sched.h source code [Linux/include/linux/sched.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef _LINUX_SCHED_H
3	#define _LINUX_SCHED_H
4
5	/*
6	* Define 'struct task_struct' and provide the main scheduler
7	* APIs (schedule(), wakeup variants, etc.)
8	*/
9
10	#include <uapi/linux/sched.h>
11
12	#include <asm/current.h>
13	#include <asm/processor.h>
14	#include <linux/thread_info.h>
15	#include <linux/preempt.h>
16	#include <linux/cpumask_types.h>
17
18	#include <linux/cache.h>
19	#include <linux/irqflags_types.h>
20	#include <linux/smp_types.h>
21	#include <linux/pid_types.h>
22	#include <linux/sem_types.h>
23	#include <linux/shm.h>
24	#include <linux/kmsan_types.h>
25	#include <linux/mutex_types.h>
26	#include <linux/plist_types.h>
27	#include <linux/hrtimer_types.h>
28	#include <linux/timer_types.h>
29	#include <linux/seccomp_types.h>
30	#include <linux/nodemask_types.h>
31	#include <linux/refcount_types.h>
32	#include <linux/resource.h>
33	#include <linux/latencytop.h>
34	#include <linux/sched/prio.h>
35	#include <linux/sched/types.h>
36	#include <linux/signal_types.h>
37	#include <linux/spinlock.h>
38	#include <linux/syscall_user_dispatch_types.h>
39	#include <linux/mm_types_task.h>
40	#include <linux/netdevice_xmit.h>
41	#include <linux/task_io_accounting.h>
42	#include <linux/posix-timers_types.h>
43	#include <linux/restart_block.h>
44	#include <uapi/linux/rseq.h>
45	#include <linux/seqlock_types.h>
46	#include <linux/kcsan.h>
47	#include <linux/rv.h>
48	#include <linux/uidgid_types.h>
49	#include <linux/tracepoint-defs.h>
50	#include <linux/unwind_deferred_types.h>
51	#include <asm/kmap_size.h>
52	#ifndef COMPILE_OFFSETS
53	#include <generated/rq-offsets.h>
54	#endif
55
56	/ task_struct member predeclarations (sorted alphabetically): /
57	struct audit_context;
58	struct bio_list;
59	struct blk_plug;
60	struct bpf_local_storage;
61	struct bpf_run_ctx;
62	struct bpf_net_context;
63	struct capture_control;
64	struct cfs_rq;
65	struct fs_struct;
66	struct futex_pi_state;
67	struct io_context;
68	struct io_uring_task;
69	struct mempolicy;
70	struct nameidata;
71	struct nsproxy;
72	struct perf_event_context;
73	struct perf_ctx_data;
74	struct pid_namespace;
75	struct pipe_inode_info;
76	struct rcu_node;
77	struct reclaim_state;
78	struct robust_list_head;
79	struct root_domain;
80	struct rq;
81	struct sched_attr;
82	struct sched_dl_entity;
83	struct seq_file;
84	struct sighand_struct;
85	struct signal_struct;
86	struct task_delay_info;
87	struct task_group;
88	struct task_struct;
89	struct user_event_mm;
90
91	#include <linux/sched/ext.h>
92
93	/*
94	* Task state bitmask. NOTE! These bits are also
95	* encoded in fs/proc/array.c: get_task_state().
96	*
97	* We have two separate sets of flags: task->__state
98	* is about runnability, while task->exit_state are
99	* about the task exiting. Confusing, but this way
100	* modifying one set can't modify the other one by
101	* mistake.
102	*/
103
104	/ Used in tsk->__state: /
105	#define TASK_RUNNING 0x00000000
106	#define TASK_INTERRUPTIBLE 0x00000001
107	#define TASK_UNINTERRUPTIBLE 0x00000002
108	#define __TASK_STOPPED 0x00000004
109	#define __TASK_TRACED 0x00000008
110	/ Used in tsk->exit_state: /
111	#define EXIT_DEAD 0x00000010
112	#define EXIT_ZOMBIE 0x00000020
113	#define EXIT_TRACE (EXIT_ZOMBIE \| EXIT_DEAD)
114	/ Used in tsk->__state again: /
115	#define TASK_PARKED 0x00000040
116	#define TASK_DEAD 0x00000080
117	#define TASK_WAKEKILL 0x00000100
118	#define TASK_WAKING 0x00000200
119	#define TASK_NOLOAD 0x00000400
120	#define TASK_NEW 0x00000800
121	#define TASK_RTLOCK_WAIT 0x00001000
122	#define TASK_FREEZABLE 0x00002000
123	#define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP))
124	#define TASK_FROZEN 0x00008000
125	#define TASK_STATE_MAX 0x00010000
126
127	#define TASK_ANY (TASK_STATE_MAX-1)
128
129	/*
130	* DO NOT ADD ANY NEW USERS !
131	*/
132	#define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE \| __TASK_FREEZABLE_UNSAFE)
133
134	/ Convenience macros for the sake of set_current_state: /
135	#define TASK_KILLABLE (TASK_WAKEKILL \| TASK_UNINTERRUPTIBLE)
136	#define TASK_STOPPED (TASK_WAKEKILL \| __TASK_STOPPED)
137	#define TASK_TRACED __TASK_TRACED
138
139	#define TASK_IDLE (TASK_UNINTERRUPTIBLE \| TASK_NOLOAD)
140
141	/ Convenience macros for the sake of wake_up(): /
142	#define TASK_NORMAL (TASK_INTERRUPTIBLE \| TASK_UNINTERRUPTIBLE)
143
144	/ get_task_state(): /
145	#define TASK_REPORT (TASK_RUNNING \| TASK_INTERRUPTIBLE \| \
146	TASK_UNINTERRUPTIBLE \| __TASK_STOPPED \| \
147	__TASK_TRACED \| EXIT_DEAD \| EXIT_ZOMBIE \| \
148	TASK_PARKED)
149
150	#define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING)
151
152	#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
153	#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
154	#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED \| JOBCTL_TRACED)) != 0)
155
156	/*
157	* Special states are those that do not use the normal wait-loop pattern. See
158	* the comment with set_special_state().
159	*/
160	#define is_special_task_state(state) \
161	((state) & (__TASK_STOPPED \| __TASK_TRACED \| TASK_PARKED \| \
162	TASK_DEAD \| TASK_FROZEN))
163
164	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
165	# define debug_normal_state_change(state_value) \
166	do { \
167	WARN_ON_ONCE(is_special_task_state(state_value)); \
168	current->task_state_change = _THIS_IP_; \
169	} while (0)
170
171	# define debug_special_state_change(state_value) \
172	do { \
173	WARN_ON_ONCE(!is_special_task_state(state_value)); \
174	current->task_state_change = _THIS_IP_; \
175	} while (0)
176
177	# define debug_rtlock_wait_set_state() \
178	do { \
179	current->saved_state_change = current->task_state_change;\
180	current->task_state_change = _THIS_IP_; \
181	} while (0)
182
183	# define debug_rtlock_wait_restore_state() \
184	do { \
185	current->task_state_change = current->saved_state_change;\
186	} while (0)
187
188	#else
189	# define debug_normal_state_change(cond) do { } while (0)
190	# define debug_special_state_change(cond) do { } while (0)
191	# define debug_rtlock_wait_set_state() do { } while (0)
192	# define debug_rtlock_wait_restore_state() do { } while (0)
193	#endif
194
195	#define trace_set_current_state(state_value) \
196	do { \
197	if (tracepoint_enabled(sched_set_state_tp)) \
198	__trace_set_current_state(state_value); \
199	} while (0)
200
201	/*
202	* set_current_state() includes a barrier so that the write of current->__state
203	* is correctly serialised wrt the caller's subsequent test of whether to
204	* actually sleep:
205	*
206	* for (;;) {
207	* set_current_state(TASK_UNINTERRUPTIBLE);
208	* if (CONDITION)
209	* break;
210	*
211	* schedule();
212	* }
213	* __set_current_state(TASK_RUNNING);
214	*
215	* If the caller does not need such serialisation (because, for instance, the
216	* CONDITION test and condition change and wakeup are under the same lock) then
217	* use __set_current_state().
218	*
219	* The above is typically ordered against the wakeup, which does:
220	*
221	* CONDITION = 1;
222	* wake_up_state(p, TASK_UNINTERRUPTIBLE);
223	*
224	* where wake_up_state()/try_to_wake_up() executes a full memory barrier before
225	* accessing p->__state.
226	*
227	* Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
228	* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
229	* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
230	*
231	* However, with slightly different timing the wakeup TASK_RUNNING store can
232	* also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not
233	* a problem either because that will result in one extra go around the loop
234	* and our @cond test will save the day.
235	*
236	* Also see the comments of try_to_wake_up().
237	*/
238	#define __set_current_state(state_value) \
239	do { \
240	debug_normal_state_change((state_value)); \
241	trace_set_current_state(state_value); \
242	WRITE_ONCE(current->__state, (state_value)); \
243	} while (0)
244
245	#define set_current_state(state_value) \
246	do { \
247	debug_normal_state_change((state_value)); \
248	trace_set_current_state(state_value); \
249	smp_store_mb(current->__state, (state_value)); \
250	} while (0)
251
252	/*
253	* set_special_state() should be used for those states when the blocking task
254	* can not use the regular condition based wait-loop. In that case we must
255	* serialize against wakeups such that any possible in-flight TASK_RUNNING
256	* stores will not collide with our state change.
257	*/
258	#define set_special_state(state_value) \
259	do { \
260	unsigned long flags; /* may shadow */ \
261	\
262	raw_spin_lock_irqsave(&current->pi_lock, flags); \
263	debug_special_state_change((state_value)); \
264	trace_set_current_state(state_value); \
265	WRITE_ONCE(current->__state, (state_value)); \
266	raw_spin_unlock_irqrestore(&current->pi_lock, flags); \
267	} while (0)
268
269	/*
270	* PREEMPT_RT specific variants for "sleeping" spin/rwlocks
271	*
272	* RT's spin/rwlock substitutions are state preserving. The state of the
273	* task when blocking on the lock is saved in task_struct::saved_state and
274	* restored after the lock has been acquired. These operations are
275	* serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT
276	* lock related wakeups while the task is blocked on the lock are
277	* redirected to operate on task_struct::saved_state to ensure that these
278	* are not dropped. On restore task_struct::saved_state is set to
279	* TASK_RUNNING so any wakeup attempt redirected to saved_state will fail.
280	*
281	* The lock operation looks like this:
282	*
283	* current_save_and_set_rtlock_wait_state();
284	* for (;;) {
285	* if (try_lock())
286	* break;
287	* raw_spin_unlock_irq(&lock->wait_lock);
288	* schedule_rtlock();
289	* raw_spin_lock_irq(&lock->wait_lock);
290	* set_current_state(TASK_RTLOCK_WAIT);
291	* }
292	* current_restore_rtlock_saved_state();
293	*/
294	#define current_save_and_set_rtlock_wait_state() \
295	do { \
296	lockdep_assert_irqs_disabled(); \
297	raw_spin_lock(&current->pi_lock); \
298	current->saved_state = current->__state; \
299	debug_rtlock_wait_set_state(); \
300	trace_set_current_state(TASK_RTLOCK_WAIT); \
301	WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \
302	raw_spin_unlock(&current->pi_lock); \
303	} while (0);
304
305	#define current_restore_rtlock_saved_state() \
306	do { \
307	lockdep_assert_irqs_disabled(); \
308	raw_spin_lock(&current->pi_lock); \
309	debug_rtlock_wait_restore_state(); \
310	trace_set_current_state(current->saved_state); \
311	WRITE_ONCE(current->__state, current->saved_state); \
312	current->saved_state = TASK_RUNNING; \
313	raw_spin_unlock(&current->pi_lock); \
314	} while (0);
315
316	#define get_current_state() READ_ONCE(current->__state)
317
318	/*
319	* Define the task command name length as enum, then it can be visible to
320	* BPF programs.
321	*/
322	enum {
323	TASK_COMM_LEN = `16`,
324	};
325
326	extern void sched_tick(void);
327
328	#define MAX_SCHEDULE_TIMEOUT LONG_MAX
329
330	extern long schedule_timeout(long timeout);
331	extern long schedule_timeout_interruptible(long timeout);
332	extern long schedule_timeout_killable(long timeout);
333	extern long schedule_timeout_uninterruptible(long timeout);
334	extern long schedule_timeout_idle(long timeout);
335	asmlinkage void schedule(void);
336	extern void schedule_preempt_disabled(void);
337	asmlinkage void preempt_schedule_irq(void);
338	#ifdef CONFIG_PREEMPT_RT
339	extern void schedule_rtlock(void);
340	#endif
341
342	extern int __must_check io_schedule_prepare(void);
343	extern void io_schedule_finish(int token);
344	extern long io_schedule_timeout(long timeout);
345	extern void io_schedule(void);
346
347	/ wrapper functions to trace from this header file /
348	DECLARE_TRACEPOINT(sched_set_state_tp);
349	extern void __trace_set_current_state(int state_value);
350	DECLARE_TRACEPOINT(sched_set_need_resched_tp);
351	extern void __trace_set_need_resched(struct task_struct curr, int* tif);
352
353	/**
354	* struct prev_cputime - snapshot of system and user cputime
355	* @utime: time spent in user mode
356	* @stime: time spent in system mode
357	* @lock: protects the above two fields
358	*
359	* Stores previous user/system time values such that we can guarantee
360	* monotonicity.
361	*/
362	struct prev_cputime {
363	#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
364	u64 utime;
365	u64 stime;
366	raw_spinlock_t lock;
367	#endif
368	};
369
370	enum vtime_state {
371	/ Task is sleeping or running in a CPU with VTIME inactive: /
372	VTIME_INACTIVE = `0`,
373	/ Task is idle /
374	VTIME_IDLE,
375	/ Task runs in kernelspace in a CPU with VTIME active: /
376	VTIME_SYS,
377	/ Task runs in userspace in a CPU with VTIME active: /
378	VTIME_USER,
379	/ Task runs as guests in a CPU with VTIME active: /
380	VTIME_GUEST,
381	};
382
383	struct vtime {
384	seqcount_t seqcount;
385	unsigned long long starttime;
386	enum vtime_state state;
387	unsigned int cpu;
388	u64 utime;
389	u64 stime;
390	u64 gtime;
391	};
392
393	/*
394	* Utilization clamp constraints.
395	* @UCLAMP_MIN: Minimum utilization
396	* @UCLAMP_MAX: Maximum utilization
397	* @UCLAMP_CNT: Utilization clamp constraints count
398	*/
399	enum uclamp_id {
400	UCLAMP_MIN = `0`,
401	UCLAMP_MAX,
402	UCLAMP_CNT
403	};
404
405	extern struct root_domain def_root_domain;
406	extern struct mutex sched_domains_mutex;
407	extern void sched_domains_mutex_lock(void);
408	extern void sched_domains_mutex_unlock(void);
409
410	struct sched_param {
411	int sched_priority;
412	};
413
414	struct sched_info {
415	#ifdef CONFIG_SCHED_INFO
416	/ Cumulative counters: /
417
418	/ # of times we have run on this CPU: /
419	unsigned long pcount;
420
421	/ Time spent waiting on a runqueue: /
422	unsigned long long run_delay;
423
424	/ Max time spent waiting on a runqueue: /
425	unsigned long long max_run_delay;
426
427	/ Min time spent waiting on a runqueue: /
428	unsigned long long min_run_delay;
429
430	/ Timestamps: /
431
432	/ When did we last run on a CPU? /
433	unsigned long long last_arrival;
434
435	/ When were we last queued to run? /
436	unsigned long long last_queued;
437
438	#endif /* CONFIG_SCHED_INFO */
439	};
440
441	/*
442	* Integer metrics need fixed point arithmetic, e.g., sched/fair
443	* has a few: load, load_avg, util_avg, freq, and capacity.
444	*
445	* We define a basic fixed point arithmetic range, and then formalize
446	* all these metrics based on that basic range.
447	*/
448	# define SCHED_FIXEDPOINT_SHIFT 10
449	# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
450
451	/ Increase resolution of cpu_capacity calculations /
452	# define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT
453	# define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT)
454
455	struct load_weight {
456	unsigned long weight;
457	u32 inv_weight;
458	};
459
460	/*
461	* The load/runnable/util_avg accumulates an infinite geometric series
462	* (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c).
463	*
464	* [load_avg definition]
465	*
466	* load_avg = runnable% * scale_load_down(load)
467	*
468	* [runnable_avg definition]
469	*
470	* runnable_avg = runnable% * SCHED_CAPACITY_SCALE
471	*
472	* [util_avg definition]
473	*
474	* util_avg = running% * SCHED_CAPACITY_SCALE
475	*
476	* where runnable% is the time ratio that a sched_entity is runnable and
477	* running% the time ratio that a sched_entity is running.
478	*
479	* For cfs_rq, they are the aggregated values of all runnable and blocked
480	* sched_entities.
481	*
482	* The load/runnable/util_avg doesn't directly factor frequency scaling and CPU
483	* capacity scaling. The scaling is done through the rq_clock_pelt that is used
484	* for computing those signals (see update_rq_clock_pelt())
485	*
486	* N.B., the above ratios (runnable% and running%) themselves are in the
487	* range of [0, 1]. To do fixed point arithmetics, we therefore scale them
488	* to as large a range as necessary. This is for example reflected by
489	* util_avg's SCHED_CAPACITY_SCALE.
490	*
491	* [Overflow issue]
492	*
493	* The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
494	* with the highest load (=88761), always runnable on a single cfs_rq,
495	* and should not overflow as the number already hits PID_MAX_LIMIT.
496	*
497	* For all other cases (including 32-bit kernels), struct load_weight's
498	* weight will overflow first before we do, because:
499	*
500	* Max(load_avg) <= Max(load.weight)
501	*
502	* Then it is the load_weight's responsibility to consider overflow
503	* issues.
504	*/
505	struct sched_avg {
506	u64 last_update_time;
507	u64 load_sum;
508	u64 runnable_sum;
509	u32 util_sum;
510	u32 period_contrib;
511	unsigned long load_avg;
512	unsigned long runnable_avg;
513	unsigned long util_avg;
514	unsigned int util_est;
515	} ____cacheline_aligned;
516
517	/*
518	* The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
519	* updates. When a task is dequeued, its util_est should not be updated if its
520	* util_avg has not been updated in the meantime.
521	* This information is mapped into the MSB bit of util_est at dequeue time.
522	* Since max value of util_est for a task is 1024 (PELT util_avg for a task)
523	* it is safe to use MSB.
524	*/
525	#define UTIL_EST_WEIGHT_SHIFT 2
526	#define UTIL_AVG_UNCHANGED 0x80000000
527
528	struct sched_statistics {
529	#ifdef CONFIG_SCHEDSTATS
530	u64 wait_start;
531	u64 wait_max;
532	u64 wait_count;
533	u64 wait_sum;
534	u64 iowait_count;
535	u64 iowait_sum;
536
537	u64 sleep_start;
538	u64 sleep_max;
539	s64 sum_sleep_runtime;
540
541	u64 block_start;
542	u64 block_max;
543	s64 sum_block_runtime;
544
545	s64 exec_max;
546	u64 slice_max;
547
548	u64 nr_migrations_cold;
549	u64 nr_failed_migrations_affine;
550	u64 nr_failed_migrations_running;
551	u64 nr_failed_migrations_hot;
552	u64 nr_forced_migrations;
553
554	u64 nr_wakeups;
555	u64 nr_wakeups_sync;
556	u64 nr_wakeups_migrate;
557	u64 nr_wakeups_local;
558	u64 nr_wakeups_remote;
559	u64 nr_wakeups_affine;
560	u64 nr_wakeups_affine_attempts;
561	u64 nr_wakeups_passive;
562	u64 nr_wakeups_idle;
563
564	#ifdef CONFIG_SCHED_CORE
565	u64 core_forceidle_sum;
566	#endif
567	#endif /* CONFIG_SCHEDSTATS */
568	} ____cacheline_aligned;
569
570	struct sched_entity {
571	/ For load-balancing: /
572	struct load_weight load;
573	struct rb_node run_node;
574	u64 deadline;
575	u64 min_vruntime;
576	u64 min_slice;
577
578	struct list_head group_node;
579	unsigned char on_rq;
580	unsigned char sched_delayed;
581	unsigned char rel_deadline;
582	unsigned char custom_slice;
583	/ hole /
584
585	u64 exec_start;
586	u64 sum_exec_runtime;
587	u64 prev_sum_exec_runtime;
588	u64 vruntime;
589	union {
590	/*
591	* When !@on_rq this field is vlag.
592	* When cfs_rq->curr == se (which implies @on_rq)
593	* this field is vprot. See protect_slice().
594	*/
595	s64 vlag;
596	u64 vprot;
597	};
598	u64 slice;
599
600	u64 nr_migrations;
601
602	#ifdef CONFIG_FAIR_GROUP_SCHED
603	int depth;
604	struct sched_entity *parent;
605	/ rq on which this entity is (to be) queued: /
606	struct cfs_rq *cfs_rq;
607	/ rq "owned" by this entity/group: /
608	struct cfs_rq *my_q;
609	/ cached value of my_q->h_nr_running /
610	unsigned long runnable_weight;
611	#endif
612
613	/*
614	* Per entity load average tracking.
615	*
616	* Put into separate cache line so it does not
617	* collide with read-mostly values above.
618	*/
619	struct sched_avg avg;
620	};
621
622	struct sched_rt_entity {
623	struct list_head run_list;
624	unsigned long timeout;
625	unsigned long watchdog_stamp;
626	unsigned int time_slice;
627	unsigned short on_rq;
628	unsigned short on_list;
629
630	struct sched_rt_entity *back;
631	#ifdef CONFIG_RT_GROUP_SCHED
632	struct sched_rt_entity *parent;
633	/ rq on which this entity is (to be) queued: /
634	struct rt_rq *rt_rq;
635	/ rq "owned" by this entity/group: /
636	struct rt_rq *my_q;
637	#endif
638	} __randomize_layout;
639
640	typedef bool (dl_server_has_tasks_f)(struct* sched_dl_entity *);
641	typedef struct task_struct (dl_server_pick_f)(struct sched_dl_entity *);
642
643	struct sched_dl_entity {
644	struct rb_node rb_node;
645
646	/*
647	* Original scheduling parameters. Copied here from sched_attr
648	* during sched_setattr(), they will remain the same until
649	* the next sched_setattr().
650	*/
651	u64 dl_runtime; / Maximum runtime for each instance /
652	u64 dl_deadline; / Relative deadline of each instance /
653	u64 dl_period; / Separation of two instances (period) /
654	u64 dl_bw; / dl_runtime / dl_period /
655	u64 dl_density; / dl_runtime / dl_deadline /
656
657	/*
658	* Actual scheduling parameters. Initialized with the values above,
659	* they are continuously updated during task execution. Note that
660	* the remaining runtime could be < 0 in case we are in overrun.
661	*/
662	s64 runtime; / Remaining runtime for this instance /
663	u64 deadline; / Absolute deadline for this instance /
664	unsigned int flags; / Specifying the scheduler behaviour /
665
666	/*
667	* Some bool flags:
668	*
669	* @dl_throttled tells if we exhausted the runtime. If so, the
670	* task has to wait for a replenishment to be performed at the
671	* next firing of dl_timer.
672	*
673	* @dl_yielded tells if task gave up the CPU before consuming
674	* all its available runtime during the last job.
675	*
676	* @dl_non_contending tells if the task is inactive while still
677	* contributing to the active utilization. In other words, it
678	* indicates if the inactive timer has been armed and its handler
679	* has not been executed yet. This flag is useful to avoid race
680	* conditions between the inactive timer handler and the wakeup
681	* code.
682	*
683	* @dl_overrun tells if the task asked to be informed about runtime
684	* overruns.
685	*
686	* @dl_server tells if this is a server entity.
687	*
688	* @dl_defer tells if this is a deferred or regular server. For
689	* now only defer server exists.
690	*
691	* @dl_defer_armed tells if the deferrable server is waiting
692	* for the replenishment timer to activate it.
693	*
694	* @dl_server_active tells if the dlserver is active(started).
695	* dlserver is started on first cfs enqueue on an idle runqueue
696	* and is stopped when a dequeue results in 0 cfs tasks on the
697	* runqueue. In other words, dlserver is active only when cpu's
698	* runqueue has atleast one cfs task.
699	*
700	* @dl_defer_running tells if the deferrable server is actually
701	* running, skipping the defer phase.
702	*/
703	unsigned int dl_throttled : `1`;
704	unsigned int dl_yielded : `1`;
705	unsigned int dl_non_contending : `1`;
706	unsigned int dl_overrun : `1`;
707	unsigned int dl_server : `1`;
708	unsigned int dl_server_active : `1`;
709	unsigned int dl_defer : `1`;
710	unsigned int dl_defer_armed : `1`;
711	unsigned int dl_defer_running : `1`;
712
713	/*
714	* Bandwidth enforcement timer. Each -deadline task has its
715	* own bandwidth to be enforced, thus we need one timer per task.
716	*/
717	struct hrtimer dl_timer;
718
719	/*
720	* Inactive timer, responsible for decreasing the active utilization
721	* at the "0-lag time". When a -deadline task blocks, it contributes
722	* to GRUB's active utilization until the "0-lag time", hence a
723	* timer is needed to decrease the active utilization at the correct
724	* time.
725	*/
726	struct hrtimer inactive_timer;
727
728	/*
729	* Bits for DL-server functionality. Also see the comment near
730	* dl_server_update().
731	*
732	* @rq the runqueue this server is for
733	*
734	* @server_has_tasks() returns true if @server_pick return a
735	* runnable task.
736	*/
737	struct rq *rq;
738	dl_server_pick_f server_pick_task;
739
740	#ifdef CONFIG_RT_MUTEXES
741	/*
742	* Priority Inheritance. When a DEADLINE scheduling entity is boosted
743	* pi_se points to the donor, otherwise points to the dl_se it belongs
744	* to (the original one/itself).
745	*/
746	struct sched_dl_entity *pi_se;
747	#endif
748	};
749
750	#ifdef CONFIG_UCLAMP_TASK
751	/ Number of utilization clamp buckets (shorter alias) /
752	#define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT
753
754	/*
755	* Utilization clamp for a scheduling entity
756	* @value: clamp value "assigned" to a se
757	* @bucket_id: bucket index corresponding to the "assigned" value
758	* @active: the se is currently refcounted in a rq's bucket
759	* @user_defined: the requested clamp value comes from user-space
760	*
761	* The bucket_id is the index of the clamp bucket matching the clamp value
762	* which is pre-computed and stored to avoid expensive integer divisions from
763	* the fast path.
764	*
765	* The active bit is set whenever a task has got an "effective" value assigned,
766	* which can be different from the clamp value "requested" from user-space.
767	* This allows to know a task is refcounted in the rq's bucket corresponding
768	* to the "effective" bucket_id.
769	*
770	* The user_defined bit is set whenever a task has got a task-specific clamp
771	* value requested from userspace, i.e. the system defaults apply to this task
772	* just as a restriction. This allows to relax default clamps when a less
773	* restrictive task-specific value has been requested, thus allowing to
774	* implement a "nice" semantic. For example, a task running with a 20%
775	* default boost can still drop its own boosting to 0%.
776	*/
777	struct uclamp_se {
778	unsigned int value : bits_per(SCHED_CAPACITY_SCALE);
779	unsigned int bucket_id : bits_per(UCLAMP_BUCKETS);
780	unsigned int active : `1`;
781	unsigned int user_defined : `1`;
782	};
783	#endif /* CONFIG_UCLAMP_TASK */
784
785	union rcu_special {
786	struct {
787	u8 blocked;
788	u8 need_qs;
789	u8 exp_hint; / Hint for performance. /
790	u8 need_mb; / Readers need smp_mb(). /
791	} b; / Bits. /
792	u32 s; / Set of bits. /
793	};
794
795	enum perf_event_task_context {
796	perf_invalid_context = -`1`,
797	perf_hw_context = `0`,
798	perf_sw_context,
799	perf_nr_task_contexts,
800	};
801
802	/*
803	* Number of contexts where an event can trigger:
804	* task, softirq, hardirq, nmi.
805	*/
806	#define PERF_NR_CONTEXTS 4
807
808	struct wake_q_node {
809	struct wake_q_node *next;
810	};
811
812	struct kmap_ctrl {
813	#ifdef CONFIG_KMAP_LOCAL
814	int idx;
815	pte_t pteval[KM_MAX_IDX];
816	#endif
817	};
818
819	struct task_struct {
820	#ifdef CONFIG_THREAD_INFO_IN_TASK
821	/*
822	* For reasons of header soup (see current_thread_info()), this
823	* must be the first element of task_struct.
824	*/
825	struct thread_info thread_info;
826	#endif
827	unsigned int __state;
828
829	/ saved state for "spinlock sleepers" /
830	unsigned int saved_state;
831
832	/*
833	* This begins the randomizable portion of task_struct. Only
834	* scheduling-critical items should be added above here.
835	*/
836	randomized_struct_fields_start
837
838	void *stack;
839	refcount_t usage;
840	/ Per task flags (PF_), defined further below: /*
841	unsigned int flags;
842	unsigned int ptrace;
843
844	#ifdef CONFIG_MEM_ALLOC_PROFILING
845	struct alloc_tag *alloc_tag;
846	#endif
847
848	int on_cpu;
849	struct __call_single_node wake_entry;
850	unsigned int wakee_flips;
851	unsigned long wakee_flip_decay_ts;
852	struct task_struct *last_wakee;
853
854	/*
855	* recent_used_cpu is initially set as the last CPU used by a task
856	* that wakes affine another task. Waker/wakee relationships can
857	* push tasks around a CPU where each wakeup moves to the next one.
858	* Tracking a recently used CPU allows a quick search for a recently
859	* used CPU that may be idle.
860	*/
861	int recent_used_cpu;
862	int wake_cpu;
863	int on_rq;
864
865	int prio;
866	int static_prio;
867	int normal_prio;
868	unsigned int rt_priority;
869
870	struct sched_entity se;
871	struct sched_rt_entity rt;
872	struct sched_dl_entity dl;
873	struct sched_dl_entity *dl_server;
874	#ifdef CONFIG_SCHED_CLASS_EXT
875	struct sched_ext_entity scx;
876	#endif
877	const struct sched_class *sched_class;
878
879	#ifdef CONFIG_SCHED_CORE
880	struct rb_node core_node;
881	unsigned long core_cookie;
882	unsigned int core_occupation;
883	#endif
884
885	#ifdef CONFIG_CGROUP_SCHED
886	struct task_group *sched_task_group;
887	#ifdef CONFIG_CFS_BANDWIDTH
888	struct callback_head sched_throttle_work;
889	struct list_head throttle_node;
890	bool throttled;
891	#endif
892	#endif
893
894
895	#ifdef CONFIG_UCLAMP_TASK
896	/*
897	* Clamp values requested for a scheduling entity.
898	* Must be updated with task_rq_lock() held.
899	*/
900	struct uclamp_se uclamp_req[UCLAMP_CNT];
901	/*
902	* Effective clamp values used for a scheduling entity.
903	* Must be updated with task_rq_lock() held.
904	*/
905	struct uclamp_se uclamp[UCLAMP_CNT];
906	#endif
907
908	struct sched_statistics stats;
909
910	#ifdef CONFIG_PREEMPT_NOTIFIERS
911	/ List of struct preempt_notifier: /
912	struct hlist_head preempt_notifiers;
913	#endif
914
915	#ifdef CONFIG_BLK_DEV_IO_TRACE
916	unsigned int btrace_seq;
917	#endif
918
919	unsigned int policy;
920	unsigned long max_allowed_capacity;
921	int nr_cpus_allowed;
922	const cpumask_t *cpus_ptr;
923	cpumask_t *user_cpus_ptr;
924	cpumask_t cpus_mask;
925	void *migration_pending;
926	unsigned short migration_disabled;
927	unsigned short migration_flags;
928
929	#ifdef CONFIG_PREEMPT_RCU
930	int rcu_read_lock_nesting;
931	union rcu_special rcu_read_unlock_special;
932	struct list_head rcu_node_entry;
933	struct rcu_node *rcu_blocked_node;
934	#endif /* #ifdef CONFIG_PREEMPT_RCU */
935
936	#ifdef CONFIG_TASKS_RCU
937	unsigned long rcu_tasks_nvcsw;
938	u8 rcu_tasks_holdout;
939	u8 rcu_tasks_idx;
940	int rcu_tasks_idle_cpu;
941	struct list_head rcu_tasks_holdout_list;
942	int rcu_tasks_exit_cpu;
943	struct list_head rcu_tasks_exit_list;
944	#endif /* #ifdef CONFIG_TASKS_RCU */
945
946	#ifdef CONFIG_TASKS_TRACE_RCU
947	int trc_reader_nesting;
948	int trc_ipi_to_cpu;
949	union rcu_special trc_reader_special;
950	struct list_head trc_holdout_list;
951	struct list_head trc_blkd_node;
952	int trc_blkd_cpu;
953	#endif /* #ifdef CONFIG_TASKS_TRACE_RCU */
954
955	struct sched_info sched_info;
956
957	struct list_head tasks;
958	struct plist_node pushable_tasks;
959	struct rb_node pushable_dl_tasks;
960
961	struct mm_struct *mm;
962	struct mm_struct *active_mm;
963	struct address_space *faults_disabled_mapping;
964
965	int exit_state;
966	int exit_code;
967	int exit_signal;
968	/ The signal sent when the parent dies: /
969	int pdeath_signal;
970	/ JOBCTL_, siglock protected: /*
971	unsigned long jobctl;
972
973	/ Used for emulating ABI behavior of previous Linux versions: /
974	unsigned int personality;
975
976	/ Scheduler bits, serialized by scheduler locks: /
977	unsigned sched_reset_on_fork:`1`;
978	unsigned sched_contributes_to_load:`1`;
979	unsigned sched_migrated:`1`;
980	unsigned sched_task_hot:`1`;
981
982	/ Force alignment to the next boundary: /
983	unsigned :`0`;
984
985	/ Unserialized, strictly 'current' /
986
987	/*
988	* This field must not be in the scheduler word above due to wakelist
989	* queueing no longer being serialized by p->on_cpu. However:
990	*
991	* p->XXX = X; ttwu()
992	* schedule() if (p->on_rq && ..) // false
993	* smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true
994	* deactivate_task() ttwu_queue_wakelist())
995	* p->on_rq = 0; p->sched_remote_wakeup = Y;
996	*
997	* guarantees all stores of 'current' are visible before
998	* ->sched_remote_wakeup gets used, so it can be in this word.
999	*/
1000	unsigned sched_remote_wakeup:`1`;
1001	#ifdef CONFIG_RT_MUTEXES
1002	unsigned sched_rt_mutex:`1`;
1003	#endif
1004
1005	/ Bit to tell TOMOYO we're in execve(): /
1006	unsigned in_execve:`1`;
1007	unsigned in_iowait:`1`;
1008	#ifndef TIF_RESTORE_SIGMASK
1009	unsigned restore_sigmask:`1`;
1010	#endif
1011	#ifdef CONFIG_MEMCG_V1
1012	unsigned in_user_fault:`1`;
1013	#endif
1014	#ifdef CONFIG_LRU_GEN
1015	/ whether the LRU algorithm may apply to this access /
1016	unsigned in_lru_fault:`1`;
1017	#endif
1018	#ifdef CONFIG_COMPAT_BRK
1019	unsigned brk_randomized:`1`;
1020	#endif
1021	#ifdef CONFIG_CGROUPS
1022	/ disallow userland-initiated cgroup migration /
1023	unsigned no_cgroup_migration:`1`;
1024	/ task is frozen/stopped (used by the cgroup freezer) /
1025	unsigned frozen:`1`;
1026	#endif
1027	#ifdef CONFIG_BLK_CGROUP
1028	unsigned use_memdelay:`1`;
1029	#endif
1030	#ifdef CONFIG_PSI
1031	/ Stalled due to lack of memory /
1032	unsigned in_memstall:`1`;
1033	#endif
1034	#ifdef CONFIG_PAGE_OWNER
1035	/ Used by page_owner=on to detect recursion in page tracking. /
1036	unsigned in_page_owner:`1`;
1037	#endif
1038	#ifdef CONFIG_EVENTFD
1039	/ Recursion prevention for eventfd_signal() /
1040	unsigned in_eventfd:`1`;
1041	#endif
1042	#ifdef CONFIG_ARCH_HAS_CPU_PASID
1043	unsigned pasid_activated:`1`;
1044	#endif
1045	#ifdef CONFIG_X86_BUS_LOCK_DETECT
1046	unsigned reported_split_lock:`1`;
1047	#endif
1048	#ifdef CONFIG_TASK_DELAY_ACCT
1049	/ delay due to memory thrashing /
1050	unsigned in_thrashing:`1`;
1051	#endif
1052	unsigned in_nf_duplicate:`1`;
1053	#ifdef CONFIG_PREEMPT_RT
1054	struct netdev_xmit net_xmit;
1055	#endif
1056	unsigned long atomic_flags; / Flags requiring atomic access. /
1057
1058	struct restart_block restart_block;
1059
1060	pid_t pid;
1061	pid_t tgid;
1062
1063	#ifdef CONFIG_STACKPROTECTOR
1064	/ Canary value for the -fstack-protector GCC feature: /
1065	unsigned long stack_canary;
1066	#endif
1067	/*
1068	* Pointers to the (original) parent process, youngest child, younger sibling,
1069	* older sibling, respectively. (p->father can be replaced with
1070	* p->real_parent->pid)
1071	*/
1072
1073	/ Real parent process: /
1074	struct task_struct __rcu *real_parent;
1075
1076	/ Recipient of SIGCHLD, wait4() reports: /
1077	struct task_struct __rcu *parent;
1078
1079	/*
1080	* Children/sibling form the list of natural children:
1081	*/
1082	struct list_head children;
1083	struct list_head sibling;
1084	struct task_struct *group_leader;
1085
1086	/*
1087	* 'ptraced' is the list of tasks this task is using ptrace() on.
1088	*
1089	* This includes both natural children and PTRACE_ATTACH targets.
1090	* 'ptrace_entry' is this task's link on the p->parent->ptraced list.
1091	*/
1092	struct list_head ptraced;
1093	struct list_head ptrace_entry;
1094
1095	/ PID/PID hash table linkage. /
1096	struct pid *thread_pid;
1097	struct hlist_node pid_links[PIDTYPE_MAX];
1098	struct list_head thread_node;
1099
1100	struct completion *vfork_done;
1101
1102	/ CLONE_CHILD_SETTID: /
1103	int __user *set_child_tid;
1104
1105	/ CLONE_CHILD_CLEARTID: /
1106	int __user *clear_child_tid;
1107
1108	/ PF_KTHREAD \| PF_IO_WORKER /
1109	void *worker_private;
1110
1111	u64 utime;
1112	u64 stime;
1113	#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
1114	u64 utimescaled;
1115	u64 stimescaled;
1116	#endif
1117	u64 gtime;
1118	struct prev_cputime prev_cputime;
1119	#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1120	struct vtime vtime;
1121	#endif
1122
1123	#ifdef CONFIG_NO_HZ_FULL
1124	atomic_t tick_dep_mask;
1125	#endif
1126	/ Context switch counts: /
1127	unsigned long nvcsw;
1128	unsigned long nivcsw;
1129
1130	/ Monotonic time in nsecs: /
1131	u64 start_time;
1132
1133	/ Boot based time in nsecs: /
1134	u64 start_boottime;
1135
1136	/ MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: /
1137	unsigned long min_flt;
1138	unsigned long maj_flt;
1139
1140	/ Empty if CONFIG_POSIX_CPUTIMERS=n /
1141	struct posix_cputimers posix_cputimers;
1142
1143	#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1144	struct posix_cputimers_work posix_cputimers_work;
1145	#endif
1146
1147	/ Process credentials: /
1148
1149	/ Tracer's credentials at attach: /
1150	const struct cred __rcu *ptracer_cred;
1151
1152	/ Objective and real subjective task credentials (COW): /
1153	const struct cred __rcu *real_cred;
1154
1155	/ Effective (overridable) subjective task credentials (COW): /
1156	const struct cred __rcu *cred;
1157
1158	#ifdef CONFIG_KEYS
1159	/ Cached requested key. /
1160	struct key *cached_requested_key;
1161	#endif
1162
1163	/*
1164	* executable name, excluding path.
1165	*
1166	* - normally initialized begin_new_exec()
1167	* - set it with set_task_comm()
1168	* - strscpy_pad() to ensure it is always NUL-terminated and
1169	* zero-padded
1170	* - task_lock() to ensure the operation is atomic and the name is
1171	* fully updated.
1172	*/
1173	char comm[TASK_COMM_LEN];
1174
1175	struct nameidata *nameidata;
1176
1177	#ifdef CONFIG_SYSVIPC
1178	struct sysv_sem sysvsem;
1179	struct sysv_shm sysvshm;
1180	#endif
1181	#ifdef CONFIG_DETECT_HUNG_TASK
1182	unsigned long last_switch_count;
1183	unsigned long last_switch_time;
1184	#endif
1185	/ Filesystem information: /
1186	struct fs_struct *fs;
1187
1188	/ Open file information: /
1189	struct files_struct *files;
1190
1191	#ifdef CONFIG_IO_URING
1192	struct io_uring_task *io_uring;
1193	#endif
1194
1195	/ Namespaces: /
1196	struct nsproxy *nsproxy;
1197
1198	/ Signal handlers: /
1199	struct signal_struct *signal;
1200	struct sighand_struct __rcu *sighand;
1201	sigset_t blocked;
1202	sigset_t real_blocked;
1203	/ Restored if set_restore_sigmask() was used: /
1204	sigset_t saved_sigmask;
1205	struct sigpending pending;
1206	unsigned long sas_ss_sp;
1207	size_t sas_ss_size;
1208	unsigned int sas_ss_flags;
1209
1210	struct callback_head *task_works;
1211
1212	#ifdef CONFIG_AUDIT
1213	#ifdef CONFIG_AUDITSYSCALL
1214	struct audit_context *audit_context;
1215	#endif
1216	kuid_t loginuid;
1217	unsigned int sessionid;
1218	#endif
1219	struct seccomp seccomp;
1220	struct syscall_user_dispatch syscall_dispatch;
1221
1222	/ Thread group tracking: /
1223	u64 parent_exec_id;
1224	u64 self_exec_id;
1225
1226	/ Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: /
1227	spinlock_t alloc_lock;
1228
1229	/ Protection of the PI data structures: /
1230	raw_spinlock_t pi_lock;
1231
1232	struct wake_q_node wake_q;
1233
1234	#ifdef CONFIG_RT_MUTEXES
1235	/ PI waiters blocked on a rt_mutex held by this task: /
1236	struct rb_root_cached pi_waiters;
1237	/ Updated under owner's pi_lock and rq lock /
1238	struct task_struct *pi_top_task;
1239	/ Deadlock detection and priority inheritance handling: /
1240	struct rt_mutex_waiter *pi_blocked_on;
1241	#endif
1242
1243	struct mutex blocked_on; /* lock we're blocked on /
1244
1245	#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
1246	/*
1247	* Encoded lock address causing task block (lower 2 bits = type from
1248	* <linux/hung_task.h>). Accessed via hung_task_*() helpers.
1249	*/
1250	unsigned long blocker;
1251	#endif
1252
1253	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1254	int non_block_count;
1255	#endif
1256
1257	#ifdef CONFIG_TRACE_IRQFLAGS
1258	struct irqtrace_events irqtrace;
1259	unsigned int hardirq_threaded;
1260	u64 hardirq_chain_key;
1261	int softirqs_enabled;
1262	int softirq_context;
1263	int irq_config;
1264	#endif
1265	#ifdef CONFIG_PREEMPT_RT
1266	int softirq_disable_cnt;
1267	#endif
1268
1269	#ifdef CONFIG_LOCKDEP
1270	# define MAX_LOCK_DEPTH 48UL
1271	u64 curr_chain_key;
1272	int lockdep_depth;
1273	unsigned int lockdep_recursion;
1274	struct held_lock held_locks[MAX_LOCK_DEPTH];
1275	#endif
1276
1277	#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
1278	unsigned int in_ubsan;
1279	#endif
1280
1281	/ Journalling filesystem info: /
1282	void *journal_info;
1283
1284	/ Stacked block device info: /
1285	struct bio_list *bio_list;
1286
1287	/ Stack plugging: /
1288	struct blk_plug *plug;
1289
1290	/ VM state: /
1291	struct reclaim_state *reclaim_state;
1292
1293	struct io_context *io_context;
1294
1295	#ifdef CONFIG_COMPACTION
1296	struct capture_control *capture_control;
1297	#endif
1298	/ Ptrace state: /
1299	unsigned long ptrace_message;
1300	kernel_siginfo_t *last_siginfo;
1301
1302	struct task_io_accounting ioac;
1303	#ifdef CONFIG_PSI
1304	/ Pressure stall state /
1305	unsigned int psi_flags;
1306	#endif
1307	#ifdef CONFIG_TASK_XACCT
1308	/ Accumulated RSS usage: /
1309	u64 acct_rss_mem1;
1310	/ Accumulated virtual memory usage: /
1311	u64 acct_vm_mem1;
1312	/ stime + utime since last update: /
1313	u64 acct_timexpd;
1314	#endif
1315	#ifdef CONFIG_CPUSETS
1316	/ Protected by ->alloc_lock: /
1317	nodemask_t mems_allowed;
1318	/ Sequence number to catch updates: /
1319	seqcount_spinlock_t mems_allowed_seq;
1320	int cpuset_mem_spread_rotor;
1321	#endif
1322	#ifdef CONFIG_CGROUPS
1323	/ Control Group info protected by css_set_lock: /
1324	struct css_set __rcu *cgroups;
1325	/ cg_list protected by css_set_lock and tsk->alloc_lock: /
1326	struct list_head cg_list;
1327	#endif
1328	#ifdef CONFIG_X86_CPU_RESCTRL
1329	u32 closid;
1330	u32 rmid;
1331	#endif
1332	#ifdef CONFIG_FUTEX
1333	struct robust_list_head __user *robust_list;
1334	#ifdef CONFIG_COMPAT
1335	struct compat_robust_list_head __user *compat_robust_list;
1336	#endif
1337	struct list_head pi_state_list;
1338	struct futex_pi_state *pi_state_cache;
1339	struct mutex futex_exit_mutex;
1340	unsigned int futex_state;
1341	#endif
1342	#ifdef CONFIG_PERF_EVENTS
1343	u8 perf_recursion[PERF_NR_CONTEXTS];
1344	struct perf_event_context *perf_event_ctxp;
1345	struct mutex perf_event_mutex;
1346	struct list_head perf_event_list;
1347	struct perf_ctx_data __rcu *perf_ctx_data;
1348	#endif
1349	#ifdef CONFIG_DEBUG_PREEMPT
1350	unsigned long preempt_disable_ip;
1351	#endif
1352	#ifdef CONFIG_NUMA
1353	/ Protected by alloc_lock: /
1354	struct mempolicy *mempolicy;
1355	short il_prev;
1356	u8 il_weight;
1357	short pref_node_fork;
1358	#endif
1359	#ifdef CONFIG_NUMA_BALANCING
1360	int numa_scan_seq;
1361	unsigned int numa_scan_period;
1362	unsigned int numa_scan_period_max;
1363	int numa_preferred_nid;
1364	unsigned long numa_migrate_retry;
1365	/ Migration stamp: /
1366	u64 node_stamp;
1367	u64 last_task_numa_placement;
1368	u64 last_sum_exec_runtime;
1369	struct callback_head numa_work;
1370
1371	/*
1372	* This pointer is only modified for current in syscall and
1373	* pagefault context (and for tasks being destroyed), so it can be read
1374	* from any of the following contexts:
1375	* - RCU read-side critical section
1376	* - current->numa_group from everywhere
1377	* - task's runqueue locked, task not running
1378	*/
1379	struct numa_group __rcu *numa_group;
1380
1381	/*
1382	* numa_faults is an array split into four regions:
1383	* faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
1384	* in this precise order.
1385	*
1386	* faults_memory: Exponential decaying average of faults on a per-node
1387	* basis. Scheduling placement decisions are made based on these
1388	* counts. The values remain static for the duration of a PTE scan.
1389	* faults_cpu: Track the nodes the process was running on when a NUMA
1390	* hinting fault was incurred.
1391	* faults_memory_buffer and faults_cpu_buffer: Record faults per node
1392	* during the current scan window. When the scan completes, the counts
1393	* in faults_memory and faults_cpu decay and these values are copied.
1394	*/
1395	unsigned long *numa_faults;
1396	unsigned long total_numa_faults;
1397
1398	/*
1399	* numa_faults_locality tracks if faults recorded during the last
1400	* scan window were remote/local or failed to migrate. The task scan
1401	* period is adapted based on the locality of the faults with different
1402	* weights depending on whether they were shared or private faults
1403	*/
1404	unsigned long numa_faults_locality[`3`];
1405
1406	unsigned long numa_pages_migrated;
1407	#endif /* CONFIG_NUMA_BALANCING */
1408
1409	#ifdef CONFIG_RSEQ
1410	struct rseq __user *rseq;
1411	u32 rseq_len;
1412	u32 rseq_sig;
1413	/*
1414	* RmW on rseq_event_mask must be performed atomically
1415	* with respect to preemption.
1416	*/
1417	unsigned long rseq_event_mask;
1418	# ifdef CONFIG_DEBUG_RSEQ
1419	/*
1420	* This is a place holder to save a copy of the rseq fields for
1421	* validation of read-only fields. The struct rseq has a
1422	* variable-length array at the end, so it cannot be used
1423	* directly. Reserve a size large enough for the known fields.
1424	*/
1425	char rseq_fields[sizeof(struct rseq)];
1426	# endif
1427	#endif
1428
1429	#ifdef CONFIG_SCHED_MM_CID
1430	int mm_cid; / Current cid in mm /
1431	int last_mm_cid; / Most recent cid in mm /
1432	int migrate_from_cpu;
1433	int mm_cid_active; / Whether cid bitmap is active /
1434	struct callback_head cid_work;
1435	#endif
1436
1437	struct tlbflush_unmap_batch tlb_ubc;
1438
1439	/ Cache last used pipe for splice(): /
1440	struct pipe_inode_info *splice_pipe;
1441
1442	struct page_frag task_frag;
1443
1444	#ifdef CONFIG_TASK_DELAY_ACCT
1445	struct task_delay_info *delays;
1446	#endif
1447
1448	#ifdef CONFIG_FAULT_INJECTION
1449	int make_it_fail;
1450	unsigned int fail_nth;
1451	#endif
1452	/*
1453	* When (nr_dirtied >= nr_dirtied_pause), it's time to call
1454	* balance_dirty_pages() for a dirty throttling pause:
1455	*/
1456	int nr_dirtied;
1457	int nr_dirtied_pause;
1458	/ Start of a write-and-pause period: /
1459	unsigned long dirty_paused_when;
1460
1461	#ifdef CONFIG_LATENCYTOP
1462	int latency_record_count;
1463	struct latency_record latency_record[LT_SAVECOUNT];
1464	#endif
1465	/*
1466	* Time slack values; these are used to round up poll() and
1467	* select() etc timeout values. These are in nanoseconds.
1468	*/
1469	u64 timer_slack_ns;
1470	u64 default_timer_slack_ns;
1471
1472	#if defined(CONFIG_KASAN_GENERIC) \|\| defined(CONFIG_KASAN_SW_TAGS)
1473	unsigned int kasan_depth;
1474	#endif
1475
1476	#ifdef CONFIG_KCSAN
1477	struct kcsan_ctx kcsan_ctx;
1478	#ifdef CONFIG_TRACE_IRQFLAGS
1479	struct irqtrace_events kcsan_save_irqtrace;
1480	#endif
1481	#ifdef CONFIG_KCSAN_WEAK_MEMORY
1482	int kcsan_stack_depth;
1483	#endif
1484	#endif
1485
1486	#ifdef CONFIG_KMSAN
1487	struct kmsan_ctx kmsan_ctx;
1488	#endif
1489
1490	#if IS_ENABLED(CONFIG_KUNIT)
1491	struct kunit *kunit_test;
1492	#endif
1493
1494	#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1495	/ Index of current stored address in ret_stack: /
1496	int curr_ret_stack;
1497	int curr_ret_depth;
1498
1499	/ Stack of return addresses for return function tracing: /
1500	unsigned long *ret_stack;
1501
1502	/ Timestamp for last schedule: /
1503	unsigned long long ftrace_timestamp;
1504	unsigned long long ftrace_sleeptime;
1505
1506	/*
1507	* Number of functions that haven't been traced
1508	* because of depth overrun:
1509	*/
1510	atomic_t trace_overrun;
1511
1512	/ Pause tracing: /
1513	atomic_t tracing_graph_pause;
1514	#endif
1515
1516	#ifdef CONFIG_TRACING
1517	/ Bitmask and counter of trace recursion: /
1518	unsigned long trace_recursion;
1519	#endif /* CONFIG_TRACING */
1520
1521	#ifdef CONFIG_KCOV
1522	/ See kernel/kcov.c for more details. /
1523
1524	/ Coverage collection mode enabled for this task (0 if disabled): /
1525	unsigned int kcov_mode;
1526
1527	/ Size of the kcov_area: /
1528	unsigned int kcov_size;
1529
1530	/ Buffer for coverage collection: /
1531	void *kcov_area;
1532
1533	/ KCOV descriptor wired with this task or NULL: /
1534	struct kcov *kcov;
1535
1536	/ KCOV common handle for remote coverage collection: /
1537	u64 kcov_handle;
1538
1539	/ KCOV sequence number: /
1540	int kcov_sequence;
1541
1542	/ Collect coverage from softirq context: /
1543	unsigned int kcov_softirq;
1544	#endif
1545
1546	#ifdef CONFIG_MEMCG_V1
1547	struct mem_cgroup *memcg_in_oom;
1548	#endif
1549
1550	#ifdef CONFIG_MEMCG
1551	/ Number of pages to reclaim on returning to userland: /
1552	unsigned int memcg_nr_pages_over_high;
1553
1554	/ Used by memcontrol for targeted memcg charge: /
1555	struct mem_cgroup *active_memcg;
1556
1557	/ Cache for current->cgroups->memcg->objcg lookups: /
1558	struct obj_cgroup *objcg;
1559	#endif
1560
1561	#ifdef CONFIG_BLK_CGROUP
1562	struct gendisk *throttle_disk;
1563	#endif
1564
1565	#ifdef CONFIG_UPROBES
1566	struct uprobe_task *utask;
1567	#endif
1568	#if defined(CONFIG_BCACHE) \|\| defined(CONFIG_BCACHE_MODULE)
1569	unsigned int sequential_io;
1570	unsigned int sequential_io_avg;
1571	#endif
1572	struct kmap_ctrl kmap_ctrl;
1573	#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1574	unsigned long task_state_change;
1575	# ifdef CONFIG_PREEMPT_RT
1576	unsigned long saved_state_change;
1577	# endif
1578	#endif
1579	struct rcu_head rcu;
1580	refcount_t rcu_users;
1581	int pagefault_disabled;
1582	#ifdef CONFIG_MMU
1583	struct task_struct *oom_reaper_list;
1584	struct timer_list oom_reaper_timer;
1585	#endif
1586	#ifdef CONFIG_VMAP_STACK
1587	struct vm_struct *stack_vm_area;
1588	#endif
1589	#ifdef CONFIG_THREAD_INFO_IN_TASK
1590	/ A live task holds one reference: /
1591	refcount_t stack_refcount;
1592	#endif
1593	#ifdef CONFIG_LIVEPATCH
1594	int patch_state;
1595	#endif
1596	#ifdef CONFIG_SECURITY
1597	/ Used by LSM modules for access restriction: /
1598	void *security;
1599	#endif
1600	#ifdef CONFIG_BPF_SYSCALL
1601	/ Used by BPF task local storage /
1602	struct bpf_local_storage __rcu *bpf_storage;
1603	/ Used for BPF run context /
1604	struct bpf_run_ctx *bpf_ctx;
1605	#endif
1606	/ Used by BPF for per-TASK xdp storage /
1607	struct bpf_net_context *bpf_net_context;
1608
1609	#ifdef CONFIG_KSTACK_ERASE
1610	unsigned long lowest_stack;
1611	#endif
1612	#ifdef CONFIG_KSTACK_ERASE_METRICS
1613	unsigned long prev_lowest_stack;
1614	#endif
1615
1616	#ifdef CONFIG_X86_MCE
1617	void __user *mce_vaddr;
1618	__u64 mce_kflags;
1619	u64 mce_addr;
1620	__u64 mce_ripv : `1`,
1621	mce_whole_page : `1`,
1622	__mce_reserved : `62`;
1623	struct callback_head mce_kill_me;
1624	int mce_count;
1625	#endif
1626
1627	#ifdef CONFIG_KRETPROBES
1628	struct llist_head kretprobe_instances;
1629	#endif
1630	#ifdef CONFIG_RETHOOK
1631	struct llist_head rethooks;
1632	#endif
1633
1634	#ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH
1635	/*
1636	* If L1D flush is supported on mm context switch
1637	* then we use this callback head to queue kill work
1638	* to kill tasks that are not running on SMT disabled
1639	* cores
1640	*/
1641	struct callback_head l1d_flush_kill;
1642	#endif
1643
1644	#ifdef CONFIG_RV
1645	/*
1646	* Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS.
1647	* If memory becomes a concern, we can think about a dynamic method.
1648	*/
1649	union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS];
1650	#endif
1651
1652	#ifdef CONFIG_USER_EVENTS
1653	struct user_event_mm *user_event_mm;
1654	#endif
1655
1656	#ifdef CONFIG_UNWIND_USER
1657	struct unwind_task_info unwind_info;
1658	#endif
1659
1660	/ CPU-specific state of this task: /
1661	struct thread_struct thread;
1662
1663	/*
1664	* New fields for task_struct should be added above here, so that
1665	* they are included in the randomized portion of task_struct.
1666	*/
1667	randomized_struct_fields_end
1668	} __attribute__ ((aligned (`64`)));
1669
1670	#ifdef CONFIG_SCHED_PROXY_EXEC
1671	DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec);
1672	static inline bool sched_proxy_exec(void)
1673	{
1674	return static_branch_likely(&__sched_proxy_exec);
1675	}
1676	#else
1677	static inline bool sched_proxy_exec(void)
1678	{
1679	return false;
1680	}
1681	#endif
1682
1683	#define TASK_REPORT_IDLE (TASK_REPORT + 1)
1684	#define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1)
1685
1686	static inline unsigned int __task_state_index(unsigned int tsk_state,
1687	unsigned int tsk_exit_state)
1688	{
1689	unsigned int state = (tsk_state \| tsk_exit_state) & TASK_REPORT;
1690
1691	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
1692
1693	if ((tsk_state & TASK_IDLE) == TASK_IDLE)
1694	state = TASK_REPORT_IDLE;
1695
1696	/*
1697	* We're lying here, but rather than expose a completely new task state
1698	* to userspace, we can make this appear as if the task has gone through
1699	* a regular rt_mutex_lock() call.
1700	* Report frozen tasks as uninterruptible.
1701	*/
1702	if ((tsk_state & TASK_RTLOCK_WAIT) \|\| (tsk_state & TASK_FROZEN))
1703	state = TASK_UNINTERRUPTIBLE;
1704
1705	return fls(x: state);
1706	}
1707
1708	static inline unsigned int task_state_index(struct task_struct *tsk)
1709	{
1710	return __task_state_index(READ_ONCE(tsk->__state), tsk_exit_state: tsk->exit_state);
1711	}
1712
1713	static inline char task_index_to_char(unsigned int state)
1714	{
1715	static const char state_char[] = "RSDTtXZPI";
1716
1717	BUILD_BUG_ON(TASK_REPORT_MAX * `2` != `1` << (sizeof(state_char) - `1`));
1718
1719	return state_char[state];
1720	}
1721
1722	static inline char task_state_to_char(struct task_struct *tsk)
1723	{
1724	return task_index_to_char(state: task_state_index(tsk));
1725	}
1726
1727	extern struct pid *cad_pid;
1728
1729	/*
1730	* Per process flags
1731	*/
1732	#define PF_VCPU 0x00000001 /* I'm a virtual CPU */
1733	#define PF_IDLE 0x00000002 /* I am an IDLE thread */
1734	#define PF_EXITING 0x00000004 /* Getting shut down */
1735	#define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */
1736	#define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
1737	#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
1738	#define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
1739	#define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */
1740	#define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */
1741	#define PF_DUMPCORE 0x00000200 /* Dumped core */
1742	#define PF_SIGNALED 0x00000400 /* Killed by a signal */
1743	#define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */
1744	#define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */
1745	#define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */
1746	#define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */
1747	#define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */
1748	#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */
1749	#define PF_KSWAPD 0x00020000 /* I am kswapd */
1750	#define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */
1751	#define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */
1752	#define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to,
1753	* I am cleaning dirty pages from some other bdi. */
1754	#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1755	#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
1756	#define PF__HOLE__00800000 0x00800000
1757	#define PF__HOLE__01000000 0x01000000
1758	#define PF__HOLE__02000000 0x02000000
1759	#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
1760	#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1761	#define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning.
1762	* See memalloc_pin_save() */
1763	#define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */
1764	#define PF__HOLE__40000000 0x40000000
1765	#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
1766
1767	/*
1768	* Only the _current_ task can read/write to tsk->flags, but other
1769	* tasks can access tsk->flags in readonly mode for example
1770	* with tsk_used_math (like during threaded core dumping).
1771	* There is however an exception to this rule during ptrace
1772	* or during fork: the ptracer task is allowed to write to the
1773	* child->flags of its traced child (same goes for fork, the parent
1774	* can write to the child->flags), because we're guaranteed the
1775	* child is not running and in turn not changing child->flags
1776	* at the same time the parent does it.
1777	*/
1778	#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
1779	#define set_stopped_child_used_math(child) do { (child)->flags \|= PF_USED_MATH; } while (0)
1780	#define clear_used_math() clear_stopped_child_used_math(current)
1781	#define set_used_math() set_stopped_child_used_math(current)
1782
1783	#define conditional_stopped_child_used_math(condition, child) \
1784	do { (child)->flags &= ~PF_USED_MATH, (child)->flags \|= (condition) ? PF_USED_MATH : 0; } while (0)
1785
1786	#define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current)
1787
1788	#define copy_to_stopped_child_used_math(child) \
1789	do { (child)->flags &= ~PF_USED_MATH, (child)->flags \|= current->flags & PF_USED_MATH; } while (0)
1790
1791	/ NOTE: this will return 0 or PF_USED_MATH, it will never return 1 /
1792	#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1793	#define used_math() tsk_used_math(current)
1794
1795	static __always_inline bool is_percpu_thread(void)
1796	{
1797	return (current->flags & PF_NO_SETAFFINITY) &&
1798	(current->nr_cpus_allowed == `1`);
1799	}
1800
1801	/ Per-process atomic flags. /
1802	#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1803	#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1804	#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1805	#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */
1806	#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/
1807	#define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */
1808	#define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */
1809	#define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */
1810
1811	#define TASK_PFA_TEST(name, func) \
1812	static inline bool task_##func(struct task_struct *p) \
1813	{ return test_bit(PFA_##name, &p->atomic_flags); }
1814
1815	#define TASK_PFA_SET(name, func) \
1816	static inline void task_set_##func(struct task_struct *p) \
1817	{ set_bit(PFA_##name, &p->atomic_flags); }
1818
1819	#define TASK_PFA_CLEAR(name, func) \
1820	static inline void task_clear_##func(struct task_struct *p) \
1821	{ clear_bit(PFA_##name, &p->atomic_flags); }
1822
1823	TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
1824	TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1825
1826	TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1827	TASK_PFA_SET(SPREAD_PAGE, spread_page)
1828	TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1829
1830	TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1831	TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1832	TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1833
1834	TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
1835	TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
1836	TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
1837
1838	TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1839	TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1840	TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
1841
1842	TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1843	TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
1844
1845	TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
1846	TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
1847	TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
1848
1849	TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1850	TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
1851
1852	static inline void
1853	current_restore_flags(unsigned long orig_flags, unsigned long flags)
1854	{
1855	current->flags &= ~flags;
1856	current->flags \|= orig_flags & flags;
1857	}
1858
1859	extern int cpuset_cpumask_can_shrink(const struct cpumask cur, const* struct cpumask *trial);
1860	extern int task_can_attach(struct task_struct *p);
1861	extern int dl_bw_alloc(int cpu, u64 dl_bw);
1862	extern void dl_bw_free(int cpu, u64 dl_bw);
1863
1864	/ do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead /
1865	extern void do_set_cpus_allowed(struct task_struct p, const* struct cpumask *new_mask);
1866
1867	/**
1868	* set_cpus_allowed_ptr - set CPU affinity mask of a task
1869	* @p: the task
1870	* @new_mask: CPU affinity mask
1871	*
1872	* Return: zero if successful, or a negative error code
1873	*/
1874	extern int set_cpus_allowed_ptr(struct task_struct p, const* struct cpumask *new_mask);
1875	extern int dup_user_cpus_ptr(struct task_struct dst, struct* task_struct src, int* node);
1876	extern void release_user_cpus_ptr(struct task_struct *p);
1877	extern int dl_task_check_affinity(struct task_struct p, const* struct cpumask *mask);
1878	extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
1879	extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
1880
1881	extern int yield_to(struct task_struct *p, bool preempt);
1882	extern void set_user_nice(struct task_struct p, long* nice);
1883	extern int task_prio(const struct task_struct *p);
1884
1885	/**
1886	* task_nice - return the nice value of a given task.
1887	* @p: the task in question.
1888	*
1889	* Return: The nice value [ -20 ... 0 ... 19 ].
1890	*/
1891	static inline int task_nice(const struct task_struct *p)
1892	{
1893	return PRIO_TO_NICE((p)->static_prio);
1894	}
1895
1896	extern int can_nice(const struct task_struct p, const* int nice);
1897	extern int task_curr(const struct task_struct *p);
1898	extern int idle_cpu(int cpu);
1899	extern int available_idle_cpu(int cpu);
1900	extern int sched_setscheduler(struct task_struct , int, const* struct sched_param *);
1901	extern int sched_setscheduler_nocheck(struct task_struct , int, const* struct sched_param *);
1902	extern void sched_set_fifo(struct task_struct *p);
1903	extern void sched_set_fifo_low(struct task_struct *p);
1904	extern void sched_set_normal(struct task_struct p, int* nice);
1905	extern int sched_setattr(struct task_struct , const* struct sched_attr *);
1906	extern int sched_setattr_nocheck(struct task_struct , const* struct sched_attr *);
1907	extern struct task_struct idle_task(int* cpu);
1908
1909	/**
1910	* is_idle_task - is the specified task an idle task?
1911	* @p: the task in question.
1912	*
1913	* Return: 1 if @p is an idle task. 0 otherwise.
1914	*/
1915	static __always_inline bool is_idle_task(const struct task_struct *p)
1916	{
1917	return !!(p->flags & PF_IDLE);
1918	}
1919
1920	extern struct task_struct curr_task(int* cpu);
1921	extern void ia64_set_curr_task(int cpu, struct task_struct *p);
1922
1923	void yield(void);
1924
1925	union thread_union {
1926	struct task_struct task;
1927	#ifndef CONFIG_THREAD_INFO_IN_TASK
1928	struct thread_info thread_info;
1929	#endif
1930	unsigned long stack[THREAD_SIZE/sizeof(long)];
1931	};
1932
1933	#ifndef CONFIG_THREAD_INFO_IN_TASK
1934	extern struct thread_info init_thread_info;
1935	#endif
1936
1937	extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
1938
1939	#ifdef CONFIG_THREAD_INFO_IN_TASK
1940	# define task_thread_info(task) (&(task)->thread_info)
1941	#else
1942	# define task_thread_info(task) ((struct thread_info *)(task)->stack)
1943	#endif
1944
1945	/*
1946	* find a task by one of its numerical ids
1947	*
1948	* find_task_by_pid_ns():
1949	* finds a task by its pid in the specified namespace
1950	* find_task_by_vpid():
1951	* finds a task by its virtual pid
1952	*
1953	* see also find_vpid() etc in include/linux/pid.h
1954	*/
1955
1956	extern struct task_struct *find_task_by_vpid(pid_t nr);
1957	extern struct task_struct find_task_by_pid_ns(pid_t nr, struct* pid_namespace *ns);
1958
1959	/*
1960	* find a task by its virtual pid and get the task struct
1961	*/
1962	extern struct task_struct *find_get_task_by_vpid(pid_t nr);
1963
1964	extern int wake_up_state(struct task_struct tsk, unsigned* int state);
1965	extern int wake_up_process(struct task_struct *tsk);
1966	extern void wake_up_new_task(struct task_struct *tsk);
1967
1968	extern void kick_process(struct task_struct *tsk);
1969
1970	extern void __set_task_comm(struct task_struct tsk, const* char *from, bool exec);
1971	#define set_task_comm(tsk, from) ({ \
1972	BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \
1973	__set_task_comm(tsk, from, false); \
1974	})
1975
1976	/*
1977	* - Why not use task_lock()?
1978	* User space can randomly change their names anyway, so locking for readers
1979	* doesn't make sense. For writers, locking is probably necessary, as a race
1980	* condition could lead to long-term mixed results.
1981	* The strscpy_pad() in __set_task_comm() can ensure that the task comm is
1982	* always NUL-terminated and zero-padded. Therefore the race condition between
1983	* reader and writer is not an issue.
1984	*
1985	* - BUILD_BUG_ON() can help prevent the buf from being truncated.
1986	* Since the callers don't perform any return value checks, this safeguard is
1987	* necessary.
1988	*/
1989	#define get_task_comm(buf, tsk) ({ \
1990	BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \
1991	strscpy_pad(buf, (tsk)->comm); \
1992	buf; \
1993	})
1994
1995	static __always_inline void scheduler_ipi(void)
1996	{
1997	/*
1998	* Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
1999	* TIF_NEED_RESCHED remotely (for the first time) will also send
2000	* this IPI.
2001	*/
2002	preempt_fold_need_resched();
2003	}
2004
2005	extern unsigned long wait_task_inactive(struct task_struct , unsigned* int match_state);
2006
2007	/*
2008	* Set thread flags in other task's structures.
2009	* See asm/thread_info.h for TIF_xxxx flags available:
2010	*/
2011	static inline void set_tsk_thread_flag(struct task_struct tsk, int* flag)
2012	{
2013	set_ti_thread_flag(task_thread_info(tsk), flag);
2014	}
2015
2016	static inline void clear_tsk_thread_flag(struct task_struct tsk, int* flag)
2017	{
2018	clear_ti_thread_flag(task_thread_info(tsk), flag);
2019	}
2020
2021	static inline void update_tsk_thread_flag(struct task_struct tsk, int* flag,
2022	bool value)
2023	{
2024	update_ti_thread_flag(task_thread_info(tsk), flag, value);
2025	}
2026
2027	static inline int test_and_set_tsk_thread_flag(struct task_struct tsk, int* flag)
2028	{
2029	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
2030	}
2031
2032	static inline int test_and_clear_tsk_thread_flag(struct task_struct tsk, int* flag)
2033	{
2034	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
2035	}
2036
2037	static inline int test_tsk_thread_flag(struct task_struct tsk, int* flag)
2038	{
2039	return test_ti_thread_flag(task_thread_info(tsk), flag);
2040	}
2041
2042	static inline void set_tsk_need_resched(struct task_struct *tsk)
2043	{
2044	if (tracepoint_enabled(sched_set_need_resched_tp) &&
2045	!test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
2046	__trace_set_need_resched(curr: tsk, TIF_NEED_RESCHED);
2047	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
2048	}
2049
2050	static inline void clear_tsk_need_resched(struct task_struct *tsk)
2051	{
2052	atomic_long_andnot(_TIF_NEED_RESCHED \| _TIF_NEED_RESCHED_LAZY,
2053	v: (atomic_long_t *)&task_thread_info(tsk)->flags);
2054	}
2055
2056	static inline int test_tsk_need_resched(struct task_struct *tsk)
2057	{
2058	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
2059	}
2060
2061	/*
2062	* cond_resched() and cond_resched_lock(): latency reduction via
2063	* explicit rescheduling in places that are safe. The return
2064	* value indicates whether a reschedule was done in fact.
2065	* cond_resched_lock() will drop the spinlock before scheduling,
2066	*/
2067	#if !defined(CONFIG_PREEMPTION) \|\| defined(CONFIG_PREEMPT_DYNAMIC)
2068	extern int __cond_resched(void);
2069
2070	#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
2071
2072	DECLARE_STATIC_CALL(cond_resched, __cond_resched);
2073
2074	static __always_inline int _cond_resched(void)
2075	{
2076	return static_call_mod(cond_resched)();
2077	}
2078
2079	#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
2080
2081	extern int dynamic_cond_resched(void);
2082
2083	static __always_inline int _cond_resched(void)
2084	{
2085	return dynamic_cond_resched();
2086	}
2087
2088	#else /* !CONFIG_PREEMPTION */
2089
2090	static inline int _cond_resched(void)
2091	{
2092	return __cond_resched();
2093	}
2094
2095	#endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */
2096
2097	#else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */
2098
2099	static inline int _cond_resched(void)
2100	{
2101	return `0`;
2102	}
2103
2104	#endif /* !CONFIG_PREEMPTION \|\| CONFIG_PREEMPT_DYNAMIC */
2105
2106	#define cond_resched() ({ \
2107	__might_resched(__FILE__, __LINE__, 0); \
2108	_cond_resched(); \
2109	})
2110
2111	extern int __cond_resched_lock(spinlock_t *lock);
2112	extern int __cond_resched_rwlock_read(rwlock_t *lock);
2113	extern int __cond_resched_rwlock_write(rwlock_t *lock);
2114
2115	#define MIGHT_RESCHED_RCU_SHIFT 8
2116	#define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
2117
2118	#ifndef CONFIG_PREEMPT_RT
2119	/*
2120	* Non RT kernels have an elevated preempt count due to the held lock,
2121	* but are not allowed to be inside a RCU read side critical section
2122	*/
2123	# define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET
2124	#else
2125	/*
2126	* spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
2127	* cond_resched*lock() has to take that into account because it checks for
2128	* preempt_count() and rcu_preempt_depth().
2129	*/
2130	# define PREEMPT_LOCK_RESCHED_OFFSETS \
2131	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
2132	#endif
2133
2134	#define cond_resched_lock(lock) ({ \
2135	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2136	__cond_resched_lock(lock); \
2137	})
2138
2139	#define cond_resched_rwlock_read(lock) ({ \
2140	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2141	__cond_resched_rwlock_read(lock); \
2142	})
2143
2144	#define cond_resched_rwlock_write(lock) ({ \
2145	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \
2146	__cond_resched_rwlock_write(lock); \
2147	})
2148
2149	#ifndef CONFIG_PREEMPT_RT
2150	static inline struct mutex __get_task_blocked_on(struct* task_struct *p)
2151	{
2152	struct mutex *m = p->blocked_on;
2153
2154	if (m)
2155	lockdep_assert_held_once(&m->wait_lock);
2156	return m;
2157	}
2158
2159	static inline void __set_task_blocked_on(struct task_struct p, struct* mutex *m)
2160	{
2161	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
2162
2163	WARN_ON_ONCE(!m);
2164	/ The task should only be setting itself as blocked /
2165	WARN_ON_ONCE(p != current);
2166	/ Currently we serialize blocked_on under the mutex::wait_lock /
2167	lockdep_assert_held_once(&m->wait_lock);
2168	/*
2169	* Check ensure we don't overwrite existing mutex value
2170	* with a different mutex. Note, setting it to the same
2171	* lock repeatedly is ok.
2172	*/
2173	WARN_ON_ONCE(blocked_on && blocked_on != m);
2174	WRITE_ONCE(p->blocked_on, m);
2175	}
2176
2177	static inline void set_task_blocked_on(struct task_struct p, struct* mutex *m)
2178	{
2179	guard(raw_spinlock_irqsave)(l: &m->wait_lock);
2180	__set_task_blocked_on(p, m);
2181	}
2182
2183	static inline void __clear_task_blocked_on(struct task_struct p, struct* mutex *m)
2184	{
2185	if (m) {
2186	struct mutex *blocked_on = READ_ONCE(p->blocked_on);
2187
2188	/ Currently we serialize blocked_on under the mutex::wait_lock /
2189	lockdep_assert_held_once(&m->wait_lock);
2190	/*
2191	* There may be cases where we re-clear already cleared
2192	* blocked_on relationships, but make sure we are not
2193	* clearing the relationship with a different lock.
2194	*/
2195	WARN_ON_ONCE(blocked_on && blocked_on != m);
2196	}
2197	WRITE_ONCE(p->blocked_on, NULL);
2198	}
2199
2200	static inline void clear_task_blocked_on(struct task_struct p, struct* mutex *m)
2201	{
2202	guard(raw_spinlock_irqsave)(l: &m->wait_lock);
2203	__clear_task_blocked_on(p, m);
2204	}
2205	#else
2206	static inline void __clear_task_blocked_on(struct task_struct p, struct* rt_mutex *m)
2207	{
2208	}
2209
2210	static inline void clear_task_blocked_on(struct task_struct p, struct* rt_mutex *m)
2211	{
2212	}
2213	#endif /* !CONFIG_PREEMPT_RT */
2214
2215	static __always_inline bool need_resched(void)
2216	{
2217	return unlikely(tif_need_resched());
2218	}
2219
2220	/*
2221	* Wrappers for p->thread_info->cpu access. No-op on UP.
2222	*/
2223	#ifdef CONFIG_SMP
2224
2225	static inline unsigned int task_cpu(const struct task_struct *p)
2226	{
2227	return READ_ONCE(task_thread_info(p)->cpu);
2228	}
2229
2230	extern void set_task_cpu(struct task_struct p, unsigned* int cpu);
2231
2232	#else
2233
2234	static inline unsigned int task_cpu(const struct task_struct *p)
2235	{
2236	return `0`;
2237	}
2238
2239	static inline void set_task_cpu(struct task_struct p, unsigned* int cpu)
2240	{
2241	}
2242
2243	#endif /* CONFIG_SMP */
2244
2245	static inline bool task_is_runnable(struct task_struct *p)
2246	{
2247	return p->on_rq && !p->se.sched_delayed;
2248	}
2249
2250	extern bool sched_task_on_rq(struct task_struct *p);
2251	extern unsigned long get_wchan(struct task_struct *p);
2252	extern struct task_struct cpu_curr_snapshot(int* cpu);
2253
2254	/*
2255	* In order to reduce various lock holder preemption latencies provide an
2256	* interface to see if a vCPU is currently running or not.
2257	*
2258	* This allows us to terminate optimistic spin loops and block, analogous to
2259	* the native optimistic spin heuristic of testing if the lock owner task is
2260	* running or not.
2261	*/
2262	#ifndef vcpu_is_preempted
2263	static inline bool vcpu_is_preempted(int cpu)
2264	{
2265	return false;
2266	}
2267	#endif
2268
2269	extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2270	extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2271
2272	#ifndef TASK_SIZE_OF
2273	#define TASK_SIZE_OF(tsk) TASK_SIZE
2274	#endif
2275
2276	static inline bool owner_on_cpu(struct task_struct *owner)
2277	{
2278	/*
2279	* As lock holder preemption issue, we both skip spinning if
2280	* task is not on cpu or its cpu is preempted
2281	*/
2282	return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(cpu: task_cpu(p: owner));
2283	}
2284
2285	/ Returns effective CPU energy utilization, as seen by the scheduler /
2286	unsigned long sched_cpu_util(int cpu);
2287
2288	#ifdef CONFIG_SCHED_CORE
2289	extern void sched_core_free(struct task_struct *tsk);
2290	extern void sched_core_fork(struct task_struct *p);
2291	extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
2292	unsigned long uaddr);
2293	extern int sched_core_idle_cpu(int cpu);
2294	#else
2295	static inline void sched_core_free(struct task_struct *tsk) { }
2296	static inline void sched_core_fork(struct task_struct *p) { }
2297	static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
2298	#endif
2299
2300	extern void sched_set_stop_task(int cpu, struct task_struct *stop);
2301
2302	#ifdef CONFIG_MEM_ALLOC_PROFILING
2303	static __always_inline struct alloc_tag alloc_tag_save(struct* alloc_tag *tag)
2304	{
2305	swap(current->alloc_tag, tag);
2306	return tag;
2307	}
2308
2309	static __always_inline void alloc_tag_restore(struct alloc_tag tag, struct* alloc_tag *old)
2310	{
2311	#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
2312	WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n");
2313	#endif
2314	current->alloc_tag = old;
2315	}
2316	#else
2317	#define alloc_tag_save(_tag) NULL
2318	#define alloc_tag_restore(_tag, _old) do {} while (0)
2319	#endif
2320
2321	#ifndef MODULE
2322	#ifndef COMPILE_OFFSETS
2323
2324	extern void ___migrate_enable(void);
2325
2326	struct rq;
2327	DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
2328
2329	/*
2330	* The "struct rq" is not available here, so we can't access the
2331	* "runqueues" with this_cpu_ptr(), as the compilation will fail in
2332	* this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
2333	* typeof((ptr) + 0)
2334	*
2335	* So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
2336	*/
2337	#ifdef CONFIG_SMP
2338	#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
2339	#else
2340	#define this_rq_raw() PERCPU_PTR(&runqueues)
2341	#endif
2342	#define this_rq_pinned() ((unsigned int )((void *)this_rq_raw() + RQ_nr_pinned))
2343
2344	static inline void __migrate_enable(void)
2345	{
2346	struct task_struct *p = current;
2347
2348	#ifdef CONFIG_DEBUG_PREEMPT
2349	/*
2350	* Check both overflow from migrate_disable() and superfluous
2351	* migrate_enable().
2352	*/
2353	if (WARN_ON_ONCE((s16)p->migration_disabled <= `0`))
2354	return;
2355	#endif
2356
2357	if (p->migration_disabled > `1`) {
2358	p->migration_disabled--;
2359	return;
2360	}
2361
2362	/*
2363	* Ensure stop_task runs either before or after this, and that
2364	* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2365	*/
2366	guard(preempt)();
2367	if (unlikely(p->cpus_ptr != &p->cpus_mask))
2368	___migrate_enable();
2369	/*
2370	* Mustn't clear migration_disabled() until cpus_ptr points back at the
2371	* regular cpus_mask, otherwise things that race (eg.
2372	* select_fallback_rq) get confused.
2373	*/
2374	barrier();
2375	p->migration_disabled = `0`;
2376	this_rq_pinned()--;
2377	}
2378
2379	static inline void __migrate_disable(void)
2380	{
2381	struct task_struct *p = current;
2382
2383	if (p->migration_disabled) {
2384	#ifdef CONFIG_DEBUG_PREEMPT
2385	/*
2386	*Warn about overflow half-way through the range.
2387	*/
2388	WARN_ON_ONCE((s16)p->migration_disabled < `0`);
2389	#endif
2390	p->migration_disabled++;
2391	return;
2392	}
2393
2394	guard(preempt)();
2395	this_rq_pinned()++;
2396	p->migration_disabled = `1`;
2397	}
2398	#else /* !COMPILE_OFFSETS */
2399	static inline void __migrate_disable(void) { }
2400	static inline void __migrate_enable(void) { }
2401	#endif /* !COMPILE_OFFSETS */
2402
2403	/*
2404	* So that it is possible to not export the runqueues variable, define and
2405	* export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
2406	* them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
2407	* be defined in kernel/sched/core.c.
2408	*/
2409	#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
2410	static inline void migrate_disable(void)
2411	{
2412	__migrate_disable();
2413	}
2414
2415	static inline void migrate_enable(void)
2416	{
2417	__migrate_enable();
2418	}
2419	#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2420	extern void migrate_disable(void);
2421	extern void migrate_enable(void);
2422	#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
2423
2424	#else /* MODULE */
2425	extern void migrate_disable(void);
2426	extern void migrate_enable(void);
2427	#endif /* MODULE */
2428
2429	DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
2430
2431	#endif
2432

Browse the source code of Linux/include/linux/sched.h