| 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ | 
|---|
| 2 |  | 
|---|
| 3 | #ifndef __CPUSET_INTERNAL_H | 
|---|
| 4 | #define __CPUSET_INTERNAL_H | 
|---|
| 5 |  | 
|---|
| 6 | #include <linux/cgroup.h> | 
|---|
| 7 | #include <linux/cpu.h> | 
|---|
| 8 | #include <linux/cpumask.h> | 
|---|
| 9 | #include <linux/cpuset.h> | 
|---|
| 10 | #include <linux/spinlock.h> | 
|---|
| 11 | #include <linux/union_find.h> | 
|---|
| 12 |  | 
|---|
| 13 | /* See "Frequency meter" comments, below. */ | 
|---|
| 14 |  | 
|---|
| 15 | struct fmeter { | 
|---|
| 16 | int cnt;		/* unprocessed events count */ | 
|---|
| 17 | int val;		/* most recent output value */ | 
|---|
| 18 | time64_t time;		/* clock (secs) when val computed */ | 
|---|
| 19 | spinlock_t lock;	/* guards read or write of above */ | 
|---|
| 20 | }; | 
|---|
| 21 |  | 
|---|
| 22 | /* | 
|---|
| 23 | * Invalid partition error code | 
|---|
| 24 | */ | 
|---|
| 25 | enum prs_errcode { | 
|---|
| 26 | PERR_NONE = 0, | 
|---|
| 27 | PERR_INVCPUS, | 
|---|
| 28 | PERR_INVPARENT, | 
|---|
| 29 | PERR_NOTPART, | 
|---|
| 30 | PERR_NOTEXCL, | 
|---|
| 31 | PERR_NOCPUS, | 
|---|
| 32 | PERR_HOTPLUG, | 
|---|
| 33 | PERR_CPUSEMPTY, | 
|---|
| 34 | PERR_HKEEPING, | 
|---|
| 35 | PERR_ACCESS, | 
|---|
| 36 | PERR_REMOTE, | 
|---|
| 37 | }; | 
|---|
| 38 |  | 
|---|
| 39 | /* bits in struct cpuset flags field */ | 
|---|
| 40 | typedef enum { | 
|---|
| 41 | CS_CPU_EXCLUSIVE, | 
|---|
| 42 | CS_MEM_EXCLUSIVE, | 
|---|
| 43 | CS_MEM_HARDWALL, | 
|---|
| 44 | CS_MEMORY_MIGRATE, | 
|---|
| 45 | CS_SCHED_LOAD_BALANCE, | 
|---|
| 46 | CS_SPREAD_PAGE, | 
|---|
| 47 | CS_SPREAD_SLAB, | 
|---|
| 48 | } cpuset_flagbits_t; | 
|---|
| 49 |  | 
|---|
| 50 | /* The various types of files and directories in a cpuset file system */ | 
|---|
| 51 |  | 
|---|
| 52 | typedef enum { | 
|---|
| 53 | FILE_MEMORY_MIGRATE, | 
|---|
| 54 | FILE_CPULIST, | 
|---|
| 55 | FILE_MEMLIST, | 
|---|
| 56 | FILE_EFFECTIVE_CPULIST, | 
|---|
| 57 | FILE_EFFECTIVE_MEMLIST, | 
|---|
| 58 | FILE_SUBPARTS_CPULIST, | 
|---|
| 59 | FILE_EXCLUSIVE_CPULIST, | 
|---|
| 60 | FILE_EFFECTIVE_XCPULIST, | 
|---|
| 61 | FILE_ISOLATED_CPULIST, | 
|---|
| 62 | FILE_CPU_EXCLUSIVE, | 
|---|
| 63 | FILE_MEM_EXCLUSIVE, | 
|---|
| 64 | FILE_MEM_HARDWALL, | 
|---|
| 65 | FILE_SCHED_LOAD_BALANCE, | 
|---|
| 66 | FILE_PARTITION_ROOT, | 
|---|
| 67 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | 
|---|
| 68 | FILE_MEMORY_PRESSURE_ENABLED, | 
|---|
| 69 | FILE_MEMORY_PRESSURE, | 
|---|
| 70 | FILE_SPREAD_PAGE, | 
|---|
| 71 | FILE_SPREAD_SLAB, | 
|---|
| 72 | } cpuset_filetype_t; | 
|---|
| 73 |  | 
|---|
| 74 | struct cpuset { | 
|---|
| 75 | struct cgroup_subsys_state css; | 
|---|
| 76 |  | 
|---|
| 77 | unsigned long flags;		/* "unsigned long" so bitops work */ | 
|---|
| 78 |  | 
|---|
| 79 | /* | 
|---|
| 80 | * On default hierarchy: | 
|---|
| 81 | * | 
|---|
| 82 | * The user-configured masks can only be changed by writing to | 
|---|
| 83 | * cpuset.cpus and cpuset.mems, and won't be limited by the | 
|---|
| 84 | * parent masks. | 
|---|
| 85 | * | 
|---|
| 86 | * The effective masks is the real masks that apply to the tasks | 
|---|
| 87 | * in the cpuset. They may be changed if the configured masks are | 
|---|
| 88 | * changed or hotplug happens. | 
|---|
| 89 | * | 
|---|
| 90 | * effective_mask == configured_mask & parent's effective_mask, | 
|---|
| 91 | * and if it ends up empty, it will inherit the parent's mask. | 
|---|
| 92 | * | 
|---|
| 93 | * | 
|---|
| 94 | * On legacy hierarchy: | 
|---|
| 95 | * | 
|---|
| 96 | * The user-configured masks are always the same with effective masks. | 
|---|
| 97 | */ | 
|---|
| 98 |  | 
|---|
| 99 | /* user-configured CPUs and Memory Nodes allow to tasks */ | 
|---|
| 100 | cpumask_var_t cpus_allowed; | 
|---|
| 101 | nodemask_t mems_allowed; | 
|---|
| 102 |  | 
|---|
| 103 | /* effective CPUs and Memory Nodes allow to tasks */ | 
|---|
| 104 | cpumask_var_t effective_cpus; | 
|---|
| 105 | nodemask_t effective_mems; | 
|---|
| 106 |  | 
|---|
| 107 | /* | 
|---|
| 108 | * Exclusive CPUs dedicated to current cgroup (default hierarchy only) | 
|---|
| 109 | * | 
|---|
| 110 | * The effective_cpus of a valid partition root comes solely from its | 
|---|
| 111 | * effective_xcpus and some of the effective_xcpus may be distributed | 
|---|
| 112 | * to sub-partitions below & hence excluded from its effective_cpus. | 
|---|
| 113 | * For a valid partition root, its effective_cpus have no relationship | 
|---|
| 114 | * with cpus_allowed unless its exclusive_cpus isn't set. | 
|---|
| 115 | * | 
|---|
| 116 | * This value will only be set if either exclusive_cpus is set or | 
|---|
| 117 | * when this cpuset becomes a local partition root. | 
|---|
| 118 | */ | 
|---|
| 119 | cpumask_var_t effective_xcpus; | 
|---|
| 120 |  | 
|---|
| 121 | /* | 
|---|
| 122 | * Exclusive CPUs as requested by the user (default hierarchy only) | 
|---|
| 123 | * | 
|---|
| 124 | * Its value is independent of cpus_allowed and designates the set of | 
|---|
| 125 | * CPUs that can be granted to the current cpuset or its children when | 
|---|
| 126 | * it becomes a valid partition root. The effective set of exclusive | 
|---|
| 127 | * CPUs granted (effective_xcpus) depends on whether those exclusive | 
|---|
| 128 | * CPUs are passed down by its ancestors and not yet taken up by | 
|---|
| 129 | * another sibling partition root along the way. | 
|---|
| 130 | * | 
|---|
| 131 | * If its value isn't set, it defaults to cpus_allowed. | 
|---|
| 132 | */ | 
|---|
| 133 | cpumask_var_t exclusive_cpus; | 
|---|
| 134 |  | 
|---|
| 135 | /* | 
|---|
| 136 | * This is old Memory Nodes tasks took on. | 
|---|
| 137 | * | 
|---|
| 138 | * - top_cpuset.old_mems_allowed is initialized to mems_allowed. | 
|---|
| 139 | * - A new cpuset's old_mems_allowed is initialized when some | 
|---|
| 140 | *   task is moved into it. | 
|---|
| 141 | * - old_mems_allowed is used in cpuset_migrate_mm() when we change | 
|---|
| 142 | *   cpuset.mems_allowed and have tasks' nodemask updated, and | 
|---|
| 143 | *   then old_mems_allowed is updated to mems_allowed. | 
|---|
| 144 | */ | 
|---|
| 145 | nodemask_t old_mems_allowed; | 
|---|
| 146 |  | 
|---|
| 147 | struct fmeter fmeter;		/* memory_pressure filter */ | 
|---|
| 148 |  | 
|---|
| 149 | /* | 
|---|
| 150 | * Tasks are being attached to this cpuset.  Used to prevent | 
|---|
| 151 | * zeroing cpus/mems_allowed between ->can_attach() and ->attach(). | 
|---|
| 152 | */ | 
|---|
| 153 | int attach_in_progress; | 
|---|
| 154 |  | 
|---|
| 155 | /* for custom sched domain */ | 
|---|
| 156 | int relax_domain_level; | 
|---|
| 157 |  | 
|---|
| 158 | /* number of valid local child partitions */ | 
|---|
| 159 | int nr_subparts; | 
|---|
| 160 |  | 
|---|
| 161 | /* partition root state */ | 
|---|
| 162 | int partition_root_state; | 
|---|
| 163 |  | 
|---|
| 164 | /* | 
|---|
| 165 | * number of SCHED_DEADLINE tasks attached to this cpuset, so that we | 
|---|
| 166 | * know when to rebuild associated root domain bandwidth information. | 
|---|
| 167 | */ | 
|---|
| 168 | int nr_deadline_tasks; | 
|---|
| 169 | int nr_migrate_dl_tasks; | 
|---|
| 170 | u64 sum_migrate_dl_bw; | 
|---|
| 171 |  | 
|---|
| 172 | /* Invalid partition error code, not lock protected */ | 
|---|
| 173 | enum prs_errcode prs_err; | 
|---|
| 174 |  | 
|---|
| 175 | /* Handle for cpuset.cpus.partition */ | 
|---|
| 176 | struct cgroup_file partition_file; | 
|---|
| 177 |  | 
|---|
| 178 | /* Remote partition silbling list anchored at remote_children */ | 
|---|
| 179 | struct list_head remote_sibling; | 
|---|
| 180 |  | 
|---|
| 181 | /* Used to merge intersecting subsets for generate_sched_domains */ | 
|---|
| 182 | struct uf_node node; | 
|---|
| 183 | }; | 
|---|
| 184 |  | 
|---|
| 185 | static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) | 
|---|
| 186 | { | 
|---|
| 187 | return css ? container_of(css, struct cpuset, css) : NULL; | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | /* Retrieve the cpuset for a task */ | 
|---|
| 191 | static inline struct cpuset *task_cs(struct task_struct *task) | 
|---|
| 192 | { | 
|---|
| 193 | return css_cs(css: task_css(task, subsys_id: cpuset_cgrp_id)); | 
|---|
| 194 | } | 
|---|
| 195 |  | 
|---|
| 196 | static inline struct cpuset *parent_cs(struct cpuset *cs) | 
|---|
| 197 | { | 
|---|
| 198 | return css_cs(css: cs->css.parent); | 
|---|
| 199 | } | 
|---|
| 200 |  | 
|---|
| 201 | /* convenient tests for these bits */ | 
|---|
| 202 | static inline bool is_cpuset_online(struct cpuset *cs) | 
|---|
| 203 | { | 
|---|
| 204 | return css_is_online(css: &cs->css) && !css_is_dying(css: &cs->css); | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | static inline int is_cpu_exclusive(const struct cpuset *cs) | 
|---|
| 208 | { | 
|---|
| 209 | return test_bit(CS_CPU_EXCLUSIVE, &cs->flags); | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | static inline int is_mem_exclusive(const struct cpuset *cs) | 
|---|
| 213 | { | 
|---|
| 214 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); | 
|---|
| 215 | } | 
|---|
| 216 |  | 
|---|
| 217 | static inline int is_mem_hardwall(const struct cpuset *cs) | 
|---|
| 218 | { | 
|---|
| 219 | return test_bit(CS_MEM_HARDWALL, &cs->flags); | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | static inline int is_sched_load_balance(const struct cpuset *cs) | 
|---|
| 223 | { | 
|---|
| 224 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | static inline int is_memory_migrate(const struct cpuset *cs) | 
|---|
| 228 | { | 
|---|
| 229 | return test_bit(CS_MEMORY_MIGRATE, &cs->flags); | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | static inline int is_spread_page(const struct cpuset *cs) | 
|---|
| 233 | { | 
|---|
| 234 | return test_bit(CS_SPREAD_PAGE, &cs->flags); | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | static inline int is_spread_slab(const struct cpuset *cs) | 
|---|
| 238 | { | 
|---|
| 239 | return test_bit(CS_SPREAD_SLAB, &cs->flags); | 
|---|
| 240 | } | 
|---|
| 241 |  | 
|---|
| 242 | /** | 
|---|
| 243 | * cpuset_for_each_child - traverse online children of a cpuset | 
|---|
| 244 | * @child_cs: loop cursor pointing to the current child | 
|---|
| 245 | * @pos_css: used for iteration | 
|---|
| 246 | * @parent_cs: target cpuset to walk children of | 
|---|
| 247 | * | 
|---|
| 248 | * Walk @child_cs through the online children of @parent_cs.  Must be used | 
|---|
| 249 | * with RCU read locked. | 
|---|
| 250 | */ | 
|---|
| 251 | #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\ | 
|---|
| 252 | css_for_each_child((pos_css), &(parent_cs)->css)		\ | 
|---|
| 253 | if (is_cpuset_online(((child_cs) = css_cs((pos_css))))) | 
|---|
| 254 |  | 
|---|
| 255 | /** | 
|---|
| 256 | * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants | 
|---|
| 257 | * @des_cs: loop cursor pointing to the current descendant | 
|---|
| 258 | * @pos_css: used for iteration | 
|---|
| 259 | * @root_cs: target cpuset to walk ancestor of | 
|---|
| 260 | * | 
|---|
| 261 | * Walk @des_cs through the online descendants of @root_cs.  Must be used | 
|---|
| 262 | * with RCU read locked.  The caller may modify @pos_css by calling | 
|---|
| 263 | * css_rightmost_descendant() to skip subtree.  @root_cs is included in the | 
|---|
| 264 | * iteration and the first node to be visited. | 
|---|
| 265 | */ | 
|---|
| 266 | #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\ | 
|---|
| 267 | css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\ | 
|---|
| 268 | if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) | 
|---|
| 269 |  | 
|---|
| 270 | void rebuild_sched_domains_locked(void); | 
|---|
| 271 | void cpuset_callback_lock_irq(void); | 
|---|
| 272 | void cpuset_callback_unlock_irq(void); | 
|---|
| 273 | void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus); | 
|---|
| 274 | void cpuset_update_tasks_nodemask(struct cpuset *cs); | 
|---|
| 275 | int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on); | 
|---|
| 276 | ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | 
|---|
| 277 | char *buf, size_t nbytes, loff_t off); | 
|---|
| 278 | int cpuset_common_seq_show(struct seq_file *sf, void *v); | 
|---|
| 279 | void cpuset_full_lock(void); | 
|---|
| 280 | void cpuset_full_unlock(void); | 
|---|
| 281 |  | 
|---|
| 282 | /* | 
|---|
| 283 | * cpuset-v1.c | 
|---|
| 284 | */ | 
|---|
| 285 | #ifdef CONFIG_CPUSETS_V1 | 
|---|
| 286 | extern struct cftype cpuset1_files[]; | 
|---|
| 287 | void fmeter_init(struct fmeter *fmp); | 
|---|
| 288 | void cpuset1_update_task_spread_flags(struct cpuset *cs, | 
|---|
| 289 | struct task_struct *tsk); | 
|---|
| 290 | void cpuset1_update_tasks_flags(struct cpuset *cs); | 
|---|
| 291 | void cpuset1_hotplug_update_tasks(struct cpuset *cs, | 
|---|
| 292 | struct cpumask *new_cpus, nodemask_t *new_mems, | 
|---|
| 293 | bool cpus_updated, bool mems_updated); | 
|---|
| 294 | int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial); | 
|---|
| 295 | #else | 
|---|
| 296 | static inline void fmeter_init(struct fmeter *fmp) {} | 
|---|
| 297 | static inline void cpuset1_update_task_spread_flags(struct cpuset *cs, | 
|---|
| 298 | struct task_struct *tsk) {} | 
|---|
| 299 | static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {} | 
|---|
| 300 | static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs, | 
|---|
| 301 | struct cpumask *new_cpus, nodemask_t *new_mems, | 
|---|
| 302 | bool cpus_updated, bool mems_updated) {} | 
|---|
| 303 | static inline int cpuset1_validate_change(struct cpuset *cur, | 
|---|
| 304 | struct cpuset *trial) { return 0; } | 
|---|
| 305 | #endif /* CONFIG_CPUSETS_V1 */ | 
|---|
| 306 |  | 
|---|
| 307 | #endif /* __CPUSET_INTERNAL_H */ | 
|---|
| 308 |  | 
|---|