| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 | #include <linux/cgroup.h> | 
|---|
| 3 | #include <linux/sched.h> | 
|---|
| 4 | #include <linux/sched/task.h> | 
|---|
| 5 | #include <linux/sched/signal.h> | 
|---|
| 6 |  | 
|---|
| 7 | #include "cgroup-internal.h" | 
|---|
| 8 |  | 
|---|
| 9 | #include <trace/events/cgroup.h> | 
|---|
| 10 |  | 
|---|
| 11 | /* | 
|---|
| 12 | * Update CGRP_FROZEN of cgroup.flag | 
|---|
| 13 | * Return true if flags is updated; false if flags has no change | 
|---|
| 14 | */ | 
|---|
| 15 | static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen) | 
|---|
| 16 | { | 
|---|
| 17 | lockdep_assert_held(&css_set_lock); | 
|---|
| 18 |  | 
|---|
| 19 | /* Already there? */ | 
|---|
| 20 | if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen) | 
|---|
| 21 | return false; | 
|---|
| 22 |  | 
|---|
| 23 | if (frozen) | 
|---|
| 24 | set_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); | 
|---|
| 25 | else | 
|---|
| 26 | clear_bit(nr: CGRP_FROZEN, addr: &cgrp->flags); | 
|---|
| 27 |  | 
|---|
| 28 | cgroup_file_notify(cfile: &cgrp->events_file); | 
|---|
| 29 | TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); | 
|---|
| 30 | return true; | 
|---|
| 31 | } | 
|---|
| 32 |  | 
|---|
| 33 | /* | 
|---|
| 34 | * Propagate the cgroup frozen state upwards by the cgroup tree. | 
|---|
| 35 | */ | 
|---|
| 36 | static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) | 
|---|
| 37 | { | 
|---|
| 38 | int desc = 1; | 
|---|
| 39 |  | 
|---|
| 40 | /* | 
|---|
| 41 | * If the new state is frozen, some freezing ancestor cgroups may change | 
|---|
| 42 | * their state too, depending on if all their descendants are frozen. | 
|---|
| 43 | * | 
|---|
| 44 | * Otherwise, all ancestor cgroups are forced into the non-frozen state. | 
|---|
| 45 | */ | 
|---|
| 46 | while ((cgrp = cgroup_parent(cgrp))) { | 
|---|
| 47 | if (frozen) { | 
|---|
| 48 | cgrp->freezer.nr_frozen_descendants += desc; | 
|---|
| 49 | if (!test_bit(CGRP_FREEZE, &cgrp->flags) || | 
|---|
| 50 | (cgrp->freezer.nr_frozen_descendants != | 
|---|
| 51 | cgrp->nr_descendants)) | 
|---|
| 52 | continue; | 
|---|
| 53 | } else { | 
|---|
| 54 | cgrp->freezer.nr_frozen_descendants -= desc; | 
|---|
| 55 | } | 
|---|
| 56 |  | 
|---|
| 57 | if (cgroup_update_frozen_flag(cgrp, frozen)) | 
|---|
| 58 | desc++; | 
|---|
| 59 | } | 
|---|
| 60 | } | 
|---|
| 61 |  | 
|---|
| 62 | /* | 
|---|
| 63 | * Revisit the cgroup frozen state. | 
|---|
| 64 | * Checks if the cgroup is really frozen and perform all state transitions. | 
|---|
| 65 | */ | 
|---|
| 66 | void cgroup_update_frozen(struct cgroup *cgrp) | 
|---|
| 67 | { | 
|---|
| 68 | bool frozen; | 
|---|
| 69 |  | 
|---|
| 70 | /* | 
|---|
| 71 | * If the cgroup has to be frozen (CGRP_FREEZE bit set), | 
|---|
| 72 | * and all tasks are frozen and/or stopped, let's consider | 
|---|
| 73 | * the cgroup frozen. Otherwise it's not frozen. | 
|---|
| 74 | */ | 
|---|
| 75 | frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && | 
|---|
| 76 | cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); | 
|---|
| 77 |  | 
|---|
| 78 | /* If flags is updated, update the state of ancestor cgroups. */ | 
|---|
| 79 | if (cgroup_update_frozen_flag(cgrp, frozen)) | 
|---|
| 80 | cgroup_propagate_frozen(cgrp, frozen); | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | /* | 
|---|
| 84 | * Increment cgroup's nr_frozen_tasks. | 
|---|
| 85 | */ | 
|---|
| 86 | static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) | 
|---|
| 87 | { | 
|---|
| 88 | cgrp->freezer.nr_frozen_tasks++; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | /* | 
|---|
| 92 | * Decrement cgroup's nr_frozen_tasks. | 
|---|
| 93 | */ | 
|---|
| 94 | static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) | 
|---|
| 95 | { | 
|---|
| 96 | cgrp->freezer.nr_frozen_tasks--; | 
|---|
| 97 | WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | /* | 
|---|
| 101 | * Enter frozen/stopped state, if not yet there. Update cgroup's counters, | 
|---|
| 102 | * and revisit the state of the cgroup, if necessary. | 
|---|
| 103 | */ | 
|---|
| 104 | void cgroup_enter_frozen(void) | 
|---|
| 105 | { | 
|---|
| 106 | struct cgroup *cgrp; | 
|---|
| 107 |  | 
|---|
| 108 | if (current->frozen) | 
|---|
| 109 | return; | 
|---|
| 110 |  | 
|---|
| 111 | spin_lock_irq(lock: &css_set_lock); | 
|---|
| 112 | current->frozen = true; | 
|---|
| 113 | cgrp = task_dfl_cgroup(current); | 
|---|
| 114 | cgroup_inc_frozen_cnt(cgrp); | 
|---|
| 115 | cgroup_update_frozen(cgrp); | 
|---|
| 116 | spin_unlock_irq(lock: &css_set_lock); | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | /* | 
|---|
| 120 | * Conditionally leave frozen/stopped state. Update cgroup's counters, | 
|---|
| 121 | * and revisit the state of the cgroup, if necessary. | 
|---|
| 122 | * | 
|---|
| 123 | * If always_leave is not set, and the cgroup is freezing, | 
|---|
| 124 | * we're racing with the cgroup freezing. In this case, we don't | 
|---|
| 125 | * drop the frozen counter to avoid a transient switch to | 
|---|
| 126 | * the unfrozen state. | 
|---|
| 127 | */ | 
|---|
| 128 | void cgroup_leave_frozen(bool always_leave) | 
|---|
| 129 | { | 
|---|
| 130 | struct cgroup *cgrp; | 
|---|
| 131 |  | 
|---|
| 132 | spin_lock_irq(lock: &css_set_lock); | 
|---|
| 133 | cgrp = task_dfl_cgroup(current); | 
|---|
| 134 | if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { | 
|---|
| 135 | cgroup_dec_frozen_cnt(cgrp); | 
|---|
| 136 | cgroup_update_frozen(cgrp); | 
|---|
| 137 | WARN_ON_ONCE(!current->frozen); | 
|---|
| 138 | current->frozen = false; | 
|---|
| 139 | } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { | 
|---|
| 140 | spin_lock(lock: ¤t->sighand->siglock); | 
|---|
| 141 | current->jobctl |= JOBCTL_TRAP_FREEZE; | 
|---|
| 142 | set_thread_flag(TIF_SIGPENDING); | 
|---|
| 143 | spin_unlock(lock: ¤t->sighand->siglock); | 
|---|
| 144 | } | 
|---|
| 145 | spin_unlock_irq(lock: &css_set_lock); | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | /* | 
|---|
| 149 | * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE | 
|---|
| 150 | * jobctl bit. | 
|---|
| 151 | */ | 
|---|
| 152 | static void cgroup_freeze_task(struct task_struct *task, bool freeze) | 
|---|
| 153 | { | 
|---|
| 154 | unsigned long flags; | 
|---|
| 155 |  | 
|---|
| 156 | /* If the task is about to die, don't bother with freezing it. */ | 
|---|
| 157 | if (!lock_task_sighand(task, flags: &flags)) | 
|---|
| 158 | return; | 
|---|
| 159 |  | 
|---|
| 160 | if (freeze) { | 
|---|
| 161 | task->jobctl |= JOBCTL_TRAP_FREEZE; | 
|---|
| 162 | signal_wake_up(t: task, fatal: false); | 
|---|
| 163 | } else { | 
|---|
| 164 | task->jobctl &= ~JOBCTL_TRAP_FREEZE; | 
|---|
| 165 | wake_up_process(tsk: task); | 
|---|
| 166 | } | 
|---|
| 167 |  | 
|---|
| 168 | unlock_task_sighand(task, flags: &flags); | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | /* | 
|---|
| 172 | * Freeze or unfreeze all tasks in the given cgroup. | 
|---|
| 173 | */ | 
|---|
| 174 | static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec) | 
|---|
| 175 | { | 
|---|
| 176 | struct css_task_iter it; | 
|---|
| 177 | struct task_struct *task; | 
|---|
| 178 |  | 
|---|
| 179 | lockdep_assert_held(&cgroup_mutex); | 
|---|
| 180 |  | 
|---|
| 181 | spin_lock_irq(lock: &css_set_lock); | 
|---|
| 182 | write_seqcount_begin(&cgrp->freezer.freeze_seq); | 
|---|
| 183 | if (freeze) { | 
|---|
| 184 | set_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); | 
|---|
| 185 | cgrp->freezer.freeze_start_nsec = ts_nsec; | 
|---|
| 186 | } else { | 
|---|
| 187 | clear_bit(nr: CGRP_FREEZE, addr: &cgrp->flags); | 
|---|
| 188 | cgrp->freezer.frozen_nsec += (ts_nsec - | 
|---|
| 189 | cgrp->freezer.freeze_start_nsec); | 
|---|
| 190 | } | 
|---|
| 191 | write_seqcount_end(&cgrp->freezer.freeze_seq); | 
|---|
| 192 | spin_unlock_irq(lock: &css_set_lock); | 
|---|
| 193 |  | 
|---|
| 194 | if (freeze) | 
|---|
| 195 | TRACE_CGROUP_PATH(freeze, cgrp); | 
|---|
| 196 | else | 
|---|
| 197 | TRACE_CGROUP_PATH(unfreeze, cgrp); | 
|---|
| 198 |  | 
|---|
| 199 | css_task_iter_start(css: &cgrp->self, flags: 0, it: &it); | 
|---|
| 200 | while ((task = css_task_iter_next(it: &it))) { | 
|---|
| 201 | /* | 
|---|
| 202 | * Ignore kernel threads here. Freezing cgroups containing | 
|---|
| 203 | * kthreads isn't supported. | 
|---|
| 204 | */ | 
|---|
| 205 | if (task->flags & PF_KTHREAD) | 
|---|
| 206 | continue; | 
|---|
| 207 | cgroup_freeze_task(task, freeze); | 
|---|
| 208 | } | 
|---|
| 209 | css_task_iter_end(it: &it); | 
|---|
| 210 |  | 
|---|
| 211 | /* | 
|---|
| 212 | * Cgroup state should be revisited here to cover empty leaf cgroups | 
|---|
| 213 | * and cgroups which descendants are already in the desired state. | 
|---|
| 214 | */ | 
|---|
| 215 | spin_lock_irq(lock: &css_set_lock); | 
|---|
| 216 | if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) | 
|---|
| 217 | cgroup_update_frozen(cgrp); | 
|---|
| 218 | spin_unlock_irq(lock: &css_set_lock); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | /* | 
|---|
| 222 | * Adjust the task state (freeze or unfreeze) and revisit the state of | 
|---|
| 223 | * source and destination cgroups. | 
|---|
| 224 | */ | 
|---|
| 225 | void cgroup_freezer_migrate_task(struct task_struct *task, | 
|---|
| 226 | struct cgroup *src, struct cgroup *dst) | 
|---|
| 227 | { | 
|---|
| 228 | lockdep_assert_held(&css_set_lock); | 
|---|
| 229 |  | 
|---|
| 230 | /* | 
|---|
| 231 | * Kernel threads are not supposed to be frozen at all. | 
|---|
| 232 | */ | 
|---|
| 233 | if (task->flags & PF_KTHREAD) | 
|---|
| 234 | return; | 
|---|
| 235 |  | 
|---|
| 236 | /* | 
|---|
| 237 | * It's not necessary to do changes if both of the src and dst cgroups | 
|---|
| 238 | * are not freezing and task is not frozen. | 
|---|
| 239 | */ | 
|---|
| 240 | if (!test_bit(CGRP_FREEZE, &src->flags) && | 
|---|
| 241 | !test_bit(CGRP_FREEZE, &dst->flags) && | 
|---|
| 242 | !task->frozen) | 
|---|
| 243 | return; | 
|---|
| 244 |  | 
|---|
| 245 | /* | 
|---|
| 246 | * Adjust counters of freezing and frozen tasks. | 
|---|
| 247 | * Note, that if the task is frozen, but the destination cgroup is not | 
|---|
| 248 | * frozen, we bump both counters to keep them balanced. | 
|---|
| 249 | */ | 
|---|
| 250 | if (task->frozen) { | 
|---|
| 251 | cgroup_inc_frozen_cnt(cgrp: dst); | 
|---|
| 252 | cgroup_dec_frozen_cnt(cgrp: src); | 
|---|
| 253 | } | 
|---|
| 254 | cgroup_update_frozen(cgrp: dst); | 
|---|
| 255 | cgroup_update_frozen(cgrp: src); | 
|---|
| 256 |  | 
|---|
| 257 | /* | 
|---|
| 258 | * Force the task to the desired state. | 
|---|
| 259 | */ | 
|---|
| 260 | cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | void cgroup_freeze(struct cgroup *cgrp, bool freeze) | 
|---|
| 264 | { | 
|---|
| 265 | struct cgroup_subsys_state *css; | 
|---|
| 266 | struct cgroup *parent; | 
|---|
| 267 | struct cgroup *dsct; | 
|---|
| 268 | bool applied = false; | 
|---|
| 269 | u64 ts_nsec; | 
|---|
| 270 | bool old_e; | 
|---|
| 271 |  | 
|---|
| 272 | lockdep_assert_held(&cgroup_mutex); | 
|---|
| 273 |  | 
|---|
| 274 | /* | 
|---|
| 275 | * Nothing changed? Just exit. | 
|---|
| 276 | */ | 
|---|
| 277 | if (cgrp->freezer.freeze == freeze) | 
|---|
| 278 | return; | 
|---|
| 279 |  | 
|---|
| 280 | cgrp->freezer.freeze = freeze; | 
|---|
| 281 | ts_nsec = ktime_get_ns(); | 
|---|
| 282 |  | 
|---|
| 283 | /* | 
|---|
| 284 | * Propagate changes downwards the cgroup tree. | 
|---|
| 285 | */ | 
|---|
| 286 | css_for_each_descendant_pre(css, &cgrp->self) { | 
|---|
| 287 | dsct = css->cgroup; | 
|---|
| 288 |  | 
|---|
| 289 | if (cgroup_is_dead(cgrp: dsct)) | 
|---|
| 290 | continue; | 
|---|
| 291 |  | 
|---|
| 292 | /* | 
|---|
| 293 | * e_freeze is affected by parent's e_freeze and dst's freeze. | 
|---|
| 294 | * If old e_freeze eq new e_freeze, no change, its children | 
|---|
| 295 | * will not be affected. So do nothing and skip the subtree | 
|---|
| 296 | */ | 
|---|
| 297 | old_e = dsct->freezer.e_freeze; | 
|---|
| 298 | parent = cgroup_parent(cgrp: dsct); | 
|---|
| 299 | dsct->freezer.e_freeze = (dsct->freezer.freeze || | 
|---|
| 300 | parent->freezer.e_freeze); | 
|---|
| 301 | if (dsct->freezer.e_freeze == old_e) { | 
|---|
| 302 | css = css_rightmost_descendant(pos: css); | 
|---|
| 303 | continue; | 
|---|
| 304 | } | 
|---|
| 305 |  | 
|---|
| 306 | /* | 
|---|
| 307 | * Do change actual state: freeze or unfreeze. | 
|---|
| 308 | */ | 
|---|
| 309 | cgroup_do_freeze(cgrp: dsct, freeze, ts_nsec); | 
|---|
| 310 | applied = true; | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 | /* | 
|---|
| 314 | * Even if the actual state hasn't changed, let's notify a user. | 
|---|
| 315 | * The state can be enforced by an ancestor cgroup: the cgroup | 
|---|
| 316 | * can already be in the desired state or it can be locked in the | 
|---|
| 317 | * opposite state, so that the transition will never happen. | 
|---|
| 318 | * In both cases it's better to notify a user, that there is | 
|---|
| 319 | * nothing to wait for. | 
|---|
| 320 | */ | 
|---|
| 321 | if (!applied) { | 
|---|
| 322 | TRACE_CGROUP_PATH(notify_frozen, cgrp, | 
|---|
| 323 | test_bit(CGRP_FROZEN, &cgrp->flags)); | 
|---|
| 324 | cgroup_file_notify(cfile: &cgrp->events_file); | 
|---|
| 325 | } | 
|---|
| 326 | } | 
|---|
| 327 |  | 
|---|