| 1 | // SPDX-License-Identifier: GPL-2.0 | 
|---|
| 2 |  | 
|---|
| 3 | /* | 
|---|
| 4 | * CPU accounting code for task groups. | 
|---|
| 5 | * | 
|---|
| 6 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | 
|---|
| 7 | * (balbir@in.ibm.com). | 
|---|
| 8 | */ | 
|---|
| 9 | #include <linux/sched/cputime.h> | 
|---|
| 10 | #include "sched.h" | 
|---|
| 11 |  | 
|---|
| 12 | /* Time spent by the tasks of the CPU accounting group executing in ... */ | 
|---|
| 13 | enum cpuacct_stat_index { | 
|---|
| 14 | CPUACCT_STAT_USER,	/* ... user mode */ | 
|---|
| 15 | CPUACCT_STAT_SYSTEM,	/* ... kernel mode */ | 
|---|
| 16 |  | 
|---|
| 17 | CPUACCT_STAT_NSTATS, | 
|---|
| 18 | }; | 
|---|
| 19 |  | 
|---|
| 20 | static const char * const cpuacct_stat_desc[] = { | 
|---|
| 21 | [CPUACCT_STAT_USER] = "user", | 
|---|
| 22 | [CPUACCT_STAT_SYSTEM] = "system", | 
|---|
| 23 | }; | 
|---|
| 24 |  | 
|---|
| 25 | /* track CPU usage of a group of tasks and its child groups */ | 
|---|
| 26 | struct cpuacct { | 
|---|
| 27 | struct cgroup_subsys_state	css; | 
|---|
| 28 | /* cpuusage holds pointer to a u64-type object on every CPU */ | 
|---|
| 29 | u64 __percpu	*cpuusage; | 
|---|
| 30 | struct kernel_cpustat __percpu	*cpustat; | 
|---|
| 31 | }; | 
|---|
| 32 |  | 
|---|
| 33 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) | 
|---|
| 34 | { | 
|---|
| 35 | return css ? container_of(css, struct cpuacct, css) : NULL; | 
|---|
| 36 | } | 
|---|
| 37 |  | 
|---|
| 38 | /* Return CPU accounting group to which this task belongs */ | 
|---|
| 39 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | 
|---|
| 40 | { | 
|---|
| 41 | return css_ca(css: task_css(task: tsk, subsys_id: cpuacct_cgrp_id)); | 
|---|
| 42 | } | 
|---|
| 43 |  | 
|---|
| 44 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) | 
|---|
| 45 | { | 
|---|
| 46 | return css_ca(css: ca->css.parent); | 
|---|
| 47 | } | 
|---|
| 48 |  | 
|---|
| 49 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); | 
|---|
| 50 | static struct cpuacct root_cpuacct = { | 
|---|
| 51 | .cpustat	= &kernel_cpustat, | 
|---|
| 52 | .cpuusage	= &root_cpuacct_cpuusage, | 
|---|
| 53 | }; | 
|---|
| 54 |  | 
|---|
| 55 | /* Create a new CPU accounting group */ | 
|---|
| 56 | static struct cgroup_subsys_state * | 
|---|
| 57 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | 
|---|
| 58 | { | 
|---|
| 59 | struct cpuacct *ca; | 
|---|
| 60 |  | 
|---|
| 61 | if (!parent_css) | 
|---|
| 62 | return &root_cpuacct.css; | 
|---|
| 63 |  | 
|---|
| 64 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 
|---|
| 65 | if (!ca) | 
|---|
| 66 | goto out; | 
|---|
| 67 |  | 
|---|
| 68 | ca->cpuusage = alloc_percpu(u64); | 
|---|
| 69 | if (!ca->cpuusage) | 
|---|
| 70 | goto out_free_ca; | 
|---|
| 71 |  | 
|---|
| 72 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | 
|---|
| 73 | if (!ca->cpustat) | 
|---|
| 74 | goto out_free_cpuusage; | 
|---|
| 75 |  | 
|---|
| 76 | return &ca->css; | 
|---|
| 77 |  | 
|---|
| 78 | out_free_cpuusage: | 
|---|
| 79 | free_percpu(pdata: ca->cpuusage); | 
|---|
| 80 | out_free_ca: | 
|---|
| 81 | kfree(objp: ca); | 
|---|
| 82 | out: | 
|---|
| 83 | return ERR_PTR(error: -ENOMEM); | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | /* Destroy an existing CPU accounting group */ | 
|---|
| 87 | static void cpuacct_css_free(struct cgroup_subsys_state *css) | 
|---|
| 88 | { | 
|---|
| 89 | struct cpuacct *ca = css_ca(css); | 
|---|
| 90 |  | 
|---|
| 91 | free_percpu(pdata: ca->cpustat); | 
|---|
| 92 | free_percpu(pdata: ca->cpuusage); | 
|---|
| 93 | kfree(objp: ca); | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, | 
|---|
| 97 | enum cpuacct_stat_index index) | 
|---|
| 98 | { | 
|---|
| 99 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 
|---|
| 100 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; | 
|---|
| 101 | u64 data; | 
|---|
| 102 |  | 
|---|
| 103 | /* | 
|---|
| 104 | * We allow index == CPUACCT_STAT_NSTATS here to read | 
|---|
| 105 | * the sum of usages. | 
|---|
| 106 | */ | 
|---|
| 107 | if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) | 
|---|
| 108 | return 0; | 
|---|
| 109 |  | 
|---|
| 110 | #ifndef CONFIG_64BIT | 
|---|
| 111 | /* | 
|---|
| 112 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | 
|---|
| 113 | */ | 
|---|
| 114 | raw_spin_rq_lock_irq(cpu_rq(cpu)); | 
|---|
| 115 | #endif | 
|---|
| 116 |  | 
|---|
| 117 | switch (index) { | 
|---|
| 118 | case CPUACCT_STAT_USER: | 
|---|
| 119 | data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; | 
|---|
| 120 | break; | 
|---|
| 121 | case CPUACCT_STAT_SYSTEM: | 
|---|
| 122 | data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + | 
|---|
| 123 | cpustat[CPUTIME_SOFTIRQ]; | 
|---|
| 124 | break; | 
|---|
| 125 | case CPUACCT_STAT_NSTATS: | 
|---|
| 126 | data = *cpuusage; | 
|---|
| 127 | break; | 
|---|
| 128 | } | 
|---|
| 129 |  | 
|---|
| 130 | #ifndef CONFIG_64BIT | 
|---|
| 131 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); | 
|---|
| 132 | #endif | 
|---|
| 133 |  | 
|---|
| 134 | return data; | 
|---|
| 135 | } | 
|---|
| 136 |  | 
|---|
| 137 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) | 
|---|
| 138 | { | 
|---|
| 139 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 
|---|
| 140 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; | 
|---|
| 141 |  | 
|---|
| 142 | /* Don't allow to reset global kernel_cpustat */ | 
|---|
| 143 | if (ca == &root_cpuacct) | 
|---|
| 144 | return; | 
|---|
| 145 |  | 
|---|
| 146 | #ifndef CONFIG_64BIT | 
|---|
| 147 | /* | 
|---|
| 148 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | 
|---|
| 149 | */ | 
|---|
| 150 | raw_spin_rq_lock_irq(cpu_rq(cpu)); | 
|---|
| 151 | #endif | 
|---|
| 152 | *cpuusage = 0; | 
|---|
| 153 | cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; | 
|---|
| 154 | cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; | 
|---|
| 155 | cpustat[CPUTIME_SOFTIRQ] = 0; | 
|---|
| 156 |  | 
|---|
| 157 | #ifndef CONFIG_64BIT | 
|---|
| 158 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); | 
|---|
| 159 | #endif | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | /* Return total CPU usage (in nanoseconds) of a group */ | 
|---|
| 163 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, | 
|---|
| 164 | enum cpuacct_stat_index index) | 
|---|
| 165 | { | 
|---|
| 166 | struct cpuacct *ca = css_ca(css); | 
|---|
| 167 | u64 totalcpuusage = 0; | 
|---|
| 168 | int i; | 
|---|
| 169 |  | 
|---|
| 170 | for_each_possible_cpu(i) | 
|---|
| 171 | totalcpuusage += cpuacct_cpuusage_read(ca, cpu: i, index); | 
|---|
| 172 |  | 
|---|
| 173 | return totalcpuusage; | 
|---|
| 174 | } | 
|---|
| 175 |  | 
|---|
| 176 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, | 
|---|
| 177 | struct cftype *cft) | 
|---|
| 178 | { | 
|---|
| 179 | return __cpuusage_read(css, index: CPUACCT_STAT_USER); | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, | 
|---|
| 183 | struct cftype *cft) | 
|---|
| 184 | { | 
|---|
| 185 | return __cpuusage_read(css, index: CPUACCT_STAT_SYSTEM); | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) | 
|---|
| 189 | { | 
|---|
| 190 | return __cpuusage_read(css, index: CPUACCT_STAT_NSTATS); | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, | 
|---|
| 194 | u64 val) | 
|---|
| 195 | { | 
|---|
| 196 | struct cpuacct *ca = css_ca(css); | 
|---|
| 197 | int cpu; | 
|---|
| 198 |  | 
|---|
| 199 | /* | 
|---|
| 200 | * Only allow '0' here to do a reset. | 
|---|
| 201 | */ | 
|---|
| 202 | if (val) | 
|---|
| 203 | return -EINVAL; | 
|---|
| 204 |  | 
|---|
| 205 | for_each_possible_cpu(cpu) | 
|---|
| 206 | cpuacct_cpuusage_write(ca, cpu); | 
|---|
| 207 |  | 
|---|
| 208 | return 0; | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | static int __cpuacct_percpu_seq_show(struct seq_file *m, | 
|---|
| 212 | enum cpuacct_stat_index index) | 
|---|
| 213 | { | 
|---|
| 214 | struct cpuacct *ca = css_ca(css: seq_css(seq: m)); | 
|---|
| 215 | u64 percpu; | 
|---|
| 216 | int i; | 
|---|
| 217 |  | 
|---|
| 218 | for_each_possible_cpu(i) { | 
|---|
| 219 | percpu = cpuacct_cpuusage_read(ca, cpu: i, index); | 
|---|
| 220 | seq_printf(m, fmt: "%llu ", (unsigned long long) percpu); | 
|---|
| 221 | } | 
|---|
| 222 | seq_printf(m, fmt: "\n"); | 
|---|
| 223 | return 0; | 
|---|
| 224 | } | 
|---|
| 225 |  | 
|---|
| 226 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) | 
|---|
| 227 | { | 
|---|
| 228 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_USER); | 
|---|
| 229 | } | 
|---|
| 230 |  | 
|---|
| 231 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) | 
|---|
| 232 | { | 
|---|
| 233 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_SYSTEM); | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) | 
|---|
| 237 | { | 
|---|
| 238 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_NSTATS); | 
|---|
| 239 | } | 
|---|
| 240 |  | 
|---|
| 241 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) | 
|---|
| 242 | { | 
|---|
| 243 | struct cpuacct *ca = css_ca(css: seq_css(seq: m)); | 
|---|
| 244 | int index; | 
|---|
| 245 | int cpu; | 
|---|
| 246 |  | 
|---|
| 247 | seq_puts(m, s: "cpu"); | 
|---|
| 248 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) | 
|---|
| 249 | seq_printf(m, fmt: " %s", cpuacct_stat_desc[index]); | 
|---|
| 250 | seq_puts(m, s: "\n"); | 
|---|
| 251 |  | 
|---|
| 252 | for_each_possible_cpu(cpu) { | 
|---|
| 253 | seq_printf(m, fmt: "%d", cpu); | 
|---|
| 254 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) | 
|---|
| 255 | seq_printf(m, fmt: " %llu", | 
|---|
| 256 | cpuacct_cpuusage_read(ca, cpu, index)); | 
|---|
| 257 | seq_puts(m, s: "\n"); | 
|---|
| 258 | } | 
|---|
| 259 | return 0; | 
|---|
| 260 | } | 
|---|
| 261 |  | 
|---|
| 262 | static int cpuacct_stats_show(struct seq_file *sf, void *v) | 
|---|
| 263 | { | 
|---|
| 264 | struct cpuacct *ca = css_ca(css: seq_css(seq: sf)); | 
|---|
| 265 | struct task_cputime cputime; | 
|---|
| 266 | u64 val[CPUACCT_STAT_NSTATS]; | 
|---|
| 267 | int cpu; | 
|---|
| 268 | int stat; | 
|---|
| 269 |  | 
|---|
| 270 | memset(s: &cputime, c: 0, n: sizeof(cputime)); | 
|---|
| 271 | for_each_possible_cpu(cpu) { | 
|---|
| 272 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; | 
|---|
| 273 |  | 
|---|
| 274 | cputime.utime += cpustat[CPUTIME_USER]; | 
|---|
| 275 | cputime.utime += cpustat[CPUTIME_NICE]; | 
|---|
| 276 | cputime.stime += cpustat[CPUTIME_SYSTEM]; | 
|---|
| 277 | cputime.stime += cpustat[CPUTIME_IRQ]; | 
|---|
| 278 | cputime.stime += cpustat[CPUTIME_SOFTIRQ]; | 
|---|
| 279 |  | 
|---|
| 280 | cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|
| 283 | cputime_adjust(curr: &cputime, prev: &seq_css(seq: sf)->cgroup->prev_cputime, | 
|---|
| 284 | ut: &val[CPUACCT_STAT_USER], st: &val[CPUACCT_STAT_SYSTEM]); | 
|---|
| 285 |  | 
|---|
| 286 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { | 
|---|
| 287 | seq_printf(m: sf, fmt: "%s %llu\n", cpuacct_stat_desc[stat], | 
|---|
| 288 | nsec_to_clock_t(x: val[stat])); | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | return 0; | 
|---|
| 292 | } | 
|---|
| 293 |  | 
|---|
| 294 | static struct cftype files[] = { | 
|---|
| 295 | { | 
|---|
| 296 | .name = "usage", | 
|---|
| 297 | .read_u64 = cpuusage_read, | 
|---|
| 298 | .write_u64 = cpuusage_write, | 
|---|
| 299 | }, | 
|---|
| 300 | { | 
|---|
| 301 | .name = "usage_user", | 
|---|
| 302 | .read_u64 = cpuusage_user_read, | 
|---|
| 303 | }, | 
|---|
| 304 | { | 
|---|
| 305 | .name = "usage_sys", | 
|---|
| 306 | .read_u64 = cpuusage_sys_read, | 
|---|
| 307 | }, | 
|---|
| 308 | { | 
|---|
| 309 | .name = "usage_percpu", | 
|---|
| 310 | .seq_show = cpuacct_percpu_seq_show, | 
|---|
| 311 | }, | 
|---|
| 312 | { | 
|---|
| 313 | .name = "usage_percpu_user", | 
|---|
| 314 | .seq_show = cpuacct_percpu_user_seq_show, | 
|---|
| 315 | }, | 
|---|
| 316 | { | 
|---|
| 317 | .name = "usage_percpu_sys", | 
|---|
| 318 | .seq_show = cpuacct_percpu_sys_seq_show, | 
|---|
| 319 | }, | 
|---|
| 320 | { | 
|---|
| 321 | .name = "usage_all", | 
|---|
| 322 | .seq_show = cpuacct_all_seq_show, | 
|---|
| 323 | }, | 
|---|
| 324 | { | 
|---|
| 325 | .name = "stat", | 
|---|
| 326 | .seq_show = cpuacct_stats_show, | 
|---|
| 327 | }, | 
|---|
| 328 | { }	/* terminate */ | 
|---|
| 329 | }; | 
|---|
| 330 |  | 
|---|
| 331 | /* | 
|---|
| 332 | * charge this task's execution time to its accounting group. | 
|---|
| 333 | * | 
|---|
| 334 | * called with rq->lock held. | 
|---|
| 335 | */ | 
|---|
| 336 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | 
|---|
| 337 | { | 
|---|
| 338 | unsigned int cpu = task_cpu(p: tsk); | 
|---|
| 339 | struct cpuacct *ca; | 
|---|
| 340 |  | 
|---|
| 341 | lockdep_assert_rq_held(cpu_rq(cpu)); | 
|---|
| 342 |  | 
|---|
| 343 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) | 
|---|
| 344 | *per_cpu_ptr(ca->cpuusage, cpu) += cputime; | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | /* | 
|---|
| 348 | * Add user/system time to cpuacct. | 
|---|
| 349 | * | 
|---|
| 350 | * Note: it's the caller that updates the account of the root cgroup. | 
|---|
| 351 | */ | 
|---|
| 352 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) | 
|---|
| 353 | { | 
|---|
| 354 | struct cpuacct *ca; | 
|---|
| 355 |  | 
|---|
| 356 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) | 
|---|
| 357 | __this_cpu_add(ca->cpustat->cpustat[index], val); | 
|---|
| 358 | } | 
|---|
| 359 |  | 
|---|
| 360 | struct cgroup_subsys cpuacct_cgrp_subsys = { | 
|---|
| 361 | .css_alloc	= cpuacct_css_alloc, | 
|---|
| 362 | .css_free	= cpuacct_css_free, | 
|---|
| 363 | .legacy_cftypes	= files, | 
|---|
| 364 | .early_init	= true, | 
|---|
| 365 | }; | 
|---|
| 366 |  | 
|---|