1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * CPU accounting code for task groups.
5 *
6 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
7 * (balbir@in.ibm.com).
8 */
9#include <linux/sched/cputime.h>
10#include "sched.h"
11
12/* Time spent by the tasks of the CPU accounting group executing in ... */
13enum cpuacct_stat_index {
14 CPUACCT_STAT_USER, /* ... user mode */
15 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
16
17 CPUACCT_STAT_NSTATS,
18};
19
20static const char * const cpuacct_stat_desc[] = {
21 [CPUACCT_STAT_USER] = "user",
22 [CPUACCT_STAT_SYSTEM] = "system",
23};
24
25/* track CPU usage of a group of tasks and its child groups */
26struct cpuacct {
27 struct cgroup_subsys_state css;
28 /* cpuusage holds pointer to a u64-type object on every CPU */
29 u64 __percpu *cpuusage;
30 struct kernel_cpustat __percpu *cpustat;
31};
32
33static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
34{
35 return css ? container_of(css, struct cpuacct, css) : NULL;
36}
37
38/* Return CPU accounting group to which this task belongs */
39static inline struct cpuacct *task_ca(struct task_struct *tsk)
40{
41 return css_ca(css: task_css(task: tsk, subsys_id: cpuacct_cgrp_id));
42}
43
44static inline struct cpuacct *parent_ca(struct cpuacct *ca)
45{
46 return css_ca(css: ca->css.parent);
47}
48
49static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
50static struct cpuacct root_cpuacct = {
51 .cpustat = &kernel_cpustat,
52 .cpuusage = &root_cpuacct_cpuusage,
53};
54
55/* Create a new CPU accounting group */
56static struct cgroup_subsys_state *
57cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
58{
59 struct cpuacct *ca;
60
61 if (!parent_css)
62 return &root_cpuacct.css;
63
64 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
65 if (!ca)
66 goto out;
67
68 ca->cpuusage = alloc_percpu(u64);
69 if (!ca->cpuusage)
70 goto out_free_ca;
71
72 ca->cpustat = alloc_percpu(struct kernel_cpustat);
73 if (!ca->cpustat)
74 goto out_free_cpuusage;
75
76 return &ca->css;
77
78out_free_cpuusage:
79 free_percpu(pdata: ca->cpuusage);
80out_free_ca:
81 kfree(objp: ca);
82out:
83 return ERR_PTR(error: -ENOMEM);
84}
85
86/* Destroy an existing CPU accounting group */
87static void cpuacct_css_free(struct cgroup_subsys_state *css)
88{
89 struct cpuacct *ca = css_ca(css);
90
91 free_percpu(pdata: ca->cpustat);
92 free_percpu(pdata: ca->cpuusage);
93 kfree(objp: ca);
94}
95
96static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
97 enum cpuacct_stat_index index)
98{
99 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
100 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
101 u64 data;
102
103 /*
104 * We allow index == CPUACCT_STAT_NSTATS here to read
105 * the sum of usages.
106 */
107 if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
108 return 0;
109
110#ifndef CONFIG_64BIT
111 /*
112 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
113 */
114 raw_spin_rq_lock_irq(cpu_rq(cpu));
115#endif
116
117 switch (index) {
118 case CPUACCT_STAT_USER:
119 data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
120 break;
121 case CPUACCT_STAT_SYSTEM:
122 data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
123 cpustat[CPUTIME_SOFTIRQ];
124 break;
125 case CPUACCT_STAT_NSTATS:
126 data = *cpuusage;
127 break;
128 }
129
130#ifndef CONFIG_64BIT
131 raw_spin_rq_unlock_irq(cpu_rq(cpu));
132#endif
133
134 return data;
135}
136
137static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
138{
139 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
140 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
141
142 /* Don't allow to reset global kernel_cpustat */
143 if (ca == &root_cpuacct)
144 return;
145
146#ifndef CONFIG_64BIT
147 /*
148 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
149 */
150 raw_spin_rq_lock_irq(cpu_rq(cpu));
151#endif
152 *cpuusage = 0;
153 cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
154 cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
155 cpustat[CPUTIME_SOFTIRQ] = 0;
156
157#ifndef CONFIG_64BIT
158 raw_spin_rq_unlock_irq(cpu_rq(cpu));
159#endif
160}
161
162/* Return total CPU usage (in nanoseconds) of a group */
163static u64 __cpuusage_read(struct cgroup_subsys_state *css,
164 enum cpuacct_stat_index index)
165{
166 struct cpuacct *ca = css_ca(css);
167 u64 totalcpuusage = 0;
168 int i;
169
170 for_each_possible_cpu(i)
171 totalcpuusage += cpuacct_cpuusage_read(ca, cpu: i, index);
172
173 return totalcpuusage;
174}
175
176static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
177 struct cftype *cft)
178{
179 return __cpuusage_read(css, index: CPUACCT_STAT_USER);
180}
181
182static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
183 struct cftype *cft)
184{
185 return __cpuusage_read(css, index: CPUACCT_STAT_SYSTEM);
186}
187
188static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
189{
190 return __cpuusage_read(css, index: CPUACCT_STAT_NSTATS);
191}
192
193static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
194 u64 val)
195{
196 struct cpuacct *ca = css_ca(css);
197 int cpu;
198
199 /*
200 * Only allow '0' here to do a reset.
201 */
202 if (val)
203 return -EINVAL;
204
205 for_each_possible_cpu(cpu)
206 cpuacct_cpuusage_write(ca, cpu);
207
208 return 0;
209}
210
211static int __cpuacct_percpu_seq_show(struct seq_file *m,
212 enum cpuacct_stat_index index)
213{
214 struct cpuacct *ca = css_ca(css: seq_css(seq: m));
215 u64 percpu;
216 int i;
217
218 for_each_possible_cpu(i) {
219 percpu = cpuacct_cpuusage_read(ca, cpu: i, index);
220 seq_printf(m, fmt: "%llu ", (unsigned long long) percpu);
221 }
222 seq_printf(m, fmt: "\n");
223 return 0;
224}
225
226static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
227{
228 return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_USER);
229}
230
231static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
232{
233 return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_SYSTEM);
234}
235
236static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
237{
238 return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_NSTATS);
239}
240
241static int cpuacct_all_seq_show(struct seq_file *m, void *V)
242{
243 struct cpuacct *ca = css_ca(css: seq_css(seq: m));
244 int index;
245 int cpu;
246
247 seq_puts(m, s: "cpu");
248 for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
249 seq_printf(m, fmt: " %s", cpuacct_stat_desc[index]);
250 seq_puts(m, s: "\n");
251
252 for_each_possible_cpu(cpu) {
253 seq_printf(m, fmt: "%d", cpu);
254 for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
255 seq_printf(m, fmt: " %llu",
256 cpuacct_cpuusage_read(ca, cpu, index));
257 seq_puts(m, s: "\n");
258 }
259 return 0;
260}
261
262static int cpuacct_stats_show(struct seq_file *sf, void *v)
263{
264 struct cpuacct *ca = css_ca(css: seq_css(seq: sf));
265 struct task_cputime cputime;
266 u64 val[CPUACCT_STAT_NSTATS];
267 int cpu;
268 int stat;
269
270 memset(s: &cputime, c: 0, n: sizeof(cputime));
271 for_each_possible_cpu(cpu) {
272 u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
273
274 cputime.utime += cpustat[CPUTIME_USER];
275 cputime.utime += cpustat[CPUTIME_NICE];
276 cputime.stime += cpustat[CPUTIME_SYSTEM];
277 cputime.stime += cpustat[CPUTIME_IRQ];
278 cputime.stime += cpustat[CPUTIME_SOFTIRQ];
279
280 cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
281 }
282
283 cputime_adjust(curr: &cputime, prev: &seq_css(seq: sf)->cgroup->prev_cputime,
284 ut: &val[CPUACCT_STAT_USER], st: &val[CPUACCT_STAT_SYSTEM]);
285
286 for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
287 seq_printf(m: sf, fmt: "%s %llu\n", cpuacct_stat_desc[stat],
288 nsec_to_clock_t(x: val[stat]));
289 }
290
291 return 0;
292}
293
294static struct cftype files[] = {
295 {
296 .name = "usage",
297 .read_u64 = cpuusage_read,
298 .write_u64 = cpuusage_write,
299 },
300 {
301 .name = "usage_user",
302 .read_u64 = cpuusage_user_read,
303 },
304 {
305 .name = "usage_sys",
306 .read_u64 = cpuusage_sys_read,
307 },
308 {
309 .name = "usage_percpu",
310 .seq_show = cpuacct_percpu_seq_show,
311 },
312 {
313 .name = "usage_percpu_user",
314 .seq_show = cpuacct_percpu_user_seq_show,
315 },
316 {
317 .name = "usage_percpu_sys",
318 .seq_show = cpuacct_percpu_sys_seq_show,
319 },
320 {
321 .name = "usage_all",
322 .seq_show = cpuacct_all_seq_show,
323 },
324 {
325 .name = "stat",
326 .seq_show = cpuacct_stats_show,
327 },
328 { } /* terminate */
329};
330
331/*
332 * charge this task's execution time to its accounting group.
333 *
334 * called with rq->lock held.
335 */
336void cpuacct_charge(struct task_struct *tsk, u64 cputime)
337{
338 unsigned int cpu = task_cpu(p: tsk);
339 struct cpuacct *ca;
340
341 lockdep_assert_rq_held(cpu_rq(cpu));
342
343 for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
344 *per_cpu_ptr(ca->cpuusage, cpu) += cputime;
345}
346
347/*
348 * Add user/system time to cpuacct.
349 *
350 * Note: it's the caller that updates the account of the root cgroup.
351 */
352void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
353{
354 struct cpuacct *ca;
355
356 for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
357 __this_cpu_add(ca->cpustat->cpustat[index], val);
358}
359
360struct cgroup_subsys cpuacct_cgrp_subsys = {
361 .css_alloc = cpuacct_css_alloc,
362 .css_free = cpuacct_css_free,
363 .legacy_cftypes = files,
364 .early_init = true,
365};
366