cgroup-v1.c source code [Linux/kernel/cgroup/cgroup-v1.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	#include "cgroup-internal.h"
3
4	#include <linux/ctype.h>
5	#include <linux/kmod.h>
6	#include <linux/sort.h>
7	#include <linux/delay.h>
8	#include <linux/mm.h>
9	#include <linux/sched/signal.h>
10	#include <linux/sched/task.h>
11	#include <linux/magic.h>
12	#include <linux/slab.h>
13	#include <linux/string.h>
14	#include <linux/vmalloc.h>
15	#include <linux/delayacct.h>
16	#include <linux/pid_namespace.h>
17	#include <linux/cgroupstats.h>
18	#include <linux/fs_parser.h>
19
20	#include <trace/events/cgroup.h>
21
22	/*
23	* pidlists linger the following amount before being destroyed. The goal
24	* is avoiding frequent destruction in the middle of consecutive read calls
25	* Expiring in the middle is a performance problem not a correctness one.
26	* 1 sec should be enough.
27	*/
28	#define CGROUP_PIDLIST_DESTROY_DELAY HZ
29
30	/ Controllers blocked by the commandline in v1 /
31	static u16 cgroup_no_v1_mask;
32
33	/ disable named v1 mounts /
34	static bool cgroup_no_v1_named;
35
36	/ Show unavailable controllers in /proc/cgroups /
37	static bool proc_show_all;
38
39	/*
40	* pidlist destructions need to be flushed on cgroup destruction. Use a
41	* separate workqueue as flush domain.
42	*/
43	static struct workqueue_struct *cgroup_pidlist_destroy_wq;
44
45	/ protects cgroup_subsys->release_agent_path /
46	static DEFINE_SPINLOCK(release_agent_path_lock);
47
48	bool cgroup1_ssid_disabled(int ssid)
49	{
50	return cgroup_no_v1_mask & (`1` << ssid);
51	}
52
53	static bool cgroup1_subsys_absent(struct cgroup_subsys *ss)
54	{
55	/ Check also dfl_cftypes for file-less controllers, i.e. perf_event /
56	return ss->legacy_cftypes == NULL && ss->dfl_cftypes;
57	}
58
59	/**
60	* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
61	* @from: attach to all cgroups of a given task
62	* @tsk: the task to be attached
63	*
64	* Return: %0 on success or a negative errno code on failure
65	*/
66	int cgroup_attach_task_all(struct task_struct from, struct* task_struct *tsk)
67	{
68	struct cgroup_root *root;
69	int retval = `0`;
70
71	cgroup_lock();
72	cgroup_attach_lock(lock_mode: CGRP_ATTACH_LOCK_GLOBAL, NULL);
73	for_each_root(root) {
74	struct cgroup *from_cgrp;
75
76	spin_lock_irq(lock: &css_set_lock);
77	from_cgrp = task_cgroup_from_root(task: from, root);
78	spin_unlock_irq(lock: &css_set_lock);
79
80	retval = cgroup_attach_task(dst_cgrp: from_cgrp, leader: tsk, threadgroup: false);
81	if (retval)
82	break;
83	}
84	cgroup_attach_unlock(lock_mode: CGRP_ATTACH_LOCK_GLOBAL, NULL);
85	cgroup_unlock();
86
87	return retval;
88	}
89	EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
90
91	/**
92	* cgroup_transfer_tasks - move tasks from one cgroup to another
93	* @to: cgroup to which the tasks will be moved
94	* @from: cgroup in which the tasks currently reside
95	*
96	* Locking rules between cgroup_post_fork() and the migration path
97	* guarantee that, if a task is forking while being migrated, the new child
98	* is guaranteed to be either visible in the source cgroup after the
99	* parent's migration is complete or put into the target cgroup. No task
100	* can slip out of migration through forking.
101	*
102	* Return: %0 on success or a negative errno code on failure
103	*/
104	int cgroup_transfer_tasks(struct cgroup to, struct* cgroup *from)
105	{
106	DEFINE_CGROUP_MGCTX(mgctx);
107	struct cgrp_cset_link *link;
108	struct css_task_iter it;
109	struct task_struct *task;
110	int ret;
111
112	if (cgroup_on_dfl(cgrp: to))
113	return -EINVAL;
114
115	ret = cgroup_migrate_vet_dst(dst_cgrp: to);
116	if (ret)
117	return ret;
118
119	cgroup_lock();
120
121	cgroup_attach_lock(lock_mode: CGRP_ATTACH_LOCK_GLOBAL, NULL);
122
123	/ all tasks in @from are being moved, all csets are source /
124	spin_lock_irq(lock: &css_set_lock);
125	list_for_each_entry(link, &from->cset_links, cset_link)
126	cgroup_migrate_add_src(src_cset: link->cset, dst_cgrp: to, mgctx: &mgctx);
127	spin_unlock_irq(lock: &css_set_lock);
128
129	ret = cgroup_migrate_prepare_dst(mgctx: &mgctx);
130	if (ret)
131	goto out_err;
132
133	/*
134	* Migrate tasks one-by-one until @from is empty. This fails iff
135	* ->can_attach() fails.
136	*/
137	do {
138	css_task_iter_start(css: &from->self, flags: `0`, it: &it);
139
140	do {
141	task = css_task_iter_next(it: &it);
142	} while (task && (task->flags & PF_EXITING));
143
144	if (task)
145	get_task_struct(t: task);
146	css_task_iter_end(it: &it);
147
148	if (task) {
149	ret = cgroup_migrate(leader: task, threadgroup: false, mgctx: &mgctx);
150	if (!ret)
151	TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
152	put_task_struct(t: task);
153	}
154	} while (task && !ret);
155	out_err:
156	cgroup_migrate_finish(mgctx: &mgctx);
157	cgroup_attach_unlock(lock_mode: CGRP_ATTACH_LOCK_GLOBAL, NULL);
158	cgroup_unlock();
159	return ret;
160	}
161
162	/*
163	* Stuff for reading the 'tasks'/'procs' files.
164	*
165	* Reading this file can return large amounts of data if a cgroup has
166	* lots of attached tasks. So it may need several calls to read(),
167	* but we cannot guarantee that the information we produce is correct
168	* unless we produce it entirely atomically.
169	*
170	*/
171
172	/ which pidlist file are we talking about? /
173	enum cgroup_filetype {
174	CGROUP_FILE_PROCS,
175	CGROUP_FILE_TASKS,
176	};
177
178	/*
179	* A pidlist is a list of pids that virtually represents the contents of one
180	* of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
181	* a pair (one each for procs, tasks) for each pid namespace that's relevant
182	* to the cgroup.
183	*/
184	struct cgroup_pidlist {
185	/*
186	* used to find which pidlist is wanted. doesn't change as long as
187	* this particular list stays in the list.
188	*/
189	struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
190	/ array of xids /
191	pid_t *list;
192	/ how many elements the above list has /
193	int length;
194	/ each of these stored in a list by its cgroup /
195	struct list_head links;
196	/ pointer to the cgroup we belong to, for list removal purposes /
197	struct cgroup *owner;
198	/ for delayed destruction /
199	struct delayed_work destroy_dwork;
200	};
201
202	/*
203	* Used to destroy all pidlists lingering waiting for destroy timer. None
204	* should be left afterwards.
205	*/
206	void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
207	{
208	struct cgroup_pidlist l, tmp_l;
209
210	mutex_lock(lock: &cgrp->pidlist_mutex);
211	list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
212	mod_delayed_work(wq: cgroup_pidlist_destroy_wq, dwork: &l->destroy_dwork, delay: `0`);
213	mutex_unlock(lock: &cgrp->pidlist_mutex);
214
215	flush_workqueue(cgroup_pidlist_destroy_wq);
216	BUG_ON(!list_empty(&cgrp->pidlists));
217	}
218
219	static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
220	{
221	struct delayed_work *dwork = to_delayed_work(work);
222	struct cgroup_pidlist l = container_of(dwork, struct* cgroup_pidlist,
223	destroy_dwork);
224	struct cgroup_pidlist *tofree = NULL;
225
226	mutex_lock(lock: &l->owner->pidlist_mutex);
227
228	/*
229	* Destroy iff we didn't get queued again. The state won't change
230	* as destroy_dwork can only be queued while locked.
231	*/
232	if (!delayed_work_pending(dwork)) {
233	list_del(entry: &l->links);
234	kvfree(addr: l->list);
235	put_pid_ns(ns: l->key.ns);
236	tofree = l;
237	}
238
239	mutex_unlock(lock: &l->owner->pidlist_mutex);
240	kfree(objp: tofree);
241	}
242
243	/*
244	* pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
245	* Returns the number of unique elements.
246	*/
247	static int pidlist_uniq(pid_t list, int* length)
248	{
249	int src, dest = `1`;
250
251	/*
252	* we presume the 0th element is unique, so i starts at 1. trivial
253	* edge cases first; no work needs to be done for either
254	*/
255	if (length == `0` \|\| length == `1`)
256	return length;
257	/ src and dest walk down the list; dest counts unique elements /
258	for (src = `1`; src < length; src++) {
259	/ find next unique element /
260	while (list[src] == list[src-`1`]) {
261	src++;
262	if (src == length)
263	goto after;
264	}
265	/ dest always points to where the next unique element goes /
266	list[dest] = list[src];
267	dest++;
268	}
269	after:
270	return dest;
271	}
272
273	/*
274	* The two pid files - task and cgroup.procs - guaranteed that the result
275	* is sorted, which forced this whole pidlist fiasco. As pid order is
276	* different per namespace, each namespace needs differently sorted list,
277	* making it impossible to use, for example, single rbtree of member tasks
278	* sorted by task pointer. As pidlists can be fairly large, allocating one
279	* per open file is dangerous, so cgroup had to implement shared pool of
280	* pidlists keyed by cgroup and namespace.
281	*/
282	static int cmppid(const void a, const* void *b)
283	{
284	return (pid_t )a - (pid_t )b;
285	}
286
287	static struct cgroup_pidlist cgroup_pidlist_find(struct* cgroup *cgrp,
288	enum cgroup_filetype type)
289	{
290	struct cgroup_pidlist *l;
291	/ don't need task_nsproxy() if we're looking at ourself /
292	struct pid_namespace *ns = task_active_pid_ns(current);
293
294	lockdep_assert_held(&cgrp->pidlist_mutex);
295
296	list_for_each_entry(l, &cgrp->pidlists, links)
297	if (l->key.type == type && l->key.ns == ns)
298	return l;
299	return NULL;
300	}
301
302	/*
303	* find the appropriate pidlist for our purpose (given procs vs tasks)
304	* returns with the lock on that pidlist already held, and takes care
305	* of the use count, or returns NULL with no locks held if we're out of
306	* memory.
307	*/
308	static struct cgroup_pidlist cgroup_pidlist_find_create(struct* cgroup *cgrp,
309	enum cgroup_filetype type)
310	{
311	struct cgroup_pidlist *l;
312
313	lockdep_assert_held(&cgrp->pidlist_mutex);
314
315	l = cgroup_pidlist_find(cgrp, type);
316	if (l)
317	return l;
318
319	/ entry not found; create a new one /
320	l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
321	if (!l)
322	return l;
323
324	INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
325	l->key.type = type;
326	/ don't need task_nsproxy() if we're looking at ourself /
327	l->key.ns = get_pid_ns(ns: task_active_pid_ns(current));
328	l->owner = cgrp;
329	list_add(new: &l->links, head: &cgrp->pidlists);
330	return l;
331	}
332
333	/*
334	* Load a cgroup's pidarray with either procs' tgids or tasks' pids
335	*/
336	static int pidlist_array_load(struct cgroup cgrp, enum* cgroup_filetype type,
337	struct cgroup_pidlist **lp)
338	{
339	pid_t *array;
340	int length;
341	int pid, n = `0`; / used for populating the array /
342	struct css_task_iter it;
343	struct task_struct *tsk;
344	struct cgroup_pidlist *l;
345
346	lockdep_assert_held(&cgrp->pidlist_mutex);
347
348	/*
349	* If cgroup gets more users after we read count, we won't have
350	* enough space - tough. This race is indistinguishable to the
351	* caller from the case that the additional cgroup users didn't
352	* show up until sometime later on.
353	*/
354	length = cgroup_task_count(cgrp);
355	array = kvmalloc_array(length, sizeof(pid_t), GFP_KERNEL);
356	if (!array)
357	return -ENOMEM;
358	/ now, populate the array /
359	css_task_iter_start(css: &cgrp->self, flags: `0`, it: &it);
360	while ((tsk = css_task_iter_next(it: &it))) {
361	if (unlikely(n == length))
362	break;
363	/ get tgid or pid for procs or tasks file respectively /
364	if (type == CGROUP_FILE_PROCS)
365	pid = task_tgid_vnr(tsk);
366	else
367	pid = task_pid_vnr(tsk);
368	if (pid > `0`) / make sure to only use valid results /
369	array[n++] = pid;
370	}
371	css_task_iter_end(it: &it);
372	length = n;
373	/ now sort & strip out duplicates (tgids or recycled thread PIDs) /
374	sort(base: array, num: length, size: sizeof(pid_t), cmp_func: cmppid, NULL);
375	length = pidlist_uniq(list: array, length);
376
377	l = cgroup_pidlist_find_create(cgrp, type);
378	if (!l) {
379	kvfree(addr: array);
380	return -ENOMEM;
381	}
382
383	/ store array, freeing old if necessary /
384	kvfree(addr: l->list);
385	l->list = array;
386	l->length = length;
387	*lp = l;
388	return `0`;
389	}
390
391	/*
392	* seq_file methods for the tasks/procs files. The seq_file position is the
393	* next pid to display; the seq_file iterator is a pointer to the pid
394	* in the cgroup->l->list array.
395	*/
396
397	static void cgroup_pidlist_start(struct* seq_file s, loff_t pos)
398	{
399	/*
400	* Initially we receive a position value that corresponds to
401	* one more than the last pid shown (or 0 on the first call or
402	* after a seek to the start). Use a binary-search to find the
403	* next pid to display, if any
404	*/
405	struct kernfs_open_file *of = s->private;
406	struct cgroup_file_ctx *ctx = of->priv;
407	struct cgroup *cgrp = seq_css(seq: s)->cgroup;
408	struct cgroup_pidlist *l;
409	enum cgroup_filetype type = seq_cft(seq: s)->private;
410	int index = `0`, pid = *pos;
411	int *iter, ret;
412
413	mutex_lock(lock: &cgrp->pidlist_mutex);
414
415	/*
416	* !NULL @ctx->procs1.pidlist indicates that this isn't the first
417	* start() after open. If the matching pidlist is around, we can use
418	* that. Look for it. Note that @ctx->procs1.pidlist can't be used
419	* directly. It could already have been destroyed.
420	*/
421	if (ctx->procs1.pidlist)
422	ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
423
424	/*
425	* Either this is the first start() after open or the matching
426	* pidlist has been destroyed inbetween. Create a new one.
427	*/
428	if (!ctx->procs1.pidlist) {
429	ret = pidlist_array_load(cgrp, type, lp: &ctx->procs1.pidlist);
430	if (ret)
431	return ERR_PTR(error: ret);
432	}
433	l = ctx->procs1.pidlist;
434
435	if (pid) {
436	int end = l->length;
437
438	while (index < end) {
439	int mid = (index + end) / `2`;
440	if (l->list[mid] == pid) {
441	index = mid;
442	break;
443	} else if (l->list[mid] < pid)
444	index = mid + `1`;
445	else
446	end = mid;
447	}
448	}
449	/ If we're off the end of the array, we're done /
450	if (index >= l->length)
451	return NULL;
452	/ Update the abstract position to be the actual pid that we found /
453	iter = l->list + index;
454	pos = iter;
455	return iter;
456	}
457
458	static void cgroup_pidlist_stop(struct seq_file s, void* *v)
459	{
460	struct kernfs_open_file *of = s->private;
461	struct cgroup_file_ctx *ctx = of->priv;
462	struct cgroup_pidlist *l = ctx->procs1.pidlist;
463
464	if (l)
465	mod_delayed_work(wq: cgroup_pidlist_destroy_wq, dwork: &l->destroy_dwork,
466	CGROUP_PIDLIST_DESTROY_DELAY);
467	mutex_unlock(lock: &seq_css(seq: s)->cgroup->pidlist_mutex);
468	}
469
470	static void cgroup_pidlist_next(struct* seq_file s, void* v, loff_t pos)
471	{
472	struct kernfs_open_file *of = s->private;
473	struct cgroup_file_ctx *ctx = of->priv;
474	struct cgroup_pidlist *l = ctx->procs1.pidlist;
475	pid_t *p = v;
476	pid_t *end = l->list + l->length;
477	/*
478	* Advance to the next pid in the array. If this goes off the
479	* end, we're done
480	*/
481	p++;
482	if (p >= end) {
483	(*pos)++;
484	return NULL;
485	} else {
486	pos = p;
487	return p;
488	}
489	}
490
491	static int cgroup_pidlist_show(struct seq_file s, void* *v)
492	{
493	seq_printf(m: s, fmt: "%d\n", (int* *)v);
494
495	return `0`;
496	}
497
498	static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
499	char *buf, size_t nbytes, loff_t off,
500	bool threadgroup)
501	{
502	struct cgroup *cgrp;
503	struct task_struct *task;
504	const struct cred cred, tcred;
505	ssize_t ret;
506	enum cgroup_attach_lock_mode lock_mode;
507
508	cgrp = cgroup_kn_lock_live(kn: of->kn, drain_offline: false);
509	if (!cgrp)
510	return -ENODEV;
511
512	task = cgroup_procs_write_start(buf, threadgroup, lock_mode: &lock_mode);
513	ret = PTR_ERR_OR_ZERO(ptr: task);
514	if (ret)
515	goto out_unlock;
516
517	/*
518	* Even if we're attaching all tasks in the thread group, we only need
519	* to check permissions on one of them. Check permissions using the
520	* credentials from file open to protect against inherited fd attacks.
521	*/
522	cred = of->file->f_cred;
523	tcred = get_task_cred(task);
524	if (!uid_eq(left: cred->euid, GLOBAL_ROOT_UID) &&
525	!uid_eq(cred->euid, tcred->uid) &&
526	!uid_eq(cred->euid, tcred->suid))
527	ret = -EACCES;
528	put_cred(tcred);
529	if (ret)
530	goto out_finish;
531
532	ret = cgroup_attach_task(cgrp, task, threadgroup);
533
534	out_finish:
535	cgroup_procs_write_finish(task, lock_mode);
536	out_unlock:
537	cgroup_kn_unlock(of->kn);
538
539	return ret ?: nbytes;
540	}
541
542	static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
543	char *buf, size_t nbytes, loff_t off)
544	{
545	return __cgroup1_procs_write(of, buf, nbytes, off, threadgroup: true);
546	}
547
548	static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
549	char *buf, size_t nbytes, loff_t off)
550	{
551	return __cgroup1_procs_write(of, buf, nbytes, off, threadgroup: false);
552	}
553
554	static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
555	char *buf, size_t nbytes, loff_t off)
556	{
557	struct cgroup *cgrp;
558	struct cgroup_file_ctx *ctx;
559
560	BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
561
562	/*
563	* Release agent gets called with all capabilities,
564	* require capabilities to set release agent.
565	*/
566	ctx = of->priv;
567	if ((ctx->ns->user_ns != &init_user_ns) \|\|
568	!file_ns_capable(file: of->file, ns: &init_user_ns, CAP_SYS_ADMIN))
569	return -EPERM;
570
571	cgrp = cgroup_kn_lock_live(kn: of->kn, drain_offline: false);
572	if (!cgrp)
573	return -ENODEV;
574	spin_lock(lock: &release_agent_path_lock);
575	strscpy(cgrp->root->release_agent_path, strstrip(buf),
576	sizeof(cgrp->root->release_agent_path));
577	spin_unlock(lock: &release_agent_path_lock);
578	cgroup_kn_unlock(kn: of->kn);
579	return nbytes;
580	}
581
582	static int cgroup_release_agent_show(struct seq_file seq, void* *v)
583	{
584	struct cgroup *cgrp = seq_css(seq)->cgroup;
585
586	spin_lock(lock: &release_agent_path_lock);
587	seq_puts(m: seq, s: cgrp->root->release_agent_path);
588	spin_unlock(lock: &release_agent_path_lock);
589	seq_putc(m: seq, c: `'\n'`);
590	return `0`;
591	}
592
593	static int cgroup_sane_behavior_show(struct seq_file seq, void* *v)
594	{
595	seq_puts(m: seq, s: "0\n");
596	return `0`;
597	}
598
599	static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
600	struct cftype *cft)
601	{
602	return notify_on_release(cgrp: css->cgroup);
603	}
604
605	static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
606	struct cftype *cft, u64 val)
607	{
608	if (val)
609	set_bit(nr: CGRP_NOTIFY_ON_RELEASE, addr: &css->cgroup->flags);
610	else
611	clear_bit(nr: CGRP_NOTIFY_ON_RELEASE, addr: &css->cgroup->flags);
612	return `0`;
613	}
614
615	static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
616	struct cftype *cft)
617	{
618	return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
619	}
620
621	static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
622	struct cftype *cft, u64 val)
623	{
624	if (val)
625	set_bit(nr: CGRP_CPUSET_CLONE_CHILDREN, addr: &css->cgroup->flags);
626	else
627	clear_bit(nr: CGRP_CPUSET_CLONE_CHILDREN, addr: &css->cgroup->flags);
628	return `0`;
629	}
630
631	/ cgroup core interface files for the legacy hierarchies /
632	struct cftype cgroup1_base_files[] = {
633	{
634	.name = "cgroup.procs",
635	.seq_start = cgroup_pidlist_start,
636	.seq_next = cgroup_pidlist_next,
637	.seq_stop = cgroup_pidlist_stop,
638	.seq_show = cgroup_pidlist_show,
639	.private = CGROUP_FILE_PROCS,
640	.write = cgroup1_procs_write,
641	},
642	{
643	.name = "cgroup.clone_children",
644	.read_u64 = cgroup_clone_children_read,
645	.write_u64 = cgroup_clone_children_write,
646	},
647	{
648	.name = "cgroup.sane_behavior",
649	.flags = CFTYPE_ONLY_ON_ROOT,
650	.seq_show = cgroup_sane_behavior_show,
651	},
652	{
653	.name = "tasks",
654	.seq_start = cgroup_pidlist_start,
655	.seq_next = cgroup_pidlist_next,
656	.seq_stop = cgroup_pidlist_stop,
657	.seq_show = cgroup_pidlist_show,
658	.private = CGROUP_FILE_TASKS,
659	.write = cgroup1_tasks_write,
660	},
661	{
662	.name = "notify_on_release",
663	.read_u64 = cgroup_read_notify_on_release,
664	.write_u64 = cgroup_write_notify_on_release,
665	},
666	{
667	.name = "release_agent",
668	.flags = CFTYPE_ONLY_ON_ROOT,
669	.seq_show = cgroup_release_agent_show,
670	.write = cgroup_release_agent_write,
671	.max_write_len = PATH_MAX - `1`,
672	},
673	{ } / terminate /
674	};
675
676	/ Display information about each subsystem and each hierarchy /
677	int proc_cgroupstats_show(struct seq_file m, void* *v)
678	{
679	struct cgroup_subsys *ss;
680	bool cgrp_v1_visible = false;
681	int i;
682
683	seq_puts(m, s: "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
684	/*
685	* Grab the subsystems state racily. No need to add avenue to
686	* cgroup_mutex contention.
687	*/
688
689	for_each_subsys(ss, i) {
690	cgrp_v1_visible \|= ss->root != &cgrp_dfl_root;
691
692	if (!proc_show_all && cgroup1_subsys_absent(ss))
693	continue;
694
695	seq_printf(m, fmt: "%s\t%d\t%d\t%d\n",
696	ss->legacy_name, ss->root->hierarchy_id,
697	atomic_read(v: &ss->root->nr_cgrps),
698	cgroup_ssid_enabled(ssid: i));
699	}
700
701	if (cgrp_dfl_visible && !cgrp_v1_visible)
702	pr_info_once("/proc/cgroups lists only v1 controllers, use cgroup.controllers of root cgroup for v2 info\n");
703
704
705	return `0`;
706	}
707
708	/**
709	* cgroupstats_build - build and fill cgroupstats
710	* @stats: cgroupstats to fill information into
711	* @dentry: A dentry entry belonging to the cgroup for which stats have
712	* been requested.
713	*
714	* Build and fill cgroupstats so that taskstats can export it to user
715	* space.
716	*
717	* Return: %0 on success or a negative errno code on failure
718	*/
719	int cgroupstats_build(struct cgroupstats stats, struct* dentry *dentry)
720	{
721	struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
722	struct cgroup *cgrp;
723	struct css_task_iter it;
724	struct task_struct *tsk;
725
726	/ it should be kernfs_node belonging to cgroupfs and is a directory /
727	if (dentry->d_sb->s_type != &cgroup_fs_type \|\| !kn \|\|
728	kernfs_type(kn) != KERNFS_DIR)
729	return -EINVAL;
730
731	/*
732	* We aren't being called from kernfs and there's no guarantee on
733	* @kn->priv's validity. For this and css_tryget_online_from_dir(),
734	* @kn->priv is RCU safe. Let's do the RCU dancing.
735	*/
736	rcu_read_lock();
737	cgrp = rcu_dereference((void* __rcu __force **)&kn->priv);
738	if (!cgrp \|\| !cgroup_tryget(cgrp)) {
739	rcu_read_unlock();
740	return -ENOENT;
741	}
742	rcu_read_unlock();
743
744	css_task_iter_start(css: &cgrp->self, flags: `0`, it: &it);
745	while ((tsk = css_task_iter_next(it: &it))) {
746	switch (READ_ONCE(tsk->__state)) {
747	case TASK_RUNNING:
748	stats->nr_running++;
749	break;
750	case TASK_INTERRUPTIBLE:
751	stats->nr_sleeping++;
752	break;
753	case TASK_UNINTERRUPTIBLE:
754	stats->nr_uninterruptible++;
755	break;
756	case TASK_STOPPED:
757	stats->nr_stopped++;
758	break;
759	default:
760	if (tsk->in_iowait)
761	stats->nr_io_wait++;
762	break;
763	}
764	}
765	css_task_iter_end(it: &it);
766
767	cgroup_put(cgrp);
768	return `0`;
769	}
770
771	void cgroup1_check_for_release(struct cgroup *cgrp)
772	{
773	if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
774	!css_has_online_children(css: &cgrp->self) && !cgroup_is_dead(cgrp))
775	schedule_work(work: &cgrp->release_agent_work);
776	}
777
778	/*
779	* Notify userspace when a cgroup is released, by running the
780	* configured release agent with the name of the cgroup (path
781	* relative to the root of cgroup file system) as the argument.
782	*
783	* Most likely, this user command will try to rmdir this cgroup.
784	*
785	* This races with the possibility that some other task will be
786	* attached to this cgroup before it is removed, or that some other
787	* user task will 'mkdir' a child cgroup of this cgroup. That's ok.
788	* The presumed 'rmdir' will fail quietly if this cgroup is no longer
789	* unused, and this cgroup will be reprieved from its death sentence,
790	* to continue to serve a useful existence. Next time it's released,
791	* we will get notified again, if it still has 'notify_on_release' set.
792	*
793	* The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
794	* means only wait until the task is successfully execve()'d. The
795	* separate release agent task is forked by call_usermodehelper(),
796	* then control in this thread returns here, without waiting for the
797	* release agent task. We don't bother to wait because the caller of
798	* this routine has no use for the exit status of the release agent
799	* task, so no sense holding our caller up for that.
800	*/
801	void cgroup1_release_agent(struct work_struct *work)
802	{
803	struct cgroup *cgrp =
804	container_of(work, struct cgroup, release_agent_work);
805	char pathbuf, agentbuf;
806	char argv[`3`], envp[`3`];
807	int ret;
808
809	/ snoop agent path and exit early if empty /
810	if (!cgrp->root->release_agent_path[`0`])
811	return;
812
813	/ prepare argument buffers /
814	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
815	agentbuf = kmalloc(PATH_MAX, GFP_KERNEL);
816	if (!pathbuf \|\| !agentbuf)
817	goto out_free;
818
819	spin_lock(lock: &release_agent_path_lock);
820	strscpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
821	spin_unlock(lock: &release_agent_path_lock);
822	if (!agentbuf[`0`])
823	goto out_free;
824
825	ret = cgroup_path_ns(cgrp, buf: pathbuf, PATH_MAX, ns: &init_cgroup_ns);
826	if (ret < `0`)
827	goto out_free;
828
829	argv[`0`] = agentbuf;
830	argv[`1`] = pathbuf;
831	argv[`2`] = NULL;
832
833	/ minimal command environment /
834	envp[`0`] = "HOME=/";
835	envp[`1`] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
836	envp[`2`] = NULL;
837
838	call_usermodehelper(path: argv[`0`], argv, envp, UMH_WAIT_EXEC);
839	out_free:
840	kfree(objp: agentbuf);
841	kfree(objp: pathbuf);
842	}
843
844	/*
845	* cgroup_rename - Only allow simple rename of directories in place.
846	*/
847	static int cgroup1_rename(struct kernfs_node kn, struct* kernfs_node *new_parent,
848	const char *new_name_str)
849	{
850	struct cgroup *cgrp = kn->priv;
851	int ret;
852
853	/ do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable /
854	if (strchr(new_name_str, `'\n'`))
855	return -EINVAL;
856
857	if (kernfs_type(kn) != KERNFS_DIR)
858	return -ENOTDIR;
859	if (rcu_access_pointer(kn->__parent) != new_parent)
860	return -EIO;
861
862	/*
863	* We're gonna grab cgroup_mutex which nests outside kernfs
864	* active_ref. kernfs_rename() doesn't require active_ref
865	* protection. Break them before grabbing cgroup_mutex.
866	*/
867	kernfs_break_active_protection(kn: new_parent);
868	kernfs_break_active_protection(kn);
869
870	cgroup_lock();
871
872	ret = kernfs_rename(kn, new_parent, new_name: new_name_str);
873	if (!ret)
874	TRACE_CGROUP_PATH(rename, cgrp);
875
876	cgroup_unlock();
877
878	kernfs_unbreak_active_protection(kn);
879	kernfs_unbreak_active_protection(kn: new_parent);
880	return ret;
881	}
882
883	static int cgroup1_show_options(struct seq_file seq, struct* kernfs_root *kf_root)
884	{
885	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
886	struct cgroup_subsys *ss;
887	int ssid;
888
889	for_each_subsys(ss, ssid)
890	if (root->subsys_mask & (`1` << ssid))
891	seq_show_option(m: seq, name: ss->legacy_name, NULL);
892	if (root->flags & CGRP_ROOT_NOPREFIX)
893	seq_puts(m: seq, s: ",noprefix");
894	if (root->flags & CGRP_ROOT_XATTR)
895	seq_puts(m: seq, s: ",xattr");
896	if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
897	seq_puts(m: seq, s: ",cpuset_v2_mode");
898	if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
899	seq_puts(m: seq, s: ",favordynmods");
900
901	spin_lock(lock: &release_agent_path_lock);
902	if (strlen(root->release_agent_path))
903	seq_show_option(m: seq, name: "release_agent",
904	value: root->release_agent_path);
905	spin_unlock(lock: &release_agent_path_lock);
906
907	if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
908	seq_puts(m: seq, s: ",clone_children");
909	if (strlen(root->name))
910	seq_show_option(m: seq, name: "name", value: root->name);
911	return `0`;
912	}
913
914	enum cgroup1_param {
915	Opt_all,
916	Opt_clone_children,
917	Opt_cpuset_v2_mode,
918	Opt_name,
919	Opt_none,
920	Opt_noprefix,
921	Opt_release_agent,
922	Opt_xattr,
923	Opt_favordynmods,
924	Opt_nofavordynmods,
925	};
926
927	const struct fs_parameter_spec cgroup1_fs_parameters[] = {
928	fsparam_flag ("all", Opt_all),
929	fsparam_flag ("clone_children", Opt_clone_children),
930	fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
931	fsparam_string("name", Opt_name),
932	fsparam_flag ("none", Opt_none),
933	fsparam_flag ("noprefix", Opt_noprefix),
934	fsparam_string("release_agent", Opt_release_agent),
935	fsparam_flag ("xattr", Opt_xattr),
936	fsparam_flag ("favordynmods", Opt_favordynmods),
937	fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
938	{}
939	};
940
941	int cgroup1_parse_param(struct fs_context fc, struct* fs_parameter *param)
942	{
943	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
944	struct cgroup_subsys *ss;
945	struct fs_parse_result result;
946	int opt, i;
947
948	opt = fs_parse(fc, desc: cgroup1_fs_parameters, param, result: &result);
949	if (opt == -ENOPARAM) {
950	int ret;
951
952	ret = vfs_parse_fs_param_source(fc, param);
953	if (ret != -ENOPARAM)
954	return ret;
955	for_each_subsys(ss, i) {
956	if (strcmp(param->key, ss->legacy_name) \|\|
957	cgroup1_subsys_absent(ss))
958	continue;
959	if (!cgroup_ssid_enabled(ssid: i) \|\| cgroup1_ssid_disabled(ssid: i))
960	return invalfc(fc, "Disabled controller '%s'",
961	param->key);
962	ctx->subsys_mask \|= (`1` << i);
963	return `0`;
964	}
965	return invalfc(fc, "Unknown subsys name '%s'", param->key);
966	}
967	if (opt < `0`)
968	return opt;
969
970	switch (opt) {
971	case Opt_none:
972	/ Explicitly have no subsystems /
973	ctx->none = true;
974	break;
975	case Opt_all:
976	ctx->all_ss = true;
977	break;
978	case Opt_noprefix:
979	ctx->flags \|= CGRP_ROOT_NOPREFIX;
980	break;
981	case Opt_clone_children:
982	ctx->cpuset_clone_children = true;
983	break;
984	case Opt_cpuset_v2_mode:
985	ctx->flags \|= CGRP_ROOT_CPUSET_V2_MODE;
986	break;
987	case Opt_xattr:
988	ctx->flags \|= CGRP_ROOT_XATTR;
989	break;
990	case Opt_favordynmods:
991	ctx->flags \|= CGRP_ROOT_FAVOR_DYNMODS;
992	break;
993	case Opt_nofavordynmods:
994	ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
995	break;
996	case Opt_release_agent:
997	/ Specifying two release agents is forbidden /
998	if (ctx->release_agent)
999	return invalfc(fc, "release_agent respecified");
1000	/*
1001	* Release agent gets called with all capabilities,
1002	* require capabilities to set release agent.
1003	*/
1004	if ((fc->user_ns != &init_user_ns) \|\| !capable(CAP_SYS_ADMIN))
1005	return invalfc(fc, "Setting release_agent not allowed");
1006	ctx->release_agent = param->string;
1007	param->string = NULL;
1008	break;
1009	case Opt_name:
1010	/ blocked by boot param? /
1011	if (cgroup_no_v1_named)
1012	return -ENOENT;
1013	/ Can't specify an empty name /
1014	if (!param->size)
1015	return invalfc(fc, "Empty name");
1016	if (param->size > MAX_CGROUP_ROOT_NAMELEN - `1`)
1017	return invalfc(fc, "Name too long");
1018	/ Must match [\w.-]+ /
1019	for (i = `0`; i < param->size; i++) {
1020	char c = param->string[i];
1021	if (isalnum(c))
1022	continue;
1023	if ((c == `'.'`) \|\| (c == `'-'`) \|\| (c == `'_'`))
1024	continue;
1025	return invalfc(fc, "Invalid name");
1026	}
1027	/ Specifying two names is forbidden /
1028	if (ctx->name)
1029	return invalfc(fc, "name respecified");
1030	ctx->name = param->string;
1031	param->string = NULL;
1032	break;
1033	}
1034	return `0`;
1035	}
1036
1037	static int check_cgroupfs_options(struct fs_context *fc)
1038	{
1039	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1040	u16 mask = U16_MAX;
1041	u16 enabled = `0`;
1042	struct cgroup_subsys *ss;
1043	int i;
1044
1045	#ifdef CONFIG_CPUSETS
1046	mask = ~((u16)`1` << cpuset_cgrp_id);
1047	#endif
1048	for_each_subsys(ss, i)
1049	if (cgroup_ssid_enabled(ssid: i) && !cgroup1_ssid_disabled(ssid: i) &&
1050	!cgroup1_subsys_absent(ss))
1051	enabled \|= `1` << i;
1052
1053	ctx->subsys_mask &= enabled;
1054
1055	/*
1056	* In absence of 'none', 'name=' and subsystem name options,
1057	* let's default to 'all'.
1058	*/
1059	if (!ctx->subsys_mask && !ctx->none && !ctx->name)
1060	ctx->all_ss = true;
1061
1062	if (ctx->all_ss) {
1063	/ Mutually exclusive option 'all' + subsystem name /
1064	if (ctx->subsys_mask)
1065	return invalfc(fc, "subsys name conflicts with all");
1066	/ 'all' => select all the subsystems /
1067	ctx->subsys_mask = enabled;
1068	}
1069
1070	/*
1071	* We either have to specify by name or by subsystems. (So all
1072	* empty hierarchies must have a name).
1073	*/
1074	if (!ctx->subsys_mask && !ctx->name)
1075	return invalfc(fc, "Need name or subsystem set");
1076
1077	/*
1078	* Option noprefix was introduced just for backward compatibility
1079	* with the old cpuset, so we allow noprefix only if mounting just
1080	* the cpuset subsystem.
1081	*/
1082	if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
1083	return invalfc(fc, "noprefix used incorrectly");
1084
1085	/ Can't specify "none" and some subsystems /
1086	if (ctx->subsys_mask && ctx->none)
1087	return invalfc(fc, "none used incorrectly");
1088
1089	return `0`;
1090	}
1091
1092	int cgroup1_reconfigure(struct fs_context *fc)
1093	{
1094	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1095	struct kernfs_root *kf_root = kernfs_root_from_sb(sb: fc->root->d_sb);
1096	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
1097	int ret = `0`;
1098	u16 added_mask, removed_mask;
1099
1100	cgroup_lock_and_drain_offline(cgrp: &cgrp_dfl_root.cgrp);
1101
1102	/ See what subsystems are wanted /
1103	ret = check_cgroupfs_options(fc);
1104	if (ret)
1105	goto out_unlock;
1106
1107	if (ctx->subsys_mask != root->subsys_mask \|\| ctx->release_agent)
1108	pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
1109	task_tgid_nr(current), current->comm);
1110
1111	added_mask = ctx->subsys_mask & ~root->subsys_mask;
1112	removed_mask = root->subsys_mask & ~ctx->subsys_mask;
1113
1114	/ Don't allow flags or name to change at remount /
1115	if ((ctx->flags ^ root->flags) \|\|
1116	(ctx->name && strcmp(ctx->name, root->name))) {
1117	errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
1118	ctx->flags, ctx->name ?: "", root->flags, root->name);
1119	ret = -EINVAL;
1120	goto out_unlock;
1121	}
1122
1123	/ remounting is not allowed for populated hierarchies /
1124	if (!list_empty(head: &root->cgrp.self.children)) {
1125	ret = -EBUSY;
1126	goto out_unlock;
1127	}
1128
1129	ret = rebind_subsystems(dst_root: root, ss_mask: added_mask);
1130	if (ret)
1131	goto out_unlock;
1132
1133	WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
1134
1135	if (ctx->release_agent) {
1136	spin_lock(lock: &release_agent_path_lock);
1137	strscpy(root->release_agent_path, ctx->release_agent);
1138	spin_unlock(lock: &release_agent_path_lock);
1139	}
1140
1141	trace_cgroup_remount(root);
1142
1143	out_unlock:
1144	cgroup_unlock();
1145	return ret;
1146	}
1147
1148	struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
1149	.rename = cgroup1_rename,
1150	.show_options = cgroup1_show_options,
1151	.mkdir = cgroup_mkdir,
1152	.rmdir = cgroup_rmdir,
1153	.show_path = cgroup_show_path,
1154	};
1155
1156	/*
1157	* The guts of cgroup1 mount - find or create cgroup_root to use.
1158	* Called with cgroup_mutex held; returns 0 on success, -E... on
1159	* error and positive - in case when the candidate is busy dying.
1160	* On success it stashes a reference to cgroup_root into given
1161	* cgroup_fs_context; that reference is NOT counting towards the
1162	* cgroup_root refcount.
1163	*/
1164	static int cgroup1_root_to_use(struct fs_context *fc)
1165	{
1166	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1167	struct cgroup_root *root;
1168	struct cgroup_subsys *ss;
1169	int i, ret;
1170
1171	/ First find the desired set of subsystems /
1172	ret = check_cgroupfs_options(fc);
1173	if (ret)
1174	return ret;
1175
1176	/*
1177	* Destruction of cgroup root is asynchronous, so subsystems may
1178	* still be dying after the previous unmount. Let's drain the
1179	* dying subsystems. We just need to ensure that the ones
1180	* unmounted previously finish dying and don't care about new ones
1181	* starting. Testing ref liveliness is good enough.
1182	*/
1183	for_each_subsys(ss, i) {
1184	if (!(ctx->subsys_mask & (`1` << i)) \|\|
1185	ss->root == &cgrp_dfl_root)
1186	continue;
1187
1188	if (!percpu_ref_tryget_live(ref: &ss->root->cgrp.self.refcnt))
1189	return `1`; / restart /
1190	cgroup_put(cgrp: &ss->root->cgrp);
1191	}
1192
1193	for_each_root(root) {
1194	bool name_match = false;
1195
1196	if (root == &cgrp_dfl_root)
1197	continue;
1198
1199	/*
1200	* If we asked for a name then it must match. Also, if
1201	* name matches but sybsys_mask doesn't, we should fail.
1202	* Remember whether name matched.
1203	*/
1204	if (ctx->name) {
1205	if (strcmp(ctx->name, root->name))
1206	continue;
1207	name_match = true;
1208	}
1209
1210	/*
1211	* If we asked for subsystems (or explicitly for no
1212	* subsystems) then they must match.
1213	*/
1214	if ((ctx->subsys_mask \|\| ctx->none) &&
1215	(ctx->subsys_mask != root->subsys_mask)) {
1216	if (!name_match)
1217	continue;
1218	return -EBUSY;
1219	}
1220
1221	if (root->flags ^ ctx->flags)
1222	pr_warn("new mount options do not match the existing superblock, will be ignored\n");
1223
1224	ctx->root = root;
1225	return `0`;
1226	}
1227
1228	/*
1229	* No such thing, create a new one. name= matching without subsys
1230	* specification is allowed for already existing hierarchies but we
1231	* can't create new one without subsys specification.
1232	*/
1233	if (!ctx->subsys_mask && !ctx->none)
1234	return invalfc(fc, "No subsys list or none specified");
1235
1236	/ Hierarchies may only be created in the initial cgroup namespace. /
1237	if (ctx->ns != &init_cgroup_ns)
1238	return -EPERM;
1239
1240	root = kzalloc(sizeof(*root), GFP_KERNEL);
1241	if (!root)
1242	return -ENOMEM;
1243
1244	ctx->root = root;
1245	init_cgroup_root(ctx);
1246
1247	ret = cgroup_setup_root(root, ss_mask: ctx->subsys_mask);
1248	if (!ret)
1249	cgroup_favor_dynmods(root, favor: ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
1250	else
1251	cgroup_free_root(root);
1252
1253	return ret;
1254	}
1255
1256	int cgroup1_get_tree(struct fs_context *fc)
1257	{
1258	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
1259	int ret;
1260
1261	/ Check if the caller has permission to mount. /
1262	if (!ns_capable(ns: ctx->ns->user_ns, CAP_SYS_ADMIN))
1263	return -EPERM;
1264
1265	cgroup_lock_and_drain_offline(cgrp: &cgrp_dfl_root.cgrp);
1266
1267	ret = cgroup1_root_to_use(fc);
1268	if (!ret && !percpu_ref_tryget_live(ref: &ctx->root->cgrp.self.refcnt))
1269	ret = `1`; / restart /
1270
1271	cgroup_unlock();
1272
1273	if (!ret)
1274	ret = cgroup_do_get_tree(fc);
1275
1276	if (!ret && percpu_ref_is_dying(ref: &ctx->root->cgrp.self.refcnt)) {
1277	fc_drop_locked(fc);
1278	ret = `1`;
1279	}
1280
1281	if (unlikely(ret > `0`)) {
1282	msleep(msecs: `10`);
1283	return restart_syscall();
1284	}
1285	return ret;
1286	}
1287
1288	/**
1289	* task_get_cgroup1 - Acquires the associated cgroup of a task within a
1290	* specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its
1291	* hierarchy ID.
1292	* @tsk: The target task
1293	* @hierarchy_id: The ID of a cgroup1 hierarchy
1294	*
1295	* On success, the cgroup is returned. On failure, ERR_PTR is returned.
1296	* We limit it to cgroup1 only.
1297	*/
1298	struct cgroup task_get_cgroup1(struct* task_struct tsk, int* hierarchy_id)
1299	{
1300	struct cgroup *cgrp = ERR_PTR(error: -ENOENT);
1301	struct cgroup_root *root;
1302	unsigned long flags;
1303
1304	rcu_read_lock();
1305	for_each_root(root) {
1306	/ cgroup1 only/
1307	if (root == &cgrp_dfl_root)
1308	continue;
1309	if (root->hierarchy_id != hierarchy_id)
1310	continue;
1311	spin_lock_irqsave(&css_set_lock, flags);
1312	cgrp = task_cgroup_from_root(task: tsk, root);
1313	if (!cgrp \|\| !cgroup_tryget(cgrp))
1314	cgrp = ERR_PTR(error: -ENOENT);
1315	spin_unlock_irqrestore(lock: &css_set_lock, flags);
1316	break;
1317	}
1318	rcu_read_unlock();
1319	return cgrp;
1320	}
1321
1322	static int __init cgroup1_wq_init(void)
1323	{
1324	/*
1325	* Used to destroy pidlists and separate to serve as flush domain.
1326	* Cap @max_active to 1 too.
1327	*/
1328	cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
1329	WQ_PERCPU, `1`);
1330	BUG_ON(!cgroup_pidlist_destroy_wq);
1331	return `0`;
1332	}
1333	core_initcall(cgroup1_wq_init);
1334
1335	static int __init cgroup_no_v1(char *str)
1336	{
1337	struct cgroup_subsys *ss;
1338	char *token;
1339	int i;
1340
1341	while ((token = strsep(&str, ",")) != NULL) {
1342	if (!*token)
1343	continue;
1344
1345	if (!strcmp(token, "all")) {
1346	cgroup_no_v1_mask = U16_MAX;
1347	continue;
1348	}
1349
1350	if (!strcmp(token, "named")) {
1351	cgroup_no_v1_named = true;
1352	continue;
1353	}
1354
1355	for_each_subsys(ss, i) {
1356	if (strcmp(token, ss->name) &&
1357	strcmp(token, ss->legacy_name))
1358	continue;
1359
1360	cgroup_no_v1_mask \|= `1` << i;
1361	break;
1362	}
1363	}
1364	return `1`;
1365	}
1366	__setup("cgroup_no_v1=", cgroup_no_v1);
1367
1368	static int __init cgroup_v1_proc(char *str)
1369	{
1370	return (kstrtobool(s: str, res: &proc_show_all) == `0`);
1371	}
1372	__setup("cgroup_v1_proc=", cgroup_v1_proc);
1373

Browse the source code of Linux/kernel/cgroup/cgroup-v1.c