padata.c source code [Linux/kernel/padata.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* padata.c - generic interface to process data streams in parallel
4	*
5	* See Documentation/core-api/padata.rst for more information.
6	*
7	* Copyright (C) 2008, 2009 secunet Security Networks AG
8	* Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
9	*
10	* Copyright (c) 2020 Oracle and/or its affiliates.
11	* Author: Daniel Jordan <daniel.m.jordan@oracle.com>
12	*/
13
14	#include <linux/completion.h>
15	#include <linux/export.h>
16	#include <linux/cpumask.h>
17	#include <linux/err.h>
18	#include <linux/cpu.h>
19	#include <linux/padata.h>
20	#include <linux/mutex.h>
21	#include <linux/sched.h>
22	#include <linux/slab.h>
23	#include <linux/sysfs.h>
24	#include <linux/rcupdate.h>
25
26	#define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
27
28	struct padata_work {
29	struct work_struct pw_work;
30	struct list_head pw_list; / padata_free_works linkage /
31	void *pw_data;
32	};
33
34	static DEFINE_SPINLOCK(padata_works_lock);
35	static struct padata_work *padata_works;
36	static LIST_HEAD(padata_free_works);
37
38	struct padata_mt_job_state {
39	spinlock_t lock;
40	struct completion completion;
41	struct padata_mt_job *job;
42	int nworks;
43	int nworks_fini;
44	unsigned long chunk_size;
45	};
46
47	static void padata_free_pd(struct parallel_data *pd);
48	static void __init padata_mt_helper(struct work_struct *work);
49
50	static inline void padata_get_pd(struct parallel_data *pd)
51	{
52	refcount_inc(r: &pd->refcnt);
53	}
54
55	static inline void padata_put_pd_cnt(struct parallel_data pd, int* cnt)
56	{
57	if (refcount_sub_and_test(i: cnt, r: &pd->refcnt))
58	padata_free_pd(pd);
59	}
60
61	static inline void padata_put_pd(struct parallel_data *pd)
62	{
63	padata_put_pd_cnt(pd, cnt: `1`);
64	}
65
66	static int padata_cpu_hash(struct parallel_data pd, unsigned* int seq_nr)
67	{
68	/*
69	* Hash the sequence numbers to the cpus by taking
70	* seq_nr mod. number of cpus in use.
71	*/
72	int cpu_index = seq_nr % cpumask_weight(srcp: pd->cpumask.pcpu);
73
74	return cpumask_nth(cpu: cpu_index, srcp: pd->cpumask.pcpu);
75	}
76
77	static struct padata_work padata_work_alloc(void*)
78	{
79	struct padata_work *pw;
80
81	lockdep_assert_held(&padata_works_lock);
82
83	if (list_empty(head: &padata_free_works))
84	return NULL; / No more work items allowed to be queued. /
85
86	pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
87	list_del(entry: &pw->pw_list);
88	return pw;
89	}
90
91	/*
92	* This function is marked __ref because this function may be optimized in such
93	* a way that it directly refers to work_fn's address, which causes modpost to
94	* complain when work_fn is marked __init. This scenario was observed with clang
95	* LTO, where padata_work_init() was optimized to refer directly to
96	* padata_mt_helper() because the calls to padata_work_init() with other work_fn
97	* values were eliminated or inlined.
98	*/
99	static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn,
100	void data, int* flags)
101	{
102	if (flags & PADATA_WORK_ONSTACK)
103	INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
104	else
105	INIT_WORK(&pw->pw_work, work_fn);
106	pw->pw_data = data;
107	}
108
109	static int __init padata_work_alloc_mt(int nworks, void *data,
110	struct list_head *head)
111	{
112	int i;
113
114	spin_lock_bh(lock: &padata_works_lock);
115	/ Start at 1 because the current task participates in the job. /
116	for (i = `1`; i < nworks; ++i) {
117	struct padata_work *pw = padata_work_alloc();
118
119	if (!pw)
120	break;
121	padata_work_init(pw, work_fn: padata_mt_helper, data, flags: `0`);
122	list_add(new: &pw->pw_list, head);
123	}
124	spin_unlock_bh(lock: &padata_works_lock);
125
126	return i;
127	}
128
129	static void padata_work_free(struct padata_work *pw)
130	{
131	lockdep_assert_held(&padata_works_lock);
132	list_add(new: &pw->pw_list, head: &padata_free_works);
133	}
134
135	static void __init padata_works_free(struct list_head *works)
136	{
137	struct padata_work cur, next;
138
139	if (list_empty(head: works))
140	return;
141
142	spin_lock_bh(lock: &padata_works_lock);
143	list_for_each_entry_safe(cur, next, works, pw_list) {
144	list_del(entry: &cur->pw_list);
145	padata_work_free(pw: cur);
146	}
147	spin_unlock_bh(lock: &padata_works_lock);
148	}
149
150	static void padata_parallel_worker(struct work_struct *parallel_work)
151	{
152	struct padata_work pw = container_of(parallel_work, struct* padata_work,
153	pw_work);
154	struct padata_priv *padata = pw->pw_data;
155
156	local_bh_disable();
157	padata->parallel(padata);
158	spin_lock(lock: &padata_works_lock);
159	padata_work_free(pw);
160	spin_unlock(lock: &padata_works_lock);
161	local_bh_enable();
162	}
163
164	/**
165	* padata_do_parallel - padata parallelization function
166	*
167	* @ps: padatashell
168	* @padata: object to be parallelized
169	* @cb_cpu: pointer to the CPU that the serialization callback function should
170	* run on. If it's not in the serial cpumask of @pinst
171	* (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
172	* none found, returns -EINVAL.
173	*
174	* The parallelization callback function will run with BHs off.
175	* Note: Every object which is parallelized by padata_do_parallel
176	* must be seen by padata_do_serial.
177	*
178	* Return: 0 on success or else negative error code.
179	*/
180	int padata_do_parallel(struct padata_shell *ps,
181	struct padata_priv padata, int* *cb_cpu)
182	{
183	struct padata_instance *pinst = ps->pinst;
184	struct parallel_data *pd;
185	struct padata_work *pw;
186	int cpu_index, err;
187
188	rcu_read_lock_bh();
189
190	pd = rcu_dereference_bh(ps->pd);
191
192	err = -EINVAL;
193	if (!(pinst->flags & PADATA_INIT) \|\| pinst->flags & PADATA_INVALID)
194	goto out;
195
196	if (!cpumask_test_cpu(cpu: *cb_cpu, cpumask: pd->cpumask.cbcpu)) {
197	if (cpumask_empty(srcp: pd->cpumask.cbcpu))
198	goto out;
199
200	/ Select an alternate fallback CPU and notify the caller. /
201	cpu_index = *cb_cpu % cpumask_weight(srcp: pd->cpumask.cbcpu);
202	*cb_cpu = cpumask_nth(cpu: cpu_index, srcp: pd->cpumask.cbcpu);
203	}
204
205	err = -EBUSY;
206	if ((pinst->flags & PADATA_RESET))
207	goto out;
208
209	padata_get_pd(pd);
210	padata->pd = pd;
211	padata->cb_cpu = *cb_cpu;
212
213	spin_lock(lock: &padata_works_lock);
214	padata->seq_nr = ++pd->seq_nr;
215	pw = padata_work_alloc();
216	spin_unlock(lock: &padata_works_lock);
217
218	if (!pw) {
219	/ Maximum works limit exceeded, run in the current task. /
220	padata->parallel(padata);
221	}
222
223	rcu_read_unlock_bh();
224
225	if (pw) {
226	padata_work_init(pw, work_fn: padata_parallel_worker, data: padata, flags: `0`);
227	queue_work(wq: pinst->parallel_wq, work: &pw->pw_work);
228	}
229
230	return `0`;
231	out:
232	rcu_read_unlock_bh();
233
234	return err;
235	}
236	EXPORT_SYMBOL(padata_do_parallel);
237
238	/*
239	* padata_find_next - Find the next object that needs serialization.
240	*
241	* Return:
242	* * A pointer to the control struct of the next object that needs
243	* serialization, if present in one of the percpu reorder queues.
244	* * NULL, if the next object that needs serialization will
245	* be parallel processed by another cpu and is not yet present in
246	* the cpu's reorder queue.
247	*/
248	static struct padata_priv padata_find_next(struct* parallel_data pd, int* cpu,
249	unsigned int processed)
250	{
251	struct padata_priv *padata;
252	struct padata_list *reorder;
253
254	reorder = per_cpu_ptr(pd->reorder_list, cpu);
255
256	spin_lock(lock: &reorder->lock);
257	if (list_empty(head: &reorder->list))
258	goto notfound;
259
260	padata = list_entry(reorder->list.next, struct padata_priv, list);
261
262	/*
263	* Checks the rare case where two or more parallel jobs have hashed to
264	* the same CPU and one of the later ones finishes first.
265	*/
266	if (padata->seq_nr != processed)
267	goto notfound;
268
269	list_del_init(entry: &padata->list);
270	spin_unlock(lock: &reorder->lock);
271	return padata;
272
273	notfound:
274	pd->processed = processed;
275	pd->cpu = cpu;
276	spin_unlock(lock: &reorder->lock);
277	return NULL;
278	}
279
280	static void padata_reorder(struct padata_priv *padata)
281	{
282	struct parallel_data *pd = padata->pd;
283	struct padata_instance *pinst = pd->ps->pinst;
284	unsigned int processed;
285	int cpu;
286
287	processed = pd->processed;
288	cpu = pd->cpu;
289
290	do {
291	struct padata_serial_queue *squeue;
292	int cb_cpu;
293
294	processed++;
295	/ When sequence wraps around, reset to the first CPU. /
296	if (unlikely(processed == `0`))
297	cpu = cpumask_first(srcp: pd->cpumask.pcpu);
298	else
299	cpu = cpumask_next_wrap(n: cpu, src: pd->cpumask.pcpu);
300
301	cb_cpu = padata->cb_cpu;
302	squeue = per_cpu_ptr(pd->squeue, cb_cpu);
303
304	spin_lock(lock: &squeue->serial.lock);
305	list_add_tail(new: &padata->list, head: &squeue->serial.list);
306	queue_work_on(cpu: cb_cpu, wq: pinst->serial_wq, work: &squeue->work);
307
308	/*
309	* If the next object that needs serialization is parallel
310	* processed by another cpu and is still on it's way to the
311	* cpu's reorder queue, end the loop.
312	*/
313	padata = padata_find_next(pd, cpu, processed);
314	spin_unlock(lock: &squeue->serial.lock);
315	} while (padata);
316	}
317
318	static void padata_serial_worker(struct work_struct *serial_work)
319	{
320	struct padata_serial_queue *squeue;
321	struct parallel_data *pd;
322	LIST_HEAD(local_list);
323	int cnt;
324
325	local_bh_disable();
326	squeue = container_of(serial_work, struct padata_serial_queue, work);
327	pd = squeue->pd;
328
329	spin_lock(lock: &squeue->serial.lock);
330	list_replace_init(old: &squeue->serial.list, new: &local_list);
331	spin_unlock(lock: &squeue->serial.lock);
332
333	cnt = `0`;
334
335	while (!list_empty(head: &local_list)) {
336	struct padata_priv *padata;
337
338	padata = list_entry(local_list.next,
339	struct padata_priv, list);
340
341	list_del_init(entry: &padata->list);
342
343	padata->serial(padata);
344	cnt++;
345	}
346	local_bh_enable();
347
348	padata_put_pd_cnt(pd, cnt);
349	}
350
351	/**
352	* padata_do_serial - padata serialization function
353	*
354	* @padata: object to be serialized.
355	*
356	* padata_do_serial must be called for every parallelized object.
357	* The serialization callback function will run with BHs off.
358	*/
359	void padata_do_serial(struct padata_priv *padata)
360	{
361	struct parallel_data *pd = padata->pd;
362	int hashed_cpu = padata_cpu_hash(pd, seq_nr: padata->seq_nr);
363	struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
364	struct padata_priv *cur;
365	struct list_head *pos;
366	bool gotit = true;
367
368	spin_lock(lock: &reorder->lock);
369	/ Sort in ascending order of sequence number. /
370	list_for_each_prev(pos, &reorder->list) {
371	cur = list_entry(pos, struct padata_priv, list);
372	/ Compare by difference to consider integer wrap around /
373	if ((signed int)(cur->seq_nr - padata->seq_nr) < `0`)
374	break;
375	}
376	if (padata->seq_nr != pd->processed) {
377	gotit = false;
378	list_add(new: &padata->list, head: pos);
379	}
380	spin_unlock(lock: &reorder->lock);
381
382	if (gotit)
383	padata_reorder(padata);
384	}
385	EXPORT_SYMBOL(padata_do_serial);
386
387	static int padata_setup_cpumasks(struct padata_instance *pinst)
388	{
389	struct workqueue_attrs *attrs;
390	int err;
391
392	attrs = alloc_workqueue_attrs();
393	if (!attrs)
394	return -ENOMEM;
395
396	/ Restrict parallel_wq workers to pd->cpumask.pcpu. /
397	cpumask_copy(dstp: attrs->cpumask, srcp: pinst->cpumask.pcpu);
398	err = apply_workqueue_attrs(wq: pinst->parallel_wq, attrs);
399	free_workqueue_attrs(attrs);
400
401	return err;
402	}
403
404	static void __init padata_mt_helper(struct work_struct *w)
405	{
406	struct padata_work pw = container_of(w, struct* padata_work, pw_work);
407	struct padata_mt_job_state *ps = pw->pw_data;
408	struct padata_mt_job *job = ps->job;
409	bool done;
410
411	spin_lock(lock: &ps->lock);
412
413	while (job->size > `0`) {
414	unsigned long start, size, end;
415
416	start = job->start;
417	/ So end is chunk size aligned if enough work remains. /
418	size = roundup(start + `1`, ps->chunk_size) - start;
419	size = min(size, job->size);
420	end = start + size;
421
422	job->start = end;
423	job->size -= size;
424
425	spin_unlock(lock: &ps->lock);
426	job->thread_fn(start, end, job->fn_arg);
427	spin_lock(lock: &ps->lock);
428	}
429
430	++ps->nworks_fini;
431	done = (ps->nworks_fini == ps->nworks);
432	spin_unlock(lock: &ps->lock);
433
434	if (done)
435	complete(&ps->completion);
436	}
437
438	/**
439	* padata_do_multithreaded - run a multithreaded job
440	* @job: Description of the job.
441	*
442	* See the definition of struct padata_mt_job for more details.
443	*/
444	void __init padata_do_multithreaded(struct padata_mt_job *job)
445	{
446	/ In case threads finish at different times. /
447	static const unsigned long load_balance_factor = `4`;
448	struct padata_work my_work, *pw;
449	struct padata_mt_job_state ps;
450	LIST_HEAD(works);
451	int nworks, nid;
452	static atomic_t last_used_nid __initdata;
453
454	if (job->size == `0`)
455	return;
456
457	/ Ensure at least one thread when size < min_chunk. /
458	nworks = max(job->size / max(job->min_chunk, job->align), `1ul`);
459	nworks = min(nworks, job->max_threads);
460
461	if (nworks == `1`) {
462	/ Single thread, no coordination needed, cut to the chase. /
463	job->thread_fn(job->start, job->start + job->size, job->fn_arg);
464	return;
465	}
466
467	spin_lock_init(&ps.lock);
468	init_completion(x: &ps.completion);
469	ps.job = job;
470	ps.nworks = padata_work_alloc_mt(nworks, data: &ps, head: &works);
471	ps.nworks_fini = `0`;
472
473	/*
474	* Chunk size is the amount of work a helper does per call to the
475	* thread function. Load balance large jobs between threads by
476	* increasing the number of chunks, guarantee at least the minimum
477	* chunk size from the caller, and honor the caller's alignment.
478	* Ensure chunk_size is at least 1 to prevent divide-by-0
479	* panic in padata_mt_helper().
480	*/
481	ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
482	ps.chunk_size = max(ps.chunk_size, job->min_chunk);
483	ps.chunk_size = max(ps.chunk_size, `1ul`);
484	ps.chunk_size = roundup(ps.chunk_size, job->align);
485
486	list_for_each_entry(pw, &works, pw_list)
487	if (job->numa_aware) {
488	int old_node = atomic_read(v: &last_used_nid);
489
490	do {
491	nid = next_node_in(old_node, node_states[N_CPU]);
492	} while (!atomic_try_cmpxchg(v: &last_used_nid, old: &old_node, new: nid));
493	queue_work_node(node: nid, wq: system_dfl_wq, work: &pw->pw_work);
494	} else {
495	queue_work(wq: system_dfl_wq, work: &pw->pw_work);
496	}
497
498	/ Use the current thread, which saves starting a workqueue worker. /
499	padata_work_init(pw: &my_work, work_fn: padata_mt_helper, data: &ps, PADATA_WORK_ONSTACK);
500	padata_mt_helper(w: &my_work.pw_work);
501
502	/ Wait for all the helpers to finish. /
503	wait_for_completion(&ps.completion);
504
505	destroy_work_on_stack(work: &my_work.pw_work);
506	padata_works_free(works: &works);
507	}
508
509	static void __padata_list_init(struct padata_list *pd_list)
510	{
511	INIT_LIST_HEAD(list: &pd_list->list);
512	spin_lock_init(&pd_list->lock);
513	}
514
515	/ Initialize all percpu queues used by serial workers /
516	static void padata_init_squeues(struct parallel_data *pd)
517	{
518	int cpu;
519	struct padata_serial_queue *squeue;
520
521	for_each_cpu(cpu, pd->cpumask.cbcpu) {
522	squeue = per_cpu_ptr(pd->squeue, cpu);
523	squeue->pd = pd;
524	__padata_list_init(pd_list: &squeue->serial);
525	INIT_WORK(&squeue->work, padata_serial_worker);
526	}
527	}
528
529	/ Initialize per-CPU reorder lists /
530	static void padata_init_reorder_list(struct parallel_data *pd)
531	{
532	int cpu;
533	struct padata_list *list;
534
535	for_each_cpu(cpu, pd->cpumask.pcpu) {
536	list = per_cpu_ptr(pd->reorder_list, cpu);
537	__padata_list_init(pd_list: list);
538	}
539	}
540
541	/ Allocate and initialize the internal cpumask dependend resources. /
542	static struct parallel_data padata_alloc_pd(struct* padata_shell *ps)
543	{
544	struct padata_instance *pinst = ps->pinst;
545	struct parallel_data *pd;
546
547	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
548	if (!pd)
549	goto err;
550
551	pd->reorder_list = alloc_percpu(struct padata_list);
552	if (!pd->reorder_list)
553	goto err_free_pd;
554
555	pd->squeue = alloc_percpu(struct padata_serial_queue);
556	if (!pd->squeue)
557	goto err_free_reorder_list;
558
559	pd->ps = ps;
560
561	if (!alloc_cpumask_var(mask: &pd->cpumask.pcpu, GFP_KERNEL))
562	goto err_free_squeue;
563	if (!alloc_cpumask_var(mask: &pd->cpumask.cbcpu, GFP_KERNEL))
564	goto err_free_pcpu;
565
566	cpumask_and(dstp: pd->cpumask.pcpu, src1p: pinst->cpumask.pcpu, cpu_online_mask);
567	cpumask_and(dstp: pd->cpumask.cbcpu, src1p: pinst->cpumask.cbcpu, cpu_online_mask);
568
569	padata_init_reorder_list(pd);
570	padata_init_squeues(pd);
571	pd->seq_nr = -`1`;
572	refcount_set(r: &pd->refcnt, n: `1`);
573	pd->cpu = cpumask_first(srcp: pd->cpumask.pcpu);
574
575	return pd;
576
577	err_free_pcpu:
578	free_cpumask_var(mask: pd->cpumask.pcpu);
579	err_free_squeue:
580	free_percpu(pdata: pd->squeue);
581	err_free_reorder_list:
582	free_percpu(pdata: pd->reorder_list);
583	err_free_pd:
584	kfree(objp: pd);
585	err:
586	return NULL;
587	}
588
589	static void padata_free_pd(struct parallel_data *pd)
590	{
591	free_cpumask_var(mask: pd->cpumask.pcpu);
592	free_cpumask_var(mask: pd->cpumask.cbcpu);
593	free_percpu(pdata: pd->reorder_list);
594	free_percpu(pdata: pd->squeue);
595	kfree(objp: pd);
596	}
597
598	static void __padata_start(struct padata_instance *pinst)
599	{
600	pinst->flags \|= PADATA_INIT;
601	}
602
603	static void __padata_stop(struct padata_instance *pinst)
604	{
605	if (!(pinst->flags & PADATA_INIT))
606	return;
607
608	pinst->flags &= ~PADATA_INIT;
609
610	synchronize_rcu();
611	}
612
613	/ Replace the internal control structure with a new one. /
614	static int padata_replace_one(struct padata_shell *ps)
615	{
616	struct parallel_data *pd_new;
617
618	pd_new = padata_alloc_pd(ps);
619	if (!pd_new)
620	return -ENOMEM;
621
622	ps->opd = rcu_dereference_protected(ps->pd, `1`);
623	rcu_assign_pointer(ps->pd, pd_new);
624
625	return `0`;
626	}
627
628	static int padata_replace(struct padata_instance *pinst)
629	{
630	struct padata_shell *ps;
631	int err = `0`;
632
633	pinst->flags \|= PADATA_RESET;
634
635	list_for_each_entry(ps, &pinst->pslist, list) {
636	err = padata_replace_one(ps);
637	if (err)
638	break;
639	}
640
641	synchronize_rcu();
642
643	list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
644	padata_put_pd(pd: ps->opd);
645
646	pinst->flags &= ~PADATA_RESET;
647
648	return err;
649	}
650
651	/ If cpumask contains no active cpu, we mark the instance as invalid. /
652	static bool padata_validate_cpumask(struct padata_instance *pinst,
653	const struct cpumask *cpumask)
654	{
655	if (!cpumask_intersects(src1p: cpumask, cpu_online_mask)) {
656	pinst->flags \|= PADATA_INVALID;
657	return false;
658	}
659
660	pinst->flags &= ~PADATA_INVALID;
661	return true;
662	}
663
664	static int __padata_set_cpumasks(struct padata_instance *pinst,
665	cpumask_var_t pcpumask,
666	cpumask_var_t cbcpumask)
667	{
668	int valid;
669	int err;
670
671	valid = padata_validate_cpumask(pinst, cpumask: pcpumask);
672	if (!valid) {
673	__padata_stop(pinst);
674	goto out_replace;
675	}
676
677	valid = padata_validate_cpumask(pinst, cpumask: cbcpumask);
678	if (!valid)
679	__padata_stop(pinst);
680
681	out_replace:
682	cpumask_copy(dstp: pinst->cpumask.pcpu, srcp: pcpumask);
683	cpumask_copy(dstp: pinst->cpumask.cbcpu, srcp: cbcpumask);
684
685	err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
686
687	if (valid)
688	__padata_start(pinst);
689
690	return err;
691	}
692
693	/**
694	* padata_set_cpumask - Sets specified by @cpumask_type cpumask to the value
695	* equivalent to @cpumask.
696	* @pinst: padata instance
697	* @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
698	* to parallel and serial cpumasks respectively.
699	* @cpumask: the cpumask to use
700	*
701	* Return: 0 on success or negative error code
702	*/
703	int padata_set_cpumask(struct padata_instance pinst, int* cpumask_type,
704	cpumask_var_t cpumask)
705	{
706	struct cpumask serial_mask, parallel_mask;
707	int err = -EINVAL;
708
709	cpus_read_lock();
710	mutex_lock(lock: &pinst->lock);
711
712	switch (cpumask_type) {
713	case PADATA_CPU_PARALLEL:
714	serial_mask = pinst->cpumask.cbcpu;
715	parallel_mask = cpumask;
716	break;
717	case PADATA_CPU_SERIAL:
718	parallel_mask = pinst->cpumask.pcpu;
719	serial_mask = cpumask;
720	break;
721	default:
722	goto out;
723	}
724
725	err = __padata_set_cpumasks(pinst, pcpumask: parallel_mask, cbcpumask: serial_mask);
726
727	out:
728	mutex_unlock(lock: &pinst->lock);
729	cpus_read_unlock();
730
731	return err;
732	}
733	EXPORT_SYMBOL(padata_set_cpumask);
734
735	#ifdef CONFIG_HOTPLUG_CPU
736
737	static int __padata_add_cpu(struct padata_instance pinst, int* cpu)
738	{
739	int err = `0`;
740
741	if (cpumask_test_cpu(cpu, cpu_online_mask)) {
742	err = padata_replace(pinst);
743
744	if (padata_validate_cpumask(pinst, cpumask: pinst->cpumask.pcpu) &&
745	padata_validate_cpumask(pinst, cpumask: pinst->cpumask.cbcpu))
746	__padata_start(pinst);
747	}
748
749	return err;
750	}
751
752	static int __padata_remove_cpu(struct padata_instance pinst, int* cpu)
753	{
754	int err = `0`;
755
756	if (!cpumask_test_cpu(cpu, cpu_online_mask)) {
757	if (!padata_validate_cpumask(pinst, cpumask: pinst->cpumask.pcpu) \|\|
758	!padata_validate_cpumask(pinst, cpumask: pinst->cpumask.cbcpu))
759	__padata_stop(pinst);
760
761	err = padata_replace(pinst);
762	}
763
764	return err;
765	}
766
767	static inline int pinst_has_cpu(struct padata_instance pinst, int* cpu)
768	{
769	return cpumask_test_cpu(cpu, cpumask: pinst->cpumask.pcpu) \|\|
770	cpumask_test_cpu(cpu, cpumask: pinst->cpumask.cbcpu);
771	}
772
773	static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
774	{
775	struct padata_instance *pinst;
776	int ret;
777
778	pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
779	if (!pinst_has_cpu(pinst, cpu))
780	return `0`;
781
782	mutex_lock(lock: &pinst->lock);
783	ret = __padata_add_cpu(pinst, cpu);
784	mutex_unlock(lock: &pinst->lock);
785	return ret;
786	}
787
788	static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
789	{
790	struct padata_instance *pinst;
791	int ret;
792
793	pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
794	if (!pinst_has_cpu(pinst, cpu))
795	return `0`;
796
797	mutex_lock(lock: &pinst->lock);
798	ret = __padata_remove_cpu(pinst, cpu);
799	mutex_unlock(lock: &pinst->lock);
800	return ret;
801	}
802
803	static enum cpuhp_state hp_online;
804	#endif
805
806	static void __padata_free(struct padata_instance *pinst)
807	{
808	#ifdef CONFIG_HOTPLUG_CPU
809	cpuhp_state_remove_instance_nocalls(state: CPUHP_PADATA_DEAD,
810	node: &pinst->cpu_dead_node);
811	cpuhp_state_remove_instance_nocalls(state: hp_online, node: &pinst->cpu_online_node);
812	#endif
813
814	WARN_ON(!list_empty(&pinst->pslist));
815
816	free_cpumask_var(mask: pinst->cpumask.pcpu);
817	free_cpumask_var(mask: pinst->cpumask.cbcpu);
818	destroy_workqueue(wq: pinst->serial_wq);
819	destroy_workqueue(wq: pinst->parallel_wq);
820	kfree(objp: pinst);
821	}
822
823	#define kobj2pinst(_kobj) \
824	container_of(_kobj, struct padata_instance, kobj)
825	#define attr2pentry(_attr) \
826	container_of(_attr, struct padata_sysfs_entry, attr)
827
828	static void padata_sysfs_release(struct kobject *kobj)
829	{
830	struct padata_instance *pinst = kobj2pinst(kobj);
831	__padata_free(pinst);
832	}
833
834	struct padata_sysfs_entry {
835	struct attribute attr;
836	ssize_t (show)(struct* padata_instance , struct* attribute , char* *);
837	ssize_t (store)(struct* padata_instance , struct* attribute *,
838	const char *, size_t);
839	};
840
841	static ssize_t show_cpumask(struct padata_instance *pinst,
842	struct attribute attr, char* *buf)
843	{
844	struct cpumask *cpumask;
845	ssize_t len;
846
847	mutex_lock(lock: &pinst->lock);
848	if (!strcmp(attr->name, "serial_cpumask"))
849	cpumask = pinst->cpumask.cbcpu;
850	else
851	cpumask = pinst->cpumask.pcpu;
852
853	len = snprintf(buf, PAGE_SIZE, fmt: "%*pb\n",
854	nr_cpu_ids, cpumask_bits(cpumask));
855	mutex_unlock(lock: &pinst->lock);
856	return len < PAGE_SIZE ? len : -EINVAL;
857	}
858
859	static ssize_t store_cpumask(struct padata_instance *pinst,
860	struct attribute *attr,
861	const char *buf, size_t count)
862	{
863	cpumask_var_t new_cpumask;
864	ssize_t ret;
865	int mask_type;
866
867	if (!alloc_cpumask_var(mask: &new_cpumask, GFP_KERNEL))
868	return -ENOMEM;
869
870	ret = bitmap_parse(buf, buflen: count, cpumask_bits(new_cpumask),
871	nr_cpumask_bits);
872	if (ret < `0`)
873	goto out;
874
875	mask_type = !strcmp(attr->name, "serial_cpumask") ?
876	PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
877	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
878	if (!ret)
879	ret = count;
880
881	out:
882	free_cpumask_var(mask: new_cpumask);
883	return ret;
884	}
885
886	#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
887	static struct padata_sysfs_entry _name##_attr = \
888	__ATTR(_name, 0644, _show_name, _store_name)
889	#define PADATA_ATTR_RO(_name, _show_name) \
890	static struct padata_sysfs_entry _name##_attr = \
891	__ATTR(_name, 0400, _show_name, NULL)
892
893	PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
894	PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
895
896	/*
897	* Padata sysfs provides the following objects:
898	* serial_cpumask [RW] - cpumask for serial workers
899	* parallel_cpumask [RW] - cpumask for parallel workers
900	*/
901	static struct attribute *padata_default_attrs[] = {
902	&serial_cpumask_attr.attr,
903	&parallel_cpumask_attr.attr,
904	NULL,
905	};
906	ATTRIBUTE_GROUPS(padata_default);
907
908	static ssize_t padata_sysfs_show(struct kobject *kobj,
909	struct attribute attr, char* *buf)
910	{
911	struct padata_instance *pinst;
912	struct padata_sysfs_entry *pentry;
913	ssize_t ret = -EIO;
914
915	pinst = kobj2pinst(kobj);
916	pentry = attr2pentry(attr);
917	if (pentry->show)
918	ret = pentry->show(pinst, attr, buf);
919
920	return ret;
921	}
922
923	static ssize_t padata_sysfs_store(struct kobject kobj, struct* attribute *attr,
924	const char *buf, size_t count)
925	{
926	struct padata_instance *pinst;
927	struct padata_sysfs_entry *pentry;
928	ssize_t ret = -EIO;
929
930	pinst = kobj2pinst(kobj);
931	pentry = attr2pentry(attr);
932	if (pentry->store)
933	ret = pentry->store(pinst, attr, buf, count);
934
935	return ret;
936	}
937
938	static const struct sysfs_ops padata_sysfs_ops = {
939	.show = padata_sysfs_show,
940	.store = padata_sysfs_store,
941	};
942
943	static const struct kobj_type padata_attr_type = {
944	.sysfs_ops = &padata_sysfs_ops,
945	.default_groups = padata_default_groups,
946	.release = padata_sysfs_release,
947	};
948
949	/**
950	* padata_alloc - allocate and initialize a padata instance
951	* @name: used to identify the instance
952	*
953	* Return: new instance on success, NULL on error
954	*/
955	struct padata_instance padata_alloc(const* char *name)
956	{
957	struct padata_instance *pinst;
958
959	pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
960	if (!pinst)
961	goto err;
962
963	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, `0`,
964	name);
965	if (!pinst->parallel_wq)
966	goto err_free_inst;
967
968	cpus_read_lock();
969
970	pinst->serial_wq = alloc_workqueue("%s_serial",
971	WQ_MEM_RECLAIM \| WQ_CPU_INTENSIVE \| WQ_PERCPU,
972	`1`, name);
973	if (!pinst->serial_wq)
974	goto err_put_cpus;
975
976	if (!alloc_cpumask_var(mask: &pinst->cpumask.pcpu, GFP_KERNEL))
977	goto err_free_serial_wq;
978	if (!alloc_cpumask_var(mask: &pinst->cpumask.cbcpu, GFP_KERNEL)) {
979	free_cpumask_var(mask: pinst->cpumask.pcpu);
980	goto err_free_serial_wq;
981	}
982
983	INIT_LIST_HEAD(list: &pinst->pslist);
984
985	cpumask_copy(dstp: pinst->cpumask.pcpu, cpu_possible_mask);
986	cpumask_copy(dstp: pinst->cpumask.cbcpu, cpu_possible_mask);
987
988	if (padata_setup_cpumasks(pinst))
989	goto err_free_masks;
990
991	__padata_start(pinst);
992
993	kobject_init(kobj: &pinst->kobj, ktype: &padata_attr_type);
994	mutex_init(&pinst->lock);
995
996	#ifdef CONFIG_HOTPLUG_CPU
997	cpuhp_state_add_instance_nocalls_cpuslocked(state: hp_online,
998	node: &pinst->cpu_online_node);
999	cpuhp_state_add_instance_nocalls_cpuslocked(state: CPUHP_PADATA_DEAD,
1000	node: &pinst->cpu_dead_node);
1001	#endif
1002
1003	cpus_read_unlock();
1004
1005	return pinst;
1006
1007	err_free_masks:
1008	free_cpumask_var(mask: pinst->cpumask.pcpu);
1009	free_cpumask_var(mask: pinst->cpumask.cbcpu);
1010	err_free_serial_wq:
1011	destroy_workqueue(wq: pinst->serial_wq);
1012	err_put_cpus:
1013	cpus_read_unlock();
1014	destroy_workqueue(wq: pinst->parallel_wq);
1015	err_free_inst:
1016	kfree(objp: pinst);
1017	err:
1018	return NULL;
1019	}
1020	EXPORT_SYMBOL(padata_alloc);
1021
1022	/**
1023	* padata_free - free a padata instance
1024	*
1025	* @pinst: padata instance to free
1026	*/
1027	void padata_free(struct padata_instance *pinst)
1028	{
1029	kobject_put(kobj: &pinst->kobj);
1030	}
1031	EXPORT_SYMBOL(padata_free);
1032
1033	/**
1034	* padata_alloc_shell - Allocate and initialize padata shell.
1035	*
1036	* @pinst: Parent padata_instance object.
1037	*
1038	* Return: new shell on success, NULL on error
1039	*/
1040	struct padata_shell padata_alloc_shell(struct* padata_instance *pinst)
1041	{
1042	struct parallel_data *pd;
1043	struct padata_shell *ps;
1044
1045	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
1046	if (!ps)
1047	goto out;
1048
1049	ps->pinst = pinst;
1050
1051	cpus_read_lock();
1052	pd = padata_alloc_pd(ps);
1053	cpus_read_unlock();
1054
1055	if (!pd)
1056	goto out_free_ps;
1057
1058	mutex_lock(lock: &pinst->lock);
1059	RCU_INIT_POINTER(ps->pd, pd);
1060	list_add(new: &ps->list, head: &pinst->pslist);
1061	mutex_unlock(lock: &pinst->lock);
1062
1063	return ps;
1064
1065	out_free_ps:
1066	kfree(objp: ps);
1067	out:
1068	return NULL;
1069	}
1070	EXPORT_SYMBOL(padata_alloc_shell);
1071
1072	/**
1073	* padata_free_shell - free a padata shell
1074	*
1075	* @ps: padata shell to free
1076	*/
1077	void padata_free_shell(struct padata_shell *ps)
1078	{
1079	struct parallel_data *pd;
1080
1081	if (!ps)
1082	return;
1083
1084	mutex_lock(lock: &ps->pinst->lock);
1085	list_del(entry: &ps->list);
1086	pd = rcu_dereference_protected(ps->pd, `1`);
1087	padata_put_pd(pd);
1088	mutex_unlock(lock: &ps->pinst->lock);
1089
1090	kfree(objp: ps);
1091	}
1092	EXPORT_SYMBOL(padata_free_shell);
1093
1094	void __init padata_init(void)
1095	{
1096	unsigned int i, possible_cpus;
1097	#ifdef CONFIG_HOTPLUG_CPU
1098	int ret;
1099
1100	ret = cpuhp_setup_state_multi(state: CPUHP_AP_ONLINE_DYN, name: "padata:online",
1101	startup: padata_cpu_online, NULL);
1102	if (ret < `0`)
1103	goto err;
1104	hp_online = ret;
1105
1106	ret = cpuhp_setup_state_multi(state: CPUHP_PADATA_DEAD, name: "padata:dead",
1107	NULL, teardown: padata_cpu_dead);
1108	if (ret < `0`)
1109	goto remove_online_state;
1110	#endif
1111
1112	possible_cpus = num_possible_cpus();
1113	padata_works = kmalloc_array(possible_cpus, sizeof(struct padata_work),
1114	GFP_KERNEL);
1115	if (!padata_works)
1116	goto remove_dead_state;
1117
1118	for (i = `0`; i < possible_cpus; ++i)
1119	list_add(new: &padata_works[i].pw_list, head: &padata_free_works);
1120
1121	return;
1122
1123	remove_dead_state:
1124	#ifdef CONFIG_HOTPLUG_CPU
1125	cpuhp_remove_multi_state(state: CPUHP_PADATA_DEAD);
1126	remove_online_state:
1127	cpuhp_remove_multi_state(state: hp_online);
1128	err:
1129	#endif
1130	pr_warn("padata: initialization failed\n");
1131	}
1132

Browse the source code of Linux/kernel/padata.c