srcutree.c source code [Linux/kernel/rcu/srcutree.c]

1	// SPDX-License-Identifier: GPL-2.0+
2	/*
3	* Sleepable Read-Copy Update mechanism for mutual exclusion.
4	*
5	* Copyright (C) IBM Corporation, 2006
6	* Copyright (C) Fujitsu, 2012
7	*
8	* Authors: Paul McKenney <paulmck@linux.ibm.com>
9	* Lai Jiangshan <laijs@cn.fujitsu.com>
10	*
11	* For detailed explanation of Read-Copy Update mechanism see -
12	* Documentation/RCU/ *.txt
13	*
14	*/
15
16	#define pr_fmt(fmt) "rcu: " fmt
17
18	#include <linux/export.h>
19	#include <linux/mutex.h>
20	#include <linux/percpu.h>
21	#include <linux/preempt.h>
22	#include <linux/rcupdate_wait.h>
23	#include <linux/sched.h>
24	#include <linux/smp.h>
25	#include <linux/delay.h>
26	#include <linux/module.h>
27	#include <linux/slab.h>
28	#include <linux/srcu.h>
29
30	#include "rcu.h"
31	#include "rcu_segcblist.h"
32
33	/ Holdoff in nanoseconds for auto-expediting. /
34	#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
35	static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
36	module_param(exp_holdoff, ulong, `0444`);
37
38	/ Overflow-check frequency. N bits roughly says every 2*N grace periods. /*
39	static ulong counter_wrap_check = (ULONG_MAX >> `2`);
40	module_param(counter_wrap_check, ulong, `0444`);
41
42	/*
43	* Control conversion to SRCU_SIZE_BIG:
44	* 0: Don't convert at all.
45	* 1: Convert at init_srcu_struct() time.
46	* 2: Convert when rcutorture invokes srcu_torture_stats_print().
47	* 3: Decide at boot time based on system shape (default).
48	* 0x1x: Convert when excessive contention encountered.
49	*/
50	#define SRCU_SIZING_NONE 0
51	#define SRCU_SIZING_INIT 1
52	#define SRCU_SIZING_TORTURE 2
53	#define SRCU_SIZING_AUTO 3
54	#define SRCU_SIZING_CONTEND 0x10
55	#define SRCU_SIZING_IS(x) ((convert_to_big & ~SRCU_SIZING_CONTEND) == x)
56	#define SRCU_SIZING_IS_NONE() (SRCU_SIZING_IS(SRCU_SIZING_NONE))
57	#define SRCU_SIZING_IS_INIT() (SRCU_SIZING_IS(SRCU_SIZING_INIT))
58	#define SRCU_SIZING_IS_TORTURE() (SRCU_SIZING_IS(SRCU_SIZING_TORTURE))
59	#define SRCU_SIZING_IS_CONTEND() (convert_to_big & SRCU_SIZING_CONTEND)
60	static int convert_to_big = SRCU_SIZING_AUTO;
61	module_param(convert_to_big, int, `0444`);
62
63	/ Number of CPUs to trigger init_srcu_struct()-time transition to big. /
64	static int big_cpu_lim __read_mostly = `128`;
65	module_param(big_cpu_lim, int, `0444`);
66
67	/ Contention events per jiffy to initiate transition to big. /
68	static int small_contention_lim __read_mostly = `100`;
69	module_param(small_contention_lim, int, `0444`);
70
71	/ Early-boot callback-management, so early that no lock is required! /
72	static LIST_HEAD(srcu_boot_list);
73	static bool __read_mostly srcu_init_done;
74
75	static void srcu_invoke_callbacks(struct work_struct *work);
76	static void srcu_reschedule(struct srcu_struct ssp, unsigned* long delay);
77	static void process_srcu(struct work_struct *work);
78	static void srcu_delay_timer(struct timer_list *t);
79
80	/ Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). /
81	#define spin_lock_rcu_node(p) \
82	do { \
83	spin_lock(&ACCESS_PRIVATE(p, lock)); \
84	smp_mb__after_unlock_lock(); \
85	} while (0)
86
87	#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock))
88
89	#define spin_lock_irq_rcu_node(p) \
90	do { \
91	spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
92	smp_mb__after_unlock_lock(); \
93	} while (0)
94
95	#define spin_unlock_irq_rcu_node(p) \
96	spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
97
98	#define spin_lock_irqsave_rcu_node(p, flags) \
99	do { \
100	spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
101	smp_mb__after_unlock_lock(); \
102	} while (0)
103
104	#define spin_trylock_irqsave_rcu_node(p, flags) \
105	({ \
106	bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
107	\
108	if (___locked) \
109	smp_mb__after_unlock_lock(); \
110	___locked; \
111	})
112
113	#define spin_unlock_irqrestore_rcu_node(p, flags) \
114	spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
115
116	/*
117	* Initialize SRCU per-CPU data. Note that statically allocated
118	* srcu_struct structures might already have srcu_read_lock() and
119	* srcu_read_unlock() running against them. So if the is_static
120	* parameter is set, don't initialize ->srcu_ctrs[].srcu_locks and
121	* ->srcu_ctrs[].srcu_unlocks.
122	*/
123	static void init_srcu_struct_data(struct srcu_struct *ssp)
124	{
125	int cpu;
126	struct srcu_data *sdp;
127
128	/*
129	* Initialize the per-CPU srcu_data array, which feeds into the
130	* leaves of the srcu_node tree.
131	*/
132	for_each_possible_cpu(cpu) {
133	sdp = per_cpu_ptr(ssp->sda, cpu);
134	spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
135	rcu_segcblist_init(rsclp: &sdp->srcu_cblist);
136	sdp->srcu_cblist_invoking = false;
137	sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq;
138	sdp->srcu_gp_seq_needed_exp = ssp->srcu_sup->srcu_gp_seq;
139	sdp->srcu_barrier_head.next = &sdp->srcu_barrier_head;
140	sdp->mynode = NULL;
141	sdp->cpu = cpu;
142	INIT_WORK(&sdp->work, srcu_invoke_callbacks);
143	timer_setup(&sdp->delay_work, srcu_delay_timer, `0`);
144	sdp->ssp = ssp;
145	}
146	}
147
148	/ Invalid seq state, used during snp node initialization /
149	#define SRCU_SNP_INIT_SEQ 0x2
150
151	/*
152	* Check whether sequence number corresponding to snp node,
153	* is invalid.
154	*/
155	static inline bool srcu_invl_snp_seq(unsigned long s)
156	{
157	return s == SRCU_SNP_INIT_SEQ;
158	}
159
160	/*
161	* Allocated and initialize SRCU combining tree. Returns @true if
162	* allocation succeeded and @false otherwise.
163	*/
164	static bool init_srcu_struct_nodes(struct srcu_struct *ssp, gfp_t gfp_flags)
165	{
166	int cpu;
167	int i;
168	int level = `0`;
169	int levelspread[RCU_NUM_LVLS];
170	struct srcu_data *sdp;
171	struct srcu_node *snp;
172	struct srcu_node *snp_first;
173
174	/ Initialize geometry if it has not already been initialized. /
175	rcu_init_geometry();
176	ssp->srcu_sup->node = kcalloc(rcu_num_nodes, sizeof(*ssp->srcu_sup->node), gfp_flags);
177	if (!ssp->srcu_sup->node)
178	return false;
179
180	/ Work out the overall tree geometry. /
181	ssp->srcu_sup->level[`0`] = &ssp->srcu_sup->node[`0`];
182	for (i = `1`; i < rcu_num_lvls; i++)
183	ssp->srcu_sup->level[i] = ssp->srcu_sup->level[i - `1`] + num_rcu_lvl[i - `1`];
184	rcu_init_levelspread(levelspread, levelcnt: num_rcu_lvl);
185
186	/ Each pass through this loop initializes one srcu_node structure. /
187	srcu_for_each_node_breadth_first(ssp, snp) {
188	spin_lock_init(&ACCESS_PRIVATE(snp, lock));
189	BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) !=
190	ARRAY_SIZE(snp->srcu_data_have_cbs));
191	for (i = `0`; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
192	snp->srcu_have_cbs[i] = SRCU_SNP_INIT_SEQ;
193	snp->srcu_data_have_cbs[i] = `0`;
194	}
195	snp->srcu_gp_seq_needed_exp = SRCU_SNP_INIT_SEQ;
196	snp->grplo = -`1`;
197	snp->grphi = -`1`;
198	if (snp == &ssp->srcu_sup->node[`0`]) {
199	/ Root node, special case. /
200	snp->srcu_parent = NULL;
201	continue;
202	}
203
204	/ Non-root node. /
205	if (snp == ssp->srcu_sup->level[level + `1`])
206	level++;
207	snp->srcu_parent = ssp->srcu_sup->level[level - `1`] +
208	(snp - ssp->srcu_sup->level[level]) /
209	levelspread[level - `1`];
210	}
211
212	/*
213	* Initialize the per-CPU srcu_data array, which feeds into the
214	* leaves of the srcu_node tree.
215	*/
216	level = rcu_num_lvls - `1`;
217	snp_first = ssp->srcu_sup->level[level];
218	for_each_possible_cpu(cpu) {
219	sdp = per_cpu_ptr(ssp->sda, cpu);
220	sdp->mynode = &snp_first[cpu / levelspread[level]];
221	for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
222	if (snp->grplo < `0`)
223	snp->grplo = cpu;
224	snp->grphi = cpu;
225	}
226	sdp->grpmask = `1UL` << (cpu - sdp->mynode->grplo);
227	}
228	smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_WAIT_BARRIER);
229	return true;
230	}
231
232	/*
233	* Initialize non-compile-time initialized fields, including the
234	* associated srcu_node and srcu_data structures. The is_static parameter
235	* tells us that ->sda has already been wired up to srcu_data.
236	*/
237	static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static)
238	{
239	if (!is_static)
240	ssp->srcu_sup = kzalloc(sizeof(*ssp->srcu_sup), GFP_KERNEL);
241	if (!ssp->srcu_sup)
242	return -ENOMEM;
243	if (!is_static)
244	spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
245	ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL;
246	ssp->srcu_sup->node = NULL;
247	mutex_init(&ssp->srcu_sup->srcu_cb_mutex);
248	mutex_init(&ssp->srcu_sup->srcu_gp_mutex);
249	ssp->srcu_sup->srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL;
250	ssp->srcu_sup->srcu_barrier_seq = `0`;
251	mutex_init(&ssp->srcu_sup->srcu_barrier_mutex);
252	atomic_set(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt, i: `0`);
253	INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu);
254	ssp->srcu_sup->sda_is_static = is_static;
255	if (!is_static) {
256	ssp->sda = alloc_percpu(struct srcu_data);
257	ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[`0`];
258	}
259	if (!ssp->sda)
260	goto err_free_sup;
261	init_srcu_struct_data(ssp);
262	ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL;
263	ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns();
264	if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) {
265	if (!init_srcu_struct_nodes(ssp, GFP_ATOMIC))
266	goto err_free_sda;
267	WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG);
268	}
269	ssp->srcu_sup->srcu_ssp = ssp;
270	smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed,
271	SRCU_GP_SEQ_INITIAL_VAL); / Init done. /
272	return `0`;
273
274	err_free_sda:
275	if (!is_static) {
276	free_percpu(pdata: ssp->sda);
277	ssp->sda = NULL;
278	}
279	err_free_sup:
280	if (!is_static) {
281	kfree(objp: ssp->srcu_sup);
282	ssp->srcu_sup = NULL;
283	}
284	return -ENOMEM;
285	}
286
287	#ifdef CONFIG_DEBUG_LOCK_ALLOC
288
289	int __init_srcu_struct(struct srcu_struct ssp, const* char *name,
290	struct lock_class_key *key)
291	{
292	/ Don't re-initialize a lock while it is held. /
293	debug_check_no_locks_freed((void )ssp, sizeof(ssp));
294	lockdep_init_map(&ssp->dep_map, name, key, `0`);
295	return init_srcu_struct_fields(ssp, false);
296	}
297	EXPORT_SYMBOL_GPL(__init_srcu_struct);
298
299	#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
300
301	/**
302	* init_srcu_struct - initialize a sleep-RCU structure
303	* @ssp: structure to initialize.
304	*
305	* Must invoke this on a given srcu_struct before passing that srcu_struct
306	* to any other function. Each srcu_struct represents a separate domain
307	* of SRCU protection.
308	*/
309	int init_srcu_struct(struct srcu_struct *ssp)
310	{
311	return init_srcu_struct_fields(ssp, is_static: false);
312	}
313	EXPORT_SYMBOL_GPL(init_srcu_struct);
314
315	#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
316
317	/*
318	* Initiate a transition to SRCU_SIZE_BIG with lock held.
319	*/
320	static void __srcu_transition_to_big(struct srcu_struct *ssp)
321	{
322	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
323	smp_store_release(&ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC);
324	}
325
326	/*
327	* Initiate an idempotent transition to SRCU_SIZE_BIG.
328	*/
329	static void srcu_transition_to_big(struct srcu_struct *ssp)
330	{
331	unsigned long flags;
332
333	/ Double-checked locking on ->srcu_size-state. /
334	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL)
335	return;
336	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
337	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) {
338	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
339	return;
340	}
341	__srcu_transition_to_big(ssp);
342	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
343	}
344
345	/*
346	* Check to see if the just-encountered contention event justifies
347	* a transition to SRCU_SIZE_BIG.
348	*/
349	static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp)
350	{
351	unsigned long j;
352
353	if (!SRCU_SIZING_IS_CONTEND() \|\| ssp->srcu_sup->srcu_size_state)
354	return;
355	j = jiffies;
356	if (ssp->srcu_sup->srcu_size_jiffies != j) {
357	ssp->srcu_sup->srcu_size_jiffies = j;
358	ssp->srcu_sup->srcu_n_lock_retries = `0`;
359	}
360	if (++ssp->srcu_sup->srcu_n_lock_retries <= small_contention_lim)
361	return;
362	__srcu_transition_to_big(ssp);
363	}
364
365	/*
366	* Acquire the specified srcu_data structure's ->lock, but check for
367	* excessive contention, which results in initiation of a transition
368	* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
369	* parameter permits this.
370	*/
371	static void spin_lock_irqsave_sdp_contention(struct srcu_data sdp, unsigned* long *flags)
372	{
373	struct srcu_struct *ssp = sdp->ssp;
374
375	if (spin_trylock_irqsave_rcu_node(sdp, *flags))
376	return;
377	spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
378	spin_lock_irqsave_check_contention(ssp);
379	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags);
380	spin_lock_irqsave_rcu_node(sdp, *flags);
381	}
382
383	/*
384	* Acquire the specified srcu_struct structure's ->lock, but check for
385	* excessive contention, which results in initiation of a transition
386	* to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module
387	* parameter permits this.
388	*/
389	static void spin_lock_irqsave_ssp_contention(struct srcu_struct ssp, unsigned* long *flags)
390	{
391	if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags))
392	return;
393	spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags);
394	spin_lock_irqsave_check_contention(ssp);
395	}
396
397	/*
398	* First-use initialization of statically allocated srcu_struct
399	* structure. Wiring up the combining tree is more than can be
400	* done with compile-time initialization, so this check is added
401	* to each update-side SRCU primitive. Use ssp->lock, which -is-
402	* compile-time initialized, to resolve races involving multiple
403	* CPUs trying to garner first-use privileges.
404	*/
405	static void check_init_srcu_struct(struct srcu_struct *ssp)
406	{
407	unsigned long flags;
408
409	/ The smp_load_acquire() pairs with the smp_store_release(). /
410	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /^^^/
411	return; / Already initialized. /
412	spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags);
413	if (!rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq_needed)) {
414	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
415	return;
416	}
417	init_srcu_struct_fields(ssp, is_static: true);
418	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
419	}
420
421	/*
422	* Is the current or any upcoming grace period to be expedited?
423	*/
424	static bool srcu_gp_is_expedited(struct srcu_struct *ssp)
425	{
426	struct srcu_usage *sup = ssp->srcu_sup;
427
428	return ULONG_CMP_LT(READ_ONCE(sup->srcu_gp_seq), READ_ONCE(sup->srcu_gp_seq_needed_exp));
429	}
430
431	/*
432	* Computes approximate total of the readers' ->srcu_ctrs[].srcu_locks
433	* values for the rank of per-CPU counters specified by idx, and returns
434	* true if the caller did the proper barrier (gp), and if the count of
435	* the locks matches that of the unlocks passed in.
436	*/
437	static bool srcu_readers_lock_idx(struct srcu_struct ssp, int* idx, bool gp, unsigned long unlocks)
438	{
439	int cpu;
440	unsigned long mask = `0`;
441	unsigned long sum = `0`;
442
443	for_each_possible_cpu(cpu) {
444	struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
445
446	sum += atomic_long_read(v: &sdp->srcu_ctrs[idx].srcu_locks);
447	if (IS_ENABLED(CONFIG_PROVE_RCU))
448	mask = mask \| READ_ONCE(sdp->srcu_reader_flavor);
449	}
450	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - `1`)),
451	"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
452	if (mask & SRCU_READ_FLAVOR_SLOWGP && !gp)
453	return false;
454	return sum == unlocks;
455	}
456
457	/*
458	* Returns approximate total of the readers' ->srcu_ctrs[].srcu_unlocks
459	* values for the rank of per-CPU counters specified by idx.
460	*/
461	static unsigned long srcu_readers_unlock_idx(struct srcu_struct ssp, int* idx, unsigned long *rdm)
462	{
463	int cpu;
464	unsigned long mask = `0`;
465	unsigned long sum = `0`;
466
467	for_each_possible_cpu(cpu) {
468	struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
469
470	sum += atomic_long_read(v: &sdp->srcu_ctrs[idx].srcu_unlocks);
471	mask = mask \| READ_ONCE(sdp->srcu_reader_flavor);
472	}
473	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && (mask & (mask - `1`)),
474	"Mixed reader flavors for srcu_struct at %ps.\n", ssp);
475	*rdm = mask;
476	return sum;
477	}
478
479	/*
480	* Return true if the number of pre-existing readers is determined to
481	* be zero.
482	*/
483	static bool srcu_readers_active_idx_check(struct srcu_struct ssp, int* idx)
484	{
485	bool did_gp;
486	unsigned long rdm;
487	unsigned long unlocks;
488
489	unlocks = srcu_readers_unlock_idx(ssp, idx, rdm: &rdm);
490	did_gp = !!(rdm & SRCU_READ_FLAVOR_SLOWGP);
491
492	/*
493	* Make sure that a lock is always counted if the corresponding
494	* unlock is counted. Needs to be a smp_mb() as the read side may
495	* contain a read from a variable that is written to before the
496	* synchronize_srcu() in the write side. In this case smp_mb()s
497	* A and B (or X and Y) act like the store buffering pattern.
498	*
499	* This smp_mb() also pairs with smp_mb() C (or, in the case of X,
500	* Z) to prevent accesses after the synchronize_srcu() from being
501	* executed before the grace period ends.
502	*/
503	if (!did_gp)
504	smp_mb(); / A /
505	else if (srcu_gp_is_expedited(ssp))
506	synchronize_rcu_expedited(); / X /
507	else
508	synchronize_rcu(); / X /
509
510	/*
511	* If the locks are the same as the unlocks, then there must have
512	* been no readers on this index at some point in this function.
513	* But there might be more readers, as a task might have read
514	* the current ->srcu_ctrp but not yet have incremented its CPU's
515	* ->srcu_ctrs[idx].srcu_locks counter. In fact, it is possible
516	* that most of the tasks have been preempted between fetching
517	* ->srcu_ctrp and incrementing ->srcu_ctrs[idx].srcu_locks. And
518	* there could be almost (ULONG_MAX / sizeof(struct task_struct))
519	* tasks in a system whose address space was fully populated
520	* with memory. Call this quantity Nt.
521	*
522	* So suppose that the updater is preempted at this
523	* point in the code for a long time. That now-preempted
524	* updater has already flipped ->srcu_ctrp (possibly during
525	* the preceding grace period), done an smp_mb() (again,
526	* possibly during the preceding grace period), and summed up
527	* the ->srcu_ctrs[idx].srcu_unlocks counters. How many times
528	* can a given one of the aforementioned Nt tasks increment the
529	* old ->srcu_ctrp value's ->srcu_ctrs[idx].srcu_locks counter,
530	* in the absence of nesting?
531	*
532	* It can clearly do so once, given that it has already fetched
533	* the old value of ->srcu_ctrp and is just about to use that
534	* value to index its increment of ->srcu_ctrs[idx].srcu_locks.
535	* But as soon as it leaves that SRCU read-side critical section,
536	* it will increment ->srcu_ctrs[idx].srcu_unlocks, which must
537	* follow the updater's above read from that same value. Thus,
538	as soon the reading task does an smp_mb() and a later fetch from
539	* ->srcu_ctrp, that task will be guaranteed to get the new index.
540	* Except that the increment of ->srcu_ctrs[idx].srcu_unlocks
541	* in __srcu_read_unlock() is after the smp_mb(), and the fetch
542	* from ->srcu_ctrp in __srcu_read_lock() is before the smp_mb().
543	* Thus, that task might not see the new value of ->srcu_ctrp until
544	* the -second- __srcu_read_lock(), which in turn means that this
545	* task might well increment ->srcu_ctrs[idx].srcu_locks for the
546	* old value of ->srcu_ctrp twice, not just once.
547	*
548	* However, it is important to note that a given smp_mb() takes
549	* effect not just for the task executing it, but also for any
550	* later task running on that same CPU.
551	*
552	* That is, there can be almost Nt + Nc further increments
553	* of ->srcu_ctrs[idx].srcu_locks for the old index, where Nc
554	* is the number of CPUs. But this is OK because the size of
555	* the task_struct structure limits the value of Nt and current
556	* systems limit Nc to a few thousand.
557	*
558	* OK, but what about nesting? This does impose a limit on
559	* nesting of half of the size of the task_struct structure
560	* (measured in bytes), which should be sufficient. A late 2022
561	* TREE01 rcutorture run reported this size to be no less than
562	* 9408 bytes, allowing up to 4704 levels of nesting, which is
563	* comfortably beyond excessive. Especially on 64-bit systems,
564	* which are unlikely to be configured with an address space fully
565	* populated with memory, at least not anytime soon.
566	*/
567	return srcu_readers_lock_idx(ssp, idx, gp: did_gp, unlocks);
568	}
569
570	/**
571	* srcu_readers_active - returns true if there are readers. and false
572	* otherwise
573	* @ssp: which srcu_struct to count active readers (holding srcu_read_lock).
574	*
575	* Note that this is not an atomic primitive, and can therefore suffer
576	* severe errors when invoked on an active srcu_struct. That said, it
577	* can be useful as an error check at cleanup time.
578	*/
579	static bool srcu_readers_active(struct srcu_struct *ssp)
580	{
581	int cpu;
582	unsigned long sum = `0`;
583
584	for_each_possible_cpu(cpu) {
585	struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
586
587	sum += atomic_long_read(v: &sdp->srcu_ctrs[`0`].srcu_locks);
588	sum += atomic_long_read(v: &sdp->srcu_ctrs[`1`].srcu_locks);
589	sum -= atomic_long_read(v: &sdp->srcu_ctrs[`0`].srcu_unlocks);
590	sum -= atomic_long_read(v: &sdp->srcu_ctrs[`1`].srcu_unlocks);
591	}
592	return sum;
593	}
594
595	/*
596	* We use an adaptive strategy for synchronize_srcu() and especially for
597	* synchronize_srcu_expedited(). We spin for a fixed time period
598	* (defined below, boot time configurable) to allow SRCU readers to exit
599	* their read-side critical sections. If there are still some readers
600	* after one jiffy, we repeatedly block for one jiffy time periods.
601	* The blocking time is increased as the grace-period age increases,
602	* with max blocking time capped at 10 jiffies.
603	*/
604	#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5
605
606	static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
607	module_param(srcu_retry_check_delay, ulong, `0444`);
608
609	#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
610	#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
611
612	#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
613	// no-delay instances.
614	#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
615	// no-delay instances.
616
617	#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low))
618	#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high))
619	#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
620	// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
621	// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
622	// called from process_srcu().
623	#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \
624	(2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
625
626	// Maximum per-GP-phase consecutive no-delay instances.
627	#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
628	SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \
629	SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \
630	SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
631
632	static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
633	module_param(srcu_max_nodelay_phase, ulong, `0444`);
634
635	// Maximum consecutive no-delay instances.
636	#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
637	SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
638
639	static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
640	module_param(srcu_max_nodelay, ulong, `0444`);
641
642	/*
643	* Return grace-period delay, zero if there are expedited grace
644	* periods pending, SRCU_INTERVAL otherwise.
645	*/
646	static unsigned long srcu_get_delay(struct srcu_struct *ssp)
647	{
648	unsigned long gpstart;
649	unsigned long j;
650	unsigned long jbase = SRCU_INTERVAL;
651	struct srcu_usage *sup = ssp->srcu_sup;
652
653	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
654	if (srcu_gp_is_expedited(ssp))
655	jbase = `0`;
656	if (rcu_seq_state(READ_ONCE(sup->srcu_gp_seq))) {
657	j = jiffies - `1`;
658	gpstart = READ_ONCE(sup->srcu_gp_start);
659	if (time_after(j, gpstart))
660	jbase += j - gpstart;
661	if (!jbase) {
662	ASSERT_EXCLUSIVE_WRITER(sup->srcu_n_exp_nodelay);
663	WRITE_ONCE(sup->srcu_n_exp_nodelay, READ_ONCE(sup->srcu_n_exp_nodelay) + `1`);
664	if (READ_ONCE(sup->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
665	jbase = `1`;
666	}
667	}
668	return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
669	}
670
671	/**
672	* cleanup_srcu_struct - deconstruct a sleep-RCU structure
673	* @ssp: structure to clean up.
674	*
675	* Must invoke this after you are finished using a given srcu_struct that
676	* was initialized via init_srcu_struct(), else you leak memory.
677	*/
678	void cleanup_srcu_struct(struct srcu_struct *ssp)
679	{
680	int cpu;
681	unsigned long delay;
682	struct srcu_usage *sup = ssp->srcu_sup;
683
684	spin_lock_irq_rcu_node(ssp->srcu_sup);
685	delay = srcu_get_delay(ssp);
686	spin_unlock_irq_rcu_node(ssp->srcu_sup);
687	if (WARN_ON(!delay))
688	return; / Just leak it! /
689	if (WARN_ON(srcu_readers_active(ssp)))
690	return; / Just leak it! /
691	flush_delayed_work(dwork: &sup->work);
692	for_each_possible_cpu(cpu) {
693	struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
694
695	timer_delete_sync(timer: &sdp->delay_work);
696	flush_work(work: &sdp->work);
697	if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
698	return; / Forgot srcu_barrier(), so just leak it! /
699	}
700	if (WARN_ON(rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)) != SRCU_STATE_IDLE) \|\|
701	WARN_ON(rcu_seq_current(&sup->srcu_gp_seq) != sup->srcu_gp_seq_needed) \|\|
702	WARN_ON(srcu_readers_active(ssp))) {
703	pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
704	__func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
705	rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
706	return; // Caller forgot to stop doing call_srcu()?
707	// Or caller invoked start_poll_synchronize_srcu()
708	// and then cleanup_srcu_struct() before that grace
709	// period ended?
710	}
711	kfree(objp: sup->node);
712	sup->node = NULL;
713	sup->srcu_size_state = SRCU_SIZE_SMALL;
714	if (!sup->sda_is_static) {
715	free_percpu(pdata: ssp->sda);
716	ssp->sda = NULL;
717	kfree(objp: sup);
718	ssp->srcu_sup = NULL;
719	}
720	}
721	EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
722
723	/*
724	* Check for consistent reader flavor.
725	*/
726	void __srcu_check_read_flavor(struct srcu_struct ssp, int* read_flavor)
727	{
728	int old_read_flavor;
729	struct srcu_data *sdp;
730
731	/ NMI-unsafe use in NMI is a bad sign, as is multi-bit read_flavor values. /
732	WARN_ON_ONCE((read_flavor != SRCU_READ_FLAVOR_NMI) && in_nmi());
733	WARN_ON_ONCE(read_flavor & (read_flavor - `1`));
734
735	sdp = raw_cpu_ptr(ssp->sda);
736	old_read_flavor = READ_ONCE(sdp->srcu_reader_flavor);
737	if (!old_read_flavor) {
738	old_read_flavor = cmpxchg(&sdp->srcu_reader_flavor, `0`, read_flavor);
739	if (!old_read_flavor)
740	return;
741	}
742	WARN_ONCE(old_read_flavor != read_flavor, "CPU %d old state %d new state %d\n", sdp->cpu, old_read_flavor, read_flavor);
743	}
744	EXPORT_SYMBOL_GPL(__srcu_check_read_flavor);
745
746	/*
747	* Counts the new reader in the appropriate per-CPU element of the
748	* srcu_struct.
749	* Returns a guaranteed non-negative index that must be passed to the
750	* matching __srcu_read_unlock().
751	*/
752	int __srcu_read_lock(struct srcu_struct *ssp)
753	{
754	struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
755
756	this_cpu_inc(scp->srcu_locks.counter);
757	smp_mb(); / B / / Avoid leaking the critical section. /
758	return __srcu_ptr_to_ctr(ssp, scpp: scp);
759	}
760	EXPORT_SYMBOL_GPL(__srcu_read_lock);
761
762	/*
763	* Removes the count for the old reader from the appropriate per-CPU
764	* element of the srcu_struct. Note that this may well be a different
765	* CPU than that which was incremented by the corresponding srcu_read_lock().
766	*/
767	void __srcu_read_unlock(struct srcu_struct ssp, int* idx)
768	{
769	smp_mb(); / C / / Avoid leaking the critical section. /
770	this_cpu_inc(__srcu_ctr_to_ptr(ssp, idx)->srcu_unlocks.counter);
771	}
772	EXPORT_SYMBOL_GPL(__srcu_read_unlock);
773
774	#ifdef CONFIG_NEED_SRCU_NMI_SAFE
775
776	/*
777	* Counts the new reader in the appropriate per-CPU element of the
778	* srcu_struct, but in an NMI-safe manner using RMW atomics.
779	* Returns an index that must be passed to the matching srcu_read_unlock().
780	*/
781	int __srcu_read_lock_nmisafe(struct srcu_struct *ssp)
782	{
783	struct srcu_ctr __percpu *scpp = READ_ONCE(ssp->srcu_ctrp);
784	struct srcu_ctr *scp = raw_cpu_ptr(scpp);
785
786	atomic_long_inc(&scp->srcu_locks);
787	smp_mb__after_atomic(); / B / / Avoid leaking the critical section. /
788	return __srcu_ptr_to_ctr(ssp, scpp);
789	}
790	EXPORT_SYMBOL_GPL(__srcu_read_lock_nmisafe);
791
792	/*
793	* Removes the count for the old reader from the appropriate per-CPU
794	* element of the srcu_struct. Note that this may well be a different
795	* CPU than that which was incremented by the corresponding srcu_read_lock().
796	*/
797	void __srcu_read_unlock_nmisafe(struct srcu_struct ssp, int* idx)
798	{
799	smp_mb__before_atomic(); / C / / Avoid leaking the critical section. /
800	atomic_long_inc(&raw_cpu_ptr(__srcu_ctr_to_ptr(ssp, idx))->srcu_unlocks);
801	}
802	EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
803
804	#endif // CONFIG_NEED_SRCU_NMI_SAFE
805
806	/*
807	* Start an SRCU grace period.
808	*/
809	static void srcu_gp_start(struct srcu_struct *ssp)
810	{
811	int state;
812
813	lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
814	WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
815	WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
816	WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, `0`);
817	smp_mb(); / Order prior store to ->srcu_gp_seq_needed vs. GP start. /
818	rcu_seq_start(sp: &ssp->srcu_sup->srcu_gp_seq);
819	state = rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq);
820	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
821	}
822
823
824	static void srcu_delay_timer(struct timer_list *t)
825	{
826	struct srcu_data sdp = container_of(t, struct* srcu_data, delay_work);
827
828	queue_work_on(cpu: sdp->cpu, wq: rcu_gp_wq, work: &sdp->work);
829	}
830
831	static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
832	unsigned long delay)
833	{
834	if (!delay) {
835	queue_work_on(cpu: sdp->cpu, wq: rcu_gp_wq, work: &sdp->work);
836	return;
837	}
838
839	timer_reduce(timer: &sdp->delay_work, expires: jiffies + delay);
840	}
841
842	/*
843	* Schedule callback invocation for the specified srcu_data structure,
844	* if possible, on the corresponding CPU.
845	*/
846	static void srcu_schedule_cbs_sdp(struct srcu_data sdp, unsigned* long delay)
847	{
848	srcu_queue_delayed_work_on(sdp, delay);
849	}
850
851	/*
852	* Schedule callback invocation for all srcu_data structures associated
853	* with the specified srcu_node structure that have callbacks for the
854	* just-completed grace period, the one corresponding to idx. If possible,
855	* schedule this invocation on the corresponding CPUs.
856	*/
857	static void srcu_schedule_cbs_snp(struct srcu_struct ssp, struct* srcu_node *snp,
858	unsigned long mask, unsigned long delay)
859	{
860	int cpu;
861
862	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
863	if (!(mask & (`1UL` << (cpu - snp->grplo))))
864	continue;
865	srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay);
866	}
867	}
868
869	/*
870	* Note the end of an SRCU grace period. Initiates callback invocation
871	* and starts a new grace period if needed.
872	*
873	* The ->srcu_cb_mutex acquisition does not protect any data, but
874	* instead prevents more than one grace period from starting while we
875	* are initiating callback invocation. This allows the ->srcu_have_cbs[]
876	* array to have a finite number of elements.
877	*/
878	static void srcu_gp_end(struct srcu_struct *ssp)
879	{
880	unsigned long cbdelay = `1`;
881	bool cbs;
882	bool last_lvl;
883	int cpu;
884	unsigned long gpseq;
885	int idx;
886	unsigned long mask;
887	struct srcu_data *sdp;
888	unsigned long sgsne;
889	struct srcu_node *snp;
890	int ss_state;
891	struct srcu_usage *sup = ssp->srcu_sup;
892
893	/ Prevent more than one additional grace period. /
894	mutex_lock(lock: &sup->srcu_cb_mutex);
895
896	/ End the current grace period. /
897	spin_lock_irq_rcu_node(sup);
898	idx = rcu_seq_state(s: sup->srcu_gp_seq);
899	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
900	if (srcu_gp_is_expedited(ssp))
901	cbdelay = `0`;
902
903	WRITE_ONCE(sup->srcu_last_gp_end, ktime_get_mono_fast_ns());
904	rcu_seq_end(sp: &sup->srcu_gp_seq);
905	gpseq = rcu_seq_current(sp: &sup->srcu_gp_seq);
906	if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq))
907	WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq);
908	spin_unlock_irq_rcu_node(sup);
909	mutex_unlock(lock: &sup->srcu_gp_mutex);
910	/ A new grace period can start at this point. But only one. /
911
912	/ Initiate callback invocation as needed. /
913	ss_state = smp_load_acquire(&sup->srcu_size_state);
914	if (ss_state < SRCU_SIZE_WAIT_BARRIER) {
915	srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, get_boot_cpu_id()),
916	delay: cbdelay);
917	} else {
918	idx = rcu_seq_ctr(s: gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
919	srcu_for_each_node_breadth_first(ssp, snp) {
920	spin_lock_irq_rcu_node(snp);
921	cbs = false;
922	last_lvl = snp >= sup->level[rcu_num_lvls - `1`];
923	if (last_lvl)
924	cbs = ss_state < SRCU_SIZE_BIG \|\| snp->srcu_have_cbs[idx] == gpseq;
925	snp->srcu_have_cbs[idx] = gpseq;
926	rcu_seq_set_state(sp: &snp->srcu_have_cbs[idx], newstate: `1`);
927	sgsne = snp->srcu_gp_seq_needed_exp;
928	if (srcu_invl_snp_seq(s: sgsne) \|\| ULONG_CMP_LT(sgsne, gpseq))
929	WRITE_ONCE(snp->srcu_gp_seq_needed_exp, gpseq);
930	if (ss_state < SRCU_SIZE_BIG)
931	mask = ~`0`;
932	else
933	mask = snp->srcu_data_have_cbs[idx];
934	snp->srcu_data_have_cbs[idx] = `0`;
935	spin_unlock_irq_rcu_node(snp);
936	if (cbs)
937	srcu_schedule_cbs_snp(ssp, snp, mask, delay: cbdelay);
938	}
939	}
940
941	/ Occasionally prevent srcu_data counter wrap. /
942	if (!(gpseq & counter_wrap_check))
943	for_each_possible_cpu(cpu) {
944	sdp = per_cpu_ptr(ssp->sda, cpu);
945	spin_lock_irq_rcu_node(sdp);
946	if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + `100`))
947	sdp->srcu_gp_seq_needed = gpseq;
948	if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + `100`))
949	sdp->srcu_gp_seq_needed_exp = gpseq;
950	spin_unlock_irq_rcu_node(sdp);
951	}
952
953	/ Callback initiation done, allow grace periods after next. /
954	mutex_unlock(lock: &sup->srcu_cb_mutex);
955
956	/ Start a new grace period if needed. /
957	spin_lock_irq_rcu_node(sup);
958	gpseq = rcu_seq_current(sp: &sup->srcu_gp_seq);
959	if (!rcu_seq_state(s: gpseq) &&
960	ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) {
961	srcu_gp_start(ssp);
962	spin_unlock_irq_rcu_node(sup);
963	srcu_reschedule(ssp, delay: `0`);
964	} else {
965	spin_unlock_irq_rcu_node(sup);
966	}
967
968	/ Transition to big if needed. /
969	if (ss_state != SRCU_SIZE_SMALL && ss_state != SRCU_SIZE_BIG) {
970	if (ss_state == SRCU_SIZE_ALLOC)
971	init_srcu_struct_nodes(ssp, GFP_KERNEL);
972	else
973	smp_store_release(&sup->srcu_size_state, ss_state + `1`);
974	}
975	}
976
977	/*
978	* Funnel-locking scheme to scalably mediate many concurrent expedited
979	* grace-period requests. This function is invoked for the first known
980	* expedited request for a grace period that has already been requested,
981	* but without expediting. To start a completely new grace period,
982	* whether expedited or not, use srcu_funnel_gp_start() instead.
983	*/
984	static void srcu_funnel_exp_start(struct srcu_struct ssp, struct* srcu_node *snp,
985	unsigned long s)
986	{
987	unsigned long flags;
988	unsigned long sgsne;
989
990	if (snp)
991	for (; snp != NULL; snp = snp->srcu_parent) {
992	sgsne = READ_ONCE(snp->srcu_gp_seq_needed_exp);
993	if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) \|\|
994	(!srcu_invl_snp_seq(s: sgsne) && ULONG_CMP_GE(sgsne, s)))
995	return;
996	spin_lock_irqsave_rcu_node(snp, flags);
997	sgsne = snp->srcu_gp_seq_needed_exp;
998	if (!srcu_invl_snp_seq(s: sgsne) && ULONG_CMP_GE(sgsne, s)) {
999	spin_unlock_irqrestore_rcu_node(snp, flags);
1000	return;
1001	}
1002	WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1003	spin_unlock_irqrestore_rcu_node(snp, flags);
1004	}
1005	spin_lock_irqsave_ssp_contention(ssp, flags: &flags);
1006	if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s))
1007	WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s);
1008	spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags);
1009	}
1010
1011	/*
1012	* Funnel-locking scheme to scalably mediate many concurrent grace-period
1013	* requests. The winner has to do the work of actually starting grace
1014	* period s. Losers must either ensure that their desired grace-period
1015	* number is recorded on at least their leaf srcu_node structure, or they
1016	* must take steps to invoke their own callbacks.
1017	*
1018	* Note that this function also does the work of srcu_funnel_exp_start(),
1019	* in some cases by directly invoking it.
1020	*
1021	* The srcu read lock should be hold around this function. And s is a seq snap
1022	* after holding that lock.
1023	*/
1024	static void srcu_funnel_gp_start(struct srcu_struct ssp, struct* srcu_data *sdp,
1025	unsigned long s, bool do_norm)
1026	{
1027	unsigned long flags;
1028	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
1029	unsigned long sgsne;
1030	struct srcu_node *snp;
1031	struct srcu_node *snp_leaf;
1032	unsigned long snp_seq;
1033	struct srcu_usage *sup = ssp->srcu_sup;
1034
1035	/ Ensure that snp node tree is fully initialized before traversing it /
1036	if (smp_load_acquire(&sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1037	snp_leaf = NULL;
1038	else
1039	snp_leaf = sdp->mynode;
1040
1041	if (snp_leaf)
1042	/ Each pass through the loop does one level of the srcu_node tree. /
1043	for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) {
1044	if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf)
1045	return; / GP already done and CBs recorded. /
1046	spin_lock_irqsave_rcu_node(snp, flags);
1047	snp_seq = snp->srcu_have_cbs[idx];
1048	if (!srcu_invl_snp_seq(s: snp_seq) && ULONG_CMP_GE(snp_seq, s)) {
1049	if (snp == snp_leaf && snp_seq == s)
1050	snp->srcu_data_have_cbs[idx] \|= sdp->grpmask;
1051	spin_unlock_irqrestore_rcu_node(snp, flags);
1052	if (snp == snp_leaf && snp_seq != s) {
1053	srcu_schedule_cbs_sdp(sdp, delay: do_norm ? SRCU_INTERVAL : `0`);
1054	return;
1055	}
1056	if (!do_norm)
1057	srcu_funnel_exp_start(ssp, snp, s);
1058	return;
1059	}
1060	snp->srcu_have_cbs[idx] = s;
1061	if (snp == snp_leaf)
1062	snp->srcu_data_have_cbs[idx] \|= sdp->grpmask;
1063	sgsne = snp->srcu_gp_seq_needed_exp;
1064	if (!do_norm && (srcu_invl_snp_seq(s: sgsne) \|\| ULONG_CMP_LT(sgsne, s)))
1065	WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
1066	spin_unlock_irqrestore_rcu_node(snp, flags);
1067	}
1068
1069	/ Top of tree, must ensure the grace period will be started. /
1070	spin_lock_irqsave_ssp_contention(ssp, flags: &flags);
1071	if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) {
1072	/*
1073	* Record need for grace period s. Pair with load
1074	* acquire setting up for initialization.
1075	*/
1076	smp_store_release(&sup->srcu_gp_seq_needed, s); /^^^/
1077	}
1078	if (!do_norm && ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, s))
1079	WRITE_ONCE(sup->srcu_gp_seq_needed_exp, s);
1080
1081	/ If grace period not already in progress, start it. /
1082	if (!WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) &&
1083	rcu_seq_state(s: sup->srcu_gp_seq) == SRCU_STATE_IDLE) {
1084	srcu_gp_start(ssp);
1085
1086	// And how can that list_add() in the "else" clause
1087	// possibly be safe for concurrent execution? Well,
1088	// it isn't. And it does not have to be. After all, it
1089	// can only be executed during early boot when there is only
1090	// the one boot CPU running with interrupts still disabled.
1091	if (likely(srcu_init_done))
1092	queue_delayed_work(wq: rcu_gp_wq, dwork: &sup->work,
1093	delay: !!srcu_get_delay(ssp));
1094	else if (list_empty(head: &sup->work.work.entry))
1095	list_add(new: &sup->work.work.entry, head: &srcu_boot_list);
1096	}
1097	spin_unlock_irqrestore_rcu_node(sup, flags);
1098	}
1099
1100	/*
1101	* Wait until all readers counted by array index idx complete, but
1102	* loop an additional time if there is an expedited grace period pending.
1103	* The caller must ensure that ->srcu_ctrp is not changed while checking.
1104	*/
1105	static bool try_check_zero(struct srcu_struct ssp, int* idx, int trycount)
1106	{
1107	unsigned long curdelay;
1108
1109	spin_lock_irq_rcu_node(ssp->srcu_sup);
1110	curdelay = !srcu_get_delay(ssp);
1111	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1112
1113	for (;;) {
1114	if (srcu_readers_active_idx_check(ssp, idx))
1115	return true;
1116	if ((--trycount + curdelay) <= `0`)
1117	return false;
1118	udelay(usec: srcu_retry_check_delay);
1119	}
1120	}
1121
1122	/*
1123	* Increment the ->srcu_ctrp counter so that future SRCU readers will
1124	* use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
1125	* us to wait for pre-existing readers in a starvation-free manner.
1126	*/
1127	static void srcu_flip(struct srcu_struct *ssp)
1128	{
1129	/*
1130	* Because the flip of ->srcu_ctrp is executed only if the
1131	* preceding call to srcu_readers_active_idx_check() found that
1132	* the ->srcu_ctrs[].srcu_unlocks and ->srcu_ctrs[].srcu_locks sums
1133	* matched and because that summing uses atomic_long_read(),
1134	* there is ordering due to a control dependency between that
1135	* summing and the WRITE_ONCE() in this call to srcu_flip().
1136	* This ordering ensures that if this updater saw a given reader's
1137	* increment from __srcu_read_lock(), that reader was using a value
1138	* of ->srcu_ctrp from before the previous call to srcu_flip(),
1139	* which should be quite rare. This ordering thus helps forward
1140	* progress because the grace period could otherwise be delayed
1141	* by additional calls to __srcu_read_lock() using that old (soon
1142	* to be new) value of ->srcu_ctrp.
1143	*
1144	* This sum-equality check and ordering also ensures that if
1145	* a given call to __srcu_read_lock() uses the new value of
1146	* ->srcu_ctrp, this updater's earlier scans cannot have seen
1147	* that reader's increments, which is all to the good, because
1148	* this grace period need not wait on that reader. After all,
1149	* if those earlier scans had seen that reader, there would have
1150	* been a sum mismatch and this code would not be reached.
1151	*
1152	* This means that the following smp_mb() is redundant, but
1153	* it stays until either (1) Compilers learn about this sort of
1154	* control dependency or (2) Some production workload running on
1155	* a production system is unduly delayed by this slowpath smp_mb().
1156	* Except for _lite() readers, where it is inoperative, which
1157	* means that it is a good thing that it is redundant.
1158	*/
1159	smp_mb(); / E / / Pairs with B and C. /
1160
1161	WRITE_ONCE(ssp->srcu_ctrp,
1162	&ssp->sda->srcu_ctrs[!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[`0`])]);
1163
1164	/*
1165	* Ensure that if the updater misses an __srcu_read_unlock()
1166	* increment, that task's __srcu_read_lock() following its next
1167	* __srcu_read_lock() or __srcu_read_unlock() will see the above
1168	* counter update. Note that both this memory barrier and the
1169	* one in srcu_readers_active_idx_check() provide the guarantee
1170	* for __srcu_read_lock().
1171	*
1172	* Note that this is a performance optimization, in which we spend
1173	* an otherwise unnecessary smp_mb() in order to reduce the number
1174	* of full per-CPU-variable scans in srcu_readers_lock_idx() and
1175	* srcu_readers_unlock_idx(). But this performance optimization
1176	* is not so optimal for SRCU-fast, where we would be spending
1177	* not smp_mb(), but rather synchronize_rcu(). At the same time,
1178	* the overhead of the smp_mb() is in the noise, so there is no
1179	* point in omitting it in the SRCU-fast case. So the same code
1180	* is executed either way.
1181	*/
1182	smp_mb(); / D / / Pairs with C. /
1183	}
1184
1185	/*
1186	* If SRCU is likely idle, in other words, the next SRCU grace period
1187	* should be expedited, return true, otherwise return false. Except that
1188	* in the presence of _lite() readers, always return false.
1189	*
1190	* Note that it is OK for several current from-idle requests for a new
1191	* grace period from idle to specify expediting because they will all end
1192	* up requesting the same grace period anyhow. So no loss.
1193	*
1194	* Note also that if any CPU (including the current one) is still invoking
1195	* callbacks, this function will nevertheless say "idle". This is not
1196	* ideal, but the overhead of checking all CPUs' callback lists is even
1197	* less ideal, especially on large systems. Furthermore, the wakeup
1198	* can happen before the callback is fully removed, so we have no choice
1199	* but to accept this type of error.
1200	*
1201	* This function is also subject to counter-wrap errors, but let's face
1202	* it, if this function was preempted for enough time for the counters
1203	* to wrap, it really doesn't matter whether or not we expedite the grace
1204	* period. The extra overhead of a needlessly expedited grace period is
1205	* negligible when amortized over that time period, and the extra latency
1206	* of a needlessly non-expedited grace period is similarly negligible.
1207	*/
1208	static bool srcu_should_expedite(struct srcu_struct *ssp)
1209	{
1210	unsigned long curseq;
1211	unsigned long flags;
1212	struct srcu_data *sdp;
1213	unsigned long t;
1214	unsigned long tlast;
1215
1216	check_init_srcu_struct(ssp);
1217	/ If _lite() readers, don't do unsolicited expediting. /
1218	if (this_cpu_read(ssp->sda->srcu_reader_flavor) & SRCU_READ_FLAVOR_SLOWGP)
1219	return false;
1220	/ If the local srcu_data structure has callbacks, not idle. /
1221	sdp = raw_cpu_ptr(ssp->sda);
1222	spin_lock_irqsave_rcu_node(sdp, flags);
1223	if (rcu_segcblist_pend_cbs(rsclp: &sdp->srcu_cblist)) {
1224	spin_unlock_irqrestore_rcu_node(sdp, flags);
1225	return false; / Callbacks already present, so not idle. /
1226	}
1227	spin_unlock_irqrestore_rcu_node(sdp, flags);
1228
1229	/*
1230	* No local callbacks, so probabilistically probe global state.
1231	* Exact information would require acquiring locks, which would
1232	* kill scalability, hence the probabilistic nature of the probe.
1233	*/
1234
1235	/ First, see if enough time has passed since the last GP. /
1236	t = ktime_get_mono_fast_ns();
1237	tlast = READ_ONCE(ssp->srcu_sup->srcu_last_gp_end);
1238	if (exp_holdoff == `0` \|\|
1239	time_in_range_open(t, tlast, tlast + exp_holdoff))
1240	return false; / Too soon after last GP. /
1241
1242	/ Next, check for probable idleness. /
1243	curseq = rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq);
1244	smp_mb(); / Order ->srcu_gp_seq with ->srcu_gp_seq_needed. /
1245	if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_sup->srcu_gp_seq_needed)))
1246	return false; / Grace period in progress, so not idle. /
1247	smp_mb(); / Order ->srcu_gp_seq with prior access. /
1248	if (curseq != rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq))
1249	return false; / GP # changed, so not idle. /
1250	return true; / With reasonable probability, idle! /
1251	}
1252
1253	/*
1254	* SRCU callback function to leak a callback.
1255	*/
1256	static void srcu_leak_callback(struct rcu_head *rhp)
1257	{
1258	}
1259
1260	/*
1261	* Start an SRCU grace period, and also queue the callback if non-NULL.
1262	*/
1263	static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
1264	struct rcu_head *rhp, bool do_norm)
1265	{
1266	unsigned long flags;
1267	int idx;
1268	bool needexp = false;
1269	bool needgp = false;
1270	unsigned long s;
1271	struct srcu_data *sdp;
1272	struct srcu_node *sdp_mynode;
1273	int ss_state;
1274
1275	check_init_srcu_struct(ssp);
1276	/*
1277	* While starting a new grace period, make sure we are in an
1278	* SRCU read-side critical section so that the grace-period
1279	* sequence number cannot wrap around in the meantime.
1280	*/
1281	idx = __srcu_read_lock_nmisafe(ssp);
1282	ss_state = smp_load_acquire(&ssp->srcu_sup->srcu_size_state);
1283	if (ss_state < SRCU_SIZE_WAIT_CALL)
1284	sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
1285	else
1286	sdp = raw_cpu_ptr(ssp->sda);
1287	spin_lock_irqsave_sdp_contention(sdp, flags: &flags);
1288	if (rhp)
1289	rcu_segcblist_enqueue(rsclp: &sdp->srcu_cblist, rhp);
1290	/*
1291	* It's crucial to capture the snapshot 's' for acceleration before
1292	* reading the current gp_seq that is used for advancing. This is
1293	* essential because if the acceleration snapshot is taken after a
1294	* failed advancement attempt, there's a risk that a grace period may
1295	* conclude and a new one may start in the interim. If the snapshot is
1296	* captured after this sequence of events, the acceleration snapshot 's'
1297	* could be excessively advanced, leading to acceleration failure.
1298	* In such a scenario, an 'acceleration leak' can occur, where new
1299	* callbacks become indefinitely stuck in the RCU_NEXT_TAIL segment.
1300	* Also note that encountering advancing failures is a normal
1301	* occurrence when the grace period for RCU_WAIT_TAIL is in progress.
1302	*
1303	* To see this, consider the following events which occur if
1304	* rcu_seq_snap() were to be called after advance:
1305	*
1306	* 1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the
1307	* RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8).
1308	*
1309	* 2) The grace period for RCU_WAIT_TAIL is seen as started but not
1310	* completed so rcu_seq_current() returns X + SRCU_STATE_SCAN1.
1311	*
1312	* 3) This value is passed to rcu_segcblist_advance() which can't move
1313	* any segment forward and fails.
1314	*
1315	* 4) srcu_gp_start_if_needed() still proceeds with callback acceleration.
1316	* But then the call to rcu_seq_snap() observes the grace period for the
1317	* RCU_WAIT_TAIL segment as completed and the subsequent one for the
1318	* RCU_NEXT_READY_TAIL segment as started (ie: X + 4 + SRCU_STATE_SCAN1)
1319	* so it returns a snapshot of the next grace period, which is X + 12.
1320	*
1321	* 5) The value of X + 12 is passed to rcu_segcblist_accelerate() but the
1322	* freshly enqueued callback in RCU_NEXT_TAIL can't move to
1323	* RCU_NEXT_READY_TAIL which already has callbacks for a previous grace
1324	* period (gp_num = X + 8). So acceleration fails.
1325	*/
1326	s = rcu_seq_snap(sp: &ssp->srcu_sup->srcu_gp_seq);
1327	if (rhp) {
1328	rcu_segcblist_advance(rsclp: &sdp->srcu_cblist,
1329	seq: rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq));
1330	/*
1331	* Acceleration can never fail because the base current gp_seq
1332	* used for acceleration is <= the value of gp_seq used for
1333	* advancing. This means that RCU_NEXT_TAIL segment will
1334	* always be able to be emptied by the acceleration into the
1335	* RCU_NEXT_READY_TAIL or RCU_WAIT_TAIL segments.
1336	*/
1337	WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
1338	}
1339	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
1340	sdp->srcu_gp_seq_needed = s;
1341	needgp = true;
1342	}
1343	if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
1344	sdp->srcu_gp_seq_needed_exp = s;
1345	needexp = true;
1346	}
1347	spin_unlock_irqrestore_rcu_node(sdp, flags);
1348
1349	/ Ensure that snp node tree is fully initialized before traversing it /
1350	if (ss_state < SRCU_SIZE_WAIT_BARRIER)
1351	sdp_mynode = NULL;
1352	else
1353	sdp_mynode = sdp->mynode;
1354
1355	if (needgp)
1356	srcu_funnel_gp_start(ssp, sdp, s, do_norm);
1357	else if (needexp)
1358	srcu_funnel_exp_start(ssp, snp: sdp_mynode, s);
1359	__srcu_read_unlock_nmisafe(ssp, idx);
1360	return s;
1361	}
1362
1363	/*
1364	* Enqueue an SRCU callback on the srcu_data structure associated with
1365	* the current CPU and the specified srcu_struct structure, initiating
1366	* grace-period processing if it is not already running.
1367	*
1368	* Note that all CPUs must agree that the grace period extended beyond
1369	* all pre-existing SRCU read-side critical section. On systems with
1370	* more than one CPU, this means that when "func()" is invoked, each CPU
1371	* is guaranteed to have executed a full memory barrier since the end of
1372	* its last corresponding SRCU read-side critical section whose beginning
1373	* preceded the call to call_srcu(). It also means that each CPU executing
1374	* an SRCU read-side critical section that continues beyond the start of
1375	* "func()" must have executed a memory barrier after the call_srcu()
1376	* but before the beginning of that SRCU read-side critical section.
1377	* Note that these guarantees include CPUs that are offline, idle, or
1378	* executing in user mode, as well as CPUs that are executing in the kernel.
1379	*
1380	* Furthermore, if CPU A invoked call_srcu() and CPU B invoked the
1381	* resulting SRCU callback function "func()", then both CPU A and CPU
1382	* B are guaranteed to execute a full memory barrier during the time
1383	* interval between the call to call_srcu() and the invocation of "func()".
1384	* This guarantee applies even if CPU A and CPU B are the same CPU (but
1385	* again only if the system has more than one CPU).
1386	*
1387	* Of course, these guarantees apply only for invocations of call_srcu(),
1388	* srcu_read_lock(), and srcu_read_unlock() that are all passed the same
1389	* srcu_struct structure.
1390	*/
1391	static void __call_srcu(struct srcu_struct ssp, struct* rcu_head *rhp,
1392	rcu_callback_t func, bool do_norm)
1393	{
1394	if (debug_rcu_head_queue(head: rhp)) {
1395	/ Probable double call_srcu(), so leak the callback. /
1396	WRITE_ONCE(rhp->func, srcu_leak_callback);
1397	WARN_ONCE(`1`, "call_srcu(): Leaked duplicate callback\n");
1398	return;
1399	}
1400	rhp->func = func;
1401	(void)srcu_gp_start_if_needed(ssp, rhp, do_norm);
1402	}
1403
1404	/**
1405	* call_srcu() - Queue a callback for invocation after an SRCU grace period
1406	* @ssp: srcu_struct in queue the callback
1407	* @rhp: structure to be used for queueing the SRCU callback.
1408	* @func: function to be invoked after the SRCU grace period
1409	*
1410	* The callback function will be invoked some time after a full SRCU
1411	* grace period elapses, in other words after all pre-existing SRCU
1412	* read-side critical sections have completed. However, the callback
1413	* function might well execute concurrently with other SRCU read-side
1414	* critical sections that started after call_srcu() was invoked. SRCU
1415	* read-side critical sections are delimited by srcu_read_lock() and
1416	* srcu_read_unlock(), and may be nested.
1417	*
1418	* The callback will be invoked from process context, but with bh
1419	* disabled. The callback function must therefore be fast and must
1420	* not block.
1421	*
1422	* See the description of call_rcu() for more detailed information on
1423	* memory ordering guarantees.
1424	*/
1425	void call_srcu(struct srcu_struct ssp, struct* rcu_head *rhp,
1426	rcu_callback_t func)
1427	{
1428	__call_srcu(ssp, rhp, func, do_norm: true);
1429	}
1430	EXPORT_SYMBOL_GPL(call_srcu);
1431
1432	/*
1433	* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
1434	*/
1435	static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm)
1436	{
1437	struct rcu_synchronize rcu;
1438
1439	srcu_lock_sync(&ssp->dep_map);
1440
1441	RCU_LOCKDEP_WARN(lockdep_is_held(ssp) \|\|
1442	lock_is_held(&rcu_bh_lock_map) \|\|
1443	lock_is_held(&rcu_lock_map) \|\|
1444	lock_is_held(&rcu_sched_lock_map),
1445	"Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
1446
1447	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
1448	return;
1449	might_sleep();
1450	check_init_srcu_struct(ssp);
1451	init_completion(x: &rcu.completion);
1452	init_rcu_head_on_stack(head: &rcu.head);
1453	__call_srcu(ssp, rhp: &rcu.head, func: wakeme_after_rcu, do_norm);
1454	wait_for_completion(&rcu.completion);
1455	destroy_rcu_head_on_stack(head: &rcu.head);
1456
1457	/*
1458	* Make sure that later code is ordered after the SRCU grace
1459	* period. This pairs with the spin_lock_irq_rcu_node()
1460	* in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed
1461	* because the current CPU might have been totally uninvolved with
1462	* (and thus unordered against) that grace period.
1463	*/
1464	smp_mb();
1465	}
1466
1467	/**
1468	* synchronize_srcu_expedited - Brute-force SRCU grace period
1469	* @ssp: srcu_struct with which to synchronize.
1470	*
1471	* Wait for an SRCU grace period to elapse, but be more aggressive about
1472	* spinning rather than blocking when waiting.
1473	*
1474	* Note that synchronize_srcu_expedited() has the same deadlock and
1475	* memory-ordering properties as does synchronize_srcu().
1476	*/
1477	void synchronize_srcu_expedited(struct srcu_struct *ssp)
1478	{
1479	__synchronize_srcu(ssp, do_norm: rcu_gp_is_normal());
1480	}
1481	EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
1482
1483	/**
1484	* synchronize_srcu - wait for prior SRCU read-side critical-section completion
1485	* @ssp: srcu_struct with which to synchronize.
1486	*
1487	* Wait for the count to drain to zero of both indexes. To avoid the
1488	* possible starvation of synchronize_srcu(), it waits for the count of
1489	* the index=!(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[0]) to drain to zero
1490	* at first, and then flip the ->srcu_ctrp and wait for the count of the
1491	* other index.
1492	*
1493	* Can block; must be called from process context.
1494	*
1495	* Note that it is illegal to call synchronize_srcu() from the corresponding
1496	* SRCU read-side critical section; doing so will result in deadlock.
1497	* However, it is perfectly legal to call synchronize_srcu() on one
1498	* srcu_struct from some other srcu_struct's read-side critical section,
1499	* as long as the resulting graph of srcu_structs is acyclic.
1500	*
1501	* There are memory-ordering constraints implied by synchronize_srcu().
1502	* On systems with more than one CPU, when synchronize_srcu() returns,
1503	* each CPU is guaranteed to have executed a full memory barrier since
1504	* the end of its last corresponding SRCU read-side critical section
1505	* whose beginning preceded the call to synchronize_srcu(). In addition,
1506	* each CPU having an SRCU read-side critical section that extends beyond
1507	* the return from synchronize_srcu() is guaranteed to have executed a
1508	* full memory barrier after the beginning of synchronize_srcu() and before
1509	* the beginning of that SRCU read-side critical section. Note that these
1510	* guarantees include CPUs that are offline, idle, or executing in user mode,
1511	* as well as CPUs that are executing in the kernel.
1512	*
1513	* Furthermore, if CPU A invoked synchronize_srcu(), which returned
1514	* to its caller on CPU B, then both CPU A and CPU B are guaranteed
1515	* to have executed a full memory barrier during the execution of
1516	* synchronize_srcu(). This guarantee applies even if CPU A and CPU B
1517	* are the same CPU, but again only if the system has more than one CPU.
1518	*
1519	* Of course, these memory-ordering guarantees apply only when
1520	* synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
1521	* passed the same srcu_struct structure.
1522	*
1523	* Implementation of these memory-ordering guarantees is similar to
1524	* that of synchronize_rcu().
1525	*
1526	* If SRCU is likely idle as determined by srcu_should_expedite(),
1527	* expedite the first request. This semantic was provided by Classic SRCU,
1528	* and is relied upon by its users, so TREE SRCU must also provide it.
1529	* Note that detecting idleness is heuristic and subject to both false
1530	* positives and negatives.
1531	*/
1532	void synchronize_srcu(struct srcu_struct *ssp)
1533	{
1534	if (srcu_should_expedite(ssp) \|\| rcu_gp_is_expedited())
1535	synchronize_srcu_expedited(ssp);
1536	else
1537	__synchronize_srcu(ssp, do_norm: true);
1538	}
1539	EXPORT_SYMBOL_GPL(synchronize_srcu);
1540
1541	/**
1542	* get_state_synchronize_srcu - Provide an end-of-grace-period cookie
1543	* @ssp: srcu_struct to provide cookie for.
1544	*
1545	* This function returns a cookie that can be passed to
1546	* poll_state_synchronize_srcu(), which will return true if a full grace
1547	* period has elapsed in the meantime. It is the caller's responsibility
1548	* to make sure that grace period happens, for example, by invoking
1549	* call_srcu() after return from get_state_synchronize_srcu().
1550	*/
1551	unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
1552	{
1553	// Any prior manipulation of SRCU-protected data must happen
1554	// before the load from ->srcu_gp_seq.
1555	smp_mb();
1556	return rcu_seq_snap(sp: &ssp->srcu_sup->srcu_gp_seq);
1557	}
1558	EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
1559
1560	/**
1561	* start_poll_synchronize_srcu - Provide cookie and start grace period
1562	* @ssp: srcu_struct to provide cookie for.
1563	*
1564	* This function returns a cookie that can be passed to
1565	* poll_state_synchronize_srcu(), which will return true if a full grace
1566	* period has elapsed in the meantime. Unlike get_state_synchronize_srcu(),
1567	* this function also ensures that any needed SRCU grace period will be
1568	* started. This convenience does come at a cost in terms of CPU overhead.
1569	*/
1570	unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp)
1571	{
1572	return srcu_gp_start_if_needed(ssp, NULL, do_norm: true);
1573	}
1574	EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
1575
1576	/**
1577	* poll_state_synchronize_srcu - Has cookie's grace period ended?
1578	* @ssp: srcu_struct to provide cookie for.
1579	* @cookie: Return value from get_state_synchronize_srcu() or start_poll_synchronize_srcu().
1580	*
1581	* This function takes the cookie that was returned from either
1582	* get_state_synchronize_srcu() or start_poll_synchronize_srcu(), and
1583	* returns @true if an SRCU grace period elapsed since the time that the
1584	* cookie was created.
1585	*
1586	* Because cookies are finite in size, wrapping/overflow is possible.
1587	* This is more pronounced on 32-bit systems where cookies are 32 bits,
1588	* where in theory wrapping could happen in about 14 hours assuming
1589	* 25-microsecond expedited SRCU grace periods. However, a more likely
1590	* overflow lower bound is on the order of 24 days in the case of
1591	* one-millisecond SRCU grace periods. Of course, wrapping in a 64-bit
1592	* system requires geologic timespans, as in more than seven million years
1593	* even for expedited SRCU grace periods.
1594	*
1595	* Wrapping/overflow is much more of an issue for CONFIG_SMP=n systems
1596	* that also have CONFIG_PREEMPTION=n, which selects Tiny SRCU. This uses
1597	* a 16-bit cookie, which rcutorture routinely wraps in a matter of a
1598	* few minutes. If this proves to be a problem, this counter will be
1599	* expanded to the same size as for Tree SRCU.
1600	*/
1601	bool poll_state_synchronize_srcu(struct srcu_struct ssp, unsigned* long cookie)
1602	{
1603	if (cookie != SRCU_GET_STATE_COMPLETED &&
1604	!rcu_seq_done_exact(sp: &ssp->srcu_sup->srcu_gp_seq, s: cookie))
1605	return false;
1606	// Ensure that the end of the SRCU grace period happens before
1607	// any subsequent code that the caller might execute.
1608	smp_mb(); // ^^^
1609	return true;
1610	}
1611	EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
1612
1613	/*
1614	* Callback function for srcu_barrier() use.
1615	*/
1616	static void srcu_barrier_cb(struct rcu_head *rhp)
1617	{
1618	struct srcu_data *sdp;
1619	struct srcu_struct *ssp;
1620
1621	rhp->next = rhp; // Mark the callback as having been invoked.
1622	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
1623	ssp = sdp->ssp;
1624	if (atomic_dec_and_test(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt))
1625	complete(&ssp->srcu_sup->srcu_barrier_completion);
1626	}
1627
1628	/*
1629	* Enqueue an srcu_barrier() callback on the specified srcu_data
1630	* structure's ->cblist. but only if that ->cblist already has at least one
1631	* callback enqueued. Note that if a CPU already has callbacks enqueue,
1632	* it must have already registered the need for a future grace period,
1633	* so all we need do is enqueue a callback that will use the same grace
1634	* period as the last callback already in the queue.
1635	*/
1636	static void srcu_barrier_one_cpu(struct srcu_struct ssp, struct* srcu_data *sdp)
1637	{
1638	spin_lock_irq_rcu_node(sdp);
1639	atomic_inc(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt);
1640	sdp->srcu_barrier_head.func = srcu_barrier_cb;
1641	debug_rcu_head_queue(head: &sdp->srcu_barrier_head);
1642	if (!rcu_segcblist_entrain(rsclp: &sdp->srcu_cblist,
1643	rhp: &sdp->srcu_barrier_head)) {
1644	debug_rcu_head_unqueue(head: &sdp->srcu_barrier_head);
1645	atomic_dec(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt);
1646	}
1647	spin_unlock_irq_rcu_node(sdp);
1648	}
1649
1650	/**
1651	* srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
1652	* @ssp: srcu_struct on which to wait for in-flight callbacks.
1653	*/
1654	void srcu_barrier(struct srcu_struct *ssp)
1655	{
1656	int cpu;
1657	int idx;
1658	unsigned long s = rcu_seq_snap(sp: &ssp->srcu_sup->srcu_barrier_seq);
1659
1660	check_init_srcu_struct(ssp);
1661	mutex_lock(lock: &ssp->srcu_sup->srcu_barrier_mutex);
1662	if (rcu_seq_done(sp: &ssp->srcu_sup->srcu_barrier_seq, s)) {
1663	smp_mb(); / Force ordering following return. /
1664	mutex_unlock(lock: &ssp->srcu_sup->srcu_barrier_mutex);
1665	return; / Someone else did our work for us. /
1666	}
1667	rcu_seq_start(sp: &ssp->srcu_sup->srcu_barrier_seq);
1668	init_completion(x: &ssp->srcu_sup->srcu_barrier_completion);
1669
1670	/ Initial count prevents reaching zero until all CBs are posted. /
1671	atomic_set(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt, i: `1`);
1672
1673	idx = __srcu_read_lock_nmisafe(ssp);
1674	if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
1675	srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, get_boot_cpu_id()));
1676	else
1677	for_each_possible_cpu(cpu)
1678	srcu_barrier_one_cpu(ssp, per_cpu_ptr(ssp->sda, cpu));
1679	__srcu_read_unlock_nmisafe(ssp, idx);
1680
1681	/ Remove the initial count, at which point reaching zero can happen. /
1682	if (atomic_dec_and_test(v: &ssp->srcu_sup->srcu_barrier_cpu_cnt))
1683	complete(&ssp->srcu_sup->srcu_barrier_completion);
1684	wait_for_completion(&ssp->srcu_sup->srcu_barrier_completion);
1685
1686	rcu_seq_end(sp: &ssp->srcu_sup->srcu_barrier_seq);
1687	mutex_unlock(lock: &ssp->srcu_sup->srcu_barrier_mutex);
1688	}
1689	EXPORT_SYMBOL_GPL(srcu_barrier);
1690
1691	/**
1692	* srcu_batches_completed - return batches completed.
1693	* @ssp: srcu_struct on which to report batch completion.
1694	*
1695	* Report the number of batches, correlated with, but not necessarily
1696	* precisely the same as, the number of grace periods that have elapsed.
1697	*/
1698	unsigned long srcu_batches_completed(struct srcu_struct *ssp)
1699	{
1700	return READ_ONCE(ssp->srcu_sup->srcu_gp_seq);
1701	}
1702	EXPORT_SYMBOL_GPL(srcu_batches_completed);
1703
1704	/*
1705	* Core SRCU state machine. Push state bits of ->srcu_gp_seq
1706	* to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
1707	* completed in that state.
1708	*/
1709	static void srcu_advance_state(struct srcu_struct *ssp)
1710	{
1711	int idx;
1712
1713	mutex_lock(lock: &ssp->srcu_sup->srcu_gp_mutex);
1714
1715	/*
1716	* Because readers might be delayed for an extended period after
1717	* fetching ->srcu_ctrp for their index, at any point in time there
1718	* might well be readers using both idx=0 and idx=1. We therefore
1719	* need to wait for readers to clear from both index values before
1720	* invoking a callback.
1721	*
1722	* The load-acquire ensures that we see the accesses performed
1723	* by the prior grace period.
1724	*/
1725	idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); / ^^^ /
1726	if (idx == SRCU_STATE_IDLE) {
1727	spin_lock_irq_rcu_node(ssp->srcu_sup);
1728	if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1729	WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq));
1730	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1731	mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex);
1732	return;
1733	}
1734	idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq));
1735	if (idx == SRCU_STATE_IDLE)
1736	srcu_gp_start(ssp);
1737	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1738	if (idx != SRCU_STATE_IDLE) {
1739	mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex);
1740	return; / Someone else started the grace period. /
1741	}
1742	}
1743
1744	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1745	idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[`0`]);
1746	if (!try_check_zero(ssp, idx, trycount: `1`)) {
1747	mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex);
1748	return; / readers present, retry later. /
1749	}
1750	srcu_flip(ssp);
1751	spin_lock_irq_rcu_node(ssp->srcu_sup);
1752	rcu_seq_set_state(sp: &ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2);
1753	ssp->srcu_sup->srcu_n_exp_nodelay = `0`;
1754	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1755	}
1756
1757	if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1758
1759	/*
1760	* SRCU read-side critical sections are normally short,
1761	* so check at least twice in quick succession after a flip.
1762	*/
1763	idx = !(ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[`0`]);
1764	if (!try_check_zero(ssp, idx, trycount: `2`)) {
1765	mutex_unlock(lock: &ssp->srcu_sup->srcu_gp_mutex);
1766	return; / readers present, retry later. /
1767	}
1768	ssp->srcu_sup->srcu_n_exp_nodelay = `0`;
1769	srcu_gp_end(ssp); / Releases ->srcu_gp_mutex. /
1770	}
1771	}
1772
1773	/*
1774	* Invoke a limited number of SRCU callbacks that have passed through
1775	* their grace period. If there are more to do, SRCU will reschedule
1776	* the workqueue. Note that needed memory barriers have been executed
1777	* in this task's context by srcu_readers_active_idx_check().
1778	*/
1779	static void srcu_invoke_callbacks(struct work_struct *work)
1780	{
1781	long len;
1782	bool more;
1783	struct rcu_cblist ready_cbs;
1784	struct rcu_head *rhp;
1785	struct srcu_data *sdp;
1786	struct srcu_struct *ssp;
1787
1788	sdp = container_of(work, struct srcu_data, work);
1789
1790	ssp = sdp->ssp;
1791	rcu_cblist_init(rclp: &ready_cbs);
1792	spin_lock_irq_rcu_node(sdp);
1793	WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
1794	rcu_segcblist_advance(rsclp: &sdp->srcu_cblist,
1795	seq: rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq));
1796	/*
1797	* Although this function is theoretically re-entrant, concurrent
1798	* callbacks invocation is disallowed to avoid executing an SRCU barrier
1799	* too early.
1800	*/
1801	if (sdp->srcu_cblist_invoking \|\|
1802	!rcu_segcblist_ready_cbs(rsclp: &sdp->srcu_cblist)) {
1803	spin_unlock_irq_rcu_node(sdp);
1804	return; / Someone else on the job or nothing to do. /
1805	}
1806
1807	/ We are on the job! Extract and invoke ready callbacks. /
1808	sdp->srcu_cblist_invoking = true;
1809	rcu_segcblist_extract_done_cbs(rsclp: &sdp->srcu_cblist, rclp: &ready_cbs);
1810	len = ready_cbs.len;
1811	spin_unlock_irq_rcu_node(sdp);
1812	rhp = rcu_cblist_dequeue(rclp: &ready_cbs);
1813	for (; rhp != NULL; rhp = rcu_cblist_dequeue(rclp: &ready_cbs)) {
1814	debug_rcu_head_unqueue(head: rhp);
1815	debug_rcu_head_callback(rhp);
1816	local_bh_disable();
1817	rhp->func(rhp);
1818	local_bh_enable();
1819	}
1820	WARN_ON_ONCE(ready_cbs.len);
1821
1822	/*
1823	* Update counts, accelerate new callbacks, and if needed,
1824	* schedule another round of callback invocation.
1825	*/
1826	spin_lock_irq_rcu_node(sdp);
1827	rcu_segcblist_add_len(rsclp: &sdp->srcu_cblist, v: -len);
1828	sdp->srcu_cblist_invoking = false;
1829	more = rcu_segcblist_ready_cbs(rsclp: &sdp->srcu_cblist);
1830	spin_unlock_irq_rcu_node(sdp);
1831	/ An SRCU barrier or callbacks from previous nesting work pending /
1832	if (more)
1833	srcu_schedule_cbs_sdp(sdp, delay: `0`);
1834	}
1835
1836	/*
1837	* Finished one round of SRCU grace period. Start another if there are
1838	* more SRCU callbacks queued, otherwise put SRCU into not-running state.
1839	*/
1840	static void srcu_reschedule(struct srcu_struct ssp, unsigned* long delay)
1841	{
1842	bool pushgp = true;
1843
1844	spin_lock_irq_rcu_node(ssp->srcu_sup);
1845	if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) {
1846	if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) {
1847	/ All requests fulfilled, time to go idle. /
1848	pushgp = false;
1849	}
1850	} else if (!rcu_seq_state(s: ssp->srcu_sup->srcu_gp_seq)) {
1851	/ Outstanding request and no GP. Start one. /
1852	srcu_gp_start(ssp);
1853	}
1854	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1855
1856	if (pushgp)
1857	queue_delayed_work(wq: rcu_gp_wq, dwork: &ssp->srcu_sup->work, delay);
1858	}
1859
1860	/*
1861	* This is the work-queue function that handles SRCU grace periods.
1862	*/
1863	static void process_srcu(struct work_struct *work)
1864	{
1865	unsigned long curdelay;
1866	unsigned long j;
1867	struct srcu_struct *ssp;
1868	struct srcu_usage *sup;
1869
1870	sup = container_of(work, struct srcu_usage, work.work);
1871	ssp = sup->srcu_ssp;
1872
1873	srcu_advance_state(ssp);
1874	spin_lock_irq_rcu_node(ssp->srcu_sup);
1875	curdelay = srcu_get_delay(ssp);
1876	spin_unlock_irq_rcu_node(ssp->srcu_sup);
1877	if (curdelay) {
1878	WRITE_ONCE(sup->reschedule_count, `0`);
1879	} else {
1880	j = jiffies;
1881	if (READ_ONCE(sup->reschedule_jiffies) == j) {
1882	ASSERT_EXCLUSIVE_WRITER(sup->reschedule_count);
1883	WRITE_ONCE(sup->reschedule_count, READ_ONCE(sup->reschedule_count) + `1`);
1884	if (READ_ONCE(sup->reschedule_count) > srcu_max_nodelay)
1885	curdelay = `1`;
1886	} else {
1887	WRITE_ONCE(sup->reschedule_count, `1`);
1888	WRITE_ONCE(sup->reschedule_jiffies, j);
1889	}
1890	}
1891	srcu_reschedule(ssp, delay: curdelay);
1892	}
1893
1894	void srcutorture_get_gp_data(struct srcu_struct ssp, int* *flags,
1895	unsigned long *gp_seq)
1896	{
1897	*flags = `0`;
1898	*gp_seq = rcu_seq_current(sp: &ssp->srcu_sup->srcu_gp_seq);
1899	}
1900	EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1901
1902	static const char * const srcu_size_state_name[] = {
1903	"SRCU_SIZE_SMALL",
1904	"SRCU_SIZE_ALLOC",
1905	"SRCU_SIZE_WAIT_BARRIER",
1906	"SRCU_SIZE_WAIT_CALL",
1907	"SRCU_SIZE_WAIT_CBS1",
1908	"SRCU_SIZE_WAIT_CBS2",
1909	"SRCU_SIZE_WAIT_CBS3",
1910	"SRCU_SIZE_WAIT_CBS4",
1911	"SRCU_SIZE_BIG",
1912	"SRCU_SIZE_???",
1913	};
1914
1915	void srcu_torture_stats_print(struct srcu_struct ssp, char* tt, char* *tf)
1916	{
1917	int cpu;
1918	int idx;
1919	unsigned long s0 = `0`, s1 = `0`;
1920	int ss_state = READ_ONCE(ssp->srcu_sup->srcu_size_state);
1921	int ss_state_idx = ss_state;
1922
1923	idx = ssp->srcu_ctrp - &ssp->sda->srcu_ctrs[`0`];
1924	if (ss_state < `0` \|\| ss_state >= ARRAY_SIZE(srcu_size_state_name))
1925	ss_state_idx = ARRAY_SIZE(srcu_size_state_name) - `1`;
1926	pr_alert("%s%s Tree SRCU g%ld state %d (%s)",
1927	tt, tf, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq), ss_state,
1928	srcu_size_state_name[ss_state_idx]);
1929	if (!ssp->sda) {
1930	// Called after cleanup_srcu_struct(), perhaps.
1931	pr_cont(" No per-CPU srcu_data structures (->sda == NULL).\n");
1932	} else {
1933	pr_cont(" per-CPU(idx=%d):", idx);
1934	for_each_possible_cpu(cpu) {
1935	unsigned long l0, l1;
1936	unsigned long u0, u1;
1937	long c0, c1;
1938	struct srcu_data *sdp;
1939
1940	sdp = per_cpu_ptr(ssp->sda, cpu);
1941	u0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_unlocks));
1942	u1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_unlocks));
1943
1944	/*
1945	* Make sure that a lock is always counted if the corresponding
1946	* unlock is counted.
1947	*/
1948	smp_rmb();
1949
1950	l0 = data_race(atomic_long_read(&sdp->srcu_ctrs[!idx].srcu_locks));
1951	l1 = data_race(atomic_long_read(&sdp->srcu_ctrs[idx].srcu_locks));
1952
1953	c0 = l0 - u0;
1954	c1 = l1 - u1;
1955	pr_cont(" %d(%ld,%ld %c)",
1956	cpu, c0, c1,
1957	"C."[rcu_segcblist_empty(&sdp->srcu_cblist)]);
1958	s0 += c0;
1959	s1 += c1;
1960	}
1961	pr_cont(" T(%ld,%ld)\n", s0, s1);
1962	}
1963	if (SRCU_SIZING_IS_TORTURE())
1964	srcu_transition_to_big(ssp);
1965	}
1966	EXPORT_SYMBOL_GPL(srcu_torture_stats_print);
1967
1968	static int __init srcu_bootup_announce(void)
1969	{
1970	pr_info("Hierarchical SRCU implementation.\n");
1971	if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
1972	pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1973	if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
1974	pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
1975	if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
1976	pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
1977	pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
1978	return `0`;
1979	}
1980	early_initcall(srcu_bootup_announce);
1981
1982	void __init srcu_init(void)
1983	{
1984	struct srcu_usage *sup;
1985
1986	/ Decide on srcu_struct-size strategy. /
1987	if (SRCU_SIZING_IS(SRCU_SIZING_AUTO)) {
1988	if (nr_cpu_ids >= big_cpu_lim) {
1989	convert_to_big = SRCU_SIZING_INIT; // Don't bother waiting for contention.
1990	pr_info("%s: Setting srcu_struct sizes to big.\n", __func__);
1991	} else {
1992	convert_to_big = SRCU_SIZING_NONE \| SRCU_SIZING_CONTEND;
1993	pr_info("%s: Setting srcu_struct sizes based on contention.\n", __func__);
1994	}
1995	}
1996
1997	/*
1998	* Once that is set, call_srcu() can follow the normal path and
1999	* queue delayed work. This must follow RCU workqueues creation
2000	* and timers initialization.
2001	*/
2002	srcu_init_done = true;
2003	while (!list_empty(head: &srcu_boot_list)) {
2004	sup = list_first_entry(&srcu_boot_list, struct srcu_usage,
2005	work.work.entry);
2006	list_del_init(entry: &sup->work.work.entry);
2007	if (SRCU_SIZING_IS(SRCU_SIZING_INIT) &&
2008	sup->srcu_size_state == SRCU_SIZE_SMALL)
2009	sup->srcu_size_state = SRCU_SIZE_ALLOC;
2010	queue_work(wq: rcu_gp_wq, work: &sup->work.work);
2011	}
2012	}
2013
2014	#ifdef CONFIG_MODULES
2015
2016	/ Initialize any global-scope srcu_struct structures used by this module. /
2017	static int srcu_module_coming(struct module *mod)
2018	{
2019	int i;
2020	struct srcu_struct *ssp;
2021	struct srcu_struct **sspp = mod->srcu_struct_ptrs;
2022
2023	for (i = `0`; i < mod->num_srcu_structs; i++) {
2024	ssp = *(sspp++);
2025	ssp->sda = alloc_percpu(struct srcu_data);
2026	if (WARN_ON_ONCE(!ssp->sda))
2027	return -ENOMEM;
2028	ssp->srcu_ctrp = &ssp->sda->srcu_ctrs[`0`];
2029	}
2030	return `0`;
2031	}
2032
2033	/ Clean up any global-scope srcu_struct structures used by this module. /
2034	static void srcu_module_going(struct module *mod)
2035	{
2036	int i;
2037	struct srcu_struct *ssp;
2038	struct srcu_struct **sspp = mod->srcu_struct_ptrs;
2039
2040	for (i = `0`; i < mod->num_srcu_structs; i++) {
2041	ssp = *(sspp++);
2042	if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed)) &&
2043	!WARN_ON_ONCE(!ssp->srcu_sup->sda_is_static))
2044	cleanup_srcu_struct(ssp);
2045	if (!WARN_ON(srcu_readers_active(ssp)))
2046	free_percpu(pdata: ssp->sda);
2047	}
2048	}
2049
2050	/ Handle one module, either coming or going. /
2051	static int srcu_module_notify(struct notifier_block *self,
2052	unsigned long val, void *data)
2053	{
2054	struct module *mod = data;
2055	int ret = `0`;
2056
2057	switch (val) {
2058	case MODULE_STATE_COMING:
2059	ret = srcu_module_coming(mod);
2060	break;
2061	case MODULE_STATE_GOING:
2062	srcu_module_going(mod);
2063	break;
2064	default:
2065	break;
2066	}
2067	return ret;
2068	}
2069
2070	static struct notifier_block srcu_module_nb = {
2071	.notifier_call = srcu_module_notify,
2072	.priority = `0`,
2073	};
2074
2075	static __init int init_srcu_module_notifier(void)
2076	{
2077	int ret;
2078
2079	ret = register_module_notifier(nb: &srcu_module_nb);
2080	if (ret)
2081	pr_warn("Failed to register srcu module notifier\n");
2082	return ret;
2083	}
2084	late_initcall(init_srcu_module_notifier);
2085
2086	#endif /* #ifdef CONFIG_MODULES */
2087

Browse the source code of Linux/kernel/rcu/srcutree.c