| 1 | /* SPDX-License-Identifier: GPL-2.0+ */ | 
|---|
| 2 | /* | 
|---|
| 3 | * Sleepable Read-Copy Update mechanism for mutual exclusion, | 
|---|
| 4 | *	tree variant. | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright (C) IBM Corporation, 2017 | 
|---|
| 7 | * | 
|---|
| 8 | * Author: Paul McKenney <paulmck@linux.ibm.com> | 
|---|
| 9 | */ | 
|---|
| 10 |  | 
|---|
| 11 | #ifndef _LINUX_SRCU_TREE_H | 
|---|
| 12 | #define _LINUX_SRCU_TREE_H | 
|---|
| 13 |  | 
|---|
| 14 | #include <linux/rcu_node_tree.h> | 
|---|
| 15 | #include <linux/completion.h> | 
|---|
| 16 |  | 
|---|
| 17 | struct srcu_node; | 
|---|
| 18 | struct srcu_struct; | 
|---|
| 19 |  | 
|---|
| 20 | /* One element of the srcu_data srcu_ctrs array. */ | 
|---|
| 21 | struct srcu_ctr { | 
|---|
| 22 | atomic_long_t srcu_locks;	/* Locks per CPU. */ | 
|---|
| 23 | atomic_long_t srcu_unlocks;	/* Unlocks per CPU. */ | 
|---|
| 24 | }; | 
|---|
| 25 |  | 
|---|
| 26 | /* | 
|---|
| 27 | * Per-CPU structure feeding into leaf srcu_node, similar in function | 
|---|
| 28 | * to rcu_node. | 
|---|
| 29 | */ | 
|---|
| 30 | struct srcu_data { | 
|---|
| 31 | /* Read-side state. */ | 
|---|
| 32 | struct srcu_ctr srcu_ctrs[2];		/* Locks and unlocks per CPU. */ | 
|---|
| 33 | int srcu_reader_flavor;			/* Reader flavor for srcu_struct structure? */ | 
|---|
| 34 | /* Values: SRCU_READ_FLAVOR_.*  */ | 
|---|
| 35 |  | 
|---|
| 36 | /* Update-side state. */ | 
|---|
| 37 | spinlock_t __private lock ____cacheline_internodealigned_in_smp; | 
|---|
| 38 | struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/ | 
|---|
| 39 | unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */ | 
|---|
| 40 | unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */ | 
|---|
| 41 | bool srcu_cblist_invoking;		/* Invoking these CBs? */ | 
|---|
| 42 | struct timer_list delay_work;		/* Delay for CB invoking */ | 
|---|
| 43 | struct work_struct work;		/* Context for CB invoking. */ | 
|---|
| 44 | struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */ | 
|---|
| 45 | struct srcu_node *mynode;		/* Leaf srcu_node. */ | 
|---|
| 46 | unsigned long grpmask;			/* Mask for leaf srcu_node */ | 
|---|
| 47 | /*  ->srcu_data_have_cbs[]. */ | 
|---|
| 48 | int cpu; | 
|---|
| 49 | struct srcu_struct *ssp; | 
|---|
| 50 | }; | 
|---|
| 51 |  | 
|---|
| 52 | /* | 
|---|
| 53 | * Node in SRCU combining tree, similar in function to rcu_data. | 
|---|
| 54 | */ | 
|---|
| 55 | struct srcu_node { | 
|---|
| 56 | spinlock_t __private lock; | 
|---|
| 57 | unsigned long srcu_have_cbs[4];		/* GP seq for children having CBs, but only */ | 
|---|
| 58 | /*  if greater than ->srcu_gp_seq. */ | 
|---|
| 59 | unsigned long srcu_data_have_cbs[4];	/* Which srcu_data structs have CBs for given GP? */ | 
|---|
| 60 | unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */ | 
|---|
| 61 | struct srcu_node *srcu_parent;		/* Next up in tree. */ | 
|---|
| 62 | int grplo;				/* Least CPU for node. */ | 
|---|
| 63 | int grphi;				/* Biggest CPU for node. */ | 
|---|
| 64 | }; | 
|---|
| 65 |  | 
|---|
| 66 | /* | 
|---|
| 67 | * Per-SRCU-domain structure, update-side data linked from srcu_struct. | 
|---|
| 68 | */ | 
|---|
| 69 | struct srcu_usage { | 
|---|
| 70 | struct srcu_node *node;			/* Combining tree. */ | 
|---|
| 71 | struct srcu_node *level[RCU_NUM_LVLS + 1]; | 
|---|
| 72 | /* First node at each level. */ | 
|---|
| 73 | int srcu_size_state;			/* Small-to-big transition state. */ | 
|---|
| 74 | struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */ | 
|---|
| 75 | spinlock_t __private lock;		/* Protect counters and size state. */ | 
|---|
| 76 | struct mutex srcu_gp_mutex;		/* Serialize GP work. */ | 
|---|
| 77 | unsigned long srcu_gp_seq;		/* Grace-period seq #. */ | 
|---|
| 78 | unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */ | 
|---|
| 79 | unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */ | 
|---|
| 80 | unsigned long srcu_gp_start;		/* Last GP start timestamp (jiffies) */ | 
|---|
| 81 | unsigned long srcu_last_gp_end;		/* Last GP end timestamp (ns) */ | 
|---|
| 82 | unsigned long srcu_size_jiffies;	/* Current contention-measurement interval. */ | 
|---|
| 83 | unsigned long srcu_n_lock_retries;	/* Contention events in current interval. */ | 
|---|
| 84 | unsigned long srcu_n_exp_nodelay;	/* # expedited no-delays in current GP phase. */ | 
|---|
| 85 | bool sda_is_static;			/* May ->sda be passed to free_percpu()? */ | 
|---|
| 86 | unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */ | 
|---|
| 87 | struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */ | 
|---|
| 88 | struct completion srcu_barrier_completion; | 
|---|
| 89 | /* Awaken barrier rq at end. */ | 
|---|
| 90 | atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */ | 
|---|
| 91 | /*  callback for the barrier */ | 
|---|
| 92 | /*  operation. */ | 
|---|
| 93 | unsigned long reschedule_jiffies; | 
|---|
| 94 | unsigned long reschedule_count; | 
|---|
| 95 | struct delayed_work work; | 
|---|
| 96 | struct srcu_struct *srcu_ssp; | 
|---|
| 97 | }; | 
|---|
| 98 |  | 
|---|
| 99 | /* | 
|---|
| 100 | * Per-SRCU-domain structure, similar in function to rcu_state. | 
|---|
| 101 | */ | 
|---|
| 102 | struct srcu_struct { | 
|---|
| 103 | struct srcu_ctr __percpu *srcu_ctrp; | 
|---|
| 104 | struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */ | 
|---|
| 105 | struct lockdep_map dep_map; | 
|---|
| 106 | struct srcu_usage *srcu_sup;		/* Update-side data. */ | 
|---|
| 107 | }; | 
|---|
| 108 |  | 
|---|
| 109 | // Values for size state variable (->srcu_size_state).  Once the state | 
|---|
| 110 | // has been set to SRCU_SIZE_ALLOC, the grace-period code advances through | 
|---|
| 111 | // this state machine one step per grace period until the SRCU_SIZE_BIG state | 
|---|
| 112 | // is reached.  Otherwise, the state machine remains in the SRCU_SIZE_SMALL | 
|---|
| 113 | // state indefinitely. | 
|---|
| 114 | #define SRCU_SIZE_SMALL		0	// No srcu_node combining tree, ->node == NULL | 
|---|
| 115 | #define SRCU_SIZE_ALLOC		1	// An srcu_node tree is being allocated, initialized, | 
|---|
| 116 | //  and then referenced by ->node.  It will not be used. | 
|---|
| 117 | #define SRCU_SIZE_WAIT_BARRIER	2	// The srcu_node tree starts being used by everything | 
|---|
| 118 | //  except call_srcu(), especially by srcu_barrier(). | 
|---|
| 119 | //  By the end of this state, all CPUs and threads | 
|---|
| 120 | //  are aware of this tree's existence. | 
|---|
| 121 | #define SRCU_SIZE_WAIT_CALL	3	// The srcu_node tree starts being used by call_srcu(). | 
|---|
| 122 | //  By the end of this state, all of the call_srcu() | 
|---|
| 123 | //  invocations that were running on a non-boot CPU | 
|---|
| 124 | //  and using the boot CPU's callback queue will have | 
|---|
| 125 | //  completed. | 
|---|
| 126 | #define SRCU_SIZE_WAIT_CBS1	4	// Don't trust the ->srcu_have_cbs[] grace-period | 
|---|
| 127 | #define SRCU_SIZE_WAIT_CBS2	5	//  sequence elements or the ->srcu_data_have_cbs[] | 
|---|
| 128 | #define SRCU_SIZE_WAIT_CBS3	6	//  CPU-bitmask elements until all four elements of | 
|---|
| 129 | #define SRCU_SIZE_WAIT_CBS4	7	//  each array have been initialized. | 
|---|
| 130 | #define SRCU_SIZE_BIG		8	// The srcu_node combining tree is fully initialized | 
|---|
| 131 | //  and all aspects of it are being put to use. | 
|---|
| 132 |  | 
|---|
| 133 | /* Values for state variable (bottom bits of ->srcu_gp_seq). */ | 
|---|
| 134 | #define SRCU_STATE_IDLE		0 | 
|---|
| 135 | #define SRCU_STATE_SCAN1	1 | 
|---|
| 136 | #define SRCU_STATE_SCAN2	2 | 
|---|
| 137 |  | 
|---|
| 138 | /* | 
|---|
| 139 | * Values for initializing gp sequence fields. Higher values allow wrap arounds to | 
|---|
| 140 | * occur earlier. | 
|---|
| 141 | * The second value with state is useful in the case of static initialization of | 
|---|
| 142 | * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its | 
|---|
| 143 | * lower bits (or else it will appear to be already initialized within | 
|---|
| 144 | * the call check_init_srcu_struct()). | 
|---|
| 145 | */ | 
|---|
| 146 | #define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT) | 
|---|
| 147 | #define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1) | 
|---|
| 148 |  | 
|---|
| 149 | #define __SRCU_USAGE_INIT(name)									\ | 
|---|
| 150 | {												\ | 
|---|
| 151 | .lock = __SPIN_LOCK_UNLOCKED(name.lock),						\ | 
|---|
| 152 | .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL,							\ | 
|---|
| 153 | .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE,				\ | 
|---|
| 154 | .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL,					\ | 
|---|
| 155 | .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),					\ | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | #define __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\ | 
|---|
| 159 | .srcu_sup = &usage_name,								\ | 
|---|
| 160 | __SRCU_DEP_MAP_INIT(name) | 
|---|
| 161 |  | 
|---|
| 162 | #define __SRCU_STRUCT_INIT_MODULE(name, usage_name)						\ | 
|---|
| 163 | {												\ | 
|---|
| 164 | __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\ | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | #define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name)						\ | 
|---|
| 168 | {												\ | 
|---|
| 169 | .sda = &pcpu_name,									\ | 
|---|
| 170 | .srcu_ctrp = &pcpu_name.srcu_ctrs[0],							\ | 
|---|
| 171 | __SRCU_STRUCT_INIT_COMMON(name, usage_name)						\ | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | /* | 
|---|
| 175 | * Define and initialize a srcu struct at build time. | 
|---|
| 176 | * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. | 
|---|
| 177 | * | 
|---|
| 178 | * Note that although DEFINE_STATIC_SRCU() hides the name from other | 
|---|
| 179 | * files, the per-CPU variable rules nevertheless require that the | 
|---|
| 180 | * chosen name be globally unique.  These rules also prohibit use of | 
|---|
| 181 | * DEFINE_STATIC_SRCU() within a function.  If these rules are too | 
|---|
| 182 | * restrictive, declare the srcu_struct manually.  For example, in | 
|---|
| 183 | * each file: | 
|---|
| 184 | * | 
|---|
| 185 | *	static struct srcu_struct my_srcu; | 
|---|
| 186 | * | 
|---|
| 187 | * Then, before the first use of each my_srcu, manually initialize it: | 
|---|
| 188 | * | 
|---|
| 189 | *	init_srcu_struct(&my_srcu); | 
|---|
| 190 | * | 
|---|
| 191 | * See include/linux/percpu-defs.h for the rules on per-CPU variables. | 
|---|
| 192 | */ | 
|---|
| 193 | #ifdef MODULE | 
|---|
| 194 | # define __DEFINE_SRCU(name, is_static)								\ | 
|---|
| 195 | static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\ | 
|---|
| 196 | is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage);	\ | 
|---|
| 197 | extern struct srcu_struct * const __srcu_struct_##name;					\ | 
|---|
| 198 | struct srcu_struct * const __srcu_struct_##name						\ | 
|---|
| 199 | __section("___srcu_struct_ptrs") = &name | 
|---|
| 200 | #else | 
|---|
| 201 | # define __DEFINE_SRCU(name, is_static)								\ | 
|---|
| 202 | static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);				\ | 
|---|
| 203 | static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage);	\ | 
|---|
| 204 | is_static struct srcu_struct name =							\ | 
|---|
| 205 | __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data) | 
|---|
| 206 | #endif | 
|---|
| 207 | #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */) | 
|---|
| 208 | #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static) | 
|---|
| 209 |  | 
|---|
| 210 | int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); | 
|---|
| 211 | void synchronize_srcu_expedited(struct srcu_struct *ssp); | 
|---|
| 212 | void srcu_barrier(struct srcu_struct *ssp); | 
|---|
| 213 | void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); | 
|---|
| 214 |  | 
|---|
| 215 | // Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that | 
|---|
| 216 | // element's index. | 
|---|
| 217 | static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp) | 
|---|
| 218 | { | 
|---|
| 219 | return scpp - &ssp->sda->srcu_ctrs[0]; | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | // Converts an integer to a per-CPU pointer to the corresponding | 
|---|
| 223 | // ->srcu_ctrs[] array element. | 
|---|
| 224 | static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx) | 
|---|
| 225 | { | 
|---|
| 226 | return &ssp->sda->srcu_ctrs[idx]; | 
|---|
| 227 | } | 
|---|
| 228 |  | 
|---|
| 229 | /* | 
|---|
| 230 | * Counts the new reader in the appropriate per-CPU element of the | 
|---|
| 231 | * srcu_struct.  Returns a pointer that must be passed to the matching | 
|---|
| 232 | * srcu_read_unlock_fast(). | 
|---|
| 233 | * | 
|---|
| 234 | * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side | 
|---|
| 235 | * critical sections either because they disables interrupts, because | 
|---|
| 236 | * they are a single instruction, or because they are read-modify-write | 
|---|
| 237 | * atomic operations, depending on the whims of the architecture. | 
|---|
| 238 | * This matters because the SRCU-fast grace-period mechanism uses either | 
|---|
| 239 | * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU, | 
|---|
| 240 | * *not* SRCU, in order to eliminate the need for the read-side smp_mb() | 
|---|
| 241 | * invocations that are used by srcu_read_lock() and srcu_read_unlock(). | 
|---|
| 242 | * The __srcu_read_unlock_fast() function also relies on this same RCU | 
|---|
| 243 | * (again, *not* SRCU) trick to eliminate the need for smp_mb(). | 
|---|
| 244 | * | 
|---|
| 245 | * The key point behind this RCU trick is that if any part of a given | 
|---|
| 246 | * RCU reader precedes the beginning of a given RCU grace period, then | 
|---|
| 247 | * the entirety of that RCU reader and everything preceding it happens | 
|---|
| 248 | * before the end of that same RCU grace period.  Similarly, if any part | 
|---|
| 249 | * of a given RCU reader follows the end of a given RCU grace period, | 
|---|
| 250 | * then the entirety of that RCU reader and everything following it | 
|---|
| 251 | * happens after the beginning of that same RCU grace period.  Therefore, | 
|---|
| 252 | * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z | 
|---|
| 253 | * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU | 
|---|
| 254 | * read-side critical section from the viewpoint of the SRCU grace period. | 
|---|
| 255 | * This is all the ordering that is required, hence no calls to smp_mb(). | 
|---|
| 256 | * | 
|---|
| 257 | * This means that __srcu_read_lock_fast() is not all that fast | 
|---|
| 258 | * on architectures that support NMIs but do not supply NMI-safe | 
|---|
| 259 | * implementations of this_cpu_inc(). | 
|---|
| 260 | */ | 
|---|
| 261 | static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp) | 
|---|
| 262 | { | 
|---|
| 263 | struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp); | 
|---|
| 264 |  | 
|---|
| 265 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) | 
|---|
| 266 | this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader. | 
|---|
| 267 | else | 
|---|
| 268 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks));  // Y, and implicit RCU reader. | 
|---|
| 269 | barrier(); /* Avoid leaking the critical section. */ | 
|---|
| 270 | return scp; | 
|---|
| 271 | } | 
|---|
| 272 |  | 
|---|
| 273 | /* | 
|---|
| 274 | * Removes the count for the old reader from the appropriate | 
|---|
| 275 | * per-CPU element of the srcu_struct.  Note that this may well be a | 
|---|
| 276 | * different CPU than that which was incremented by the corresponding | 
|---|
| 277 | * srcu_read_lock_fast(), but it must be within the same task. | 
|---|
| 278 | * | 
|---|
| 279 | * Please see the __srcu_read_lock_fast() function's header comment for | 
|---|
| 280 | * information on implicit RCU readers and NMI safety. | 
|---|
| 281 | */ | 
|---|
| 282 | static inline void notrace | 
|---|
| 283 | __srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp) | 
|---|
| 284 | { | 
|---|
| 285 | barrier();  /* Avoid leaking the critical section. */ | 
|---|
| 286 | if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE)) | 
|---|
| 287 | this_cpu_inc(scp->srcu_unlocks.counter);  // Z, and implicit RCU reader. | 
|---|
| 288 | else | 
|---|
| 289 | atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks));  // Z, and implicit RCU reader. | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor); | 
|---|
| 293 |  | 
|---|
| 294 | // Record reader usage even for CONFIG_PROVE_RCU=n kernels.  This is | 
|---|
| 295 | // needed only for flavors that require grace-period smp_mb() calls to be | 
|---|
| 296 | // promoted to synchronize_rcu(). | 
|---|
| 297 | static inline void srcu_check_read_flavor_force(struct srcu_struct *ssp, int read_flavor) | 
|---|
| 298 | { | 
|---|
| 299 | struct srcu_data *sdp = raw_cpu_ptr(ssp->sda); | 
|---|
| 300 |  | 
|---|
| 301 | if (likely(READ_ONCE(sdp->srcu_reader_flavor) & read_flavor)) | 
|---|
| 302 | return; | 
|---|
| 303 |  | 
|---|
| 304 | // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. | 
|---|
| 305 | __srcu_check_read_flavor(ssp, read_flavor); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | // Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels. | 
|---|
| 309 | static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor) | 
|---|
| 310 | { | 
|---|
| 311 | if (IS_ENABLED(CONFIG_PROVE_RCU)) | 
|---|
| 312 | __srcu_check_read_flavor(ssp, read_flavor); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | #endif | 
|---|
| 316 |  | 
|---|