| 1 | /* SPDX-License-Identifier: GPL-2.0+ */ | 
|---|
| 2 | /* | 
|---|
| 3 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 
|---|
| 4 | * Internal non-public definitions that provide either classic | 
|---|
| 5 | * or preemptible semantics. | 
|---|
| 6 | * | 
|---|
| 7 | * Copyright Red Hat, 2009 | 
|---|
| 8 | * Copyright IBM Corporation, 2009 | 
|---|
| 9 | * Copyright SUSE, 2021 | 
|---|
| 10 | * | 
|---|
| 11 | * Author: Ingo Molnar <mingo@elte.hu> | 
|---|
| 12 | *	   Paul E. McKenney <paulmck@linux.ibm.com> | 
|---|
| 13 | *	   Frederic Weisbecker <frederic@kernel.org> | 
|---|
| 14 | */ | 
|---|
| 15 |  | 
|---|
| 16 | #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 17 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | 
|---|
| 18 | static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */ | 
|---|
| 19 |  | 
|---|
| 20 | static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) | 
|---|
| 21 | { | 
|---|
| 22 | /* Race on early boot between thread creation and assignment */ | 
|---|
| 23 | if (!rdp->nocb_cb_kthread || !rdp->nocb_gp_kthread) | 
|---|
| 24 | return true; | 
|---|
| 25 |  | 
|---|
| 26 | if (current == rdp->nocb_cb_kthread || current == rdp->nocb_gp_kthread) | 
|---|
| 27 | if (in_task()) | 
|---|
| 28 | return true; | 
|---|
| 29 | return false; | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | /* | 
|---|
| 33 | * Offload callback processing from the boot-time-specified set of CPUs | 
|---|
| 34 | * specified by rcu_nocb_mask.  For the CPUs in the set, there are kthreads | 
|---|
| 35 | * created that pull the callbacks from the corresponding CPU, wait for | 
|---|
| 36 | * a grace period to elapse, and invoke the callbacks.  These kthreads | 
|---|
| 37 | * are organized into GP kthreads, which manage incoming callbacks, wait for | 
|---|
| 38 | * grace periods, and awaken CB kthreads, and the CB kthreads, which only | 
|---|
| 39 | * invoke callbacks.  Each GP kthread invokes its own CBs.  The no-CBs CPUs | 
|---|
| 40 | * do a wake_up() on their GP kthread when they insert a callback into any | 
|---|
| 41 | * empty list, unless the rcu_nocb_poll boot parameter has been specified, | 
|---|
| 42 | * in which case each kthread actively polls its CPU.  (Which isn't so great | 
|---|
| 43 | * for energy efficiency, but which does reduce RCU's overhead on that CPU.) | 
|---|
| 44 | * | 
|---|
| 45 | * This is intended to be used in conjunction with Frederic Weisbecker's | 
|---|
| 46 | * adaptive-idle work, which would seriously reduce OS jitter on CPUs | 
|---|
| 47 | * running CPU-bound user-mode computations. | 
|---|
| 48 | * | 
|---|
| 49 | * Offloading of callbacks can also be used as an energy-efficiency | 
|---|
| 50 | * measure because CPUs with no RCU callbacks queued are more aggressive | 
|---|
| 51 | * about entering dyntick-idle mode. | 
|---|
| 52 | */ | 
|---|
| 53 |  | 
|---|
| 54 |  | 
|---|
| 55 | /* | 
|---|
| 56 | * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. | 
|---|
| 57 | * If the list is invalid, a warning is emitted and all CPUs are offloaded. | 
|---|
| 58 | */ | 
|---|
| 59 | static int __init rcu_nocb_setup(char *str) | 
|---|
| 60 | { | 
|---|
| 61 | alloc_bootmem_cpumask_var(&rcu_nocb_mask); | 
|---|
| 62 | if (*str == '=') { | 
|---|
| 63 | if (cpulist_parse(++str, rcu_nocb_mask)) { | 
|---|
| 64 | pr_warn( "rcu_nocbs= bad CPU range, all CPUs set\n"); | 
|---|
| 65 | cpumask_setall(rcu_nocb_mask); | 
|---|
| 66 | } | 
|---|
| 67 | } | 
|---|
| 68 | rcu_state.nocb_is_setup = true; | 
|---|
| 69 | return 1; | 
|---|
| 70 | } | 
|---|
| 71 | __setup( "rcu_nocbs", rcu_nocb_setup); | 
|---|
| 72 |  | 
|---|
| 73 | static int __init parse_rcu_nocb_poll(char *arg) | 
|---|
| 74 | { | 
|---|
| 75 | rcu_nocb_poll = true; | 
|---|
| 76 | return 1; | 
|---|
| 77 | } | 
|---|
| 78 | __setup( "rcu_nocb_poll", parse_rcu_nocb_poll); | 
|---|
| 79 |  | 
|---|
| 80 | /* | 
|---|
| 81 | * Don't bother bypassing ->cblist if the call_rcu() rate is low. | 
|---|
| 82 | * After all, the main point of bypassing is to avoid lock contention | 
|---|
| 83 | * on ->nocb_lock, which only can happen at high call_rcu() rates. | 
|---|
| 84 | */ | 
|---|
| 85 | static int nocb_nobypass_lim_per_jiffy = 16 * 1000 / HZ; | 
|---|
| 86 | module_param(nocb_nobypass_lim_per_jiffy, int, 0); | 
|---|
| 87 |  | 
|---|
| 88 | /* | 
|---|
| 89 | * Acquire the specified rcu_data structure's ->nocb_bypass_lock.  If the | 
|---|
| 90 | * lock isn't immediately available, perform minimal sanity check. | 
|---|
| 91 | */ | 
|---|
| 92 | static void rcu_nocb_bypass_lock(struct rcu_data *rdp) | 
|---|
| 93 | __acquires(&rdp->nocb_bypass_lock) | 
|---|
| 94 | { | 
|---|
| 95 | lockdep_assert_irqs_disabled(); | 
|---|
| 96 | if (raw_spin_trylock(&rdp->nocb_bypass_lock)) | 
|---|
| 97 | return; | 
|---|
| 98 | /* | 
|---|
| 99 | * Contention expected only when local enqueue collide with | 
|---|
| 100 | * remote flush from kthreads. | 
|---|
| 101 | */ | 
|---|
| 102 | WARN_ON_ONCE(smp_processor_id() != rdp->cpu); | 
|---|
| 103 | raw_spin_lock(&rdp->nocb_bypass_lock); | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | /* | 
|---|
| 107 | * Conditionally acquire the specified rcu_data structure's | 
|---|
| 108 | * ->nocb_bypass_lock. | 
|---|
| 109 | */ | 
|---|
| 110 | static bool rcu_nocb_bypass_trylock(struct rcu_data *rdp) | 
|---|
| 111 | { | 
|---|
| 112 | lockdep_assert_irqs_disabled(); | 
|---|
| 113 | return raw_spin_trylock(&rdp->nocb_bypass_lock); | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | /* | 
|---|
| 117 | * Release the specified rcu_data structure's ->nocb_bypass_lock. | 
|---|
| 118 | */ | 
|---|
| 119 | static void rcu_nocb_bypass_unlock(struct rcu_data *rdp) | 
|---|
| 120 | __releases(&rdp->nocb_bypass_lock) | 
|---|
| 121 | { | 
|---|
| 122 | lockdep_assert_irqs_disabled(); | 
|---|
| 123 | raw_spin_unlock(&rdp->nocb_bypass_lock); | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | /* | 
|---|
| 127 | * Acquire the specified rcu_data structure's ->nocb_lock, but only | 
|---|
| 128 | * if it corresponds to a no-CBs CPU. | 
|---|
| 129 | */ | 
|---|
| 130 | static void rcu_nocb_lock(struct rcu_data *rdp) | 
|---|
| 131 | { | 
|---|
| 132 | lockdep_assert_irqs_disabled(); | 
|---|
| 133 | if (!rcu_rdp_is_offloaded(rdp)) | 
|---|
| 134 | return; | 
|---|
| 135 | raw_spin_lock(&rdp->nocb_lock); | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | /* | 
|---|
| 139 | * Release the specified rcu_data structure's ->nocb_lock, but only | 
|---|
| 140 | * if it corresponds to a no-CBs CPU. | 
|---|
| 141 | */ | 
|---|
| 142 | static void rcu_nocb_unlock(struct rcu_data *rdp) | 
|---|
| 143 | { | 
|---|
| 144 | if (rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 145 | lockdep_assert_irqs_disabled(); | 
|---|
| 146 | raw_spin_unlock(&rdp->nocb_lock); | 
|---|
| 147 | } | 
|---|
| 148 | } | 
|---|
| 149 |  | 
|---|
| 150 | /* | 
|---|
| 151 | * Release the specified rcu_data structure's ->nocb_lock and restore | 
|---|
| 152 | * interrupts, but only if it corresponds to a no-CBs CPU. | 
|---|
| 153 | */ | 
|---|
| 154 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, | 
|---|
| 155 | unsigned long flags) | 
|---|
| 156 | { | 
|---|
| 157 | if (rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 158 | lockdep_assert_irqs_disabled(); | 
|---|
| 159 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | 
|---|
| 160 | } else { | 
|---|
| 161 | local_irq_restore(flags); | 
|---|
| 162 | } | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 | /* Lockdep check that ->cblist may be safely accessed. */ | 
|---|
| 166 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) | 
|---|
| 167 | { | 
|---|
| 168 | lockdep_assert_irqs_disabled(); | 
|---|
| 169 | if (rcu_rdp_is_offloaded(rdp)) | 
|---|
| 170 | lockdep_assert_held(&rdp->nocb_lock); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | /* | 
|---|
| 174 | * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended | 
|---|
| 175 | * grace period. | 
|---|
| 176 | */ | 
|---|
| 177 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) | 
|---|
| 178 | { | 
|---|
| 179 | swake_up_all(sq); | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) | 
|---|
| 183 | { | 
|---|
| 184 | return &rnp->nocb_gp_wq[rcu_seq_ctr(rnp->gp_seq) & 0x1]; | 
|---|
| 185 | } | 
|---|
| 186 |  | 
|---|
| 187 | static void rcu_init_one_nocb(struct rcu_node *rnp) | 
|---|
| 188 | { | 
|---|
| 189 | init_swait_queue_head(&rnp->nocb_gp_wq[0]); | 
|---|
| 190 | init_swait_queue_head(&rnp->nocb_gp_wq[1]); | 
|---|
| 191 | } | 
|---|
| 192 |  | 
|---|
| 193 | static bool __wake_nocb_gp(struct rcu_data *rdp_gp, | 
|---|
| 194 | struct rcu_data *rdp, | 
|---|
| 195 | bool force, unsigned long flags) | 
|---|
| 196 | __releases(rdp_gp->nocb_gp_lock) | 
|---|
| 197 | { | 
|---|
| 198 | bool needwake = false; | 
|---|
| 199 |  | 
|---|
| 200 | if (!READ_ONCE(rdp_gp->nocb_gp_kthread)) { | 
|---|
| 201 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 202 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 203 | TPS( "AlreadyAwake")); | 
|---|
| 204 | return false; | 
|---|
| 205 | } | 
|---|
| 206 |  | 
|---|
| 207 | if (rdp_gp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { | 
|---|
| 208 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); | 
|---|
| 209 | timer_delete(&rdp_gp->nocb_timer); | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { | 
|---|
| 213 | WRITE_ONCE(rdp_gp->nocb_gp_sleep, false); | 
|---|
| 214 | needwake = true; | 
|---|
| 215 | } | 
|---|
| 216 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 217 | if (needwake) { | 
|---|
| 218 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "DoWake")); | 
|---|
| 219 | swake_up_one(&rdp_gp->nocb_gp_wq); | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 | return needwake; | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | /* | 
|---|
| 226 | * Kick the GP kthread for this NOCB group. | 
|---|
| 227 | */ | 
|---|
| 228 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) | 
|---|
| 229 | { | 
|---|
| 230 | unsigned long flags; | 
|---|
| 231 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 232 |  | 
|---|
| 233 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 234 | return __wake_nocb_gp(rdp_gp, rdp, force, flags); | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | #ifdef CONFIG_RCU_LAZY | 
|---|
| 238 | /* | 
|---|
| 239 | * LAZY_FLUSH_JIFFIES decides the maximum amount of time that | 
|---|
| 240 | * can elapse before lazy callbacks are flushed. Lazy callbacks | 
|---|
| 241 | * could be flushed much earlier for a number of other reasons | 
|---|
| 242 | * however, LAZY_FLUSH_JIFFIES will ensure no lazy callbacks are | 
|---|
| 243 | * left unsubmitted to RCU after those many jiffies. | 
|---|
| 244 | */ | 
|---|
| 245 | #define LAZY_FLUSH_JIFFIES (10 * HZ) | 
|---|
| 246 | static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES; | 
|---|
| 247 |  | 
|---|
| 248 | // To be called only from test code. | 
|---|
| 249 | void rcu_set_jiffies_lazy_flush(unsigned long jif) | 
|---|
| 250 | { | 
|---|
| 251 | jiffies_lazy_flush = jif; | 
|---|
| 252 | } | 
|---|
| 253 | EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush); | 
|---|
| 254 |  | 
|---|
| 255 | unsigned long rcu_get_jiffies_lazy_flush(void) | 
|---|
| 256 | { | 
|---|
| 257 | return jiffies_lazy_flush; | 
|---|
| 258 | } | 
|---|
| 259 | EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush); | 
|---|
| 260 | #endif | 
|---|
| 261 |  | 
|---|
| 262 | /* | 
|---|
| 263 | * Arrange to wake the GP kthread for this NOCB group at some future | 
|---|
| 264 | * time when it is safe to do so. | 
|---|
| 265 | */ | 
|---|
| 266 | static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, | 
|---|
| 267 | const char *reason) | 
|---|
| 268 | { | 
|---|
| 269 | unsigned long flags; | 
|---|
| 270 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 271 |  | 
|---|
| 272 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 273 |  | 
|---|
| 274 | /* | 
|---|
| 275 | * Bypass wakeup overrides previous deferments. In case of | 
|---|
| 276 | * callback storms, no need to wake up too early. | 
|---|
| 277 | */ | 
|---|
| 278 | if (waketype == RCU_NOCB_WAKE_LAZY && | 
|---|
| 279 | rdp_gp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { | 
|---|
| 280 | mod_timer(&rdp_gp->nocb_timer, jiffies + rcu_get_jiffies_lazy_flush()); | 
|---|
| 281 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); | 
|---|
| 282 | } else if (waketype == RCU_NOCB_WAKE_BYPASS) { | 
|---|
| 283 | mod_timer(&rdp_gp->nocb_timer, jiffies + 2); | 
|---|
| 284 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); | 
|---|
| 285 | } else { | 
|---|
| 286 | if (rdp_gp->nocb_defer_wakeup < RCU_NOCB_WAKE) | 
|---|
| 287 | mod_timer(&rdp_gp->nocb_timer, jiffies + 1); | 
|---|
| 288 | if (rdp_gp->nocb_defer_wakeup < waketype) | 
|---|
| 289 | WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 293 |  | 
|---|
| 294 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason); | 
|---|
| 295 | } | 
|---|
| 296 |  | 
|---|
| 297 | /* | 
|---|
| 298 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. | 
|---|
| 299 | * However, if there is a callback to be enqueued and if ->nocb_bypass | 
|---|
| 300 | * proves to be initially empty, just return false because the no-CB GP | 
|---|
| 301 | * kthread may need to be awakened in this case. | 
|---|
| 302 | * | 
|---|
| 303 | * Return true if there was something to be flushed and it succeeded, otherwise | 
|---|
| 304 | * false. | 
|---|
| 305 | * | 
|---|
| 306 | * Note that this function always returns true if rhp is NULL. | 
|---|
| 307 | */ | 
|---|
| 308 | static bool rcu_nocb_do_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp_in, | 
|---|
| 309 | unsigned long j, bool lazy) | 
|---|
| 310 | { | 
|---|
| 311 | struct rcu_cblist rcl; | 
|---|
| 312 | struct rcu_head *rhp = rhp_in; | 
|---|
| 313 |  | 
|---|
| 314 | WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); | 
|---|
| 315 | rcu_lockdep_assert_cblist_protected(rdp); | 
|---|
| 316 | lockdep_assert_held(&rdp->nocb_bypass_lock); | 
|---|
| 317 | if (rhp && !rcu_cblist_n_cbs(&rdp->nocb_bypass)) { | 
|---|
| 318 | raw_spin_unlock(&rdp->nocb_bypass_lock); | 
|---|
| 319 | return false; | 
|---|
| 320 | } | 
|---|
| 321 | /* Note: ->cblist.len already accounts for ->nocb_bypass contents. */ | 
|---|
| 322 | if (rhp) | 
|---|
| 323 | rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ | 
|---|
| 324 |  | 
|---|
| 325 | /* | 
|---|
| 326 | * If the new CB requested was a lazy one, queue it onto the main | 
|---|
| 327 | * ->cblist so that we can take advantage of the grace-period that will | 
|---|
| 328 | * happen regardless. But queue it onto the bypass list first so that | 
|---|
| 329 | * the lazy CB is ordered with the existing CBs in the bypass list. | 
|---|
| 330 | */ | 
|---|
| 331 | if (lazy && rhp) { | 
|---|
| 332 | rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); | 
|---|
| 333 | rhp = NULL; | 
|---|
| 334 | } | 
|---|
| 335 | rcu_cblist_flush_enqueue(&rcl, &rdp->nocb_bypass, rhp); | 
|---|
| 336 | WRITE_ONCE(rdp->lazy_len, 0); | 
|---|
| 337 |  | 
|---|
| 338 | rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rcl); | 
|---|
| 339 | WRITE_ONCE(rdp->nocb_bypass_first, j); | 
|---|
| 340 | rcu_nocb_bypass_unlock(rdp); | 
|---|
| 341 | return true; | 
|---|
| 342 | } | 
|---|
| 343 |  | 
|---|
| 344 | /* | 
|---|
| 345 | * Flush the ->nocb_bypass queue into ->cblist, enqueuing rhp if non-NULL. | 
|---|
| 346 | * However, if there is a callback to be enqueued and if ->nocb_bypass | 
|---|
| 347 | * proves to be initially empty, just return false because the no-CB GP | 
|---|
| 348 | * kthread may need to be awakened in this case. | 
|---|
| 349 | * | 
|---|
| 350 | * Note that this function always returns true if rhp is NULL. | 
|---|
| 351 | */ | 
|---|
| 352 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | 
|---|
| 353 | unsigned long j, bool lazy) | 
|---|
| 354 | { | 
|---|
| 355 | if (!rcu_rdp_is_offloaded(rdp)) | 
|---|
| 356 | return true; | 
|---|
| 357 | rcu_lockdep_assert_cblist_protected(rdp); | 
|---|
| 358 | rcu_nocb_bypass_lock(rdp); | 
|---|
| 359 | return rcu_nocb_do_flush_bypass(rdp, rhp, j, lazy); | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | /* | 
|---|
| 363 | * If the ->nocb_bypass_lock is immediately available, flush the | 
|---|
| 364 | * ->nocb_bypass queue into ->cblist. | 
|---|
| 365 | */ | 
|---|
| 366 | static void rcu_nocb_try_flush_bypass(struct rcu_data *rdp, unsigned long j) | 
|---|
| 367 | { | 
|---|
| 368 | rcu_lockdep_assert_cblist_protected(rdp); | 
|---|
| 369 | if (!rcu_rdp_is_offloaded(rdp) || | 
|---|
| 370 | !rcu_nocb_bypass_trylock(rdp)) | 
|---|
| 371 | return; | 
|---|
| 372 | WARN_ON_ONCE(!rcu_nocb_do_flush_bypass(rdp, NULL, j, false)); | 
|---|
| 373 | } | 
|---|
| 374 |  | 
|---|
| 375 | /* | 
|---|
| 376 | * See whether it is appropriate to use the ->nocb_bypass list in order | 
|---|
| 377 | * to control contention on ->nocb_lock.  A limited number of direct | 
|---|
| 378 | * enqueues are permitted into ->cblist per jiffy.  If ->nocb_bypass | 
|---|
| 379 | * is non-empty, further callbacks must be placed into ->nocb_bypass, | 
|---|
| 380 | * otherwise rcu_barrier() breaks.  Use rcu_nocb_flush_bypass() to switch | 
|---|
| 381 | * back to direct use of ->cblist.  However, ->nocb_bypass should not be | 
|---|
| 382 | * used if ->cblist is empty, because otherwise callbacks can be stranded | 
|---|
| 383 | * on ->nocb_bypass because we cannot count on the current CPU ever again | 
|---|
| 384 | * invoking call_rcu().  The general rule is that if ->nocb_bypass is | 
|---|
| 385 | * non-empty, the corresponding no-CBs grace-period kthread must not be | 
|---|
| 386 | * in an indefinite sleep state. | 
|---|
| 387 | * | 
|---|
| 388 | * Finally, it is not permitted to use the bypass during early boot, | 
|---|
| 389 | * as doing so would confuse the auto-initialization code.  Besides | 
|---|
| 390 | * which, there is no point in worrying about lock contention while | 
|---|
| 391 | * there is only one CPU in operation. | 
|---|
| 392 | */ | 
|---|
| 393 | static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | 
|---|
| 394 | bool *was_alldone, unsigned long flags, | 
|---|
| 395 | bool lazy) | 
|---|
| 396 | { | 
|---|
| 397 | unsigned long c; | 
|---|
| 398 | unsigned long cur_gp_seq; | 
|---|
| 399 | unsigned long j = jiffies; | 
|---|
| 400 | long ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); | 
|---|
| 401 | bool bypass_is_lazy = (ncbs == READ_ONCE(rdp->lazy_len)); | 
|---|
| 402 |  | 
|---|
| 403 | lockdep_assert_irqs_disabled(); | 
|---|
| 404 |  | 
|---|
| 405 | // Pure softirq/rcuc based processing: no bypassing, no | 
|---|
| 406 | // locking. | 
|---|
| 407 | if (!rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 408 | *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); | 
|---|
| 409 | return false; | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|
| 412 | // Don't use ->nocb_bypass during early boot. | 
|---|
| 413 | if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) { | 
|---|
| 414 | rcu_nocb_lock(rdp); | 
|---|
| 415 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 416 | *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); | 
|---|
| 417 | return false; | 
|---|
| 418 | } | 
|---|
| 419 |  | 
|---|
| 420 | // If we have advanced to a new jiffy, reset counts to allow | 
|---|
| 421 | // moving back from ->nocb_bypass to ->cblist. | 
|---|
| 422 | if (j == rdp->nocb_nobypass_last) { | 
|---|
| 423 | c = rdp->nocb_nobypass_count + 1; | 
|---|
| 424 | } else { | 
|---|
| 425 | WRITE_ONCE(rdp->nocb_nobypass_last, j); | 
|---|
| 426 | c = rdp->nocb_nobypass_count - nocb_nobypass_lim_per_jiffy; | 
|---|
| 427 | if (ULONG_CMP_LT(rdp->nocb_nobypass_count, | 
|---|
| 428 | nocb_nobypass_lim_per_jiffy)) | 
|---|
| 429 | c = 0; | 
|---|
| 430 | else if (c > nocb_nobypass_lim_per_jiffy) | 
|---|
| 431 | c = nocb_nobypass_lim_per_jiffy; | 
|---|
| 432 | } | 
|---|
| 433 | WRITE_ONCE(rdp->nocb_nobypass_count, c); | 
|---|
| 434 |  | 
|---|
| 435 | // If there hasn't yet been all that many ->cblist enqueues | 
|---|
| 436 | // this jiffy, tell the caller to enqueue onto ->cblist.  But flush | 
|---|
| 437 | // ->nocb_bypass first. | 
|---|
| 438 | // Lazy CBs throttle this back and do immediate bypass queuing. | 
|---|
| 439 | if (rdp->nocb_nobypass_count < nocb_nobypass_lim_per_jiffy && !lazy) { | 
|---|
| 440 | rcu_nocb_lock(rdp); | 
|---|
| 441 | *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); | 
|---|
| 442 | if (*was_alldone) | 
|---|
| 443 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 444 | TPS( "FirstQ")); | 
|---|
| 445 |  | 
|---|
| 446 | WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, j, false)); | 
|---|
| 447 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 448 | return false; // Caller must enqueue the callback. | 
|---|
| 449 | } | 
|---|
| 450 |  | 
|---|
| 451 | // If ->nocb_bypass has been used too long or is too full, | 
|---|
| 452 | // flush ->nocb_bypass to ->cblist. | 
|---|
| 453 | if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || | 
|---|
| 454 | (ncbs &&  bypass_is_lazy && | 
|---|
| 455 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || | 
|---|
| 456 | ncbs >= qhimark) { | 
|---|
| 457 | rcu_nocb_lock(rdp); | 
|---|
| 458 | *was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); | 
|---|
| 459 |  | 
|---|
| 460 | if (!rcu_nocb_flush_bypass(rdp, rhp, j, lazy)) { | 
|---|
| 461 | if (*was_alldone) | 
|---|
| 462 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 463 | TPS( "FirstQ")); | 
|---|
| 464 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 465 | return false; // Caller must enqueue the callback. | 
|---|
| 466 | } | 
|---|
| 467 | if (j != rdp->nocb_gp_adv_time && | 
|---|
| 468 | rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) && | 
|---|
| 469 | rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) { | 
|---|
| 470 | rcu_advance_cbs_nowake(rdp->mynode, rdp); | 
|---|
| 471 | rdp->nocb_gp_adv_time = j; | 
|---|
| 472 | } | 
|---|
| 473 |  | 
|---|
| 474 | // The flush succeeded and we moved CBs into the regular list. | 
|---|
| 475 | // Don't wait for the wake up timer as it may be too far ahead. | 
|---|
| 476 | // Wake up the GP thread now instead, if the cblist was empty. | 
|---|
| 477 | __call_rcu_nocb_wake(rdp, *was_alldone, flags); | 
|---|
| 478 |  | 
|---|
| 479 | return true; // Callback already enqueued. | 
|---|
| 480 | } | 
|---|
| 481 |  | 
|---|
| 482 | // We need to use the bypass. | 
|---|
| 483 | rcu_nocb_bypass_lock(rdp); | 
|---|
| 484 | ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); | 
|---|
| 485 | rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */ | 
|---|
| 486 | rcu_cblist_enqueue(&rdp->nocb_bypass, rhp); | 
|---|
| 487 |  | 
|---|
| 488 | if (lazy) | 
|---|
| 489 | WRITE_ONCE(rdp->lazy_len, rdp->lazy_len + 1); | 
|---|
| 490 |  | 
|---|
| 491 | if (!ncbs) { | 
|---|
| 492 | WRITE_ONCE(rdp->nocb_bypass_first, j); | 
|---|
| 493 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "FirstBQ")); | 
|---|
| 494 | } | 
|---|
| 495 | rcu_nocb_bypass_unlock(rdp); | 
|---|
| 496 |  | 
|---|
| 497 | // A wake up of the grace period kthread or timer adjustment | 
|---|
| 498 | // needs to be done only if: | 
|---|
| 499 | // 1. Bypass list was fully empty before (this is the first | 
|---|
| 500 | //    bypass list entry), or: | 
|---|
| 501 | // 2. Both of these conditions are met: | 
|---|
| 502 | //    a. The bypass list previously had only lazy CBs, and: | 
|---|
| 503 | //    b. The new CB is non-lazy. | 
|---|
| 504 | if (!ncbs || (bypass_is_lazy && !lazy)) { | 
|---|
| 505 | // No-CBs GP kthread might be indefinitely asleep, if so, wake. | 
|---|
| 506 | rcu_nocb_lock(rdp); // Rare during call_rcu() flood. | 
|---|
| 507 | if (!rcu_segcblist_pend_cbs(&rdp->cblist)) { | 
|---|
| 508 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 509 | TPS( "FirstBQwake")); | 
|---|
| 510 | __call_rcu_nocb_wake(rdp, true, flags); | 
|---|
| 511 | } else { | 
|---|
| 512 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 513 | TPS( "FirstBQnoWake")); | 
|---|
| 514 | rcu_nocb_unlock(rdp); | 
|---|
| 515 | } | 
|---|
| 516 | } | 
|---|
| 517 | return true; // Callback already enqueued. | 
|---|
| 518 | } | 
|---|
| 519 |  | 
|---|
| 520 | /* | 
|---|
| 521 | * Awaken the no-CBs grace-period kthread if needed, either due to it | 
|---|
| 522 | * legitimately being asleep or due to overload conditions. | 
|---|
| 523 | * | 
|---|
| 524 | * If warranted, also wake up the kthread servicing this CPUs queues. | 
|---|
| 525 | */ | 
|---|
| 526 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, | 
|---|
| 527 | unsigned long flags) | 
|---|
| 528 | __releases(rdp->nocb_lock) | 
|---|
| 529 | { | 
|---|
| 530 | long bypass_len; | 
|---|
| 531 | unsigned long cur_gp_seq; | 
|---|
| 532 | unsigned long j; | 
|---|
| 533 | long lazy_len; | 
|---|
| 534 | long len; | 
|---|
| 535 | struct task_struct *t; | 
|---|
| 536 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 537 |  | 
|---|
| 538 | // If we are being polled or there is no kthread, just leave. | 
|---|
| 539 | t = READ_ONCE(rdp->nocb_gp_kthread); | 
|---|
| 540 | if (rcu_nocb_poll || !t) { | 
|---|
| 541 | rcu_nocb_unlock(rdp); | 
|---|
| 542 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 543 | TPS( "WakeNotPoll")); | 
|---|
| 544 | return; | 
|---|
| 545 | } | 
|---|
| 546 | // Need to actually to a wakeup. | 
|---|
| 547 | len = rcu_segcblist_n_cbs(&rdp->cblist); | 
|---|
| 548 | bypass_len = rcu_cblist_n_cbs(&rdp->nocb_bypass); | 
|---|
| 549 | lazy_len = READ_ONCE(rdp->lazy_len); | 
|---|
| 550 | if (was_alldone) { | 
|---|
| 551 | rdp->qlen_last_fqs_check = len; | 
|---|
| 552 | // Only lazy CBs in bypass list | 
|---|
| 553 | if (lazy_len && bypass_len == lazy_len) { | 
|---|
| 554 | rcu_nocb_unlock(rdp); | 
|---|
| 555 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, | 
|---|
| 556 | TPS( "WakeLazy")); | 
|---|
| 557 | } else if (!irqs_disabled_flags(flags)) { | 
|---|
| 558 | /* ... if queue was empty ... */ | 
|---|
| 559 | rcu_nocb_unlock(rdp); | 
|---|
| 560 | wake_nocb_gp(rdp, false); | 
|---|
| 561 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 562 | TPS( "WakeEmpty")); | 
|---|
| 563 | } else { | 
|---|
| 564 | rcu_nocb_unlock(rdp); | 
|---|
| 565 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, | 
|---|
| 566 | TPS( "WakeEmptyIsDeferred")); | 
|---|
| 567 | } | 
|---|
| 568 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { | 
|---|
| 569 | /* ... or if many callbacks queued. */ | 
|---|
| 570 | rdp->qlen_last_fqs_check = len; | 
|---|
| 571 | j = jiffies; | 
|---|
| 572 | if (j != rdp->nocb_gp_adv_time && | 
|---|
| 573 | rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) && | 
|---|
| 574 | rcu_seq_done(&rdp->mynode->gp_seq, cur_gp_seq)) { | 
|---|
| 575 | rcu_advance_cbs_nowake(rdp->mynode, rdp); | 
|---|
| 576 | rdp->nocb_gp_adv_time = j; | 
|---|
| 577 | } | 
|---|
| 578 | smp_mb(); /* Enqueue before timer_pending(). */ | 
|---|
| 579 | if ((rdp->nocb_cb_sleep || | 
|---|
| 580 | !rcu_segcblist_ready_cbs(&rdp->cblist)) && | 
|---|
| 581 | !timer_pending(&rdp_gp->nocb_timer)) { | 
|---|
| 582 | rcu_nocb_unlock(rdp); | 
|---|
| 583 | wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, | 
|---|
| 584 | TPS( "WakeOvfIsDeferred")); | 
|---|
| 585 | } else { | 
|---|
| 586 | rcu_nocb_unlock(rdp); | 
|---|
| 587 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "WakeNot")); | 
|---|
| 588 | } | 
|---|
| 589 | } else { | 
|---|
| 590 | rcu_nocb_unlock(rdp); | 
|---|
| 591 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "WakeNot")); | 
|---|
| 592 | } | 
|---|
| 593 | } | 
|---|
| 594 |  | 
|---|
| 595 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | 
|---|
| 596 | rcu_callback_t func, unsigned long flags, bool lazy) | 
|---|
| 597 | { | 
|---|
| 598 | bool was_alldone; | 
|---|
| 599 |  | 
|---|
| 600 | if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) { | 
|---|
| 601 | /* Not enqueued on bypass but locked, do regular enqueue */ | 
|---|
| 602 | rcutree_enqueue(rdp, head, func); | 
|---|
| 603 | __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ | 
|---|
| 604 | } | 
|---|
| 605 | } | 
|---|
| 606 |  | 
|---|
| 607 | static void nocb_gp_toggle_rdp(struct rcu_data *rdp_gp, struct rcu_data *rdp) | 
|---|
| 608 | { | 
|---|
| 609 | struct rcu_segcblist *cblist = &rdp->cblist; | 
|---|
| 610 | unsigned long flags; | 
|---|
| 611 |  | 
|---|
| 612 | /* | 
|---|
| 613 | * Locking orders future de-offloaded callbacks enqueue against previous | 
|---|
| 614 | * handling of this rdp. Ie: Make sure rcuog is done with this rdp before | 
|---|
| 615 | * deoffloaded callbacks can be enqueued. | 
|---|
| 616 | */ | 
|---|
| 617 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | 
|---|
| 618 | if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) { | 
|---|
| 619 | /* | 
|---|
| 620 | * Offloading. Set our flag and notify the offload worker. | 
|---|
| 621 | * We will handle this rdp until it ever gets de-offloaded. | 
|---|
| 622 | */ | 
|---|
| 623 | list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp); | 
|---|
| 624 | rcu_segcblist_set_flags(cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 625 | } else { | 
|---|
| 626 | /* | 
|---|
| 627 | * De-offloading. Clear our flag and notify the de-offload worker. | 
|---|
| 628 | * We will ignore this rdp until it ever gets re-offloaded. | 
|---|
| 629 | */ | 
|---|
| 630 | list_del(&rdp->nocb_entry_rdp); | 
|---|
| 631 | rcu_segcblist_clear_flags(cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 632 | } | 
|---|
| 633 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | 
|---|
| 634 | } | 
|---|
| 635 |  | 
|---|
| 636 | static void nocb_gp_sleep(struct rcu_data *my_rdp, int cpu) | 
|---|
| 637 | { | 
|---|
| 638 | trace_rcu_nocb_wake(rcu_state.name, cpu, TPS( "Sleep")); | 
|---|
| 639 | swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq, | 
|---|
| 640 | !READ_ONCE(my_rdp->nocb_gp_sleep)); | 
|---|
| 641 | trace_rcu_nocb_wake(rcu_state.name, cpu, TPS( "EndSleep")); | 
|---|
| 642 | } | 
|---|
| 643 |  | 
|---|
| 644 | /* | 
|---|
| 645 | * No-CBs GP kthreads come here to wait for additional callbacks to show up | 
|---|
| 646 | * or for grace periods to end. | 
|---|
| 647 | */ | 
|---|
| 648 | static void nocb_gp_wait(struct rcu_data *my_rdp) | 
|---|
| 649 | { | 
|---|
| 650 | bool bypass = false; | 
|---|
| 651 | int __maybe_unused cpu = my_rdp->cpu; | 
|---|
| 652 | unsigned long cur_gp_seq; | 
|---|
| 653 | unsigned long flags; | 
|---|
| 654 | bool gotcbs = false; | 
|---|
| 655 | unsigned long j = jiffies; | 
|---|
| 656 | bool lazy = false; | 
|---|
| 657 | bool needwait_gp = false; // This prevents actual uninitialized use. | 
|---|
| 658 | bool needwake; | 
|---|
| 659 | bool needwake_gp; | 
|---|
| 660 | struct rcu_data *rdp, *rdp_toggling = NULL; | 
|---|
| 661 | struct rcu_node *rnp; | 
|---|
| 662 | unsigned long wait_gp_seq = 0; // Suppress "use uninitialized" warning. | 
|---|
| 663 | bool wasempty = false; | 
|---|
| 664 |  | 
|---|
| 665 | /* | 
|---|
| 666 | * Each pass through the following loop checks for CBs and for the | 
|---|
| 667 | * nearest grace period (if any) to wait for next.  The CB kthreads | 
|---|
| 668 | * and the global grace-period kthread are awakened if needed. | 
|---|
| 669 | */ | 
|---|
| 670 | WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); | 
|---|
| 671 | /* | 
|---|
| 672 | * An rcu_data structure is removed from the list after its | 
|---|
| 673 | * CPU is de-offloaded and added to the list before that CPU is | 
|---|
| 674 | * (re-)offloaded.  If the following loop happens to be referencing | 
|---|
| 675 | * that rcu_data structure during the time that the corresponding | 
|---|
| 676 | * CPU is de-offloaded and then immediately re-offloaded, this | 
|---|
| 677 | * loop's rdp pointer will be carried to the end of the list by | 
|---|
| 678 | * the resulting pair of list operations.  This can cause the loop | 
|---|
| 679 | * to skip over some of the rcu_data structures that were supposed | 
|---|
| 680 | * to have been scanned.  Fortunately a new iteration through the | 
|---|
| 681 | * entire loop is forced after a given CPU's rcu_data structure | 
|---|
| 682 | * is added to the list, so the skipped-over rcu_data structures | 
|---|
| 683 | * won't be ignored for long. | 
|---|
| 684 | */ | 
|---|
| 685 | list_for_each_entry(rdp, &my_rdp->nocb_head_rdp, nocb_entry_rdp) { | 
|---|
| 686 | long bypass_ncbs; | 
|---|
| 687 | bool flush_bypass = false; | 
|---|
| 688 | long lazy_ncbs; | 
|---|
| 689 |  | 
|---|
| 690 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "Check")); | 
|---|
| 691 | rcu_nocb_lock_irqsave(rdp, flags); | 
|---|
| 692 | lockdep_assert_held(&rdp->nocb_lock); | 
|---|
| 693 | bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); | 
|---|
| 694 | lazy_ncbs = READ_ONCE(rdp->lazy_len); | 
|---|
| 695 |  | 
|---|
| 696 | if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && | 
|---|
| 697 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || | 
|---|
| 698 | bypass_ncbs > 2 * qhimark)) { | 
|---|
| 699 | flush_bypass = true; | 
|---|
| 700 | } else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && | 
|---|
| 701 | (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + 1) || | 
|---|
| 702 | bypass_ncbs > 2 * qhimark)) { | 
|---|
| 703 | flush_bypass = true; | 
|---|
| 704 | } else if (!bypass_ncbs && rcu_segcblist_empty(&rdp->cblist)) { | 
|---|
| 705 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 706 | continue; /* No callbacks here, try next. */ | 
|---|
| 707 | } | 
|---|
| 708 |  | 
|---|
| 709 | if (flush_bypass) { | 
|---|
| 710 | // Bypass full or old, so flush it. | 
|---|
| 711 | (void)rcu_nocb_try_flush_bypass(rdp, j); | 
|---|
| 712 | bypass_ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass); | 
|---|
| 713 | lazy_ncbs = READ_ONCE(rdp->lazy_len); | 
|---|
| 714 | } | 
|---|
| 715 |  | 
|---|
| 716 | if (bypass_ncbs) { | 
|---|
| 717 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 718 | bypass_ncbs == lazy_ncbs ? TPS( "Lazy") : TPS( "Bypass")); | 
|---|
| 719 | if (bypass_ncbs == lazy_ncbs) | 
|---|
| 720 | lazy = true; | 
|---|
| 721 | else | 
|---|
| 722 | bypass = true; | 
|---|
| 723 | } | 
|---|
| 724 | rnp = rdp->mynode; | 
|---|
| 725 |  | 
|---|
| 726 | // Advance callbacks if helpful and low contention. | 
|---|
| 727 | needwake_gp = false; | 
|---|
| 728 | if (!rcu_segcblist_restempty(&rdp->cblist, | 
|---|
| 729 | RCU_NEXT_READY_TAIL) || | 
|---|
| 730 | (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq) && | 
|---|
| 731 | rcu_seq_done(&rnp->gp_seq, cur_gp_seq))) { | 
|---|
| 732 | raw_spin_lock_rcu_node(rnp); /* irqs disabled. */ | 
|---|
| 733 | needwake_gp = rcu_advance_cbs(rnp, rdp); | 
|---|
| 734 | wasempty = rcu_segcblist_restempty(&rdp->cblist, | 
|---|
| 735 | RCU_NEXT_READY_TAIL); | 
|---|
| 736 | raw_spin_unlock_rcu_node(rnp); /* irqs disabled. */ | 
|---|
| 737 | } | 
|---|
| 738 | // Need to wait on some grace period? | 
|---|
| 739 | WARN_ON_ONCE(wasempty && | 
|---|
| 740 | !rcu_segcblist_restempty(&rdp->cblist, | 
|---|
| 741 | RCU_NEXT_READY_TAIL)); | 
|---|
| 742 | if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) { | 
|---|
| 743 | if (!needwait_gp || | 
|---|
| 744 | ULONG_CMP_LT(cur_gp_seq, wait_gp_seq)) | 
|---|
| 745 | wait_gp_seq = cur_gp_seq; | 
|---|
| 746 | needwait_gp = true; | 
|---|
| 747 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | 
|---|
| 748 | TPS( "NeedWaitGP")); | 
|---|
| 749 | } | 
|---|
| 750 | if (rcu_segcblist_ready_cbs(&rdp->cblist)) { | 
|---|
| 751 | needwake = rdp->nocb_cb_sleep; | 
|---|
| 752 | WRITE_ONCE(rdp->nocb_cb_sleep, false); | 
|---|
| 753 | } else { | 
|---|
| 754 | needwake = false; | 
|---|
| 755 | } | 
|---|
| 756 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 757 | if (needwake) { | 
|---|
| 758 | swake_up_one(&rdp->nocb_cb_wq); | 
|---|
| 759 | gotcbs = true; | 
|---|
| 760 | } | 
|---|
| 761 | if (needwake_gp) | 
|---|
| 762 | rcu_gp_kthread_wake(); | 
|---|
| 763 | } | 
|---|
| 764 |  | 
|---|
| 765 | my_rdp->nocb_gp_bypass = bypass; | 
|---|
| 766 | my_rdp->nocb_gp_gp = needwait_gp; | 
|---|
| 767 | my_rdp->nocb_gp_seq = needwait_gp ? wait_gp_seq : 0; | 
|---|
| 768 |  | 
|---|
| 769 | // At least one child with non-empty ->nocb_bypass, so set | 
|---|
| 770 | // timer in order to avoid stranding its callbacks. | 
|---|
| 771 | if (!rcu_nocb_poll) { | 
|---|
| 772 | // If bypass list only has lazy CBs. Add a deferred lazy wake up. | 
|---|
| 773 | if (lazy && !bypass) { | 
|---|
| 774 | wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_LAZY, | 
|---|
| 775 | TPS( "WakeLazyIsDeferred")); | 
|---|
| 776 | // Otherwise add a deferred bypass wake up. | 
|---|
| 777 | } else if (bypass) { | 
|---|
| 778 | wake_nocb_gp_defer(my_rdp, RCU_NOCB_WAKE_BYPASS, | 
|---|
| 779 | TPS( "WakeBypassIsDeferred")); | 
|---|
| 780 | } | 
|---|
| 781 | } | 
|---|
| 782 |  | 
|---|
| 783 | if (rcu_nocb_poll) { | 
|---|
| 784 | /* Polling, so trace if first poll in the series. */ | 
|---|
| 785 | if (gotcbs) | 
|---|
| 786 | trace_rcu_nocb_wake(rcu_state.name, cpu, TPS( "Poll")); | 
|---|
| 787 | if (list_empty(&my_rdp->nocb_head_rdp)) { | 
|---|
| 788 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 789 | if (!my_rdp->nocb_toggling_rdp) | 
|---|
| 790 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); | 
|---|
| 791 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 792 | /* Wait for any offloading rdp */ | 
|---|
| 793 | nocb_gp_sleep(my_rdp, cpu); | 
|---|
| 794 | } else { | 
|---|
| 795 | schedule_timeout_idle(1); | 
|---|
| 796 | } | 
|---|
| 797 | } else if (!needwait_gp) { | 
|---|
| 798 | /* Wait for callbacks to appear. */ | 
|---|
| 799 | nocb_gp_sleep(my_rdp, cpu); | 
|---|
| 800 | } else { | 
|---|
| 801 | rnp = my_rdp->mynode; | 
|---|
| 802 | trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS( "StartWait")); | 
|---|
| 803 | swait_event_interruptible_exclusive( | 
|---|
| 804 | rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1], | 
|---|
| 805 | rcu_seq_done(&rnp->gp_seq, wait_gp_seq) || | 
|---|
| 806 | !READ_ONCE(my_rdp->nocb_gp_sleep)); | 
|---|
| 807 | trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS( "EndWait")); | 
|---|
| 808 | } | 
|---|
| 809 |  | 
|---|
| 810 | if (!rcu_nocb_poll) { | 
|---|
| 811 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 812 | // (De-)queue an rdp to/from the group if its nocb state is changing | 
|---|
| 813 | rdp_toggling = my_rdp->nocb_toggling_rdp; | 
|---|
| 814 | if (rdp_toggling) | 
|---|
| 815 | my_rdp->nocb_toggling_rdp = NULL; | 
|---|
| 816 |  | 
|---|
| 817 | if (my_rdp->nocb_defer_wakeup > RCU_NOCB_WAKE_NOT) { | 
|---|
| 818 | WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); | 
|---|
| 819 | timer_delete(&my_rdp->nocb_timer); | 
|---|
| 820 | } | 
|---|
| 821 | WRITE_ONCE(my_rdp->nocb_gp_sleep, true); | 
|---|
| 822 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 823 | } else { | 
|---|
| 824 | rdp_toggling = READ_ONCE(my_rdp->nocb_toggling_rdp); | 
|---|
| 825 | if (rdp_toggling) { | 
|---|
| 826 | /* | 
|---|
| 827 | * Paranoid locking to make sure nocb_toggling_rdp is well | 
|---|
| 828 | * reset *before* we (re)set SEGCBLIST_KTHREAD_GP or we could | 
|---|
| 829 | * race with another round of nocb toggling for this rdp. | 
|---|
| 830 | * Nocb locking should prevent from that already but we stick | 
|---|
| 831 | * to paranoia, especially in rare path. | 
|---|
| 832 | */ | 
|---|
| 833 | raw_spin_lock_irqsave(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 834 | my_rdp->nocb_toggling_rdp = NULL; | 
|---|
| 835 | raw_spin_unlock_irqrestore(&my_rdp->nocb_gp_lock, flags); | 
|---|
| 836 | } | 
|---|
| 837 | } | 
|---|
| 838 |  | 
|---|
| 839 | if (rdp_toggling) { | 
|---|
| 840 | nocb_gp_toggle_rdp(my_rdp, rdp_toggling); | 
|---|
| 841 | swake_up_one(&rdp_toggling->nocb_state_wq); | 
|---|
| 842 | } | 
|---|
| 843 |  | 
|---|
| 844 | my_rdp->nocb_gp_seq = -1; | 
|---|
| 845 | WARN_ON(signal_pending(current)); | 
|---|
| 846 | } | 
|---|
| 847 |  | 
|---|
| 848 | /* | 
|---|
| 849 | * No-CBs grace-period-wait kthread.  There is one of these per group | 
|---|
| 850 | * of CPUs, but only once at least one CPU in that group has come online | 
|---|
| 851 | * at least once since boot.  This kthread checks for newly posted | 
|---|
| 852 | * callbacks from any of the CPUs it is responsible for, waits for a | 
|---|
| 853 | * grace period, then awakens all of the rcu_nocb_cb_kthread() instances | 
|---|
| 854 | * that then have callback-invocation work to do. | 
|---|
| 855 | */ | 
|---|
| 856 | static int rcu_nocb_gp_kthread(void *arg) | 
|---|
| 857 | { | 
|---|
| 858 | struct rcu_data *rdp = arg; | 
|---|
| 859 |  | 
|---|
| 860 | for (;;) { | 
|---|
| 861 | WRITE_ONCE(rdp->nocb_gp_loops, rdp->nocb_gp_loops + 1); | 
|---|
| 862 | nocb_gp_wait(rdp); | 
|---|
| 863 | cond_resched_tasks_rcu_qs(); | 
|---|
| 864 | } | 
|---|
| 865 | return 0; | 
|---|
| 866 | } | 
|---|
| 867 |  | 
|---|
| 868 | static inline bool nocb_cb_wait_cond(struct rcu_data *rdp) | 
|---|
| 869 | { | 
|---|
| 870 | return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park(); | 
|---|
| 871 | } | 
|---|
| 872 |  | 
|---|
| 873 | /* | 
|---|
| 874 | * Invoke any ready callbacks from the corresponding no-CBs CPU, | 
|---|
| 875 | * then, if there are no more, wait for more to appear. | 
|---|
| 876 | */ | 
|---|
| 877 | static void nocb_cb_wait(struct rcu_data *rdp) | 
|---|
| 878 | { | 
|---|
| 879 | struct rcu_segcblist *cblist = &rdp->cblist; | 
|---|
| 880 | unsigned long cur_gp_seq; | 
|---|
| 881 | unsigned long flags; | 
|---|
| 882 | bool needwake_gp = false; | 
|---|
| 883 | struct rcu_node *rnp = rdp->mynode; | 
|---|
| 884 |  | 
|---|
| 885 | swait_event_interruptible_exclusive(rdp->nocb_cb_wq, | 
|---|
| 886 | nocb_cb_wait_cond(rdp)); | 
|---|
| 887 | if (kthread_should_park()) { | 
|---|
| 888 | /* | 
|---|
| 889 | * kthread_park() must be preceded by an rcu_barrier(). | 
|---|
| 890 | * But yet another rcu_barrier() might have sneaked in between | 
|---|
| 891 | * the barrier callback execution and the callbacks counter | 
|---|
| 892 | * decrement. | 
|---|
| 893 | */ | 
|---|
| 894 | if (rdp->nocb_cb_sleep) { | 
|---|
| 895 | rcu_nocb_lock_irqsave(rdp, flags); | 
|---|
| 896 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 897 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 898 | kthread_parkme(); | 
|---|
| 899 | } | 
|---|
| 900 | } else if (READ_ONCE(rdp->nocb_cb_sleep)) { | 
|---|
| 901 | WARN_ON(signal_pending(current)); | 
|---|
| 902 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "WokeEmpty")); | 
|---|
| 903 | } | 
|---|
| 904 |  | 
|---|
| 905 | WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); | 
|---|
| 906 |  | 
|---|
| 907 | local_irq_save(flags); | 
|---|
| 908 | rcu_momentary_eqs(); | 
|---|
| 909 | local_irq_restore(flags); | 
|---|
| 910 | /* | 
|---|
| 911 | * Disable BH to provide the expected environment.  Also, when | 
|---|
| 912 | * transitioning to/from NOCB mode, a self-requeuing callback might | 
|---|
| 913 | * be invoked from softirq.  A short grace period could cause both | 
|---|
| 914 | * instances of this callback would execute concurrently. | 
|---|
| 915 | */ | 
|---|
| 916 | local_bh_disable(); | 
|---|
| 917 | rcu_do_batch(rdp); | 
|---|
| 918 | local_bh_enable(); | 
|---|
| 919 | lockdep_assert_irqs_enabled(); | 
|---|
| 920 | rcu_nocb_lock_irqsave(rdp, flags); | 
|---|
| 921 | if (rcu_segcblist_nextgp(cblist, &cur_gp_seq) && | 
|---|
| 922 | rcu_seq_done(&rnp->gp_seq, cur_gp_seq) && | 
|---|
| 923 | raw_spin_trylock_rcu_node(rnp)) { /* irqs already disabled. */ | 
|---|
| 924 | needwake_gp = rcu_advance_cbs(rdp->mynode, rdp); | 
|---|
| 925 | raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ | 
|---|
| 926 | } | 
|---|
| 927 |  | 
|---|
| 928 | if (!rcu_segcblist_ready_cbs(cblist)) { | 
|---|
| 929 | WRITE_ONCE(rdp->nocb_cb_sleep, true); | 
|---|
| 930 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "CBSleep")); | 
|---|
| 931 | } else { | 
|---|
| 932 | WRITE_ONCE(rdp->nocb_cb_sleep, false); | 
|---|
| 933 | } | 
|---|
| 934 |  | 
|---|
| 935 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 936 | if (needwake_gp) | 
|---|
| 937 | rcu_gp_kthread_wake(); | 
|---|
| 938 | } | 
|---|
| 939 |  | 
|---|
| 940 | /* | 
|---|
| 941 | * Per-rcu_data kthread, but only for no-CBs CPUs.  Repeatedly invoke | 
|---|
| 942 | * nocb_cb_wait() to do the dirty work. | 
|---|
| 943 | */ | 
|---|
| 944 | static int rcu_nocb_cb_kthread(void *arg) | 
|---|
| 945 | { | 
|---|
| 946 | struct rcu_data *rdp = arg; | 
|---|
| 947 |  | 
|---|
| 948 | // Each pass through this loop does one callback batch, and, | 
|---|
| 949 | // if there are no more ready callbacks, waits for them. | 
|---|
| 950 | for (;;) { | 
|---|
| 951 | nocb_cb_wait(rdp); | 
|---|
| 952 | cond_resched_tasks_rcu_qs(); | 
|---|
| 953 | } | 
|---|
| 954 | return 0; | 
|---|
| 955 | } | 
|---|
| 956 |  | 
|---|
| 957 | /* Is a deferred wakeup of rcu_nocb_kthread() required? */ | 
|---|
| 958 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) | 
|---|
| 959 | { | 
|---|
| 960 | return READ_ONCE(rdp->nocb_defer_wakeup) >= level; | 
|---|
| 961 | } | 
|---|
| 962 |  | 
|---|
| 963 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ | 
|---|
| 964 | static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp_gp, | 
|---|
| 965 | struct rcu_data *rdp, int level, | 
|---|
| 966 | unsigned long flags) | 
|---|
| 967 | __releases(rdp_gp->nocb_gp_lock) | 
|---|
| 968 | { | 
|---|
| 969 | int ndw; | 
|---|
| 970 | int ret; | 
|---|
| 971 |  | 
|---|
| 972 | if (!rcu_nocb_need_deferred_wakeup(rdp_gp, level)) { | 
|---|
| 973 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 974 | return false; | 
|---|
| 975 | } | 
|---|
| 976 |  | 
|---|
| 977 | ndw = rdp_gp->nocb_defer_wakeup; | 
|---|
| 978 | ret = __wake_nocb_gp(rdp_gp, rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); | 
|---|
| 979 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "DeferredWake")); | 
|---|
| 980 |  | 
|---|
| 981 | return ret; | 
|---|
| 982 | } | 
|---|
| 983 |  | 
|---|
| 984 | /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ | 
|---|
| 985 | static void do_nocb_deferred_wakeup_timer(struct timer_list *t) | 
|---|
| 986 | { | 
|---|
| 987 | unsigned long flags; | 
|---|
| 988 | struct rcu_data *rdp = timer_container_of(rdp, t, nocb_timer); | 
|---|
| 989 |  | 
|---|
| 990 | WARN_ON_ONCE(rdp->nocb_gp_rdp != rdp); | 
|---|
| 991 | trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS( "Timer")); | 
|---|
| 992 |  | 
|---|
| 993 | raw_spin_lock_irqsave(&rdp->nocb_gp_lock, flags); | 
|---|
| 994 | smp_mb__after_spinlock(); /* Timer expire before wakeup. */ | 
|---|
| 995 | do_nocb_deferred_wakeup_common(rdp, rdp, RCU_NOCB_WAKE_BYPASS, flags); | 
|---|
| 996 | } | 
|---|
| 997 |  | 
|---|
| 998 | /* | 
|---|
| 999 | * Do a deferred wakeup of rcu_nocb_kthread() from fastpath. | 
|---|
| 1000 | * This means we do an inexact common-case check.  Note that if | 
|---|
| 1001 | * we miss, ->nocb_timer will eventually clean things up. | 
|---|
| 1002 | */ | 
|---|
| 1003 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) | 
|---|
| 1004 | { | 
|---|
| 1005 | unsigned long flags; | 
|---|
| 1006 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 1007 |  | 
|---|
| 1008 | if (!rdp_gp || !rcu_nocb_need_deferred_wakeup(rdp_gp, RCU_NOCB_WAKE)) | 
|---|
| 1009 | return false; | 
|---|
| 1010 |  | 
|---|
| 1011 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 1012 | return do_nocb_deferred_wakeup_common(rdp_gp, rdp, RCU_NOCB_WAKE, flags); | 
|---|
| 1013 | } | 
|---|
| 1014 |  | 
|---|
| 1015 | void rcu_nocb_flush_deferred_wakeup(void) | 
|---|
| 1016 | { | 
|---|
| 1017 | do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); | 
|---|
| 1018 | } | 
|---|
| 1019 | EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); | 
|---|
| 1020 |  | 
|---|
| 1021 | static int rcu_nocb_queue_toggle_rdp(struct rcu_data *rdp) | 
|---|
| 1022 | { | 
|---|
| 1023 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 1024 | bool wake_gp = false; | 
|---|
| 1025 | unsigned long flags; | 
|---|
| 1026 |  | 
|---|
| 1027 | raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 1028 | // Queue this rdp for add/del to/from the list to iterate on rcuog | 
|---|
| 1029 | WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp); | 
|---|
| 1030 | if (rdp_gp->nocb_gp_sleep) { | 
|---|
| 1031 | rdp_gp->nocb_gp_sleep = false; | 
|---|
| 1032 | wake_gp = true; | 
|---|
| 1033 | } | 
|---|
| 1034 | raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); | 
|---|
| 1035 |  | 
|---|
| 1036 | return wake_gp; | 
|---|
| 1037 | } | 
|---|
| 1038 |  | 
|---|
| 1039 | static bool rcu_nocb_rdp_deoffload_wait_cond(struct rcu_data *rdp) | 
|---|
| 1040 | { | 
|---|
| 1041 | unsigned long flags; | 
|---|
| 1042 | bool ret; | 
|---|
| 1043 |  | 
|---|
| 1044 | /* | 
|---|
| 1045 | * Locking makes sure rcuog is done handling this rdp before deoffloaded | 
|---|
| 1046 | * enqueue can happen. Also it keeps the SEGCBLIST_OFFLOADED flag stable | 
|---|
| 1047 | * while the ->nocb_lock is held. | 
|---|
| 1048 | */ | 
|---|
| 1049 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | 
|---|
| 1050 | ret = !rcu_segcblist_test_flags(&rdp->cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 1051 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | 
|---|
| 1052 |  | 
|---|
| 1053 | return ret; | 
|---|
| 1054 | } | 
|---|
| 1055 |  | 
|---|
| 1056 | static int rcu_nocb_rdp_deoffload(struct rcu_data *rdp) | 
|---|
| 1057 | { | 
|---|
| 1058 | unsigned long flags; | 
|---|
| 1059 | int wake_gp; | 
|---|
| 1060 | struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 1061 |  | 
|---|
| 1062 | /* CPU must be offline, unless it's early boot */ | 
|---|
| 1063 | WARN_ON_ONCE(cpu_online(rdp->cpu) && rdp->cpu != raw_smp_processor_id()); | 
|---|
| 1064 |  | 
|---|
| 1065 | pr_info( "De-offloading %d\n", rdp->cpu); | 
|---|
| 1066 |  | 
|---|
| 1067 | /* Flush all callbacks from segcblist and bypass */ | 
|---|
| 1068 | rcu_barrier(); | 
|---|
| 1069 |  | 
|---|
| 1070 | /* | 
|---|
| 1071 | * Make sure the rcuoc kthread isn't in the middle of a nocb locked | 
|---|
| 1072 | * sequence while offloading is deactivated, along with nocb locking. | 
|---|
| 1073 | */ | 
|---|
| 1074 | if (rdp->nocb_cb_kthread) | 
|---|
| 1075 | kthread_park(rdp->nocb_cb_kthread); | 
|---|
| 1076 |  | 
|---|
| 1077 | rcu_nocb_lock_irqsave(rdp, flags); | 
|---|
| 1078 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 1079 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 1080 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 1081 |  | 
|---|
| 1082 | wake_gp = rcu_nocb_queue_toggle_rdp(rdp); | 
|---|
| 1083 |  | 
|---|
| 1084 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); | 
|---|
| 1085 |  | 
|---|
| 1086 | if (rdp_gp->nocb_gp_kthread) { | 
|---|
| 1087 | if (wake_gp) | 
|---|
| 1088 | wake_up_process(rdp_gp->nocb_gp_kthread); | 
|---|
| 1089 |  | 
|---|
| 1090 | swait_event_exclusive(rdp->nocb_state_wq, | 
|---|
| 1091 | rcu_nocb_rdp_deoffload_wait_cond(rdp)); | 
|---|
| 1092 | } else { | 
|---|
| 1093 | /* | 
|---|
| 1094 | * No kthread to clear the flags for us or remove the rdp from the nocb list | 
|---|
| 1095 | * to iterate. Do it here instead. Locking doesn't look stricly necessary | 
|---|
| 1096 | * but we stick to paranoia in this rare path. | 
|---|
| 1097 | */ | 
|---|
| 1098 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | 
|---|
| 1099 | rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 1100 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | 
|---|
| 1101 |  | 
|---|
| 1102 | list_del(&rdp->nocb_entry_rdp); | 
|---|
| 1103 | } | 
|---|
| 1104 |  | 
|---|
| 1105 | mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); | 
|---|
| 1106 |  | 
|---|
| 1107 | return 0; | 
|---|
| 1108 | } | 
|---|
| 1109 |  | 
|---|
| 1110 | int rcu_nocb_cpu_deoffload(int cpu) | 
|---|
| 1111 | { | 
|---|
| 1112 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1113 | int ret = 0; | 
|---|
| 1114 |  | 
|---|
| 1115 | cpus_read_lock(); | 
|---|
| 1116 | mutex_lock(&rcu_state.nocb_mutex); | 
|---|
| 1117 | if (rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 1118 | if (!cpu_online(cpu)) { | 
|---|
| 1119 | ret = rcu_nocb_rdp_deoffload(rdp); | 
|---|
| 1120 | if (!ret) | 
|---|
| 1121 | cpumask_clear_cpu(cpu, rcu_nocb_mask); | 
|---|
| 1122 | } else { | 
|---|
| 1123 | pr_info( "NOCB: Cannot CB-deoffload online CPU %d\n", rdp->cpu); | 
|---|
| 1124 | ret = -EINVAL; | 
|---|
| 1125 | } | 
|---|
| 1126 | } | 
|---|
| 1127 | mutex_unlock(&rcu_state.nocb_mutex); | 
|---|
| 1128 | cpus_read_unlock(); | 
|---|
| 1129 |  | 
|---|
| 1130 | return ret; | 
|---|
| 1131 | } | 
|---|
| 1132 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_deoffload); | 
|---|
| 1133 |  | 
|---|
| 1134 | static bool rcu_nocb_rdp_offload_wait_cond(struct rcu_data *rdp) | 
|---|
| 1135 | { | 
|---|
| 1136 | unsigned long flags; | 
|---|
| 1137 | bool ret; | 
|---|
| 1138 |  | 
|---|
| 1139 | raw_spin_lock_irqsave(&rdp->nocb_lock, flags); | 
|---|
| 1140 | ret = rcu_segcblist_test_flags(&rdp->cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 1141 | raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); | 
|---|
| 1142 |  | 
|---|
| 1143 | return ret; | 
|---|
| 1144 | } | 
|---|
| 1145 |  | 
|---|
| 1146 | static int rcu_nocb_rdp_offload(struct rcu_data *rdp) | 
|---|
| 1147 | { | 
|---|
| 1148 | int wake_gp; | 
|---|
| 1149 |  | 
|---|
| 1150 | WARN_ON_ONCE(cpu_online(rdp->cpu)); | 
|---|
| 1151 | /* | 
|---|
| 1152 | * For now we only support re-offload, ie: the rdp must have been | 
|---|
| 1153 | * offloaded on boot first. | 
|---|
| 1154 | */ | 
|---|
| 1155 | if (!rdp->nocb_gp_rdp) | 
|---|
| 1156 | return -EINVAL; | 
|---|
| 1157 |  | 
|---|
| 1158 | if (WARN_ON_ONCE(!rdp->nocb_gp_kthread)) | 
|---|
| 1159 | return -EINVAL; | 
|---|
| 1160 |  | 
|---|
| 1161 | pr_info( "Offloading %d\n", rdp->cpu); | 
|---|
| 1162 |  | 
|---|
| 1163 | WARN_ON_ONCE(rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 1164 | WARN_ON_ONCE(rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 1165 |  | 
|---|
| 1166 | wake_gp = rcu_nocb_queue_toggle_rdp(rdp); | 
|---|
| 1167 | if (wake_gp) | 
|---|
| 1168 | wake_up_process(rdp->nocb_gp_kthread); | 
|---|
| 1169 |  | 
|---|
| 1170 | swait_event_exclusive(rdp->nocb_state_wq, | 
|---|
| 1171 | rcu_nocb_rdp_offload_wait_cond(rdp)); | 
|---|
| 1172 |  | 
|---|
| 1173 | kthread_unpark(rdp->nocb_cb_kthread); | 
|---|
| 1174 |  | 
|---|
| 1175 | return 0; | 
|---|
| 1176 | } | 
|---|
| 1177 |  | 
|---|
| 1178 | int rcu_nocb_cpu_offload(int cpu) | 
|---|
| 1179 | { | 
|---|
| 1180 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1181 | int ret = 0; | 
|---|
| 1182 |  | 
|---|
| 1183 | cpus_read_lock(); | 
|---|
| 1184 | mutex_lock(&rcu_state.nocb_mutex); | 
|---|
| 1185 | if (!rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 1186 | if (!cpu_online(cpu)) { | 
|---|
| 1187 | ret = rcu_nocb_rdp_offload(rdp); | 
|---|
| 1188 | if (!ret) | 
|---|
| 1189 | cpumask_set_cpu(cpu, rcu_nocb_mask); | 
|---|
| 1190 | } else { | 
|---|
| 1191 | pr_info( "NOCB: Cannot CB-offload online CPU %d\n", rdp->cpu); | 
|---|
| 1192 | ret = -EINVAL; | 
|---|
| 1193 | } | 
|---|
| 1194 | } | 
|---|
| 1195 | mutex_unlock(&rcu_state.nocb_mutex); | 
|---|
| 1196 | cpus_read_unlock(); | 
|---|
| 1197 |  | 
|---|
| 1198 | return ret; | 
|---|
| 1199 | } | 
|---|
| 1200 | EXPORT_SYMBOL_GPL(rcu_nocb_cpu_offload); | 
|---|
| 1201 |  | 
|---|
| 1202 | #ifdef CONFIG_RCU_LAZY | 
|---|
| 1203 | static unsigned long | 
|---|
| 1204 | lazy_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) | 
|---|
| 1205 | { | 
|---|
| 1206 | int cpu; | 
|---|
| 1207 | unsigned long count = 0; | 
|---|
| 1208 |  | 
|---|
| 1209 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) | 
|---|
| 1210 | return 0; | 
|---|
| 1211 |  | 
|---|
| 1212 | /*  Protect rcu_nocb_mask against concurrent (de-)offloading. */ | 
|---|
| 1213 | if (!mutex_trylock(&rcu_state.nocb_mutex)) | 
|---|
| 1214 | return 0; | 
|---|
| 1215 |  | 
|---|
| 1216 | /* Snapshot count of all CPUs */ | 
|---|
| 1217 | for_each_cpu(cpu, rcu_nocb_mask) { | 
|---|
| 1218 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1219 |  | 
|---|
| 1220 | count +=  READ_ONCE(rdp->lazy_len); | 
|---|
| 1221 | } | 
|---|
| 1222 |  | 
|---|
| 1223 | mutex_unlock(&rcu_state.nocb_mutex); | 
|---|
| 1224 |  | 
|---|
| 1225 | return count ? count : SHRINK_EMPTY; | 
|---|
| 1226 | } | 
|---|
| 1227 |  | 
|---|
| 1228 | static unsigned long | 
|---|
| 1229 | lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | 
|---|
| 1230 | { | 
|---|
| 1231 | int cpu; | 
|---|
| 1232 | unsigned long flags; | 
|---|
| 1233 | unsigned long count = 0; | 
|---|
| 1234 |  | 
|---|
| 1235 | if (WARN_ON_ONCE(!cpumask_available(rcu_nocb_mask))) | 
|---|
| 1236 | return 0; | 
|---|
| 1237 | /* | 
|---|
| 1238 | * Protect against concurrent (de-)offloading. Otherwise nocb locking | 
|---|
| 1239 | * may be ignored or imbalanced. | 
|---|
| 1240 | */ | 
|---|
| 1241 | if (!mutex_trylock(&rcu_state.nocb_mutex)) { | 
|---|
| 1242 | /* | 
|---|
| 1243 | * But really don't insist if nocb_mutex is contended since we | 
|---|
| 1244 | * can't guarantee that it will never engage in a dependency | 
|---|
| 1245 | * chain involving memory allocation. The lock is seldom contended | 
|---|
| 1246 | * anyway. | 
|---|
| 1247 | */ | 
|---|
| 1248 | return 0; | 
|---|
| 1249 | } | 
|---|
| 1250 |  | 
|---|
| 1251 | /* Snapshot count of all CPUs */ | 
|---|
| 1252 | for_each_cpu(cpu, rcu_nocb_mask) { | 
|---|
| 1253 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1254 | int _count; | 
|---|
| 1255 |  | 
|---|
| 1256 | if (WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp))) | 
|---|
| 1257 | continue; | 
|---|
| 1258 |  | 
|---|
| 1259 | if (!READ_ONCE(rdp->lazy_len)) | 
|---|
| 1260 | continue; | 
|---|
| 1261 |  | 
|---|
| 1262 | rcu_nocb_lock_irqsave(rdp, flags); | 
|---|
| 1263 | /* | 
|---|
| 1264 | * Recheck under the nocb lock. Since we are not holding the bypass | 
|---|
| 1265 | * lock we may still race with increments from the enqueuer but still | 
|---|
| 1266 | * we know for sure if there is at least one lazy callback. | 
|---|
| 1267 | */ | 
|---|
| 1268 | _count = READ_ONCE(rdp->lazy_len); | 
|---|
| 1269 | if (!_count) { | 
|---|
| 1270 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 1271 | continue; | 
|---|
| 1272 | } | 
|---|
| 1273 | rcu_nocb_try_flush_bypass(rdp, jiffies); | 
|---|
| 1274 | rcu_nocb_unlock_irqrestore(rdp, flags); | 
|---|
| 1275 | wake_nocb_gp(rdp, false); | 
|---|
| 1276 | sc->nr_to_scan -= _count; | 
|---|
| 1277 | count += _count; | 
|---|
| 1278 | if (sc->nr_to_scan <= 0) | 
|---|
| 1279 | break; | 
|---|
| 1280 | } | 
|---|
| 1281 |  | 
|---|
| 1282 | mutex_unlock(&rcu_state.nocb_mutex); | 
|---|
| 1283 |  | 
|---|
| 1284 | return count ? count : SHRINK_STOP; | 
|---|
| 1285 | } | 
|---|
| 1286 | #endif // #ifdef CONFIG_RCU_LAZY | 
|---|
| 1287 |  | 
|---|
| 1288 | void __init rcu_init_nohz(void) | 
|---|
| 1289 | { | 
|---|
| 1290 | int cpu; | 
|---|
| 1291 | struct rcu_data *rdp; | 
|---|
| 1292 | const struct cpumask *cpumask = NULL; | 
|---|
| 1293 | struct shrinker * __maybe_unused lazy_rcu_shrinker; | 
|---|
| 1294 |  | 
|---|
| 1295 | #if defined(CONFIG_NO_HZ_FULL) | 
|---|
| 1296 | if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) | 
|---|
| 1297 | cpumask = tick_nohz_full_mask; | 
|---|
| 1298 | #endif | 
|---|
| 1299 |  | 
|---|
| 1300 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_DEFAULT_ALL) && | 
|---|
| 1301 | !rcu_state.nocb_is_setup && !cpumask) | 
|---|
| 1302 | cpumask = cpu_possible_mask; | 
|---|
| 1303 |  | 
|---|
| 1304 | if (cpumask) { | 
|---|
| 1305 | if (!cpumask_available(rcu_nocb_mask)) { | 
|---|
| 1306 | if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) { | 
|---|
| 1307 | pr_info( "rcu_nocb_mask allocation failed, callback offloading disabled.\n"); | 
|---|
| 1308 | return; | 
|---|
| 1309 | } | 
|---|
| 1310 | } | 
|---|
| 1311 |  | 
|---|
| 1312 | cpumask_or(rcu_nocb_mask, rcu_nocb_mask, cpumask); | 
|---|
| 1313 | rcu_state.nocb_is_setup = true; | 
|---|
| 1314 | } | 
|---|
| 1315 |  | 
|---|
| 1316 | if (!rcu_state.nocb_is_setup) | 
|---|
| 1317 | return; | 
|---|
| 1318 |  | 
|---|
| 1319 | #ifdef CONFIG_RCU_LAZY | 
|---|
| 1320 | lazy_rcu_shrinker = shrinker_alloc(0, "rcu-lazy"); | 
|---|
| 1321 | if (!lazy_rcu_shrinker) { | 
|---|
| 1322 | pr_err( "Failed to allocate lazy_rcu shrinker!\n"); | 
|---|
| 1323 | } else { | 
|---|
| 1324 | lazy_rcu_shrinker->count_objects = lazy_rcu_shrink_count; | 
|---|
| 1325 | lazy_rcu_shrinker->scan_objects = lazy_rcu_shrink_scan; | 
|---|
| 1326 |  | 
|---|
| 1327 | shrinker_register(lazy_rcu_shrinker); | 
|---|
| 1328 | } | 
|---|
| 1329 | #endif // #ifdef CONFIG_RCU_LAZY | 
|---|
| 1330 |  | 
|---|
| 1331 | if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { | 
|---|
| 1332 | pr_info( "\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n"); | 
|---|
| 1333 | cpumask_and(rcu_nocb_mask, cpu_possible_mask, | 
|---|
| 1334 | rcu_nocb_mask); | 
|---|
| 1335 | } | 
|---|
| 1336 | if (cpumask_empty(rcu_nocb_mask)) | 
|---|
| 1337 | pr_info( "\tOffload RCU callbacks from CPUs: (none).\n"); | 
|---|
| 1338 | else | 
|---|
| 1339 | pr_info( "\tOffload RCU callbacks from CPUs: %*pbl.\n", | 
|---|
| 1340 | cpumask_pr_args(rcu_nocb_mask)); | 
|---|
| 1341 | if (rcu_nocb_poll) | 
|---|
| 1342 | pr_info( "\tPoll for callbacks from no-CBs CPUs.\n"); | 
|---|
| 1343 |  | 
|---|
| 1344 | for_each_cpu(cpu, rcu_nocb_mask) { | 
|---|
| 1345 | rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1346 | if (rcu_segcblist_empty(&rdp->cblist)) | 
|---|
| 1347 | rcu_segcblist_init(&rdp->cblist); | 
|---|
| 1348 | rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_OFFLOADED); | 
|---|
| 1349 | } | 
|---|
| 1350 | rcu_organize_nocb_kthreads(); | 
|---|
| 1351 | } | 
|---|
| 1352 |  | 
|---|
| 1353 | /* Initialize per-rcu_data variables for no-CBs CPUs. */ | 
|---|
| 1354 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | 
|---|
| 1355 | { | 
|---|
| 1356 | init_swait_queue_head(&rdp->nocb_cb_wq); | 
|---|
| 1357 | init_swait_queue_head(&rdp->nocb_gp_wq); | 
|---|
| 1358 | init_swait_queue_head(&rdp->nocb_state_wq); | 
|---|
| 1359 | raw_spin_lock_init(&rdp->nocb_lock); | 
|---|
| 1360 | raw_spin_lock_init(&rdp->nocb_bypass_lock); | 
|---|
| 1361 | raw_spin_lock_init(&rdp->nocb_gp_lock); | 
|---|
| 1362 | timer_setup(&rdp->nocb_timer, do_nocb_deferred_wakeup_timer, 0); | 
|---|
| 1363 | rcu_cblist_init(&rdp->nocb_bypass); | 
|---|
| 1364 | WRITE_ONCE(rdp->lazy_len, 0); | 
|---|
| 1365 | mutex_init(&rdp->nocb_gp_kthread_mutex); | 
|---|
| 1366 | } | 
|---|
| 1367 |  | 
|---|
| 1368 | /* | 
|---|
| 1369 | * If the specified CPU is a no-CBs CPU that does not already have its | 
|---|
| 1370 | * rcuo CB kthread, spawn it.  Additionally, if the rcuo GP kthread | 
|---|
| 1371 | * for this CPU's group has not yet been created, spawn it as well. | 
|---|
| 1372 | */ | 
|---|
| 1373 | static void rcu_spawn_cpu_nocb_kthread(int cpu) | 
|---|
| 1374 | { | 
|---|
| 1375 | struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1376 | struct rcu_data *rdp_gp; | 
|---|
| 1377 | struct task_struct *t; | 
|---|
| 1378 | struct sched_param sp; | 
|---|
| 1379 |  | 
|---|
| 1380 | if (!rcu_scheduler_fully_active || !rcu_state.nocb_is_setup) | 
|---|
| 1381 | return; | 
|---|
| 1382 |  | 
|---|
| 1383 | /* If there already is an rcuo kthread, then nothing to do. */ | 
|---|
| 1384 | if (rdp->nocb_cb_kthread) | 
|---|
| 1385 | return; | 
|---|
| 1386 |  | 
|---|
| 1387 | /* If we didn't spawn the GP kthread first, reorganize! */ | 
|---|
| 1388 | sp.sched_priority = kthread_prio; | 
|---|
| 1389 | rdp_gp = rdp->nocb_gp_rdp; | 
|---|
| 1390 | mutex_lock(&rdp_gp->nocb_gp_kthread_mutex); | 
|---|
| 1391 | if (!rdp_gp->nocb_gp_kthread) { | 
|---|
| 1392 | t = kthread_run(rcu_nocb_gp_kthread, rdp_gp, | 
|---|
| 1393 | "rcuog/%d", rdp_gp->cpu); | 
|---|
| 1394 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo GP kthread, OOM is now expected behavior\n", __func__)) { | 
|---|
| 1395 | mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); | 
|---|
| 1396 | goto err; | 
|---|
| 1397 | } | 
|---|
| 1398 | WRITE_ONCE(rdp_gp->nocb_gp_kthread, t); | 
|---|
| 1399 | if (kthread_prio) | 
|---|
| 1400 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 
|---|
| 1401 | } | 
|---|
| 1402 | mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); | 
|---|
| 1403 |  | 
|---|
| 1404 | /* Spawn the kthread for this CPU. */ | 
|---|
| 1405 | t = kthread_create(rcu_nocb_cb_kthread, rdp, | 
|---|
| 1406 | "rcuo%c/%d", rcu_state.abbr, cpu); | 
|---|
| 1407 | if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__)) | 
|---|
| 1408 | goto err; | 
|---|
| 1409 |  | 
|---|
| 1410 | if (rcu_rdp_is_offloaded(rdp)) | 
|---|
| 1411 | wake_up_process(t); | 
|---|
| 1412 | else | 
|---|
| 1413 | kthread_park(t); | 
|---|
| 1414 |  | 
|---|
| 1415 | if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio) | 
|---|
| 1416 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 
|---|
| 1417 |  | 
|---|
| 1418 | WRITE_ONCE(rdp->nocb_cb_kthread, t); | 
|---|
| 1419 | WRITE_ONCE(rdp->nocb_gp_kthread, rdp_gp->nocb_gp_kthread); | 
|---|
| 1420 | return; | 
|---|
| 1421 |  | 
|---|
| 1422 | err: | 
|---|
| 1423 | /* | 
|---|
| 1424 | * No need to protect against concurrent rcu_barrier() | 
|---|
| 1425 | * because the number of callbacks should be 0 for a non-boot CPU, | 
|---|
| 1426 | * therefore rcu_barrier() shouldn't even try to grab the nocb_lock. | 
|---|
| 1427 | * But hold nocb_mutex to avoid nocb_lock imbalance from shrinker. | 
|---|
| 1428 | */ | 
|---|
| 1429 | WARN_ON_ONCE(system_state > SYSTEM_BOOTING && rcu_segcblist_n_cbs(&rdp->cblist)); | 
|---|
| 1430 | mutex_lock(&rcu_state.nocb_mutex); | 
|---|
| 1431 | if (rcu_rdp_is_offloaded(rdp)) { | 
|---|
| 1432 | rcu_nocb_rdp_deoffload(rdp); | 
|---|
| 1433 | cpumask_clear_cpu(cpu, rcu_nocb_mask); | 
|---|
| 1434 | } | 
|---|
| 1435 | mutex_unlock(&rcu_state.nocb_mutex); | 
|---|
| 1436 | } | 
|---|
| 1437 |  | 
|---|
| 1438 | /* How many CB CPU IDs per GP kthread?  Default of -1 for sqrt(nr_cpu_ids). */ | 
|---|
| 1439 | static int rcu_nocb_gp_stride = -1; | 
|---|
| 1440 | module_param(rcu_nocb_gp_stride, int, 0444); | 
|---|
| 1441 |  | 
|---|
| 1442 | /* | 
|---|
| 1443 | * Initialize GP-CB relationships for all no-CBs CPU. | 
|---|
| 1444 | */ | 
|---|
| 1445 | static void __init rcu_organize_nocb_kthreads(void) | 
|---|
| 1446 | { | 
|---|
| 1447 | int cpu; | 
|---|
| 1448 | bool firsttime = true; | 
|---|
| 1449 | bool gotnocbs = false; | 
|---|
| 1450 | bool gotnocbscbs = true; | 
|---|
| 1451 | int ls = rcu_nocb_gp_stride; | 
|---|
| 1452 | int nl = 0;  /* Next GP kthread. */ | 
|---|
| 1453 | struct rcu_data *rdp; | 
|---|
| 1454 | struct rcu_data *rdp_gp = NULL;  /* Suppress misguided gcc warn. */ | 
|---|
| 1455 |  | 
|---|
| 1456 | if (!cpumask_available(rcu_nocb_mask)) | 
|---|
| 1457 | return; | 
|---|
| 1458 | if (ls == -1) { | 
|---|
| 1459 | ls = nr_cpu_ids / int_sqrt(nr_cpu_ids); | 
|---|
| 1460 | rcu_nocb_gp_stride = ls; | 
|---|
| 1461 | } | 
|---|
| 1462 |  | 
|---|
| 1463 | /* | 
|---|
| 1464 | * Each pass through this loop sets up one rcu_data structure. | 
|---|
| 1465 | * Should the corresponding CPU come online in the future, then | 
|---|
| 1466 | * we will spawn the needed set of rcu_nocb_kthread() kthreads. | 
|---|
| 1467 | */ | 
|---|
| 1468 | for_each_possible_cpu(cpu) { | 
|---|
| 1469 | rdp = per_cpu_ptr(&rcu_data, cpu); | 
|---|
| 1470 | if (rdp->cpu >= nl) { | 
|---|
| 1471 | /* New GP kthread, set up for CBs & next GP. */ | 
|---|
| 1472 | gotnocbs = true; | 
|---|
| 1473 | nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; | 
|---|
| 1474 | rdp_gp = rdp; | 
|---|
| 1475 | INIT_LIST_HEAD(&rdp->nocb_head_rdp); | 
|---|
| 1476 | if (dump_tree) { | 
|---|
| 1477 | if (!firsttime) | 
|---|
| 1478 | pr_cont( "%s\n", gotnocbscbs | 
|---|
| 1479 | ? "": " (self only)"); | 
|---|
| 1480 | gotnocbscbs = false; | 
|---|
| 1481 | firsttime = false; | 
|---|
| 1482 | pr_alert( "%s: No-CB GP kthread CPU %d:", | 
|---|
| 1483 | __func__, cpu); | 
|---|
| 1484 | } | 
|---|
| 1485 | } else { | 
|---|
| 1486 | /* Another CB kthread, link to previous GP kthread. */ | 
|---|
| 1487 | gotnocbscbs = true; | 
|---|
| 1488 | if (dump_tree) | 
|---|
| 1489 | pr_cont( " %d", cpu); | 
|---|
| 1490 | } | 
|---|
| 1491 | rdp->nocb_gp_rdp = rdp_gp; | 
|---|
| 1492 | if (cpumask_test_cpu(cpu, rcu_nocb_mask)) | 
|---|
| 1493 | list_add_tail(&rdp->nocb_entry_rdp, &rdp_gp->nocb_head_rdp); | 
|---|
| 1494 | } | 
|---|
| 1495 | if (gotnocbs && dump_tree) | 
|---|
| 1496 | pr_cont( "%s\n", gotnocbscbs ? "": " (self only)"); | 
|---|
| 1497 | } | 
|---|
| 1498 |  | 
|---|
| 1499 | /* | 
|---|
| 1500 | * Bind the current task to the offloaded CPUs.  If there are no offloaded | 
|---|
| 1501 | * CPUs, leave the task unbound.  Splat if the bind attempt fails. | 
|---|
| 1502 | */ | 
|---|
| 1503 | void rcu_bind_current_to_nocb(void) | 
|---|
| 1504 | { | 
|---|
| 1505 | if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask)) | 
|---|
| 1506 | WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); | 
|---|
| 1507 | } | 
|---|
| 1508 | EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); | 
|---|
| 1509 |  | 
|---|
| 1510 | // The ->on_cpu field is available only in CONFIG_SMP=y, so... | 
|---|
| 1511 | #ifdef CONFIG_SMP | 
|---|
| 1512 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) | 
|---|
| 1513 | { | 
|---|
| 1514 | return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!": ""; | 
|---|
| 1515 | } | 
|---|
| 1516 | #else // #ifdef CONFIG_SMP | 
|---|
| 1517 | static char *show_rcu_should_be_on_cpu(struct task_struct *tsp) | 
|---|
| 1518 | { | 
|---|
| 1519 | return ""; | 
|---|
| 1520 | } | 
|---|
| 1521 | #endif // #else #ifdef CONFIG_SMP | 
|---|
| 1522 |  | 
|---|
| 1523 | /* | 
|---|
| 1524 | * Dump out nocb grace-period kthread state for the specified rcu_data | 
|---|
| 1525 | * structure. | 
|---|
| 1526 | */ | 
|---|
| 1527 | static void show_rcu_nocb_gp_state(struct rcu_data *rdp) | 
|---|
| 1528 | { | 
|---|
| 1529 | struct rcu_node *rnp = rdp->mynode; | 
|---|
| 1530 |  | 
|---|
| 1531 | pr_info( "nocb GP %d %c%c%c%c%c %c[%c%c] %c%c:%ld rnp %d:%d %lu %c CPU %d%s\n", | 
|---|
| 1532 | rdp->cpu, | 
|---|
| 1533 | "kK"[!!rdp->nocb_gp_kthread], | 
|---|
| 1534 | "lL"[raw_spin_is_locked(&rdp->nocb_gp_lock)], | 
|---|
| 1535 | "dD"[!!rdp->nocb_defer_wakeup], | 
|---|
| 1536 | "tT"[timer_pending(&rdp->nocb_timer)], | 
|---|
| 1537 | "sS"[!!rdp->nocb_gp_sleep], | 
|---|
| 1538 | ".W"[swait_active(&rdp->nocb_gp_wq)], | 
|---|
| 1539 | ".W"[swait_active(&rnp->nocb_gp_wq[0])], | 
|---|
| 1540 | ".W"[swait_active(&rnp->nocb_gp_wq[1])], | 
|---|
| 1541 | ".B"[!!rdp->nocb_gp_bypass], | 
|---|
| 1542 | ".G"[!!rdp->nocb_gp_gp], | 
|---|
| 1543 | (long)rdp->nocb_gp_seq, | 
|---|
| 1544 | rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops), | 
|---|
| 1545 | rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.', | 
|---|
| 1546 | rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, | 
|---|
| 1547 | show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread)); | 
|---|
| 1548 | } | 
|---|
| 1549 |  | 
|---|
| 1550 | /* Dump out nocb kthread state for the specified rcu_data structure. */ | 
|---|
| 1551 | static void show_rcu_nocb_state(struct rcu_data *rdp) | 
|---|
| 1552 | { | 
|---|
| 1553 | char bufd[22]; | 
|---|
| 1554 | char bufw[45]; | 
|---|
| 1555 | char bufr[45]; | 
|---|
| 1556 | char bufn[22]; | 
|---|
| 1557 | char bufb[22]; | 
|---|
| 1558 | struct rcu_data *nocb_next_rdp; | 
|---|
| 1559 | struct rcu_segcblist *rsclp = &rdp->cblist; | 
|---|
| 1560 | bool waslocked; | 
|---|
| 1561 | bool wassleep; | 
|---|
| 1562 |  | 
|---|
| 1563 | if (rdp->nocb_gp_rdp == rdp) | 
|---|
| 1564 | show_rcu_nocb_gp_state(rdp); | 
|---|
| 1565 |  | 
|---|
| 1566 | if (!rcu_segcblist_is_offloaded(&rdp->cblist)) | 
|---|
| 1567 | return; | 
|---|
| 1568 |  | 
|---|
| 1569 | nocb_next_rdp = list_next_or_null_rcu(&rdp->nocb_gp_rdp->nocb_head_rdp, | 
|---|
| 1570 | &rdp->nocb_entry_rdp, | 
|---|
| 1571 | typeof(*rdp), | 
|---|
| 1572 | nocb_entry_rdp); | 
|---|
| 1573 |  | 
|---|
| 1574 | sprintf(bufd, "%ld", rsclp->seglen[RCU_DONE_TAIL]); | 
|---|
| 1575 | sprintf(bufw, "%ld(%ld)", rsclp->seglen[RCU_WAIT_TAIL], rsclp->gp_seq[RCU_WAIT_TAIL]); | 
|---|
| 1576 | sprintf(bufr, "%ld(%ld)", rsclp->seglen[RCU_NEXT_READY_TAIL], | 
|---|
| 1577 | rsclp->gp_seq[RCU_NEXT_READY_TAIL]); | 
|---|
| 1578 | sprintf(bufn, "%ld", rsclp->seglen[RCU_NEXT_TAIL]); | 
|---|
| 1579 | sprintf(bufb, "%ld", rcu_cblist_n_cbs(&rdp->nocb_bypass)); | 
|---|
| 1580 | pr_info( "   CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%s%c%s%c%s%c%s%c%s q%ld %c CPU %d%s\n", | 
|---|
| 1581 | rdp->cpu, rdp->nocb_gp_rdp->cpu, | 
|---|
| 1582 | nocb_next_rdp ? nocb_next_rdp->cpu : -1, | 
|---|
| 1583 | "kK"[!!rdp->nocb_cb_kthread], | 
|---|
| 1584 | "bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)], | 
|---|
| 1585 | "lL"[raw_spin_is_locked(&rdp->nocb_lock)], | 
|---|
| 1586 | "sS"[!!rdp->nocb_cb_sleep], | 
|---|
| 1587 | ".W"[swait_active(&rdp->nocb_cb_wq)], | 
|---|
| 1588 | jiffies - rdp->nocb_bypass_first, | 
|---|
| 1589 | jiffies - rdp->nocb_nobypass_last, | 
|---|
| 1590 | rdp->nocb_nobypass_count, | 
|---|
| 1591 | ".D"[rcu_segcblist_ready_cbs(rsclp)], | 
|---|
| 1592 | rcu_segcblist_segempty(rsclp, RCU_DONE_TAIL) ? "": bufd, | 
|---|
| 1593 | ".W"[!rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL)], | 
|---|
| 1594 | rcu_segcblist_segempty(rsclp, RCU_WAIT_TAIL) ? "": bufw, | 
|---|
| 1595 | ".R"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL)], | 
|---|
| 1596 | rcu_segcblist_segempty(rsclp, RCU_NEXT_READY_TAIL) ? "": bufr, | 
|---|
| 1597 | ".N"[!rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL)], | 
|---|
| 1598 | rcu_segcblist_segempty(rsclp, RCU_NEXT_TAIL) ? "": bufn, | 
|---|
| 1599 | ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)], | 
|---|
| 1600 | !rcu_cblist_n_cbs(&rdp->nocb_bypass) ? "": bufb, | 
|---|
| 1601 | rcu_segcblist_n_cbs(&rdp->cblist), | 
|---|
| 1602 | rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.', | 
|---|
| 1603 | rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1, | 
|---|
| 1604 | show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); | 
|---|
| 1605 |  | 
|---|
| 1606 | /* It is OK for GP kthreads to have GP state. */ | 
|---|
| 1607 | if (rdp->nocb_gp_rdp == rdp) | 
|---|
| 1608 | return; | 
|---|
| 1609 |  | 
|---|
| 1610 | waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock); | 
|---|
| 1611 | wassleep = swait_active(&rdp->nocb_gp_wq); | 
|---|
| 1612 | if (!rdp->nocb_gp_sleep && !waslocked && !wassleep) | 
|---|
| 1613 | return;  /* Nothing untoward. */ | 
|---|
| 1614 |  | 
|---|
| 1615 | pr_info( "   nocb GP activity on CB-only CPU!!! %c%c%c %c\n", | 
|---|
| 1616 | "lL"[waslocked], | 
|---|
| 1617 | "dD"[!!rdp->nocb_defer_wakeup], | 
|---|
| 1618 | "sS"[!!rdp->nocb_gp_sleep], | 
|---|
| 1619 | ".W"[wassleep]); | 
|---|
| 1620 | } | 
|---|
| 1621 |  | 
|---|
| 1622 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 1623 |  | 
|---|
| 1624 | /* No ->nocb_lock to acquire.  */ | 
|---|
| 1625 | static void rcu_nocb_lock(struct rcu_data *rdp) | 
|---|
| 1626 | { | 
|---|
| 1627 | } | 
|---|
| 1628 |  | 
|---|
| 1629 | /* No ->nocb_lock to release.  */ | 
|---|
| 1630 | static void rcu_nocb_unlock(struct rcu_data *rdp) | 
|---|
| 1631 | { | 
|---|
| 1632 | } | 
|---|
| 1633 |  | 
|---|
| 1634 | /* No ->nocb_lock to release.  */ | 
|---|
| 1635 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, | 
|---|
| 1636 | unsigned long flags) | 
|---|
| 1637 | { | 
|---|
| 1638 | local_irq_restore(flags); | 
|---|
| 1639 | } | 
|---|
| 1640 |  | 
|---|
| 1641 | /* Lockdep check that ->cblist may be safely accessed. */ | 
|---|
| 1642 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) | 
|---|
| 1643 | { | 
|---|
| 1644 | lockdep_assert_irqs_disabled(); | 
|---|
| 1645 | } | 
|---|
| 1646 |  | 
|---|
| 1647 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) | 
|---|
| 1648 | { | 
|---|
| 1649 | } | 
|---|
| 1650 |  | 
|---|
| 1651 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) | 
|---|
| 1652 | { | 
|---|
| 1653 | return NULL; | 
|---|
| 1654 | } | 
|---|
| 1655 |  | 
|---|
| 1656 | static void rcu_init_one_nocb(struct rcu_node *rnp) | 
|---|
| 1657 | { | 
|---|
| 1658 | } | 
|---|
| 1659 |  | 
|---|
| 1660 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force) | 
|---|
| 1661 | { | 
|---|
| 1662 | return false; | 
|---|
| 1663 | } | 
|---|
| 1664 |  | 
|---|
| 1665 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | 
|---|
| 1666 | unsigned long j, bool lazy) | 
|---|
| 1667 | { | 
|---|
| 1668 | return true; | 
|---|
| 1669 | } | 
|---|
| 1670 |  | 
|---|
| 1671 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | 
|---|
| 1672 | rcu_callback_t func, unsigned long flags, bool lazy) | 
|---|
| 1673 | { | 
|---|
| 1674 | WARN_ON_ONCE(1);  /* Should be dead code! */ | 
|---|
| 1675 | } | 
|---|
| 1676 |  | 
|---|
| 1677 | static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, | 
|---|
| 1678 | unsigned long flags) | 
|---|
| 1679 | { | 
|---|
| 1680 | WARN_ON_ONCE(1);  /* Should be dead code! */ | 
|---|
| 1681 | } | 
|---|
| 1682 |  | 
|---|
| 1683 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | 
|---|
| 1684 | { | 
|---|
| 1685 | } | 
|---|
| 1686 |  | 
|---|
| 1687 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level) | 
|---|
| 1688 | { | 
|---|
| 1689 | return false; | 
|---|
| 1690 | } | 
|---|
| 1691 |  | 
|---|
| 1692 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) | 
|---|
| 1693 | { | 
|---|
| 1694 | return false; | 
|---|
| 1695 | } | 
|---|
| 1696 |  | 
|---|
| 1697 | static void rcu_spawn_cpu_nocb_kthread(int cpu) | 
|---|
| 1698 | { | 
|---|
| 1699 | } | 
|---|
| 1700 |  | 
|---|
| 1701 | static void show_rcu_nocb_state(struct rcu_data *rdp) | 
|---|
| 1702 | { | 
|---|
| 1703 | } | 
|---|
| 1704 |  | 
|---|
| 1705 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 1706 |  | 
|---|