| 1 | /* SPDX-License-Identifier: GPL-2.0+ */ | 
|---|
| 2 | /* | 
|---|
| 3 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | 
|---|
| 4 | * Internal non-public definitions. | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright IBM Corporation, 2008 | 
|---|
| 7 | * | 
|---|
| 8 | * Author: Ingo Molnar <mingo@elte.hu> | 
|---|
| 9 | *	   Paul E. McKenney <paulmck@linux.ibm.com> | 
|---|
| 10 | */ | 
|---|
| 11 |  | 
|---|
| 12 | #include <linux/cache.h> | 
|---|
| 13 | #include <linux/kthread.h> | 
|---|
| 14 | #include <linux/spinlock.h> | 
|---|
| 15 | #include <linux/rtmutex.h> | 
|---|
| 16 | #include <linux/threads.h> | 
|---|
| 17 | #include <linux/cpumask.h> | 
|---|
| 18 | #include <linux/seqlock.h> | 
|---|
| 19 | #include <linux/swait.h> | 
|---|
| 20 | #include <linux/rcu_node_tree.h> | 
|---|
| 21 |  | 
|---|
| 22 | #include "rcu_segcblist.h" | 
|---|
| 23 |  | 
|---|
| 24 | /* Communicate arguments to a kthread worker handler. */ | 
|---|
| 25 | struct rcu_exp_work { | 
|---|
| 26 | unsigned long rew_s; | 
|---|
| 27 | struct kthread_work rew_work; | 
|---|
| 28 | }; | 
|---|
| 29 |  | 
|---|
| 30 | /* RCU's kthread states for tracing. */ | 
|---|
| 31 | #define RCU_KTHREAD_STOPPED  0 | 
|---|
| 32 | #define RCU_KTHREAD_RUNNING  1 | 
|---|
| 33 | #define RCU_KTHREAD_WAITING  2 | 
|---|
| 34 | #define RCU_KTHREAD_OFFCPU   3 | 
|---|
| 35 | #define RCU_KTHREAD_YIELDING 4 | 
|---|
| 36 | #define RCU_KTHREAD_MAX      4 | 
|---|
| 37 |  | 
|---|
| 38 | /* | 
|---|
| 39 | * Definition for node within the RCU grace-period-detection hierarchy. | 
|---|
| 40 | */ | 
|---|
| 41 | struct rcu_node { | 
|---|
| 42 | raw_spinlock_t __private lock;	/* Root rcu_node's lock protects */ | 
|---|
| 43 | /*  some rcu_state fields as well as */ | 
|---|
| 44 | /*  following. */ | 
|---|
| 45 | unsigned long gp_seq;	/* Track rsp->gp_seq. */ | 
|---|
| 46 | unsigned long gp_seq_needed; /* Track furthest future GP request. */ | 
|---|
| 47 | unsigned long completedqs; /* All QSes done for this node. */ | 
|---|
| 48 | unsigned long qsmask;	/* CPUs or groups that need to switch in */ | 
|---|
| 49 | /*  order for current grace period to proceed.*/ | 
|---|
| 50 | /*  In leaf rcu_node, each bit corresponds to */ | 
|---|
| 51 | /*  an rcu_data structure, otherwise, each */ | 
|---|
| 52 | /*  bit corresponds to a child rcu_node */ | 
|---|
| 53 | /*  structure. */ | 
|---|
| 54 | unsigned long rcu_gp_init_mask;	/* Mask of offline CPUs at GP init. */ | 
|---|
| 55 | unsigned long qsmaskinit; | 
|---|
| 56 | /* Per-GP initial value for qsmask. */ | 
|---|
| 57 | /*  Initialized from ->qsmaskinitnext at the */ | 
|---|
| 58 | /*  beginning of each grace period. */ | 
|---|
| 59 | unsigned long qsmaskinitnext; | 
|---|
| 60 | unsigned long expmask;	/* CPUs or groups that need to check in */ | 
|---|
| 61 | /*  to allow the current expedited GP */ | 
|---|
| 62 | /*  to complete. */ | 
|---|
| 63 | unsigned long expmaskinit; | 
|---|
| 64 | /* Per-GP initial values for expmask. */ | 
|---|
| 65 | /*  Initialized from ->expmaskinitnext at the */ | 
|---|
| 66 | /*  beginning of each expedited GP. */ | 
|---|
| 67 | unsigned long expmaskinitnext; | 
|---|
| 68 | /* Online CPUs for next expedited GP. */ | 
|---|
| 69 | /*  Any CPU that has ever been online will */ | 
|---|
| 70 | /*  have its bit set. */ | 
|---|
| 71 | struct kthread_worker *exp_kworker; | 
|---|
| 72 | /* Workers performing per node expedited GP */ | 
|---|
| 73 | /* initialization. */ | 
|---|
| 74 | unsigned long cbovldmask; | 
|---|
| 75 | /* CPUs experiencing callback overload. */ | 
|---|
| 76 | unsigned long ffmask;	/* Fully functional CPUs. */ | 
|---|
| 77 | unsigned long grpmask;	/* Mask to apply to parent qsmask. */ | 
|---|
| 78 | /*  Only one bit will be set in this mask. */ | 
|---|
| 79 | int	grplo;		/* lowest-numbered CPU here. */ | 
|---|
| 80 | int	grphi;		/* highest-numbered CPU here. */ | 
|---|
| 81 | u8	grpnum;		/* group number for next level up. */ | 
|---|
| 82 | u8	level;		/* root is at level 0. */ | 
|---|
| 83 | bool	wait_blkd_tasks;/* Necessary to wait for blocked tasks to */ | 
|---|
| 84 | /*  exit RCU read-side critical sections */ | 
|---|
| 85 | /*  before propagating offline up the */ | 
|---|
| 86 | /*  rcu_node tree? */ | 
|---|
| 87 | struct rcu_node *parent; | 
|---|
| 88 | struct list_head blkd_tasks; | 
|---|
| 89 | /* Tasks blocked in RCU read-side critical */ | 
|---|
| 90 | /*  section.  Tasks are placed at the head */ | 
|---|
| 91 | /*  of this list and age towards the tail. */ | 
|---|
| 92 | struct list_head *gp_tasks; | 
|---|
| 93 | /* Pointer to the first task blocking the */ | 
|---|
| 94 | /*  current grace period, or NULL if there */ | 
|---|
| 95 | /*  is no such task. */ | 
|---|
| 96 | struct list_head *exp_tasks; | 
|---|
| 97 | /* Pointer to the first task blocking the */ | 
|---|
| 98 | /*  current expedited grace period, or NULL */ | 
|---|
| 99 | /*  if there is no such task.  If there */ | 
|---|
| 100 | /*  is no current expedited grace period, */ | 
|---|
| 101 | /*  then there can cannot be any such task. */ | 
|---|
| 102 | struct list_head *boost_tasks; | 
|---|
| 103 | /* Pointer to first task that needs to be */ | 
|---|
| 104 | /*  priority boosted, or NULL if no priority */ | 
|---|
| 105 | /*  boosting is needed for this rcu_node */ | 
|---|
| 106 | /*  structure.  If there are no tasks */ | 
|---|
| 107 | /*  queued on this rcu_node structure that */ | 
|---|
| 108 | /*  are blocking the current grace period, */ | 
|---|
| 109 | /*  there can be no such task. */ | 
|---|
| 110 | struct rt_mutex boost_mtx; | 
|---|
| 111 | /* Used only for the priority-boosting */ | 
|---|
| 112 | /*  side effect, not as a lock. */ | 
|---|
| 113 | unsigned long boost_time; | 
|---|
| 114 | /* When to start boosting (jiffies). */ | 
|---|
| 115 | struct mutex kthread_mutex; | 
|---|
| 116 | /* Exclusion for thread spawning and affinity */ | 
|---|
| 117 | /*  manipulation. */ | 
|---|
| 118 | struct task_struct *boost_kthread_task; | 
|---|
| 119 | /* kthread that takes care of priority */ | 
|---|
| 120 | /*  boosting for this rcu_node structure. */ | 
|---|
| 121 | unsigned int boost_kthread_status; | 
|---|
| 122 | /* State of boost_kthread_task for tracing. */ | 
|---|
| 123 | unsigned long n_boosts;	/* Number of boosts for this rcu_node structure. */ | 
|---|
| 124 | #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 125 | struct swait_queue_head nocb_gp_wq[2]; | 
|---|
| 126 | /* Place for rcu_nocb_kthread() to wait GP. */ | 
|---|
| 127 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 128 | raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; | 
|---|
| 129 |  | 
|---|
| 130 | spinlock_t exp_lock ____cacheline_internodealigned_in_smp; | 
|---|
| 131 | unsigned long exp_seq_rq; | 
|---|
| 132 | wait_queue_head_t exp_wq[4]; | 
|---|
| 133 | struct rcu_exp_work rew; | 
|---|
| 134 | bool exp_need_flush;	/* Need to flush workitem? */ | 
|---|
| 135 | raw_spinlock_t exp_poll_lock; | 
|---|
| 136 | /* Lock and data for polled expedited grace periods. */ | 
|---|
| 137 | unsigned long exp_seq_poll_rq; | 
|---|
| 138 | struct work_struct exp_poll_wq; | 
|---|
| 139 | } ____cacheline_internodealigned_in_smp; | 
|---|
| 140 |  | 
|---|
| 141 | /* | 
|---|
| 142 | * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and | 
|---|
| 143 | * are indexed relative to this interval rather than the global CPU ID space. | 
|---|
| 144 | * This generates the bit for a CPU in node-local masks. | 
|---|
| 145 | */ | 
|---|
| 146 | #define leaf_node_cpu_bit(rnp, cpu) (BIT((cpu) - (rnp)->grplo)) | 
|---|
| 147 |  | 
|---|
| 148 | /* | 
|---|
| 149 | * Union to allow "aggregate OR" operation on the need for a quiescent | 
|---|
| 150 | * state by the normal and expedited grace periods. | 
|---|
| 151 | */ | 
|---|
| 152 | union rcu_noqs { | 
|---|
| 153 | struct { | 
|---|
| 154 | u8 norm; | 
|---|
| 155 | u8 exp; | 
|---|
| 156 | } b; /* Bits. */ | 
|---|
| 157 | u16 s; /* Set of bits, aggregate OR here. */ | 
|---|
| 158 | }; | 
|---|
| 159 |  | 
|---|
| 160 | /* | 
|---|
| 161 | * Record the snapshot of the core stats at half of the first RCU stall timeout. | 
|---|
| 162 | * The member gp_seq is used to ensure that all members are updated only once | 
|---|
| 163 | * during the sampling period. The snapshot is taken only if this gp_seq is not | 
|---|
| 164 | * equal to rdp->gp_seq. | 
|---|
| 165 | */ | 
|---|
| 166 | struct rcu_snap_record { | 
|---|
| 167 | unsigned long	gp_seq;		/* Track rdp->gp_seq counter */ | 
|---|
| 168 | u64		cputime_irq;	/* Accumulated cputime of hard irqs */ | 
|---|
| 169 | u64		cputime_softirq;/* Accumulated cputime of soft irqs */ | 
|---|
| 170 | u64		cputime_system; /* Accumulated cputime of kernel tasks */ | 
|---|
| 171 | u64		nr_hardirqs;	/* Accumulated number of hard irqs */ | 
|---|
| 172 | unsigned int	nr_softirqs;	/* Accumulated number of soft irqs */ | 
|---|
| 173 | unsigned long long nr_csw;	/* Accumulated number of task switches */ | 
|---|
| 174 | unsigned long   jiffies;	/* Track jiffies value */ | 
|---|
| 175 | }; | 
|---|
| 176 |  | 
|---|
| 177 | /* | 
|---|
| 178 | * An IRQ work (deferred_qs_iw) is used by RCU to get the scheduler's attention. | 
|---|
| 179 | * to report quiescent states at the soonest possible time. | 
|---|
| 180 | * The request can be in one of the following states: | 
|---|
| 181 | * - DEFER_QS_IDLE: An IRQ work is yet to be scheduled. | 
|---|
| 182 | * - DEFER_QS_PENDING: An IRQ work was scheduled but either not yet run, or it | 
|---|
| 183 | *                     ran and we still haven't reported a quiescent state. | 
|---|
| 184 | */ | 
|---|
| 185 | #define DEFER_QS_IDLE		0 | 
|---|
| 186 | #define DEFER_QS_PENDING	1 | 
|---|
| 187 |  | 
|---|
| 188 | /* Per-CPU data for read-copy update. */ | 
|---|
| 189 | struct rcu_data { | 
|---|
| 190 | /* 1) quiescent-state and grace-period handling : */ | 
|---|
| 191 | unsigned long	gp_seq;		/* Track rsp->gp_seq counter. */ | 
|---|
| 192 | unsigned long	gp_seq_needed;	/* Track furthest future GP request. */ | 
|---|
| 193 | union rcu_noqs	cpu_no_qs;	/* No QSes yet for this CPU. */ | 
|---|
| 194 | bool		core_needs_qs;	/* Core waits for quiescent state. */ | 
|---|
| 195 | bool		beenonline;	/* CPU online at least once. */ | 
|---|
| 196 | bool		gpwrap;		/* Possible ->gp_seq wrap. */ | 
|---|
| 197 | unsigned int	gpwrap_count;	/* Count of GP sequence wrap. */ | 
|---|
| 198 | bool		cpu_started;	/* RCU watching this onlining CPU. */ | 
|---|
| 199 | struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */ | 
|---|
| 200 | unsigned long grpmask;		/* Mask to apply to leaf qsmask. */ | 
|---|
| 201 | unsigned long	ticks_this_gp;	/* The number of scheduling-clock */ | 
|---|
| 202 | /*  ticks this CPU has handled */ | 
|---|
| 203 | /*  during and after the last grace */ | 
|---|
| 204 | /* period it is aware of. */ | 
|---|
| 205 | struct irq_work defer_qs_iw;	/* Obtain later scheduler attention. */ | 
|---|
| 206 | int defer_qs_iw_pending;	/* Scheduler attention pending? */ | 
|---|
| 207 | struct work_struct strict_work;	/* Schedule readers for strict GPs. */ | 
|---|
| 208 |  | 
|---|
| 209 | /* 2) batch handling */ | 
|---|
| 210 | struct rcu_segcblist cblist;	/* Segmented callback list, with */ | 
|---|
| 211 | /* different callbacks waiting for */ | 
|---|
| 212 | /* different grace periods. */ | 
|---|
| 213 | long		qlen_last_fqs_check; | 
|---|
| 214 | /* qlen at last check for QS forcing */ | 
|---|
| 215 | unsigned long	n_cbs_invoked;	/* # callbacks invoked since boot. */ | 
|---|
| 216 | unsigned long	n_force_qs_snap; | 
|---|
| 217 | /* did other CPU force QS recently? */ | 
|---|
| 218 | long		blimit;		/* Upper limit on a processed batch */ | 
|---|
| 219 |  | 
|---|
| 220 | /* 3) dynticks interface. */ | 
|---|
| 221 | int  watching_snap;		/* Per-GP tracking for dynticks. */ | 
|---|
| 222 | bool rcu_need_heavy_qs;		/* GP old, so heavy quiescent state! */ | 
|---|
| 223 | bool rcu_urgent_qs;		/* GP old need light quiescent state. */ | 
|---|
| 224 | bool rcu_forced_tick;		/* Forced tick to provide QS. */ | 
|---|
| 225 | bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */ | 
|---|
| 226 |  | 
|---|
| 227 | /* 4) rcu_barrier(), OOM callbacks, and expediting. */ | 
|---|
| 228 | unsigned long barrier_seq_snap;	/* Snap of rcu_state.barrier_sequence. */ | 
|---|
| 229 | struct rcu_head barrier_head; | 
|---|
| 230 | int exp_watching_snap;		/* Double-check need for IPI. */ | 
|---|
| 231 |  | 
|---|
| 232 | /* 5) Callback offloading. */ | 
|---|
| 233 | #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 234 | struct swait_queue_head nocb_cb_wq; /* For nocb kthreads to sleep on. */ | 
|---|
| 235 | struct swait_queue_head nocb_state_wq; /* For offloading state changes */ | 
|---|
| 236 | struct task_struct *nocb_gp_kthread; | 
|---|
| 237 | raw_spinlock_t nocb_lock;	/* Guard following pair of fields. */ | 
|---|
| 238 | int nocb_defer_wakeup;		/* Defer wakeup of nocb_kthread. */ | 
|---|
| 239 | struct timer_list nocb_timer;	/* Enforce finite deferral. */ | 
|---|
| 240 | unsigned long nocb_gp_adv_time;	/* Last call_rcu() CB adv (jiffies). */ | 
|---|
| 241 | struct mutex nocb_gp_kthread_mutex; /* Exclusion for nocb gp kthread */ | 
|---|
| 242 | /* spawning */ | 
|---|
| 243 |  | 
|---|
| 244 | /* The following fields are used by call_rcu, hence own cacheline. */ | 
|---|
| 245 | raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp; | 
|---|
| 246 | struct rcu_cblist nocb_bypass;	/* Lock-contention-bypass CB list. */ | 
|---|
| 247 | unsigned long nocb_bypass_first; /* Time (jiffies) of first enqueue. */ | 
|---|
| 248 | unsigned long nocb_nobypass_last; /* Last ->cblist enqueue (jiffies). */ | 
|---|
| 249 | int nocb_nobypass_count;	/* # ->cblist enqueues at ^^^ time. */ | 
|---|
| 250 |  | 
|---|
| 251 | /* The following fields are used by GP kthread, hence own cacheline. */ | 
|---|
| 252 | raw_spinlock_t nocb_gp_lock ____cacheline_internodealigned_in_smp; | 
|---|
| 253 | u8 nocb_gp_sleep;		/* Is the nocb GP thread asleep? */ | 
|---|
| 254 | u8 nocb_gp_bypass;		/* Found a bypass on last scan? */ | 
|---|
| 255 | u8 nocb_gp_gp;			/* GP to wait for on last scan? */ | 
|---|
| 256 | unsigned long nocb_gp_seq;	/*  If so, ->gp_seq to wait for. */ | 
|---|
| 257 | unsigned long nocb_gp_loops;	/* # passes through wait code. */ | 
|---|
| 258 | struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */ | 
|---|
| 259 | bool nocb_cb_sleep;		/* Is the nocb CB thread asleep? */ | 
|---|
| 260 | struct task_struct *nocb_cb_kthread; | 
|---|
| 261 | struct list_head nocb_head_rdp; /* | 
|---|
| 262 | * Head of rcu_data list in wakeup chain, | 
|---|
| 263 | * if rdp_gp. | 
|---|
| 264 | */ | 
|---|
| 265 | struct list_head nocb_entry_rdp; /* rcu_data node in wakeup chain. */ | 
|---|
| 266 | struct rcu_data *nocb_toggling_rdp; /* rdp queued for (de-)offloading */ | 
|---|
| 267 |  | 
|---|
| 268 | /* The following fields are used by CB kthread, hence new cacheline. */ | 
|---|
| 269 | struct rcu_data *nocb_gp_rdp ____cacheline_internodealigned_in_smp; | 
|---|
| 270 | /* GP rdp takes GP-end wakeups. */ | 
|---|
| 271 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 272 |  | 
|---|
| 273 | /* 6) RCU priority boosting. */ | 
|---|
| 274 | struct task_struct *rcu_cpu_kthread_task; | 
|---|
| 275 | /* rcuc per-CPU kthread or NULL. */ | 
|---|
| 276 | unsigned int rcu_cpu_kthread_status; | 
|---|
| 277 | char rcu_cpu_has_work; | 
|---|
| 278 | unsigned long rcuc_activity; | 
|---|
| 279 |  | 
|---|
| 280 | /* 7) Diagnostic data, including RCU CPU stall warnings. */ | 
|---|
| 281 | unsigned int softirq_snap;	/* Snapshot of softirq activity. */ | 
|---|
| 282 | /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ | 
|---|
| 283 | struct irq_work rcu_iw;		/* Check for non-irq activity. */ | 
|---|
| 284 | bool rcu_iw_pending;		/* Is ->rcu_iw pending? */ | 
|---|
| 285 | unsigned long rcu_iw_gp_seq;	/* ->gp_seq associated with ->rcu_iw. */ | 
|---|
| 286 | unsigned long rcu_ofl_gp_seq;	/* ->gp_seq at last offline. */ | 
|---|
| 287 | short rcu_ofl_gp_state;		/* ->gp_state at last offline. */ | 
|---|
| 288 | unsigned long rcu_onl_gp_seq;	/* ->gp_seq at last online. */ | 
|---|
| 289 | short rcu_onl_gp_state;		/* ->gp_state at last online. */ | 
|---|
| 290 | unsigned long last_fqs_resched;	/* Time of last rcu_resched(). */ | 
|---|
| 291 | unsigned long last_sched_clock;	/* Jiffies of last rcu_sched_clock_irq(). */ | 
|---|
| 292 | struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */ | 
|---|
| 293 | /* the first RCU stall timeout */ | 
|---|
| 294 |  | 
|---|
| 295 | long lazy_len;			/* Length of buffered lazy callbacks. */ | 
|---|
| 296 | int cpu; | 
|---|
| 297 | }; | 
|---|
| 298 |  | 
|---|
| 299 | /* Values for nocb_defer_wakeup field in struct rcu_data. */ | 
|---|
| 300 | #define RCU_NOCB_WAKE_NOT	0 | 
|---|
| 301 | #define RCU_NOCB_WAKE_BYPASS	1 | 
|---|
| 302 | #define RCU_NOCB_WAKE_LAZY	2 | 
|---|
| 303 | #define RCU_NOCB_WAKE		3 | 
|---|
| 304 | #define RCU_NOCB_WAKE_FORCE	4 | 
|---|
| 305 |  | 
|---|
| 306 | #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) | 
|---|
| 307 | /* For jiffies_till_first_fqs and */ | 
|---|
| 308 | /*  and jiffies_till_next_fqs. */ | 
|---|
| 309 |  | 
|---|
| 310 | #define RCU_JIFFIES_FQS_DIV	256	/* Very large systems need more */ | 
|---|
| 311 | /*  delay between bouts of */ | 
|---|
| 312 | /*  quiescent-state forcing. */ | 
|---|
| 313 |  | 
|---|
| 314 | #define RCU_STALL_RAT_DELAY	2	/* Allow other CPUs time to take */ | 
|---|
| 315 | /*  at least one scheduling clock */ | 
|---|
| 316 | /*  irq before ratting on them. */ | 
|---|
| 317 |  | 
|---|
| 318 | #define rcu_wait(cond)							\ | 
|---|
| 319 | do {									\ | 
|---|
| 320 | for (;;) {							\ | 
|---|
| 321 | set_current_state(TASK_INTERRUPTIBLE);			\ | 
|---|
| 322 | if (cond)						\ | 
|---|
| 323 | break;						\ | 
|---|
| 324 | schedule();						\ | 
|---|
| 325 | }								\ | 
|---|
| 326 | __set_current_state(TASK_RUNNING);				\ | 
|---|
| 327 | } while (0) | 
|---|
| 328 |  | 
|---|
| 329 | /* | 
|---|
| 330 | * A max threshold for synchronize_rcu() users which are | 
|---|
| 331 | * awaken directly by the rcu_gp_kthread(). Left part is | 
|---|
| 332 | * deferred to the main worker. | 
|---|
| 333 | */ | 
|---|
| 334 | #define SR_MAX_USERS_WAKE_FROM_GP 5 | 
|---|
| 335 | #define SR_NORMAL_GP_WAIT_HEAD_MAX 5 | 
|---|
| 336 |  | 
|---|
| 337 | struct sr_wait_node { | 
|---|
| 338 | atomic_t inuse; | 
|---|
| 339 | struct llist_node node; | 
|---|
| 340 | }; | 
|---|
| 341 |  | 
|---|
| 342 | /* | 
|---|
| 343 | * RCU global state, including node hierarchy.  This hierarchy is | 
|---|
| 344 | * represented in "heap" form in a dense array.  The root (first level) | 
|---|
| 345 | * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second | 
|---|
| 346 | * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), | 
|---|
| 347 | * and the third level in ->node[m+1] and following (->node[m+1] referenced | 
|---|
| 348 | * by ->level[2]).  The number of levels is determined by the number of | 
|---|
| 349 | * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy" | 
|---|
| 350 | * consisting of a single rcu_node. | 
|---|
| 351 | */ | 
|---|
| 352 | struct rcu_state { | 
|---|
| 353 | struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */ | 
|---|
| 354 | struct rcu_node *level[RCU_NUM_LVLS + 1]; | 
|---|
| 355 | /* Hierarchy levels (+1 to */ | 
|---|
| 356 | /*  shut bogus gcc warning) */ | 
|---|
| 357 | int ncpus;				/* # CPUs seen so far. */ | 
|---|
| 358 | int n_online_cpus;			/* # CPUs online for RCU. */ | 
|---|
| 359 |  | 
|---|
| 360 | /* The following fields are guarded by the root rcu_node's lock. */ | 
|---|
| 361 |  | 
|---|
| 362 | unsigned long gp_seq ____cacheline_internodealigned_in_smp; | 
|---|
| 363 | /* Grace-period sequence #. */ | 
|---|
| 364 | unsigned long gp_max;			/* Maximum GP duration in */ | 
|---|
| 365 | /*  jiffies. */ | 
|---|
| 366 | struct task_struct *gp_kthread;		/* Task for grace periods. */ | 
|---|
| 367 | struct swait_queue_head gp_wq;		/* Where GP task waits. */ | 
|---|
| 368 | short gp_flags;				/* Commands for GP task. */ | 
|---|
| 369 | short gp_state;				/* GP kthread sleep state. */ | 
|---|
| 370 | unsigned long gp_wake_time;		/* Last GP kthread wake. */ | 
|---|
| 371 | unsigned long gp_wake_seq;		/* ->gp_seq at ^^^. */ | 
|---|
| 372 | unsigned long gp_seq_polled;		/* GP seq for polled API. */ | 
|---|
| 373 | unsigned long gp_seq_polled_snap;	/* ->gp_seq_polled at normal GP start. */ | 
|---|
| 374 | unsigned long gp_seq_polled_exp_snap;	/* ->gp_seq_polled at expedited GP start. */ | 
|---|
| 375 |  | 
|---|
| 376 | /* End of fields guarded by root rcu_node's lock. */ | 
|---|
| 377 |  | 
|---|
| 378 | struct mutex barrier_mutex;		/* Guards barrier fields. */ | 
|---|
| 379 | atomic_t barrier_cpu_count;		/* # CPUs waiting on. */ | 
|---|
| 380 | struct completion barrier_completion;	/* Wake at barrier end. */ | 
|---|
| 381 | unsigned long barrier_sequence;		/* ++ at start and end of */ | 
|---|
| 382 | /*  rcu_barrier(). */ | 
|---|
| 383 | /* End of fields guarded by barrier_mutex. */ | 
|---|
| 384 |  | 
|---|
| 385 | raw_spinlock_t barrier_lock;		/* Protects ->barrier_seq_snap. */ | 
|---|
| 386 |  | 
|---|
| 387 | struct mutex exp_mutex;			/* Serialize expedited GP. */ | 
|---|
| 388 | struct mutex exp_wake_mutex;		/* Serialize wakeup. */ | 
|---|
| 389 | unsigned long expedited_sequence;	/* Take a ticket. */ | 
|---|
| 390 | atomic_t expedited_need_qs;		/* # CPUs left to check in. */ | 
|---|
| 391 | struct swait_queue_head expedited_wq;	/* Wait for check-ins. */ | 
|---|
| 392 | int ncpus_snap;				/* # CPUs seen last time. */ | 
|---|
| 393 | u8 cbovld;				/* Callback overload now? */ | 
|---|
| 394 | u8 cbovldnext;				/* ^        ^  next time? */ | 
|---|
| 395 |  | 
|---|
| 396 | unsigned long jiffies_force_qs;		/* Time at which to invoke */ | 
|---|
| 397 | /*  force_quiescent_state(). */ | 
|---|
| 398 | unsigned long jiffies_kick_kthreads;	/* Time at which to kick */ | 
|---|
| 399 | /*  kthreads, if configured. */ | 
|---|
| 400 | unsigned long n_force_qs;		/* Number of calls to */ | 
|---|
| 401 | /*  force_quiescent_state(). */ | 
|---|
| 402 | unsigned long gp_start;			/* Time at which GP started, */ | 
|---|
| 403 | /*  but in jiffies. */ | 
|---|
| 404 | unsigned long gp_end;			/* Time last GP ended, again */ | 
|---|
| 405 | /*  in jiffies. */ | 
|---|
| 406 | unsigned long gp_activity;		/* Time of last GP kthread */ | 
|---|
| 407 | /*  activity in jiffies. */ | 
|---|
| 408 | unsigned long gp_req_activity;		/* Time of last GP request */ | 
|---|
| 409 | /*  in jiffies. */ | 
|---|
| 410 | unsigned long jiffies_stall;		/* Time at which to check */ | 
|---|
| 411 | /*  for CPU stalls. */ | 
|---|
| 412 | int nr_fqs_jiffies_stall;		/* Number of fqs loops after | 
|---|
| 413 | * which read jiffies and set | 
|---|
| 414 | * jiffies_stall. Stall | 
|---|
| 415 | * warnings disabled if !0. */ | 
|---|
| 416 | unsigned long jiffies_resched;		/* Time at which to resched */ | 
|---|
| 417 | /*  a reluctant CPU. */ | 
|---|
| 418 | unsigned long n_force_qs_gpstart;	/* Snapshot of n_force_qs at */ | 
|---|
| 419 | /*  GP start. */ | 
|---|
| 420 | const char *name;			/* Name of structure. */ | 
|---|
| 421 | char abbr;				/* Abbreviated name. */ | 
|---|
| 422 |  | 
|---|
| 423 | arch_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; | 
|---|
| 424 | /* Synchronize offline with */ | 
|---|
| 425 | /*  GP pre-initialization. */ | 
|---|
| 426 |  | 
|---|
| 427 | /* synchronize_rcu() part. */ | 
|---|
| 428 | struct llist_head srs_next;	/* request a GP users. */ | 
|---|
| 429 | struct llist_node *srs_wait_tail; /* wait for GP users. */ | 
|---|
| 430 | struct llist_node *srs_done_tail; /* ready for GP users. */ | 
|---|
| 431 | struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX]; | 
|---|
| 432 | struct work_struct srs_cleanup_work; | 
|---|
| 433 | atomic_t srs_cleanups_pending; /* srs inflight worker cleanups. */ | 
|---|
| 434 |  | 
|---|
| 435 | #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 436 | struct mutex nocb_mutex;		/* Guards (de-)offloading */ | 
|---|
| 437 | int nocb_is_setup;			/* nocb is setup from boot */ | 
|---|
| 438 | #endif | 
|---|
| 439 | }; | 
|---|
| 440 |  | 
|---|
| 441 | /* Values for rcu_state structure's gp_flags field. */ | 
|---|
| 442 | #define RCU_GP_FLAG_INIT 0x1	/* Need grace-period initialization. */ | 
|---|
| 443 | #define RCU_GP_FLAG_FQS  0x2	/* Need grace-period quiescent-state forcing. */ | 
|---|
| 444 | #define RCU_GP_FLAG_OVLD 0x4	/* Experiencing callback overload. */ | 
|---|
| 445 |  | 
|---|
| 446 | /* Values for rcu_state structure's gp_state field. */ | 
|---|
| 447 | #define RCU_GP_IDLE	 0	/* Initial state and no GP in progress. */ | 
|---|
| 448 | #define RCU_GP_WAIT_GPS  1	/* Wait for grace-period start. */ | 
|---|
| 449 | #define RCU_GP_DONE_GPS  2	/* Wait done for grace-period start. */ | 
|---|
| 450 | #define RCU_GP_ONOFF     3	/* Grace-period initialization hotplug. */ | 
|---|
| 451 | #define RCU_GP_INIT      4	/* Grace-period initialization. */ | 
|---|
| 452 | #define RCU_GP_WAIT_FQS  5	/* Wait for force-quiescent-state time. */ | 
|---|
| 453 | #define RCU_GP_DOING_FQS 6	/* Wait done for force-quiescent-state time. */ | 
|---|
| 454 | #define RCU_GP_CLEANUP   7	/* Grace-period cleanup started. */ | 
|---|
| 455 | #define RCU_GP_CLEANED   8	/* Grace-period cleanup complete. */ | 
|---|
| 456 |  | 
|---|
| 457 | /* | 
|---|
| 458 | * In order to export the rcu_state name to the tracing tools, it | 
|---|
| 459 | * needs to be added in the __tracepoint_string section. | 
|---|
| 460 | * This requires defining a separate variable tp_<sname>_varname | 
|---|
| 461 | * that points to the string being used, and this will allow | 
|---|
| 462 | * the tracing userspace tools to be able to decipher the string | 
|---|
| 463 | * address to the matching string. | 
|---|
| 464 | */ | 
|---|
| 465 | #ifdef CONFIG_PREEMPT_RCU | 
|---|
| 466 | #define RCU_ABBR 'p' | 
|---|
| 467 | #define RCU_NAME_RAW "rcu_preempt" | 
|---|
| 468 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | 
|---|
| 469 | #define RCU_ABBR 's' | 
|---|
| 470 | #define RCU_NAME_RAW "rcu_sched" | 
|---|
| 471 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | 
|---|
| 472 | #ifndef CONFIG_TRACING | 
|---|
| 473 | #define RCU_NAME RCU_NAME_RAW | 
|---|
| 474 | #else /* #ifdef CONFIG_TRACING */ | 
|---|
| 475 | static char rcu_name[] = RCU_NAME_RAW; | 
|---|
| 476 | static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; | 
|---|
| 477 | #define RCU_NAME rcu_name | 
|---|
| 478 | #endif /* #else #ifdef CONFIG_TRACING */ | 
|---|
| 479 |  | 
|---|
| 480 | /* Forward declarations for tree_plugin.h */ | 
|---|
| 481 | static void rcu_bootup_announce(void); | 
|---|
| 482 | static void rcu_qs(void); | 
|---|
| 483 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 
|---|
| 484 | #ifdef CONFIG_HOTPLUG_CPU | 
|---|
| 485 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp); | 
|---|
| 486 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 
|---|
| 487 | static int rcu_print_task_exp_stall(struct rcu_node *rnp); | 
|---|
| 488 | static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); | 
|---|
| 489 | static void rcu_flavor_sched_clock_irq(int user); | 
|---|
| 490 | static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck); | 
|---|
| 491 | static void rcu_preempt_deferred_qs_init(struct rcu_data *rdp); | 
|---|
| 492 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 
|---|
| 493 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 
|---|
| 494 | static bool rcu_is_callbacks_kthread(struct rcu_data *rdp); | 
|---|
| 495 | static void rcu_cpu_kthread_setup(unsigned int cpu); | 
|---|
| 496 | static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp); | 
|---|
| 497 | static bool rcu_preempt_has_tasks(struct rcu_node *rnp); | 
|---|
| 498 | static bool rcu_preempt_need_deferred_qs(struct task_struct *t); | 
|---|
| 499 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); | 
|---|
| 500 | static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); | 
|---|
| 501 | static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); | 
|---|
| 502 | static void rcu_init_one_nocb(struct rcu_node *rnp); | 
|---|
| 503 | static bool wake_nocb_gp(struct rcu_data *rdp, bool force); | 
|---|
| 504 | static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | 
|---|
| 505 | unsigned long j, bool lazy); | 
|---|
| 506 | static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | 
|---|
| 507 | rcu_callback_t func, unsigned long flags, bool lazy); | 
|---|
| 508 | static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, | 
|---|
| 509 | unsigned long flags); | 
|---|
| 510 | static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); | 
|---|
| 511 | static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); | 
|---|
| 512 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); | 
|---|
| 513 | static void rcu_spawn_cpu_nocb_kthread(int cpu); | 
|---|
| 514 | static void show_rcu_nocb_state(struct rcu_data *rdp); | 
|---|
| 515 | static void rcu_nocb_lock(struct rcu_data *rdp); | 
|---|
| 516 | static void rcu_nocb_unlock(struct rcu_data *rdp); | 
|---|
| 517 | static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, | 
|---|
| 518 | unsigned long flags); | 
|---|
| 519 | static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp); | 
|---|
| 520 | #ifdef CONFIG_RCU_NOCB_CPU | 
|---|
| 521 | static void __init rcu_organize_nocb_kthreads(void); | 
|---|
| 522 |  | 
|---|
| 523 | /* | 
|---|
| 524 | * Disable IRQs before checking offloaded state so that local | 
|---|
| 525 | * locking is safe against concurrent de-offloading. | 
|---|
| 526 | */ | 
|---|
| 527 | #define rcu_nocb_lock_irqsave(rdp, flags)			\ | 
|---|
| 528 | do {								\ | 
|---|
| 529 | local_irq_save(flags);					\ | 
|---|
| 530 | if (rcu_segcblist_is_offloaded(&(rdp)->cblist))	\ | 
|---|
| 531 | raw_spin_lock(&(rdp)->nocb_lock);		\ | 
|---|
| 532 | } while (0) | 
|---|
| 533 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 534 | #define rcu_nocb_lock_irqsave(rdp, flags) local_irq_save(flags) | 
|---|
| 535 | #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ | 
|---|
| 536 |  | 
|---|
| 537 | static void rcu_bind_gp_kthread(void); | 
|---|
| 538 | static bool rcu_nohz_full_cpu(void); | 
|---|
| 539 |  | 
|---|
| 540 | /* Forward declarations for tree_stall.h */ | 
|---|
| 541 | static void record_gp_stall_check_time(void); | 
|---|
| 542 | static void rcu_iw_handler(struct irq_work *iwp); | 
|---|
| 543 | static void check_cpu_stall(struct rcu_data *rdp); | 
|---|
| 544 | static void rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, | 
|---|
| 545 | const unsigned long gpssdelay); | 
|---|
| 546 |  | 
|---|
| 547 | /* Forward declarations for tree_exp.h. */ | 
|---|
| 548 | static void sync_rcu_do_polled_gp(struct work_struct *wp); | 
|---|
| 549 |  | 
|---|