1  /* SPDX-License-Identifier: GPL-2.0+ */
2  /*
3   * RCU expedited grace periods
4   *
5   * Copyright IBM Corporation, 2016
6   *
7   * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
8   */
9  
10  #include <linux/console.h>
11  #include <linux/lockdep.h>
12  
13  static void rcu_exp_handler(void *unused);
14  static int rcu_print_task_exp_stall(struct rcu_node *rnp);
15  static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp);
16  
17  /*
18   * Record the start of an expedited grace period.
19   */
rcu_exp_gp_seq_start(void)20  static void rcu_exp_gp_seq_start(void)
21  {
22  	rcu_seq_start(&rcu_state.expedited_sequence);
23  	rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
24  }
25  
26  /*
27   * Return the value that the expedited-grace-period counter will have
28   * at the end of the current grace period.
29   */
rcu_exp_gp_seq_endval(void)30  static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
31  {
32  	return rcu_seq_endval(&rcu_state.expedited_sequence);
33  }
34  
35  /*
36   * Record the end of an expedited grace period.
37   */
rcu_exp_gp_seq_end(void)38  static void rcu_exp_gp_seq_end(void)
39  {
40  	rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
41  	rcu_seq_end(&rcu_state.expedited_sequence);
42  	smp_mb(); /* Ensure that consecutive grace periods serialize. */
43  }
44  
45  /*
46   * Take a snapshot of the expedited-grace-period counter, which is the
47   * earliest value that will indicate that a full grace period has
48   * elapsed since the current time.
49   */
rcu_exp_gp_seq_snap(void)50  static unsigned long rcu_exp_gp_seq_snap(void)
51  {
52  	unsigned long s;
53  
54  	smp_mb(); /* Caller's modifications seen first by other CPUs. */
55  	s = rcu_seq_snap(&rcu_state.expedited_sequence);
56  	trace_rcu_exp_grace_period(rcu_state.name, s, TPS("snap"));
57  	return s;
58  }
59  
60  /*
61   * Given a counter snapshot from rcu_exp_gp_seq_snap(), return true
62   * if a full expedited grace period has elapsed since that snapshot
63   * was taken.
64   */
rcu_exp_gp_seq_done(unsigned long s)65  static bool rcu_exp_gp_seq_done(unsigned long s)
66  {
67  	return rcu_seq_done(&rcu_state.expedited_sequence, s);
68  }
69  
70  /*
71   * Reset the ->expmaskinit values in the rcu_node tree to reflect any
72   * recent CPU-online activity.  Note that these masks are not cleared
73   * when CPUs go offline, so they reflect the union of all CPUs that have
74   * ever been online.  This means that this function normally takes its
75   * no-work-to-do fastpath.
76   */
sync_exp_reset_tree_hotplug(void)77  static void sync_exp_reset_tree_hotplug(void)
78  {
79  	bool done;
80  	unsigned long flags;
81  	unsigned long mask;
82  	unsigned long oldmask;
83  	int ncpus = smp_load_acquire(&rcu_state.ncpus); /* Order vs. locking. */
84  	struct rcu_node *rnp;
85  	struct rcu_node *rnp_up;
86  
87  	/* If no new CPUs onlined since last time, nothing to do. */
88  	if (likely(ncpus == rcu_state.ncpus_snap))
89  		return;
90  	rcu_state.ncpus_snap = ncpus;
91  
92  	/*
93  	 * Each pass through the following loop propagates newly onlined
94  	 * CPUs for the current rcu_node structure up the rcu_node tree.
95  	 */
96  	rcu_for_each_leaf_node(rnp) {
97  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
98  		if (rnp->expmaskinit == rnp->expmaskinitnext) {
99  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
100  			continue;  /* No new CPUs, nothing to do. */
101  		}
102  
103  		/* Update this node's mask, track old value for propagation. */
104  		oldmask = rnp->expmaskinit;
105  		rnp->expmaskinit = rnp->expmaskinitnext;
106  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
107  
108  		/* If was already nonzero, nothing to propagate. */
109  		if (oldmask)
110  			continue;
111  
112  		/* Propagate the new CPU up the tree. */
113  		mask = rnp->grpmask;
114  		rnp_up = rnp->parent;
115  		done = false;
116  		while (rnp_up) {
117  			raw_spin_lock_irqsave_rcu_node(rnp_up, flags);
118  			if (rnp_up->expmaskinit)
119  				done = true;
120  			rnp_up->expmaskinit |= mask;
121  			raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
122  			if (done)
123  				break;
124  			mask = rnp_up->grpmask;
125  			rnp_up = rnp_up->parent;
126  		}
127  	}
128  }
129  
130  /*
131   * Reset the ->expmask values in the rcu_node tree in preparation for
132   * a new expedited grace period.
133   */
sync_exp_reset_tree(void)134  static void __maybe_unused sync_exp_reset_tree(void)
135  {
136  	unsigned long flags;
137  	struct rcu_node *rnp;
138  
139  	sync_exp_reset_tree_hotplug();
140  	rcu_for_each_node_breadth_first(rnp) {
141  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
142  		WARN_ON_ONCE(rnp->expmask);
143  		WRITE_ONCE(rnp->expmask, rnp->expmaskinit);
144  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
145  	}
146  }
147  
148  /*
149   * Return non-zero if there is no RCU expedited grace period in progress
150   * for the specified rcu_node structure, in other words, if all CPUs and
151   * tasks covered by the specified rcu_node structure have done their bit
152   * for the current expedited grace period.
153   */
sync_rcu_exp_done(struct rcu_node * rnp)154  static bool sync_rcu_exp_done(struct rcu_node *rnp)
155  {
156  	raw_lockdep_assert_held_rcu_node(rnp);
157  	return READ_ONCE(rnp->exp_tasks) == NULL &&
158  	       READ_ONCE(rnp->expmask) == 0;
159  }
160  
161  /*
162   * Like sync_rcu_exp_done(), but where the caller does not hold the
163   * rcu_node's ->lock.
164   */
sync_rcu_exp_done_unlocked(struct rcu_node * rnp)165  static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp)
166  {
167  	unsigned long flags;
168  	bool ret;
169  
170  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
171  	ret = sync_rcu_exp_done(rnp);
172  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
173  
174  	return ret;
175  }
176  
177  /*
178   * Report the exit from RCU read-side critical section for the last task
179   * that queued itself during or before the current expedited preemptible-RCU
180   * grace period.  This event is reported either to the rcu_node structure on
181   * which the task was queued or to one of that rcu_node structure's ancestors,
182   * recursively up the tree.  (Calm down, calm down, we do the recursion
183   * iteratively!)
184   */
__rcu_report_exp_rnp(struct rcu_node * rnp,bool wake,unsigned long flags)185  static void __rcu_report_exp_rnp(struct rcu_node *rnp,
186  				 bool wake, unsigned long flags)
187  	__releases(rnp->lock)
188  {
189  	unsigned long mask;
190  
191  	raw_lockdep_assert_held_rcu_node(rnp);
192  	for (;;) {
193  		if (!sync_rcu_exp_done(rnp)) {
194  			if (!rnp->expmask)
195  				rcu_initiate_boost(rnp, flags);
196  			else
197  				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
198  			break;
199  		}
200  		if (rnp->parent == NULL) {
201  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
202  			if (wake)
203  				swake_up_one_online(&rcu_state.expedited_wq);
204  
205  			break;
206  		}
207  		mask = rnp->grpmask;
208  		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
209  		rnp = rnp->parent;
210  		raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
211  		WARN_ON_ONCE(!(rnp->expmask & mask));
212  		WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
213  	}
214  }
215  
216  /*
217   * Report expedited quiescent state for specified node.  This is a
218   * lock-acquisition wrapper function for __rcu_report_exp_rnp().
219   */
rcu_report_exp_rnp(struct rcu_node * rnp,bool wake)220  static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
221  {
222  	unsigned long flags;
223  
224  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
225  	__rcu_report_exp_rnp(rnp, wake, flags);
226  }
227  
228  /*
229   * Report expedited quiescent state for multiple CPUs, all covered by the
230   * specified leaf rcu_node structure.
231   */
rcu_report_exp_cpu_mult(struct rcu_node * rnp,unsigned long mask,bool wake)232  static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
233  				    unsigned long mask, bool wake)
234  {
235  	int cpu;
236  	unsigned long flags;
237  	struct rcu_data *rdp;
238  
239  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
240  	if (!(rnp->expmask & mask)) {
241  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
242  		return;
243  	}
244  	WRITE_ONCE(rnp->expmask, rnp->expmask & ~mask);
245  	for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
246  		rdp = per_cpu_ptr(&rcu_data, cpu);
247  		if (!IS_ENABLED(CONFIG_NO_HZ_FULL) || !rdp->rcu_forced_tick_exp)
248  			continue;
249  		rdp->rcu_forced_tick_exp = false;
250  		tick_dep_clear_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
251  	}
252  	__rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
253  }
254  
255  /*
256   * Report expedited quiescent state for specified rcu_data (CPU).
257   */
rcu_report_exp_rdp(struct rcu_data * rdp)258  static void rcu_report_exp_rdp(struct rcu_data *rdp)
259  {
260  	WRITE_ONCE(rdp->cpu_no_qs.b.exp, false);
261  	rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
262  }
263  
264  /* Common code for work-done checking. */
sync_exp_work_done(unsigned long s)265  static bool sync_exp_work_done(unsigned long s)
266  {
267  	if (rcu_exp_gp_seq_done(s)) {
268  		trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
269  		/*
270  		 * Order GP completion with preceding accesses. Order also GP
271  		 * completion with post GP update side accesses. Pairs with
272  		 * rcu_seq_end().
273  		 */
274  		smp_mb();
275  		return true;
276  	}
277  	return false;
278  }
279  
280  /*
281   * Funnel-lock acquisition for expedited grace periods.  Returns true
282   * if some other task completed an expedited grace period that this task
283   * can piggy-back on, and with no mutex held.  Otherwise, returns false
284   * with the mutex held, indicating that the caller must actually do the
285   * expedited grace period.
286   */
exp_funnel_lock(unsigned long s)287  static bool exp_funnel_lock(unsigned long s)
288  {
289  	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
290  	struct rcu_node *rnp = rdp->mynode;
291  	struct rcu_node *rnp_root = rcu_get_root();
292  
293  	/* Low-contention fastpath. */
294  	if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
295  	    (rnp == rnp_root ||
296  	     ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
297  	    mutex_trylock(&rcu_state.exp_mutex))
298  		goto fastpath;
299  
300  	/*
301  	 * Each pass through the following loop works its way up
302  	 * the rcu_node tree, returning if others have done the work or
303  	 * otherwise falls through to acquire ->exp_mutex.  The mapping
304  	 * from CPU to rcu_node structure can be inexact, as it is just
305  	 * promoting locality and is not strictly needed for correctness.
306  	 */
307  	for (; rnp != NULL; rnp = rnp->parent) {
308  		if (sync_exp_work_done(s))
309  			return true;
310  
311  		/* Work not done, either wait here or go up. */
312  		spin_lock(&rnp->exp_lock);
313  		if (ULONG_CMP_GE(rnp->exp_seq_rq, s)) {
314  
315  			/* Someone else doing GP, so wait for them. */
316  			spin_unlock(&rnp->exp_lock);
317  			trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
318  						  rnp->grplo, rnp->grphi,
319  						  TPS("wait"));
320  			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
321  				   sync_exp_work_done(s));
322  			return true;
323  		}
324  		WRITE_ONCE(rnp->exp_seq_rq, s); /* Followers can wait on us. */
325  		spin_unlock(&rnp->exp_lock);
326  		trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
327  					  rnp->grplo, rnp->grphi, TPS("nxtlvl"));
328  	}
329  	mutex_lock(&rcu_state.exp_mutex);
330  fastpath:
331  	if (sync_exp_work_done(s)) {
332  		mutex_unlock(&rcu_state.exp_mutex);
333  		return true;
334  	}
335  	rcu_exp_gp_seq_start();
336  	trace_rcu_exp_grace_period(rcu_state.name, s, TPS("start"));
337  	return false;
338  }
339  
340  /*
341   * Select the CPUs within the specified rcu_node that the upcoming
342   * expedited grace period needs to wait for.
343   */
__sync_rcu_exp_select_node_cpus(struct rcu_exp_work * rewp)344  static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
345  {
346  	int cpu;
347  	unsigned long flags;
348  	unsigned long mask_ofl_test;
349  	unsigned long mask_ofl_ipi;
350  	int ret;
351  	struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
352  
353  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
354  
355  	/* Each pass checks a CPU for identity, offline, and idle. */
356  	mask_ofl_test = 0;
357  	for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
358  		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
359  		unsigned long mask = rdp->grpmask;
360  		int snap;
361  
362  		if (raw_smp_processor_id() == cpu ||
363  		    !(rnp->qsmaskinitnext & mask)) {
364  			mask_ofl_test |= mask;
365  		} else {
366  			/*
367  			 * Full ordering between remote CPU's post idle accesses
368  			 * and updater's accesses prior to current GP (and also
369  			 * the started GP sequence number) is enforced by
370  			 * rcu_seq_start() implicit barrier, relayed by kworkers
371  			 * locking and even further by smp_mb__after_unlock_lock()
372  			 * barriers chained all the way throughout the rnp locking
373  			 * tree since sync_exp_reset_tree() and up to the current
374  			 * leaf rnp locking.
375  			 *
376  			 * Ordering between remote CPU's pre idle accesses and
377  			 * post grace period updater's accesses is enforced by the
378  			 * below acquire semantic.
379  			 */
380  			snap = ct_rcu_watching_cpu_acquire(cpu);
381  			if (rcu_watching_snap_in_eqs(snap))
382  				mask_ofl_test |= mask;
383  			else
384  				rdp->exp_watching_snap = snap;
385  		}
386  	}
387  	mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
388  
389  	/*
390  	 * Need to wait for any blocked tasks as well.	Note that
391  	 * additional blocking tasks will also block the expedited GP
392  	 * until such time as the ->expmask bits are cleared.
393  	 */
394  	if (rcu_preempt_has_tasks(rnp))
395  		WRITE_ONCE(rnp->exp_tasks, rnp->blkd_tasks.next);
396  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
397  
398  	/* IPI the remaining CPUs for expedited quiescent state. */
399  	for_each_leaf_node_cpu_mask(rnp, cpu, mask_ofl_ipi) {
400  		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
401  		unsigned long mask = rdp->grpmask;
402  
403  retry_ipi:
404  		if (rcu_watching_snap_stopped_since(rdp, rdp->exp_watching_snap)) {
405  			mask_ofl_test |= mask;
406  			continue;
407  		}
408  		if (get_cpu() == cpu) {
409  			mask_ofl_test |= mask;
410  			put_cpu();
411  			continue;
412  		}
413  		ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
414  		put_cpu();
415  		/* The CPU will report the QS in response to the IPI. */
416  		if (!ret)
417  			continue;
418  
419  		/* Failed, raced with CPU hotplug operation. */
420  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
421  		if ((rnp->qsmaskinitnext & mask) &&
422  		    (rnp->expmask & mask)) {
423  			/* Online, so delay for a bit and try again. */
424  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
425  			trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
426  			schedule_timeout_idle(1);
427  			goto retry_ipi;
428  		}
429  		/* CPU really is offline, so we must report its QS. */
430  		if (rnp->expmask & mask)
431  			mask_ofl_test |= mask;
432  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
433  	}
434  	/* Report quiescent states for those that went offline. */
435  	if (mask_ofl_test)
436  		rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
437  }
438  
439  static void rcu_exp_sel_wait_wake(unsigned long s);
440  
sync_rcu_exp_select_node_cpus(struct kthread_work * wp)441  static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp)
442  {
443  	struct rcu_exp_work *rewp =
444  		container_of(wp, struct rcu_exp_work, rew_work);
445  
446  	__sync_rcu_exp_select_node_cpus(rewp);
447  }
448  
rcu_exp_worker_started(void)449  static inline bool rcu_exp_worker_started(void)
450  {
451  	return !!READ_ONCE(rcu_exp_gp_kworker);
452  }
453  
rcu_exp_par_worker_started(struct rcu_node * rnp)454  static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp)
455  {
456  	return !!READ_ONCE(rnp->exp_kworker);
457  }
458  
sync_rcu_exp_select_cpus_queue_work(struct rcu_node * rnp)459  static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp)
460  {
461  	kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
462  	/*
463  	 * Use rcu_exp_par_gp_kworker, because flushing a work item from
464  	 * another work item on the same kthread worker can result in
465  	 * deadlock.
466  	 */
467  	kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work);
468  }
469  
sync_rcu_exp_select_cpus_flush_work(struct rcu_node * rnp)470  static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp)
471  {
472  	kthread_flush_work(&rnp->rew.rew_work);
473  }
474  
475  /*
476   * Work-queue handler to drive an expedited grace period forward.
477   */
wait_rcu_exp_gp(struct kthread_work * wp)478  static void wait_rcu_exp_gp(struct kthread_work *wp)
479  {
480  	struct rcu_exp_work *rewp;
481  
482  	rewp = container_of(wp, struct rcu_exp_work, rew_work);
483  	rcu_exp_sel_wait_wake(rewp->rew_s);
484  }
485  
synchronize_rcu_expedited_queue_work(struct rcu_exp_work * rew)486  static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew)
487  {
488  	kthread_init_work(&rew->rew_work, wait_rcu_exp_gp);
489  	kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work);
490  }
491  
492  /*
493   * Select the nodes that the upcoming expedited grace period needs
494   * to wait for.
495   */
sync_rcu_exp_select_cpus(void)496  static void sync_rcu_exp_select_cpus(void)
497  {
498  	struct rcu_node *rnp;
499  
500  	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
501  	sync_exp_reset_tree();
502  	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("select"));
503  
504  	/* Schedule work for each leaf rcu_node structure. */
505  	rcu_for_each_leaf_node(rnp) {
506  		rnp->exp_need_flush = false;
507  		if (!READ_ONCE(rnp->expmask))
508  			continue; /* Avoid early boot non-existent wq. */
509  		if (!rcu_exp_par_worker_started(rnp) ||
510  		    rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
511  		    rcu_is_last_leaf_node(rnp)) {
512  			/* No worker started yet or last leaf, do direct call. */
513  			sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
514  			continue;
515  		}
516  		sync_rcu_exp_select_cpus_queue_work(rnp);
517  		rnp->exp_need_flush = true;
518  	}
519  
520  	/* Wait for jobs (if any) to complete. */
521  	rcu_for_each_leaf_node(rnp)
522  		if (rnp->exp_need_flush)
523  			sync_rcu_exp_select_cpus_flush_work(rnp);
524  }
525  
526  /*
527   * Wait for the expedited grace period to elapse, within time limit.
528   * If the time limit is exceeded without the grace period elapsing,
529   * return false, otherwise return true.
530   */
synchronize_rcu_expedited_wait_once(long tlimit)531  static bool synchronize_rcu_expedited_wait_once(long tlimit)
532  {
533  	int t;
534  	struct rcu_node *rnp_root = rcu_get_root();
535  
536  	t = swait_event_timeout_exclusive(rcu_state.expedited_wq,
537  					  sync_rcu_exp_done_unlocked(rnp_root),
538  					  tlimit);
539  	// Workqueues should not be signaled.
540  	if (t > 0 || sync_rcu_exp_done_unlocked(rnp_root))
541  		return true;
542  	WARN_ON(t < 0);  /* workqueues should not be signaled. */
543  	return false;
544  }
545  
546  /*
547   * Print out an expedited RCU CPU stall warning message.
548   */
synchronize_rcu_expedited_stall(unsigned long jiffies_start,unsigned long j)549  static void synchronize_rcu_expedited_stall(unsigned long jiffies_start, unsigned long j)
550  {
551  	int cpu;
552  	unsigned long mask;
553  	int ndetected;
554  	struct rcu_node *rnp;
555  	struct rcu_node *rnp_root = rcu_get_root();
556  
557  	if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
558  		pr_err("INFO: %s detected expedited stalls, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
559  		return;
560  	}
561  	pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", rcu_state.name);
562  	ndetected = 0;
563  	rcu_for_each_leaf_node(rnp) {
564  		ndetected += rcu_print_task_exp_stall(rnp);
565  		for_each_leaf_node_possible_cpu(rnp, cpu) {
566  			struct rcu_data *rdp;
567  
568  			mask = leaf_node_cpu_bit(rnp, cpu);
569  			if (!(READ_ONCE(rnp->expmask) & mask))
570  				continue;
571  			ndetected++;
572  			rdp = per_cpu_ptr(&rcu_data, cpu);
573  			pr_cont(" %d-%c%c%c%c", cpu,
574  				"O."[!!cpu_online(cpu)],
575  				"o."[!!(rdp->grpmask & rnp->expmaskinit)],
576  				"N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
577  				"D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
578  		}
579  	}
580  	pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
581  		j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask),
582  		".T"[!!data_race(rnp_root->exp_tasks)]);
583  	if (ndetected) {
584  		pr_err("blocking rcu_node structures (internal RCU debug):");
585  		rcu_for_each_node_breadth_first(rnp) {
586  			if (rnp == rnp_root)
587  				continue; /* printed unconditionally */
588  			if (sync_rcu_exp_done_unlocked(rnp))
589  				continue;
590  			pr_cont(" l=%u:%d-%d:%#lx/%c",
591  				rnp->level, rnp->grplo, rnp->grphi, data_race(rnp->expmask),
592  				".T"[!!data_race(rnp->exp_tasks)]);
593  		}
594  		pr_cont("\n");
595  	}
596  	rcu_for_each_leaf_node(rnp) {
597  		for_each_leaf_node_possible_cpu(rnp, cpu) {
598  			mask = leaf_node_cpu_bit(rnp, cpu);
599  			if (!(READ_ONCE(rnp->expmask) & mask))
600  				continue;
601  			dump_cpu_task(cpu);
602  		}
603  		rcu_exp_print_detail_task_stall_rnp(rnp);
604  	}
605  }
606  
607  /*
608   * Wait for the expedited grace period to elapse, issuing any needed
609   * RCU CPU stall warnings along the way.
610   */
synchronize_rcu_expedited_wait(void)611  static void synchronize_rcu_expedited_wait(void)
612  {
613  	int cpu;
614  	unsigned long j;
615  	unsigned long jiffies_stall;
616  	unsigned long jiffies_start;
617  	unsigned long mask;
618  	struct rcu_data *rdp;
619  	struct rcu_node *rnp;
620  	unsigned long flags;
621  
622  	trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
623  	jiffies_stall = rcu_exp_jiffies_till_stall_check();
624  	jiffies_start = jiffies;
625  	if (tick_nohz_full_enabled() && rcu_inkernel_boot_has_ended()) {
626  		if (synchronize_rcu_expedited_wait_once(1))
627  			return;
628  		rcu_for_each_leaf_node(rnp) {
629  			raw_spin_lock_irqsave_rcu_node(rnp, flags);
630  			mask = READ_ONCE(rnp->expmask);
631  			for_each_leaf_node_cpu_mask(rnp, cpu, mask) {
632  				rdp = per_cpu_ptr(&rcu_data, cpu);
633  				if (rdp->rcu_forced_tick_exp)
634  					continue;
635  				rdp->rcu_forced_tick_exp = true;
636  				if (cpu_online(cpu))
637  					tick_dep_set_cpu(cpu, TICK_DEP_BIT_RCU_EXP);
638  			}
639  			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
640  		}
641  		j = READ_ONCE(jiffies_till_first_fqs);
642  		if (synchronize_rcu_expedited_wait_once(j + HZ))
643  			return;
644  	}
645  
646  	for (;;) {
647  		unsigned long j;
648  
649  		if (synchronize_rcu_expedited_wait_once(jiffies_stall))
650  			return;
651  		if (rcu_stall_is_suppressed())
652  			continue;
653  
654  		nbcon_cpu_emergency_enter();
655  
656  		j = jiffies;
657  		rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
658  		trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
659  		synchronize_rcu_expedited_stall(jiffies_start, j);
660  		jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
661  
662  		nbcon_cpu_emergency_exit();
663  
664  		panic_on_rcu_stall();
665  	}
666  }
667  
668  /*
669   * Wait for the current expedited grace period to complete, and then
670   * wake up everyone who piggybacked on the just-completed expedited
671   * grace period.  Also update all the ->exp_seq_rq counters as needed
672   * in order to avoid counter-wrap problems.
673   */
rcu_exp_wait_wake(unsigned long s)674  static void rcu_exp_wait_wake(unsigned long s)
675  {
676  	struct rcu_node *rnp;
677  
678  	synchronize_rcu_expedited_wait();
679  
680  	// Switch over to wakeup mode, allowing the next GP to proceed.
681  	// End the previous grace period only after acquiring the mutex
682  	// to ensure that only one GP runs concurrently with wakeups.
683  	mutex_lock(&rcu_state.exp_wake_mutex);
684  	rcu_exp_gp_seq_end();
685  	trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end"));
686  
687  	rcu_for_each_node_breadth_first(rnp) {
688  		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
689  			spin_lock(&rnp->exp_lock);
690  			/* Recheck, avoid hang in case someone just arrived. */
691  			if (ULONG_CMP_LT(rnp->exp_seq_rq, s))
692  				WRITE_ONCE(rnp->exp_seq_rq, s);
693  			spin_unlock(&rnp->exp_lock);
694  		}
695  		smp_mb(); /* All above changes before wakeup. */
696  		wake_up_all(&rnp->exp_wq[rcu_seq_ctr(s) & 0x3]);
697  	}
698  	trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake"));
699  	mutex_unlock(&rcu_state.exp_wake_mutex);
700  }
701  
702  /*
703   * Common code to drive an expedited grace period forward, used by
704   * workqueues and mid-boot-time tasks.
705   */
rcu_exp_sel_wait_wake(unsigned long s)706  static void rcu_exp_sel_wait_wake(unsigned long s)
707  {
708  	/* Initialize the rcu_node tree in preparation for the wait. */
709  	sync_rcu_exp_select_cpus();
710  
711  	/* Wait and clean up, including waking everyone. */
712  	rcu_exp_wait_wake(s);
713  }
714  
715  #ifdef CONFIG_PREEMPT_RCU
716  
717  /*
718   * Remote handler for smp_call_function_single().  If there is an
719   * RCU read-side critical section in effect, request that the
720   * next rcu_read_unlock() record the quiescent state up the
721   * ->expmask fields in the rcu_node tree.  Otherwise, immediately
722   * report the quiescent state.
723   */
rcu_exp_handler(void * unused)724  static void rcu_exp_handler(void *unused)
725  {
726  	int depth = rcu_preempt_depth();
727  	unsigned long flags;
728  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
729  	struct rcu_node *rnp = rdp->mynode;
730  	struct task_struct *t = current;
731  
732  	/*
733  	 * First, the common case of not being in an RCU read-side
734  	 * critical section.  If also enabled or idle, immediately
735  	 * report the quiescent state, otherwise defer.
736  	 */
737  	if (!depth) {
738  		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
739  		    rcu_is_cpu_rrupt_from_idle()) {
740  			rcu_report_exp_rdp(rdp);
741  		} else {
742  			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
743  			set_tsk_need_resched(t);
744  			set_preempt_need_resched();
745  		}
746  		return;
747  	}
748  
749  	/*
750  	 * Second, the less-common case of being in an RCU read-side
751  	 * critical section.  In this case we can count on a future
752  	 * rcu_read_unlock().  However, this rcu_read_unlock() might
753  	 * execute on some other CPU, but in that case there will be
754  	 * a future context switch.  Either way, if the expedited
755  	 * grace period is still waiting on this CPU, set ->deferred_qs
756  	 * so that the eventual quiescent state will be reported.
757  	 * Note that there is a large group of race conditions that
758  	 * can have caused this quiescent state to already have been
759  	 * reported, so we really do need to check ->expmask.
760  	 */
761  	if (depth > 0) {
762  		raw_spin_lock_irqsave_rcu_node(rnp, flags);
763  		if (rnp->expmask & rdp->grpmask) {
764  			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
765  			t->rcu_read_unlock_special.b.exp_hint = true;
766  		}
767  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
768  		return;
769  	}
770  
771  	// Finally, negative nesting depth should not happen.
772  	WARN_ON_ONCE(1);
773  }
774  
775  /* PREEMPTION=y, so no PREEMPTION=n expedited grace period to clean up after. */
sync_sched_exp_online_cleanup(int cpu)776  static void sync_sched_exp_online_cleanup(int cpu)
777  {
778  }
779  
780  /*
781   * Scan the current list of tasks blocked within RCU read-side critical
782   * sections, printing out the tid of each that is blocking the current
783   * expedited grace period.
784   */
rcu_print_task_exp_stall(struct rcu_node * rnp)785  static int rcu_print_task_exp_stall(struct rcu_node *rnp)
786  {
787  	unsigned long flags;
788  	int ndetected = 0;
789  	struct task_struct *t;
790  
791  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
792  	if (!rnp->exp_tasks) {
793  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
794  		return 0;
795  	}
796  	t = list_entry(rnp->exp_tasks->prev,
797  		       struct task_struct, rcu_node_entry);
798  	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
799  		pr_cont(" P%d", t->pid);
800  		ndetected++;
801  	}
802  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
803  	return ndetected;
804  }
805  
806  /*
807   * Scan the current list of tasks blocked within RCU read-side critical
808   * sections, dumping the stack of each that is blocking the current
809   * expedited grace period.
810   */
rcu_exp_print_detail_task_stall_rnp(struct rcu_node * rnp)811  static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
812  {
813  	unsigned long flags;
814  	struct task_struct *t;
815  
816  	if (!rcu_exp_stall_task_details)
817  		return;
818  	raw_spin_lock_irqsave_rcu_node(rnp, flags);
819  	if (!READ_ONCE(rnp->exp_tasks)) {
820  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
821  		return;
822  	}
823  	t = list_entry(rnp->exp_tasks->prev,
824  		       struct task_struct, rcu_node_entry);
825  	list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
826  		/*
827  		 * We could be printing a lot while holding a spinlock.
828  		 * Avoid triggering hard lockup.
829  		 */
830  		touch_nmi_watchdog();
831  		sched_show_task(t);
832  	}
833  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
834  }
835  
836  #else /* #ifdef CONFIG_PREEMPT_RCU */
837  
838  /* Request an expedited quiescent state. */
rcu_exp_need_qs(void)839  static void rcu_exp_need_qs(void)
840  {
841  	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
842  	/* Store .exp before .rcu_urgent_qs. */
843  	smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
844  	set_tsk_need_resched(current);
845  	set_preempt_need_resched();
846  }
847  
848  /* Invoked on each online non-idle CPU for expedited quiescent state. */
rcu_exp_handler(void * unused)849  static void rcu_exp_handler(void *unused)
850  {
851  	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
852  	struct rcu_node *rnp = rdp->mynode;
853  	bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
854  
855  	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
856  	    __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
857  		return;
858  	if (rcu_is_cpu_rrupt_from_idle() ||
859  	    (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) {
860  		rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
861  		return;
862  	}
863  	rcu_exp_need_qs();
864  }
865  
866  /* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
sync_sched_exp_online_cleanup(int cpu)867  static void sync_sched_exp_online_cleanup(int cpu)
868  {
869  	unsigned long flags;
870  	int my_cpu;
871  	struct rcu_data *rdp;
872  	int ret;
873  	struct rcu_node *rnp;
874  
875  	rdp = per_cpu_ptr(&rcu_data, cpu);
876  	rnp = rdp->mynode;
877  	my_cpu = get_cpu();
878  	/* Quiescent state either not needed or already requested, leave. */
879  	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
880  	    READ_ONCE(rdp->cpu_no_qs.b.exp)) {
881  		put_cpu();
882  		return;
883  	}
884  	/* Quiescent state needed on current CPU, so set it up locally. */
885  	if (my_cpu == cpu) {
886  		local_irq_save(flags);
887  		rcu_exp_need_qs();
888  		local_irq_restore(flags);
889  		put_cpu();
890  		return;
891  	}
892  	/* Quiescent state needed on some other CPU, send IPI. */
893  	ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0);
894  	put_cpu();
895  	WARN_ON_ONCE(ret);
896  }
897  
898  /*
899   * Because preemptible RCU does not exist, we never have to check for
900   * tasks blocked within RCU read-side critical sections that are
901   * blocking the current expedited grace period.
902   */
rcu_print_task_exp_stall(struct rcu_node * rnp)903  static int rcu_print_task_exp_stall(struct rcu_node *rnp)
904  {
905  	return 0;
906  }
907  
908  /*
909   * Because preemptible RCU does not exist, we never have to print out
910   * tasks blocked within RCU read-side critical sections that are blocking
911   * the current expedited grace period.
912   */
rcu_exp_print_detail_task_stall_rnp(struct rcu_node * rnp)913  static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp)
914  {
915  }
916  
917  #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
918  
919  /**
920   * synchronize_rcu_expedited - Brute-force RCU grace period
921   *
922   * Wait for an RCU grace period, but expedite it.  The basic idea is to
923   * IPI all non-idle non-nohz online CPUs.  The IPI handler checks whether
924   * the CPU is in an RCU critical section, and if so, it sets a flag that
925   * causes the outermost rcu_read_unlock() to report the quiescent state
926   * for RCU-preempt or asks the scheduler for help for RCU-sched.  On the
927   * other hand, if the CPU is not in an RCU read-side critical section,
928   * the IPI handler reports the quiescent state immediately.
929   *
930   * Although this is a great improvement over previous expedited
931   * implementations, it is still unfriendly to real-time workloads, so is
932   * thus not recommended for any sort of common-case code.  In fact, if
933   * you are using synchronize_rcu_expedited() in a loop, please restructure
934   * your code to batch your updates, and then use a single synchronize_rcu()
935   * instead.
936   *
937   * This has the same semantics as (but is more brutal than) synchronize_rcu().
938   */
synchronize_rcu_expedited(void)939  void synchronize_rcu_expedited(void)
940  {
941  	unsigned long flags;
942  	struct rcu_exp_work rew;
943  	struct rcu_node *rnp;
944  	unsigned long s;
945  
946  	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
947  			 lock_is_held(&rcu_lock_map) ||
948  			 lock_is_held(&rcu_sched_lock_map),
949  			 "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
950  
951  	/* Is the state is such that the call is a grace period? */
952  	if (rcu_blocking_is_gp()) {
953  		// Note well that this code runs with !PREEMPT && !SMP.
954  		// In addition, all code that advances grace periods runs
955  		// at process level.  Therefore, this expedited GP overlaps
956  		// with other expedited GPs only by being fully nested within
957  		// them, which allows reuse of ->gp_seq_polled_exp_snap.
958  		rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
959  		rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
960  
961  		local_irq_save(flags);
962  		WARN_ON_ONCE(num_online_cpus() > 1);
963  		rcu_state.expedited_sequence += (1 << RCU_SEQ_CTR_SHIFT);
964  		local_irq_restore(flags);
965  		return;  // Context allows vacuous grace periods.
966  	}
967  
968  	/* If expedited grace periods are prohibited, fall back to normal. */
969  	if (rcu_gp_is_normal()) {
970  		synchronize_rcu_normal();
971  		return;
972  	}
973  
974  	/* Take a snapshot of the sequence number.  */
975  	s = rcu_exp_gp_seq_snap();
976  	if (exp_funnel_lock(s))
977  		return;  /* Someone else did our work for us. */
978  
979  	/* Ensure that load happens before action based on it. */
980  	if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) {
981  		/* Direct call during scheduler init and early_initcalls(). */
982  		rcu_exp_sel_wait_wake(s);
983  	} else {
984  		/* Marshall arguments & schedule the expedited grace period. */
985  		rew.rew_s = s;
986  		synchronize_rcu_expedited_queue_work(&rew);
987  	}
988  
989  	/* Wait for expedited grace period to complete. */
990  	rnp = rcu_get_root();
991  	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
992  		   sync_exp_work_done(s));
993  
994  	/* Let the next expedited grace period start. */
995  	mutex_unlock(&rcu_state.exp_mutex);
996  }
997  EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
998  
999  /*
1000   * Ensure that start_poll_synchronize_rcu_expedited() has the expedited
1001   * RCU grace periods that it needs.
1002   */
sync_rcu_do_polled_gp(struct work_struct * wp)1003  static void sync_rcu_do_polled_gp(struct work_struct *wp)
1004  {
1005  	unsigned long flags;
1006  	int i = 0;
1007  	struct rcu_node *rnp = container_of(wp, struct rcu_node, exp_poll_wq);
1008  	unsigned long s;
1009  
1010  	raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
1011  	s = rnp->exp_seq_poll_rq;
1012  	rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
1013  	raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
1014  	if (s == RCU_GET_STATE_COMPLETED)
1015  		return;
1016  	while (!poll_state_synchronize_rcu(s)) {
1017  		synchronize_rcu_expedited();
1018  		if (i == 10 || i == 20)
1019  			pr_info("%s: i = %d s = %lx gp_seq_polled = %lx\n", __func__, i, s, READ_ONCE(rcu_state.gp_seq_polled));
1020  		i++;
1021  	}
1022  	raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
1023  	s = rnp->exp_seq_poll_rq;
1024  	if (poll_state_synchronize_rcu(s))
1025  		rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED;
1026  	raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
1027  }
1028  
1029  /**
1030   * start_poll_synchronize_rcu_expedited - Snapshot current RCU state and start expedited grace period
1031   *
1032   * Returns a cookie to pass to a call to cond_synchronize_rcu(),
1033   * cond_synchronize_rcu_expedited(), or poll_state_synchronize_rcu(),
1034   * allowing them to determine whether or not any sort of grace period has
1035   * elapsed in the meantime.  If the needed expedited grace period is not
1036   * already slated to start, initiates that grace period.
1037   */
start_poll_synchronize_rcu_expedited(void)1038  unsigned long start_poll_synchronize_rcu_expedited(void)
1039  {
1040  	unsigned long flags;
1041  	struct rcu_data *rdp;
1042  	struct rcu_node *rnp;
1043  	unsigned long s;
1044  
1045  	s = get_state_synchronize_rcu();
1046  	rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
1047  	rnp = rdp->mynode;
1048  	if (rcu_init_invoked())
1049  		raw_spin_lock_irqsave(&rnp->exp_poll_lock, flags);
1050  	if (!poll_state_synchronize_rcu(s)) {
1051  		if (rcu_init_invoked()) {
1052  			rnp->exp_seq_poll_rq = s;
1053  			queue_work(rcu_gp_wq, &rnp->exp_poll_wq);
1054  		}
1055  	}
1056  	if (rcu_init_invoked())
1057  		raw_spin_unlock_irqrestore(&rnp->exp_poll_lock, flags);
1058  
1059  	return s;
1060  }
1061  EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited);
1062  
1063  /**
1064   * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period
1065   * @rgosp: Place to put snapshot of grace-period state
1066   *
1067   * Places the normal and expedited grace-period states in rgosp.  This
1068   * state value can be passed to a later call to cond_synchronize_rcu_full()
1069   * or poll_state_synchronize_rcu_full() to determine whether or not a
1070   * grace period (whether normal or expedited) has elapsed in the meantime.
1071   * If the needed expedited grace period is not already slated to start,
1072   * initiates that grace period.
1073   */
start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate * rgosp)1074  void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
1075  {
1076  	get_state_synchronize_rcu_full(rgosp);
1077  	(void)start_poll_synchronize_rcu_expedited();
1078  }
1079  EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited_full);
1080  
1081  /**
1082   * cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period
1083   *
1084   * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
1085   *
1086   * If any type of full RCU grace period has elapsed since the earlier
1087   * call to get_state_synchronize_rcu(), start_poll_synchronize_rcu(),
1088   * or start_poll_synchronize_rcu_expedited(), just return.  Otherwise,
1089   * invoke synchronize_rcu_expedited() to wait for a full grace period.
1090   *
1091   * Yes, this function does not take counter wrap into account.
1092   * But counter wrap is harmless.  If the counter wraps, we have waited for
1093   * more than 2 billion grace periods (and way more on a 64-bit system!),
1094   * so waiting for a couple of additional grace periods should be just fine.
1095   *
1096   * This function provides the same memory-ordering guarantees that
1097   * would be provided by a synchronize_rcu() that was invoked at the call
1098   * to the function that provided @oldstate and that returned at the end
1099   * of this function.
1100   */
cond_synchronize_rcu_expedited(unsigned long oldstate)1101  void cond_synchronize_rcu_expedited(unsigned long oldstate)
1102  {
1103  	if (!poll_state_synchronize_rcu(oldstate))
1104  		synchronize_rcu_expedited();
1105  }
1106  EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited);
1107  
1108  /**
1109   * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period
1110   * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full()
1111   *
1112   * If a full RCU grace period has elapsed since the call to
1113   * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
1114   * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was
1115   * obtained, just return.  Otherwise, invoke synchronize_rcu_expedited()
1116   * to wait for a full grace period.
1117   *
1118   * Yes, this function does not take counter wrap into account.
1119   * But counter wrap is harmless.  If the counter wraps, we have waited for
1120   * more than 2 billion grace periods (and way more on a 64-bit system!),
1121   * so waiting for a couple of additional grace periods should be just fine.
1122   *
1123   * This function provides the same memory-ordering guarantees that
1124   * would be provided by a synchronize_rcu() that was invoked at the call
1125   * to the function that provided @rgosp and that returned at the end of
1126   * this function.
1127   */
cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate * rgosp)1128  void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
1129  {
1130  	if (!poll_state_synchronize_rcu_full(rgosp))
1131  		synchronize_rcu_expedited();
1132  }
1133  EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited_full);
1134