1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Generic waiting primitives.
4   *
5   * (C) 2004 Nadia Yvette Chambers, Oracle
6   */
7  
__init_waitqueue_head(struct wait_queue_head * wq_head,const char * name,struct lock_class_key * key)8  void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
9  {
10  	spin_lock_init(&wq_head->lock);
11  	lockdep_set_class_and_name(&wq_head->lock, key, name);
12  	INIT_LIST_HEAD(&wq_head->head);
13  }
14  
15  EXPORT_SYMBOL(__init_waitqueue_head);
16  
add_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)17  void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
18  {
19  	unsigned long flags;
20  
21  	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
22  	spin_lock_irqsave(&wq_head->lock, flags);
23  	__add_wait_queue(wq_head, wq_entry);
24  	spin_unlock_irqrestore(&wq_head->lock, flags);
25  }
26  EXPORT_SYMBOL(add_wait_queue);
27  
add_wait_queue_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)28  void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
29  {
30  	unsigned long flags;
31  
32  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
33  	spin_lock_irqsave(&wq_head->lock, flags);
34  	__add_wait_queue_entry_tail(wq_head, wq_entry);
35  	spin_unlock_irqrestore(&wq_head->lock, flags);
36  }
37  EXPORT_SYMBOL(add_wait_queue_exclusive);
38  
add_wait_queue_priority(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)39  void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
40  {
41  	unsigned long flags;
42  
43  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
44  	spin_lock_irqsave(&wq_head->lock, flags);
45  	__add_wait_queue(wq_head, wq_entry);
46  	spin_unlock_irqrestore(&wq_head->lock, flags);
47  }
48  EXPORT_SYMBOL_GPL(add_wait_queue_priority);
49  
remove_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)50  void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
51  {
52  	unsigned long flags;
53  
54  	spin_lock_irqsave(&wq_head->lock, flags);
55  	__remove_wait_queue(wq_head, wq_entry);
56  	spin_unlock_irqrestore(&wq_head->lock, flags);
57  }
58  EXPORT_SYMBOL(remove_wait_queue);
59  
60  /*
61   * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
62   * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
63   * number) then we wake that number of exclusive tasks, and potentially all
64   * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
65   * the list and any non-exclusive tasks will be woken first. A priority task
66   * may be at the head of the list, and can consume the event without any other
67   * tasks being woken.
68   *
69   * There are circumstances in which we can try to wake a task which has already
70   * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
71   * zero in this (rare) case, and we handle it by continuing to scan the queue.
72   */
__wake_up_common(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key)73  static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
74  			int nr_exclusive, int wake_flags, void *key)
75  {
76  	wait_queue_entry_t *curr, *next;
77  
78  	lockdep_assert_held(&wq_head->lock);
79  
80  	curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
81  
82  	if (&curr->entry == &wq_head->head)
83  		return nr_exclusive;
84  
85  	list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
86  		unsigned flags = curr->flags;
87  		int ret;
88  
89  		ret = curr->func(curr, mode, wake_flags, key);
90  		if (ret < 0)
91  			break;
92  		if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
93  			break;
94  	}
95  
96  	return nr_exclusive;
97  }
98  
__wake_up_common_lock(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key)99  static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
100  			int nr_exclusive, int wake_flags, void *key)
101  {
102  	unsigned long flags;
103  	int remaining;
104  
105  	spin_lock_irqsave(&wq_head->lock, flags);
106  	remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags,
107  			key);
108  	spin_unlock_irqrestore(&wq_head->lock, flags);
109  
110  	return nr_exclusive - remaining;
111  }
112  
113  /**
114   * __wake_up - wake up threads blocked on a waitqueue.
115   * @wq_head: the waitqueue
116   * @mode: which threads
117   * @nr_exclusive: how many wake-one or wake-many threads to wake up
118   * @key: is directly passed to the wakeup function
119   *
120   * If this function wakes up a task, it executes a full memory barrier
121   * before accessing the task state.  Returns the number of exclusive
122   * tasks that were awaken.
123   */
__wake_up(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,void * key)124  int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
125  	      int nr_exclusive, void *key)
126  {
127  	return __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
128  }
129  EXPORT_SYMBOL(__wake_up);
130  
__wake_up_on_current_cpu(struct wait_queue_head * wq_head,unsigned int mode,void * key)131  void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
132  {
133  	__wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
134  }
135  
136  /*
137   * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
138   */
__wake_up_locked(struct wait_queue_head * wq_head,unsigned int mode,int nr)139  void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
140  {
141  	__wake_up_common(wq_head, mode, nr, 0, NULL);
142  }
143  EXPORT_SYMBOL_GPL(__wake_up_locked);
144  
__wake_up_locked_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)145  void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
146  {
147  	__wake_up_common(wq_head, mode, 1, 0, key);
148  }
149  EXPORT_SYMBOL_GPL(__wake_up_locked_key);
150  
151  /**
152   * __wake_up_sync_key - wake up threads blocked on a waitqueue.
153   * @wq_head: the waitqueue
154   * @mode: which threads
155   * @key: opaque value to be passed to wakeup targets
156   *
157   * The sync wakeup differs that the waker knows that it will schedule
158   * away soon, so while the target thread will be woken up, it will not
159   * be migrated to another CPU - ie. the two threads are 'synchronized'
160   * with each other. This can prevent needless bouncing between CPUs.
161   *
162   * On UP it can prevent extra preemption.
163   *
164   * If this function wakes up a task, it executes a full memory barrier before
165   * accessing the task state.
166   */
__wake_up_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)167  void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
168  			void *key)
169  {
170  	if (unlikely(!wq_head))
171  		return;
172  
173  	__wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
174  }
175  EXPORT_SYMBOL_GPL(__wake_up_sync_key);
176  
177  /**
178   * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
179   * @wq_head: the waitqueue
180   * @mode: which threads
181   * @key: opaque value to be passed to wakeup targets
182   *
183   * The sync wakeup differs in that the waker knows that it will schedule
184   * away soon, so while the target thread will be woken up, it will not
185   * be migrated to another CPU - ie. the two threads are 'synchronized'
186   * with each other. This can prevent needless bouncing between CPUs.
187   *
188   * On UP it can prevent extra preemption.
189   *
190   * If this function wakes up a task, it executes a full memory barrier before
191   * accessing the task state.
192   */
__wake_up_locked_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)193  void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
194  			       unsigned int mode, void *key)
195  {
196          __wake_up_common(wq_head, mode, 1, WF_SYNC, key);
197  }
198  EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
199  
200  /*
201   * __wake_up_sync - see __wake_up_sync_key()
202   */
__wake_up_sync(struct wait_queue_head * wq_head,unsigned int mode)203  void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
204  {
205  	__wake_up_sync_key(wq_head, mode, NULL);
206  }
207  EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
208  
__wake_up_pollfree(struct wait_queue_head * wq_head)209  void __wake_up_pollfree(struct wait_queue_head *wq_head)
210  {
211  	__wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
212  	/* POLLFREE must have cleared the queue. */
213  	WARN_ON_ONCE(waitqueue_active(wq_head));
214  }
215  
216  /*
217   * Note: we use "set_current_state()" _after_ the wait-queue add,
218   * because we need a memory barrier there on SMP, so that any
219   * wake-function that tests for the wait-queue being active
220   * will be guaranteed to see waitqueue addition _or_ subsequent
221   * tests in this thread will see the wakeup having taken place.
222   *
223   * The spin_unlock() itself is semi-permeable and only protects
224   * one way (it only protects stuff inside the critical region and
225   * stops them from bleeding out - it would still allow subsequent
226   * loads to move into the critical region).
227   */
228  void
prepare_to_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)229  prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
230  {
231  	unsigned long flags;
232  
233  	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
234  	spin_lock_irqsave(&wq_head->lock, flags);
235  	if (list_empty(&wq_entry->entry))
236  		__add_wait_queue(wq_head, wq_entry);
237  	set_current_state(state);
238  	spin_unlock_irqrestore(&wq_head->lock, flags);
239  }
240  EXPORT_SYMBOL(prepare_to_wait);
241  
242  /* Returns true if we are the first waiter in the queue, false otherwise. */
243  bool
prepare_to_wait_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)244  prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
245  {
246  	unsigned long flags;
247  	bool was_empty = false;
248  
249  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
250  	spin_lock_irqsave(&wq_head->lock, flags);
251  	if (list_empty(&wq_entry->entry)) {
252  		was_empty = list_empty(&wq_head->head);
253  		__add_wait_queue_entry_tail(wq_head, wq_entry);
254  	}
255  	set_current_state(state);
256  	spin_unlock_irqrestore(&wq_head->lock, flags);
257  	return was_empty;
258  }
259  EXPORT_SYMBOL(prepare_to_wait_exclusive);
260  
init_wait_entry(struct wait_queue_entry * wq_entry,int flags)261  void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
262  {
263  	wq_entry->flags = flags;
264  	wq_entry->private = current;
265  	wq_entry->func = autoremove_wake_function;
266  	INIT_LIST_HEAD(&wq_entry->entry);
267  }
268  EXPORT_SYMBOL(init_wait_entry);
269  
prepare_to_wait_event(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)270  long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
271  {
272  	unsigned long flags;
273  	long ret = 0;
274  
275  	spin_lock_irqsave(&wq_head->lock, flags);
276  	if (signal_pending_state(state, current)) {
277  		/*
278  		 * Exclusive waiter must not fail if it was selected by wakeup,
279  		 * it should "consume" the condition we were waiting for.
280  		 *
281  		 * The caller will recheck the condition and return success if
282  		 * we were already woken up, we can not miss the event because
283  		 * wakeup locks/unlocks the same wq_head->lock.
284  		 *
285  		 * But we need to ensure that set-condition + wakeup after that
286  		 * can't see us, it should wake up another exclusive waiter if
287  		 * we fail.
288  		 */
289  		list_del_init(&wq_entry->entry);
290  		ret = -ERESTARTSYS;
291  	} else {
292  		if (list_empty(&wq_entry->entry)) {
293  			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
294  				__add_wait_queue_entry_tail(wq_head, wq_entry);
295  			else
296  				__add_wait_queue(wq_head, wq_entry);
297  		}
298  		set_current_state(state);
299  	}
300  	spin_unlock_irqrestore(&wq_head->lock, flags);
301  
302  	return ret;
303  }
304  EXPORT_SYMBOL(prepare_to_wait_event);
305  
306  /*
307   * Note! These two wait functions are entered with the
308   * wait-queue lock held (and interrupts off in the _irq
309   * case), so there is no race with testing the wakeup
310   * condition in the caller before they add the wait
311   * entry to the wake queue.
312   */
do_wait_intr(wait_queue_head_t * wq,wait_queue_entry_t * wait)313  int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
314  {
315  	if (likely(list_empty(&wait->entry)))
316  		__add_wait_queue_entry_tail(wq, wait);
317  
318  	set_current_state(TASK_INTERRUPTIBLE);
319  	if (signal_pending(current))
320  		return -ERESTARTSYS;
321  
322  	spin_unlock(&wq->lock);
323  	schedule();
324  	spin_lock(&wq->lock);
325  
326  	return 0;
327  }
328  EXPORT_SYMBOL(do_wait_intr);
329  
do_wait_intr_irq(wait_queue_head_t * wq,wait_queue_entry_t * wait)330  int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
331  {
332  	if (likely(list_empty(&wait->entry)))
333  		__add_wait_queue_entry_tail(wq, wait);
334  
335  	set_current_state(TASK_INTERRUPTIBLE);
336  	if (signal_pending(current))
337  		return -ERESTARTSYS;
338  
339  	spin_unlock_irq(&wq->lock);
340  	schedule();
341  	spin_lock_irq(&wq->lock);
342  
343  	return 0;
344  }
345  EXPORT_SYMBOL(do_wait_intr_irq);
346  
347  /**
348   * finish_wait - clean up after waiting in a queue
349   * @wq_head: waitqueue waited on
350   * @wq_entry: wait descriptor
351   *
352   * Sets current thread back to running state and removes
353   * the wait descriptor from the given waitqueue if still
354   * queued.
355   */
finish_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)356  void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
357  {
358  	unsigned long flags;
359  
360  	__set_current_state(TASK_RUNNING);
361  	/*
362  	 * We can check for list emptiness outside the lock
363  	 * IFF:
364  	 *  - we use the "careful" check that verifies both
365  	 *    the next and prev pointers, so that there cannot
366  	 *    be any half-pending updates in progress on other
367  	 *    CPU's that we haven't seen yet (and that might
368  	 *    still change the stack area.
369  	 * and
370  	 *  - all other users take the lock (ie we can only
371  	 *    have _one_ other CPU that looks at or modifies
372  	 *    the list).
373  	 */
374  	if (!list_empty_careful(&wq_entry->entry)) {
375  		spin_lock_irqsave(&wq_head->lock, flags);
376  		list_del_init(&wq_entry->entry);
377  		spin_unlock_irqrestore(&wq_head->lock, flags);
378  	}
379  }
380  EXPORT_SYMBOL(finish_wait);
381  
autoremove_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)382  int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
383  {
384  	int ret = default_wake_function(wq_entry, mode, sync, key);
385  
386  	if (ret)
387  		list_del_init_careful(&wq_entry->entry);
388  
389  	return ret;
390  }
391  EXPORT_SYMBOL(autoremove_wake_function);
392  
393  /*
394   * DEFINE_WAIT_FUNC(wait, woken_wake_func);
395   *
396   * add_wait_queue(&wq_head, &wait);
397   * for (;;) {
398   *     if (condition)
399   *         break;
400   *
401   *     // in wait_woken()			// in woken_wake_function()
402   *
403   *     p->state = mode;				wq_entry->flags |= WQ_FLAG_WOKEN;
404   *     smp_mb(); // A				try_to_wake_up():
405   *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))	   <full barrier>
406   *         schedule()				   if (p->state & mode)
407   *     p->state = TASK_RUNNING;			      p->state = TASK_RUNNING;
408   *     wq_entry->flags &= ~WQ_FLAG_WOKEN;	~~~~~~~~~~~~~~~~~~
409   *     smp_mb(); // B				condition = true;
410   * }						smp_mb(); // C
411   * remove_wait_queue(&wq_head, &wait);		wq_entry->flags |= WQ_FLAG_WOKEN;
412   */
wait_woken(struct wait_queue_entry * wq_entry,unsigned mode,long timeout)413  long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
414  {
415  	/*
416  	 * The below executes an smp_mb(), which matches with the full barrier
417  	 * executed by the try_to_wake_up() in woken_wake_function() such that
418  	 * either we see the store to wq_entry->flags in woken_wake_function()
419  	 * or woken_wake_function() sees our store to current->state.
420  	 */
421  	set_current_state(mode); /* A */
422  	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !kthread_should_stop_or_park())
423  		timeout = schedule_timeout(timeout);
424  	__set_current_state(TASK_RUNNING);
425  
426  	/*
427  	 * The below executes an smp_mb(), which matches with the smp_mb() (C)
428  	 * in woken_wake_function() such that either we see the wait condition
429  	 * being true or the store to wq_entry->flags in woken_wake_function()
430  	 * follows ours in the coherence order.
431  	 */
432  	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
433  
434  	return timeout;
435  }
436  EXPORT_SYMBOL(wait_woken);
437  
woken_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)438  int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
439  {
440  	/* Pairs with the smp_store_mb() in wait_woken(). */
441  	smp_mb(); /* C */
442  	wq_entry->flags |= WQ_FLAG_WOKEN;
443  
444  	return default_wake_function(wq_entry, mode, sync, key);
445  }
446  EXPORT_SYMBOL(woken_wake_function);
447