1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Generic waiting primitives.
4   *
5   * (C) 2004 Nadia Yvette Chambers, Oracle
6   */
7  #include "sched.h"
8  
__init_waitqueue_head(struct wait_queue_head * wq_head,const char * name,struct lock_class_key * key)9  void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
10  {
11  	spin_lock_init(&wq_head->lock);
12  	lockdep_set_class_and_name(&wq_head->lock, key, name);
13  	INIT_LIST_HEAD(&wq_head->head);
14  }
15  
16  EXPORT_SYMBOL(__init_waitqueue_head);
17  
add_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)18  void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
19  {
20  	unsigned long flags;
21  
22  	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
23  	spin_lock_irqsave(&wq_head->lock, flags);
24  	__add_wait_queue(wq_head, wq_entry);
25  	spin_unlock_irqrestore(&wq_head->lock, flags);
26  }
27  EXPORT_SYMBOL(add_wait_queue);
28  
add_wait_queue_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)29  void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
30  {
31  	unsigned long flags;
32  
33  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
34  	spin_lock_irqsave(&wq_head->lock, flags);
35  	__add_wait_queue_entry_tail(wq_head, wq_entry);
36  	spin_unlock_irqrestore(&wq_head->lock, flags);
37  }
38  EXPORT_SYMBOL(add_wait_queue_exclusive);
39  
add_wait_queue_priority(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)40  void add_wait_queue_priority(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
41  {
42  	unsigned long flags;
43  
44  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE | WQ_FLAG_PRIORITY;
45  	spin_lock_irqsave(&wq_head->lock, flags);
46  	__add_wait_queue(wq_head, wq_entry);
47  	spin_unlock_irqrestore(&wq_head->lock, flags);
48  }
49  EXPORT_SYMBOL_GPL(add_wait_queue_priority);
50  
remove_wait_queue(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)51  void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
52  {
53  	unsigned long flags;
54  
55  	spin_lock_irqsave(&wq_head->lock, flags);
56  	__remove_wait_queue(wq_head, wq_entry);
57  	spin_unlock_irqrestore(&wq_head->lock, flags);
58  }
59  EXPORT_SYMBOL(remove_wait_queue);
60  
61  /*
62   * Scan threshold to break wait queue walk.
63   * This allows a waker to take a break from holding the
64   * wait queue lock during the wait queue walk.
65   */
66  #define WAITQUEUE_WALK_BREAK_CNT 64
67  
68  /*
69   * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
70   * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
71   * number) then we wake that number of exclusive tasks, and potentially all
72   * the non-exclusive tasks. Normally, exclusive tasks will be at the end of
73   * the list and any non-exclusive tasks will be woken first. A priority task
74   * may be at the head of the list, and can consume the event without any other
75   * tasks being woken.
76   *
77   * There are circumstances in which we can try to wake a task which has already
78   * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
79   * zero in this (rare) case, and we handle it by continuing to scan the queue.
80   */
__wake_up_common(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key,wait_queue_entry_t * bookmark)81  static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
82  			int nr_exclusive, int wake_flags, void *key,
83  			wait_queue_entry_t *bookmark)
84  {
85  	wait_queue_entry_t *curr, *next;
86  	int cnt = 0;
87  
88  	lockdep_assert_held(&wq_head->lock);
89  
90  	if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
91  		curr = list_next_entry(bookmark, entry);
92  
93  		list_del(&bookmark->entry);
94  		bookmark->flags = 0;
95  	} else
96  		curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
97  
98  	if (&curr->entry == &wq_head->head)
99  		return nr_exclusive;
100  
101  	list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
102  		unsigned flags = curr->flags;
103  		int ret;
104  
105  		if (flags & WQ_FLAG_BOOKMARK)
106  			continue;
107  
108  		ret = curr->func(curr, mode, wake_flags, key);
109  		if (ret < 0)
110  			break;
111  		if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
112  			break;
113  
114  		if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) &&
115  				(&next->entry != &wq_head->head)) {
116  			bookmark->flags = WQ_FLAG_BOOKMARK;
117  			list_add_tail(&bookmark->entry, &next->entry);
118  			break;
119  		}
120  	}
121  
122  	return nr_exclusive;
123  }
124  
__wake_up_common_lock(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,int wake_flags,void * key)125  static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode,
126  			int nr_exclusive, int wake_flags, void *key)
127  {
128  	unsigned long flags;
129  	wait_queue_entry_t bookmark;
130  
131  	bookmark.flags = 0;
132  	bookmark.private = NULL;
133  	bookmark.func = NULL;
134  	INIT_LIST_HEAD(&bookmark.entry);
135  
136  	do {
137  		spin_lock_irqsave(&wq_head->lock, flags);
138  		nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive,
139  						wake_flags, key, &bookmark);
140  		spin_unlock_irqrestore(&wq_head->lock, flags);
141  	} while (bookmark.flags & WQ_FLAG_BOOKMARK);
142  }
143  
144  /**
145   * __wake_up - wake up threads blocked on a waitqueue.
146   * @wq_head: the waitqueue
147   * @mode: which threads
148   * @nr_exclusive: how many wake-one or wake-many threads to wake up
149   * @key: is directly passed to the wakeup function
150   *
151   * If this function wakes up a task, it executes a full memory barrier before
152   * accessing the task state.
153   */
__wake_up(struct wait_queue_head * wq_head,unsigned int mode,int nr_exclusive,void * key)154  void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
155  			int nr_exclusive, void *key)
156  {
157  	__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
158  }
159  EXPORT_SYMBOL(__wake_up);
160  
161  /*
162   * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
163   */
__wake_up_locked(struct wait_queue_head * wq_head,unsigned int mode,int nr)164  void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
165  {
166  	__wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
167  }
168  EXPORT_SYMBOL_GPL(__wake_up_locked);
169  
__wake_up_locked_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)170  void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
171  {
172  	__wake_up_common(wq_head, mode, 1, 0, key, NULL);
173  }
174  EXPORT_SYMBOL_GPL(__wake_up_locked_key);
175  
__wake_up_locked_key_bookmark(struct wait_queue_head * wq_head,unsigned int mode,void * key,wait_queue_entry_t * bookmark)176  void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
177  		unsigned int mode, void *key, wait_queue_entry_t *bookmark)
178  {
179  	__wake_up_common(wq_head, mode, 1, 0, key, bookmark);
180  }
181  EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
182  
183  /**
184   * __wake_up_sync_key - wake up threads blocked on a waitqueue.
185   * @wq_head: the waitqueue
186   * @mode: which threads
187   * @key: opaque value to be passed to wakeup targets
188   *
189   * The sync wakeup differs that the waker knows that it will schedule
190   * away soon, so while the target thread will be woken up, it will not
191   * be migrated to another CPU - ie. the two threads are 'synchronized'
192   * with each other. This can prevent needless bouncing between CPUs.
193   *
194   * On UP it can prevent extra preemption.
195   *
196   * If this function wakes up a task, it executes a full memory barrier before
197   * accessing the task state.
198   */
__wake_up_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)199  void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
200  			void *key)
201  {
202  	if (unlikely(!wq_head))
203  		return;
204  
205  	__wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
206  }
207  EXPORT_SYMBOL_GPL(__wake_up_sync_key);
208  
209  /**
210   * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
211   * @wq_head: the waitqueue
212   * @mode: which threads
213   * @key: opaque value to be passed to wakeup targets
214   *
215   * The sync wakeup differs in that the waker knows that it will schedule
216   * away soon, so while the target thread will be woken up, it will not
217   * be migrated to another CPU - ie. the two threads are 'synchronized'
218   * with each other. This can prevent needless bouncing between CPUs.
219   *
220   * On UP it can prevent extra preemption.
221   *
222   * If this function wakes up a task, it executes a full memory barrier before
223   * accessing the task state.
224   */
__wake_up_locked_sync_key(struct wait_queue_head * wq_head,unsigned int mode,void * key)225  void __wake_up_locked_sync_key(struct wait_queue_head *wq_head,
226  			       unsigned int mode, void *key)
227  {
228          __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
229  }
230  EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
231  
232  /*
233   * __wake_up_sync - see __wake_up_sync_key()
234   */
__wake_up_sync(struct wait_queue_head * wq_head,unsigned int mode)235  void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
236  {
237  	__wake_up_sync_key(wq_head, mode, NULL);
238  }
239  EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
240  
__wake_up_pollfree(struct wait_queue_head * wq_head)241  void __wake_up_pollfree(struct wait_queue_head *wq_head)
242  {
243  	__wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
244  	/* POLLFREE must have cleared the queue. */
245  	WARN_ON_ONCE(waitqueue_active(wq_head));
246  }
247  
248  /*
249   * Note: we use "set_current_state()" _after_ the wait-queue add,
250   * because we need a memory barrier there on SMP, so that any
251   * wake-function that tests for the wait-queue being active
252   * will be guaranteed to see waitqueue addition _or_ subsequent
253   * tests in this thread will see the wakeup having taken place.
254   *
255   * The spin_unlock() itself is semi-permeable and only protects
256   * one way (it only protects stuff inside the critical region and
257   * stops them from bleeding out - it would still allow subsequent
258   * loads to move into the critical region).
259   */
260  void
prepare_to_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)261  prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
262  {
263  	unsigned long flags;
264  
265  	wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
266  	spin_lock_irqsave(&wq_head->lock, flags);
267  	if (list_empty(&wq_entry->entry))
268  		__add_wait_queue(wq_head, wq_entry);
269  	set_current_state(state);
270  	spin_unlock_irqrestore(&wq_head->lock, flags);
271  }
272  EXPORT_SYMBOL(prepare_to_wait);
273  
274  /* Returns true if we are the first waiter in the queue, false otherwise. */
275  bool
prepare_to_wait_exclusive(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)276  prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
277  {
278  	unsigned long flags;
279  	bool was_empty = false;
280  
281  	wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
282  	spin_lock_irqsave(&wq_head->lock, flags);
283  	if (list_empty(&wq_entry->entry)) {
284  		was_empty = list_empty(&wq_head->head);
285  		__add_wait_queue_entry_tail(wq_head, wq_entry);
286  	}
287  	set_current_state(state);
288  	spin_unlock_irqrestore(&wq_head->lock, flags);
289  	return was_empty;
290  }
291  EXPORT_SYMBOL(prepare_to_wait_exclusive);
292  
init_wait_entry(struct wait_queue_entry * wq_entry,int flags)293  void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
294  {
295  	wq_entry->flags = flags;
296  	wq_entry->private = current;
297  	wq_entry->func = autoremove_wake_function;
298  	INIT_LIST_HEAD(&wq_entry->entry);
299  }
300  EXPORT_SYMBOL(init_wait_entry);
301  
prepare_to_wait_event(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry,int state)302  long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
303  {
304  	unsigned long flags;
305  	long ret = 0;
306  
307  	spin_lock_irqsave(&wq_head->lock, flags);
308  	if (signal_pending_state(state, current)) {
309  		/*
310  		 * Exclusive waiter must not fail if it was selected by wakeup,
311  		 * it should "consume" the condition we were waiting for.
312  		 *
313  		 * The caller will recheck the condition and return success if
314  		 * we were already woken up, we can not miss the event because
315  		 * wakeup locks/unlocks the same wq_head->lock.
316  		 *
317  		 * But we need to ensure that set-condition + wakeup after that
318  		 * can't see us, it should wake up another exclusive waiter if
319  		 * we fail.
320  		 */
321  		list_del_init(&wq_entry->entry);
322  		ret = -ERESTARTSYS;
323  	} else {
324  		if (list_empty(&wq_entry->entry)) {
325  			if (wq_entry->flags & WQ_FLAG_EXCLUSIVE)
326  				__add_wait_queue_entry_tail(wq_head, wq_entry);
327  			else
328  				__add_wait_queue(wq_head, wq_entry);
329  		}
330  		set_current_state(state);
331  	}
332  	spin_unlock_irqrestore(&wq_head->lock, flags);
333  
334  	return ret;
335  }
336  EXPORT_SYMBOL(prepare_to_wait_event);
337  
338  /*
339   * Note! These two wait functions are entered with the
340   * wait-queue lock held (and interrupts off in the _irq
341   * case), so there is no race with testing the wakeup
342   * condition in the caller before they add the wait
343   * entry to the wake queue.
344   */
do_wait_intr(wait_queue_head_t * wq,wait_queue_entry_t * wait)345  int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
346  {
347  	if (likely(list_empty(&wait->entry)))
348  		__add_wait_queue_entry_tail(wq, wait);
349  
350  	set_current_state(TASK_INTERRUPTIBLE);
351  	if (signal_pending(current))
352  		return -ERESTARTSYS;
353  
354  	spin_unlock(&wq->lock);
355  	schedule();
356  	spin_lock(&wq->lock);
357  
358  	return 0;
359  }
360  EXPORT_SYMBOL(do_wait_intr);
361  
do_wait_intr_irq(wait_queue_head_t * wq,wait_queue_entry_t * wait)362  int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
363  {
364  	if (likely(list_empty(&wait->entry)))
365  		__add_wait_queue_entry_tail(wq, wait);
366  
367  	set_current_state(TASK_INTERRUPTIBLE);
368  	if (signal_pending(current))
369  		return -ERESTARTSYS;
370  
371  	spin_unlock_irq(&wq->lock);
372  	schedule();
373  	spin_lock_irq(&wq->lock);
374  
375  	return 0;
376  }
377  EXPORT_SYMBOL(do_wait_intr_irq);
378  
379  /**
380   * finish_wait - clean up after waiting in a queue
381   * @wq_head: waitqueue waited on
382   * @wq_entry: wait descriptor
383   *
384   * Sets current thread back to running state and removes
385   * the wait descriptor from the given waitqueue if still
386   * queued.
387   */
finish_wait(struct wait_queue_head * wq_head,struct wait_queue_entry * wq_entry)388  void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
389  {
390  	unsigned long flags;
391  
392  	__set_current_state(TASK_RUNNING);
393  	/*
394  	 * We can check for list emptiness outside the lock
395  	 * IFF:
396  	 *  - we use the "careful" check that verifies both
397  	 *    the next and prev pointers, so that there cannot
398  	 *    be any half-pending updates in progress on other
399  	 *    CPU's that we haven't seen yet (and that might
400  	 *    still change the stack area.
401  	 * and
402  	 *  - all other users take the lock (ie we can only
403  	 *    have _one_ other CPU that looks at or modifies
404  	 *    the list).
405  	 */
406  	if (!list_empty_careful(&wq_entry->entry)) {
407  		spin_lock_irqsave(&wq_head->lock, flags);
408  		list_del_init(&wq_entry->entry);
409  		spin_unlock_irqrestore(&wq_head->lock, flags);
410  	}
411  }
412  EXPORT_SYMBOL(finish_wait);
413  
autoremove_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)414  int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
415  {
416  	int ret = default_wake_function(wq_entry, mode, sync, key);
417  
418  	if (ret)
419  		list_del_init_careful(&wq_entry->entry);
420  
421  	return ret;
422  }
423  EXPORT_SYMBOL(autoremove_wake_function);
424  
is_kthread_should_stop(void)425  static inline bool is_kthread_should_stop(void)
426  {
427  	return (current->flags & PF_KTHREAD) && kthread_should_stop();
428  }
429  
430  /*
431   * DEFINE_WAIT_FUNC(wait, woken_wake_func);
432   *
433   * add_wait_queue(&wq_head, &wait);
434   * for (;;) {
435   *     if (condition)
436   *         break;
437   *
438   *     // in wait_woken()			// in woken_wake_function()
439   *
440   *     p->state = mode;				wq_entry->flags |= WQ_FLAG_WOKEN;
441   *     smp_mb(); // A				try_to_wake_up():
442   *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))	   <full barrier>
443   *         schedule()				   if (p->state & mode)
444   *     p->state = TASK_RUNNING;			      p->state = TASK_RUNNING;
445   *     wq_entry->flags &= ~WQ_FLAG_WOKEN;	~~~~~~~~~~~~~~~~~~
446   *     smp_mb(); // B				condition = true;
447   * }						smp_mb(); // C
448   * remove_wait_queue(&wq_head, &wait);		wq_entry->flags |= WQ_FLAG_WOKEN;
449   */
wait_woken(struct wait_queue_entry * wq_entry,unsigned mode,long timeout)450  long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout)
451  {
452  	/*
453  	 * The below executes an smp_mb(), which matches with the full barrier
454  	 * executed by the try_to_wake_up() in woken_wake_function() such that
455  	 * either we see the store to wq_entry->flags in woken_wake_function()
456  	 * or woken_wake_function() sees our store to current->state.
457  	 */
458  	set_current_state(mode); /* A */
459  	if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop())
460  		timeout = schedule_timeout(timeout);
461  	__set_current_state(TASK_RUNNING);
462  
463  	/*
464  	 * The below executes an smp_mb(), which matches with the smp_mb() (C)
465  	 * in woken_wake_function() such that either we see the wait condition
466  	 * being true or the store to wq_entry->flags in woken_wake_function()
467  	 * follows ours in the coherence order.
468  	 */
469  	smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
470  
471  	return timeout;
472  }
473  EXPORT_SYMBOL(wait_woken);
474  
woken_wake_function(struct wait_queue_entry * wq_entry,unsigned mode,int sync,void * key)475  int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key)
476  {
477  	/* Pairs with the smp_store_mb() in wait_woken(). */
478  	smp_mb(); /* C */
479  	wq_entry->flags |= WQ_FLAG_WOKEN;
480  
481  	return default_wake_function(wq_entry, mode, sync, key);
482  }
483  EXPORT_SYMBOL(woken_wake_function);
484