1 /* 2 * Generic waiting primitives. 3 * 4 * (C) 2004 Nadia Yvette Chambers, Oracle 5 */ 6 #include "sched.h" 7 8 void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key) 9 { 10 spin_lock_init(&wq_head->lock); 11 lockdep_set_class_and_name(&wq_head->lock, key, name); 12 INIT_LIST_HEAD(&wq_head->head); 13 } 14 15 EXPORT_SYMBOL(__init_waitqueue_head); 16 17 void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 18 { 19 unsigned long flags; 20 21 wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; 22 spin_lock_irqsave(&wq_head->lock, flags); 23 __add_wait_queue(wq_head, wq_entry); 24 spin_unlock_irqrestore(&wq_head->lock, flags); 25 } 26 EXPORT_SYMBOL(add_wait_queue); 27 28 void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 29 { 30 unsigned long flags; 31 32 wq_entry->flags |= WQ_FLAG_EXCLUSIVE; 33 spin_lock_irqsave(&wq_head->lock, flags); 34 __add_wait_queue_entry_tail(wq_head, wq_entry); 35 spin_unlock_irqrestore(&wq_head->lock, flags); 36 } 37 EXPORT_SYMBOL(add_wait_queue_exclusive); 38 39 void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 40 { 41 unsigned long flags; 42 43 spin_lock_irqsave(&wq_head->lock, flags); 44 __remove_wait_queue(wq_head, wq_entry); 45 spin_unlock_irqrestore(&wq_head->lock, flags); 46 } 47 EXPORT_SYMBOL(remove_wait_queue); 48 49 /* 50 * Scan threshold to break wait queue walk. 51 * This allows a waker to take a break from holding the 52 * wait queue lock during the wait queue walk. 53 */ 54 #define WAITQUEUE_WALK_BREAK_CNT 64 55 56 /* 57 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just 58 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve 59 * number) then we wake all the non-exclusive tasks and one exclusive task. 60 * 61 * There are circumstances in which we can try to wake a task which has already 62 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 63 * zero in this (rare) case, and we handle it by continuing to scan the queue. 64 */ 65 static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, 66 int nr_exclusive, int wake_flags, void *key, 67 wait_queue_entry_t *bookmark) 68 { 69 wait_queue_entry_t *curr, *next; 70 int cnt = 0; 71 72 lockdep_assert_held(&wq_head->lock); 73 74 if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) { 75 curr = list_next_entry(bookmark, entry); 76 77 list_del(&bookmark->entry); 78 bookmark->flags = 0; 79 } else 80 curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry); 81 82 if (&curr->entry == &wq_head->head) 83 return nr_exclusive; 84 85 list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) { 86 unsigned flags = curr->flags; 87 int ret; 88 89 if (flags & WQ_FLAG_BOOKMARK) 90 continue; 91 92 ret = curr->func(curr, mode, wake_flags, key); 93 if (ret < 0) 94 break; 95 if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 96 break; 97 98 if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && 99 (&next->entry != &wq_head->head)) { 100 bookmark->flags = WQ_FLAG_BOOKMARK; 101 list_add_tail(&bookmark->entry, &next->entry); 102 break; 103 } 104 } 105 106 return nr_exclusive; 107 } 108 109 static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, 110 int nr_exclusive, int wake_flags, void *key) 111 { 112 unsigned long flags; 113 wait_queue_entry_t bookmark; 114 115 bookmark.flags = 0; 116 bookmark.private = NULL; 117 bookmark.func = NULL; 118 INIT_LIST_HEAD(&bookmark.entry); 119 120 spin_lock_irqsave(&wq_head->lock, flags); 121 nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark); 122 spin_unlock_irqrestore(&wq_head->lock, flags); 123 124 while (bookmark.flags & WQ_FLAG_BOOKMARK) { 125 spin_lock_irqsave(&wq_head->lock, flags); 126 nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, 127 wake_flags, key, &bookmark); 128 spin_unlock_irqrestore(&wq_head->lock, flags); 129 } 130 } 131 132 /** 133 * __wake_up - wake up threads blocked on a waitqueue. 134 * @wq_head: the waitqueue 135 * @mode: which threads 136 * @nr_exclusive: how many wake-one or wake-many threads to wake up 137 * @key: is directly passed to the wakeup function 138 * 139 * If this function wakes up a task, it executes a full memory barrier before 140 * accessing the task state. 141 */ 142 void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, 143 int nr_exclusive, void *key) 144 { 145 __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key); 146 } 147 EXPORT_SYMBOL(__wake_up); 148 149 /* 150 * Same as __wake_up but called with the spinlock in wait_queue_head_t held. 151 */ 152 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr) 153 { 154 __wake_up_common(wq_head, mode, nr, 0, NULL, NULL); 155 } 156 EXPORT_SYMBOL_GPL(__wake_up_locked); 157 158 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key) 159 { 160 __wake_up_common(wq_head, mode, 1, 0, key, NULL); 161 } 162 EXPORT_SYMBOL_GPL(__wake_up_locked_key); 163 164 void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, 165 unsigned int mode, void *key, wait_queue_entry_t *bookmark) 166 { 167 __wake_up_common(wq_head, mode, 1, 0, key, bookmark); 168 } 169 EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark); 170 171 /** 172 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 173 * @wq_head: the waitqueue 174 * @mode: which threads 175 * @nr_exclusive: how many wake-one or wake-many threads to wake up 176 * @key: opaque value to be passed to wakeup targets 177 * 178 * The sync wakeup differs that the waker knows that it will schedule 179 * away soon, so while the target thread will be woken up, it will not 180 * be migrated to another CPU - ie. the two threads are 'synchronized' 181 * with each other. This can prevent needless bouncing between CPUs. 182 * 183 * On UP it can prevent extra preemption. 184 * 185 * If this function wakes up a task, it executes a full memory barrier before 186 * accessing the task state. 187 */ 188 void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, 189 int nr_exclusive, void *key) 190 { 191 int wake_flags = 1; /* XXX WF_SYNC */ 192 193 if (unlikely(!wq_head)) 194 return; 195 196 if (unlikely(nr_exclusive != 1)) 197 wake_flags = 0; 198 199 __wake_up_common_lock(wq_head, mode, nr_exclusive, wake_flags, key); 200 } 201 EXPORT_SYMBOL_GPL(__wake_up_sync_key); 202 203 /* 204 * __wake_up_sync - see __wake_up_sync_key() 205 */ 206 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive) 207 { 208 __wake_up_sync_key(wq_head, mode, nr_exclusive, NULL); 209 } 210 EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ 211 212 /* 213 * Note: we use "set_current_state()" _after_ the wait-queue add, 214 * because we need a memory barrier there on SMP, so that any 215 * wake-function that tests for the wait-queue being active 216 * will be guaranteed to see waitqueue addition _or_ subsequent 217 * tests in this thread will see the wakeup having taken place. 218 * 219 * The spin_unlock() itself is semi-permeable and only protects 220 * one way (it only protects stuff inside the critical region and 221 * stops them from bleeding out - it would still allow subsequent 222 * loads to move into the critical region). 223 */ 224 void 225 prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) 226 { 227 unsigned long flags; 228 229 wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE; 230 spin_lock_irqsave(&wq_head->lock, flags); 231 if (list_empty(&wq_entry->entry)) 232 __add_wait_queue(wq_head, wq_entry); 233 set_current_state(state); 234 spin_unlock_irqrestore(&wq_head->lock, flags); 235 } 236 EXPORT_SYMBOL(prepare_to_wait); 237 238 void 239 prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) 240 { 241 unsigned long flags; 242 243 wq_entry->flags |= WQ_FLAG_EXCLUSIVE; 244 spin_lock_irqsave(&wq_head->lock, flags); 245 if (list_empty(&wq_entry->entry)) 246 __add_wait_queue_entry_tail(wq_head, wq_entry); 247 set_current_state(state); 248 spin_unlock_irqrestore(&wq_head->lock, flags); 249 } 250 EXPORT_SYMBOL(prepare_to_wait_exclusive); 251 252 void init_wait_entry(struct wait_queue_entry *wq_entry, int flags) 253 { 254 wq_entry->flags = flags; 255 wq_entry->private = current; 256 wq_entry->func = autoremove_wake_function; 257 INIT_LIST_HEAD(&wq_entry->entry); 258 } 259 EXPORT_SYMBOL(init_wait_entry); 260 261 long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) 262 { 263 unsigned long flags; 264 long ret = 0; 265 266 spin_lock_irqsave(&wq_head->lock, flags); 267 if (unlikely(signal_pending_state(state, current))) { 268 /* 269 * Exclusive waiter must not fail if it was selected by wakeup, 270 * it should "consume" the condition we were waiting for. 271 * 272 * The caller will recheck the condition and return success if 273 * we were already woken up, we can not miss the event because 274 * wakeup locks/unlocks the same wq_head->lock. 275 * 276 * But we need to ensure that set-condition + wakeup after that 277 * can't see us, it should wake up another exclusive waiter if 278 * we fail. 279 */ 280 list_del_init(&wq_entry->entry); 281 ret = -ERESTARTSYS; 282 } else { 283 if (list_empty(&wq_entry->entry)) { 284 if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) 285 __add_wait_queue_entry_tail(wq_head, wq_entry); 286 else 287 __add_wait_queue(wq_head, wq_entry); 288 } 289 set_current_state(state); 290 } 291 spin_unlock_irqrestore(&wq_head->lock, flags); 292 293 return ret; 294 } 295 EXPORT_SYMBOL(prepare_to_wait_event); 296 297 /* 298 * Note! These two wait functions are entered with the 299 * wait-queue lock held (and interrupts off in the _irq 300 * case), so there is no race with testing the wakeup 301 * condition in the caller before they add the wait 302 * entry to the wake queue. 303 */ 304 int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait) 305 { 306 if (likely(list_empty(&wait->entry))) 307 __add_wait_queue_entry_tail(wq, wait); 308 309 set_current_state(TASK_INTERRUPTIBLE); 310 if (signal_pending(current)) 311 return -ERESTARTSYS; 312 313 spin_unlock(&wq->lock); 314 schedule(); 315 spin_lock(&wq->lock); 316 317 return 0; 318 } 319 EXPORT_SYMBOL(do_wait_intr); 320 321 int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait) 322 { 323 if (likely(list_empty(&wait->entry))) 324 __add_wait_queue_entry_tail(wq, wait); 325 326 set_current_state(TASK_INTERRUPTIBLE); 327 if (signal_pending(current)) 328 return -ERESTARTSYS; 329 330 spin_unlock_irq(&wq->lock); 331 schedule(); 332 spin_lock_irq(&wq->lock); 333 334 return 0; 335 } 336 EXPORT_SYMBOL(do_wait_intr_irq); 337 338 /** 339 * finish_wait - clean up after waiting in a queue 340 * @wq_head: waitqueue waited on 341 * @wq_entry: wait descriptor 342 * 343 * Sets current thread back to running state and removes 344 * the wait descriptor from the given waitqueue if still 345 * queued. 346 */ 347 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) 348 { 349 unsigned long flags; 350 351 __set_current_state(TASK_RUNNING); 352 /* 353 * We can check for list emptiness outside the lock 354 * IFF: 355 * - we use the "careful" check that verifies both 356 * the next and prev pointers, so that there cannot 357 * be any half-pending updates in progress on other 358 * CPU's that we haven't seen yet (and that might 359 * still change the stack area. 360 * and 361 * - all other users take the lock (ie we can only 362 * have _one_ other CPU that looks at or modifies 363 * the list). 364 */ 365 if (!list_empty_careful(&wq_entry->entry)) { 366 spin_lock_irqsave(&wq_head->lock, flags); 367 list_del_init(&wq_entry->entry); 368 spin_unlock_irqrestore(&wq_head->lock, flags); 369 } 370 } 371 EXPORT_SYMBOL(finish_wait); 372 373 int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) 374 { 375 int ret = default_wake_function(wq_entry, mode, sync, key); 376 377 if (ret) 378 list_del_init(&wq_entry->entry); 379 380 return ret; 381 } 382 EXPORT_SYMBOL(autoremove_wake_function); 383 384 static inline bool is_kthread_should_stop(void) 385 { 386 return (current->flags & PF_KTHREAD) && kthread_should_stop(); 387 } 388 389 /* 390 * DEFINE_WAIT_FUNC(wait, woken_wake_func); 391 * 392 * add_wait_queue(&wq_head, &wait); 393 * for (;;) { 394 * if (condition) 395 * break; 396 * 397 * // in wait_woken() // in woken_wake_function() 398 * 399 * p->state = mode; wq_entry->flags |= WQ_FLAG_WOKEN; 400 * smp_mb(); // A try_to_wake_up(): 401 * if (!(wq_entry->flags & WQ_FLAG_WOKEN)) <full barrier> 402 * schedule() if (p->state & mode) 403 * p->state = TASK_RUNNING; p->state = TASK_RUNNING; 404 * wq_entry->flags &= ~WQ_FLAG_WOKEN; ~~~~~~~~~~~~~~~~~~ 405 * smp_mb(); // B condition = true; 406 * } smp_mb(); // C 407 * remove_wait_queue(&wq_head, &wait); wq_entry->flags |= WQ_FLAG_WOKEN; 408 */ 409 long wait_woken(struct wait_queue_entry *wq_entry, unsigned mode, long timeout) 410 { 411 /* 412 * The below executes an smp_mb(), which matches with the full barrier 413 * executed by the try_to_wake_up() in woken_wake_function() such that 414 * either we see the store to wq_entry->flags in woken_wake_function() 415 * or woken_wake_function() sees our store to current->state. 416 */ 417 set_current_state(mode); /* A */ 418 if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) 419 timeout = schedule_timeout(timeout); 420 __set_current_state(TASK_RUNNING); 421 422 /* 423 * The below executes an smp_mb(), which matches with the smp_mb() (C) 424 * in woken_wake_function() such that either we see the wait condition 425 * being true or the store to wq_entry->flags in woken_wake_function() 426 * follows ours in the coherence order. 427 */ 428 smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */ 429 430 return timeout; 431 } 432 EXPORT_SYMBOL(wait_woken); 433 434 int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key) 435 { 436 /* Pairs with the smp_store_mb() in wait_woken(). */ 437 smp_mb(); /* C */ 438 wq_entry->flags |= WQ_FLAG_WOKEN; 439 440 return default_wake_function(wq_entry, mode, sync, key); 441 } 442 EXPORT_SYMBOL(woken_wake_function); 443