1 /* 2 * Generic waiting primitives. 3 * 4 * (C) 2004 Nadia Yvette Chambers, Oracle 5 */ 6 #include <linux/init.h> 7 #include <linux/export.h> 8 #include <linux/sched.h> 9 #include <linux/mm.h> 10 #include <linux/wait.h> 11 #include <linux/hash.h> 12 13 void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) 14 { 15 spin_lock_init(&q->lock); 16 lockdep_set_class_and_name(&q->lock, key, name); 17 INIT_LIST_HEAD(&q->task_list); 18 } 19 20 EXPORT_SYMBOL(__init_waitqueue_head); 21 22 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 23 { 24 unsigned long flags; 25 26 wait->flags &= ~WQ_FLAG_EXCLUSIVE; 27 spin_lock_irqsave(&q->lock, flags); 28 __add_wait_queue(q, wait); 29 spin_unlock_irqrestore(&q->lock, flags); 30 } 31 EXPORT_SYMBOL(add_wait_queue); 32 33 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) 34 { 35 unsigned long flags; 36 37 wait->flags |= WQ_FLAG_EXCLUSIVE; 38 spin_lock_irqsave(&q->lock, flags); 39 __add_wait_queue_tail(q, wait); 40 spin_unlock_irqrestore(&q->lock, flags); 41 } 42 EXPORT_SYMBOL(add_wait_queue_exclusive); 43 44 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 45 { 46 unsigned long flags; 47 48 spin_lock_irqsave(&q->lock, flags); 49 __remove_wait_queue(q, wait); 50 spin_unlock_irqrestore(&q->lock, flags); 51 } 52 EXPORT_SYMBOL(remove_wait_queue); 53 54 55 /* 56 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just 57 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve 58 * number) then we wake all the non-exclusive tasks and one exclusive task. 59 * 60 * There are circumstances in which we can try to wake a task which has already 61 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns 62 * zero in this (rare) case, and we handle it by continuing to scan the queue. 63 */ 64 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 65 int nr_exclusive, int wake_flags, void *key) 66 { 67 wait_queue_t *curr, *next; 68 69 list_for_each_entry_safe(curr, next, &q->task_list, task_list) { 70 unsigned flags = curr->flags; 71 72 if (curr->func(curr, mode, wake_flags, key) && 73 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 74 break; 75 } 76 } 77 78 /** 79 * __wake_up - wake up threads blocked on a waitqueue. 80 * @q: the waitqueue 81 * @mode: which threads 82 * @nr_exclusive: how many wake-one or wake-many threads to wake up 83 * @key: is directly passed to the wakeup function 84 * 85 * It may be assumed that this function implies a write memory barrier before 86 * changing the task state if and only if any tasks are woken up. 87 */ 88 void __wake_up(wait_queue_head_t *q, unsigned int mode, 89 int nr_exclusive, void *key) 90 { 91 unsigned long flags; 92 93 spin_lock_irqsave(&q->lock, flags); 94 __wake_up_common(q, mode, nr_exclusive, 0, key); 95 spin_unlock_irqrestore(&q->lock, flags); 96 } 97 EXPORT_SYMBOL(__wake_up); 98 99 /* 100 * Same as __wake_up but called with the spinlock in wait_queue_head_t held. 101 */ 102 void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) 103 { 104 __wake_up_common(q, mode, nr, 0, NULL); 105 } 106 EXPORT_SYMBOL_GPL(__wake_up_locked); 107 108 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) 109 { 110 __wake_up_common(q, mode, 1, 0, key); 111 } 112 EXPORT_SYMBOL_GPL(__wake_up_locked_key); 113 114 /** 115 * __wake_up_sync_key - wake up threads blocked on a waitqueue. 116 * @q: the waitqueue 117 * @mode: which threads 118 * @nr_exclusive: how many wake-one or wake-many threads to wake up 119 * @key: opaque value to be passed to wakeup targets 120 * 121 * The sync wakeup differs that the waker knows that it will schedule 122 * away soon, so while the target thread will be woken up, it will not 123 * be migrated to another CPU - ie. the two threads are 'synchronized' 124 * with each other. This can prevent needless bouncing between CPUs. 125 * 126 * On UP it can prevent extra preemption. 127 * 128 * It may be assumed that this function implies a write memory barrier before 129 * changing the task state if and only if any tasks are woken up. 130 */ 131 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, 132 int nr_exclusive, void *key) 133 { 134 unsigned long flags; 135 int wake_flags = 1; /* XXX WF_SYNC */ 136 137 if (unlikely(!q)) 138 return; 139 140 if (unlikely(nr_exclusive != 1)) 141 wake_flags = 0; 142 143 spin_lock_irqsave(&q->lock, flags); 144 __wake_up_common(q, mode, nr_exclusive, wake_flags, key); 145 spin_unlock_irqrestore(&q->lock, flags); 146 } 147 EXPORT_SYMBOL_GPL(__wake_up_sync_key); 148 149 /* 150 * __wake_up_sync - see __wake_up_sync_key() 151 */ 152 void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive) 153 { 154 __wake_up_sync_key(q, mode, nr_exclusive, NULL); 155 } 156 EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ 157 158 /* 159 * Note: we use "set_current_state()" _after_ the wait-queue add, 160 * because we need a memory barrier there on SMP, so that any 161 * wake-function that tests for the wait-queue being active 162 * will be guaranteed to see waitqueue addition _or_ subsequent 163 * tests in this thread will see the wakeup having taken place. 164 * 165 * The spin_unlock() itself is semi-permeable and only protects 166 * one way (it only protects stuff inside the critical region and 167 * stops them from bleeding out - it would still allow subsequent 168 * loads to move into the critical region). 169 */ 170 void 171 prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state) 172 { 173 unsigned long flags; 174 175 wait->flags &= ~WQ_FLAG_EXCLUSIVE; 176 spin_lock_irqsave(&q->lock, flags); 177 if (list_empty(&wait->task_list)) 178 __add_wait_queue(q, wait); 179 set_current_state(state); 180 spin_unlock_irqrestore(&q->lock, flags); 181 } 182 EXPORT_SYMBOL(prepare_to_wait); 183 184 void 185 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) 186 { 187 unsigned long flags; 188 189 wait->flags |= WQ_FLAG_EXCLUSIVE; 190 spin_lock_irqsave(&q->lock, flags); 191 if (list_empty(&wait->task_list)) 192 __add_wait_queue_tail(q, wait); 193 set_current_state(state); 194 spin_unlock_irqrestore(&q->lock, flags); 195 } 196 EXPORT_SYMBOL(prepare_to_wait_exclusive); 197 198 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state) 199 { 200 unsigned long flags; 201 202 if (signal_pending_state(state, current)) 203 return -ERESTARTSYS; 204 205 wait->private = current; 206 wait->func = autoremove_wake_function; 207 208 spin_lock_irqsave(&q->lock, flags); 209 if (list_empty(&wait->task_list)) { 210 if (wait->flags & WQ_FLAG_EXCLUSIVE) 211 __add_wait_queue_tail(q, wait); 212 else 213 __add_wait_queue(q, wait); 214 } 215 set_current_state(state); 216 spin_unlock_irqrestore(&q->lock, flags); 217 218 return 0; 219 } 220 EXPORT_SYMBOL(prepare_to_wait_event); 221 222 /** 223 * finish_wait - clean up after waiting in a queue 224 * @q: waitqueue waited on 225 * @wait: wait descriptor 226 * 227 * Sets current thread back to running state and removes 228 * the wait descriptor from the given waitqueue if still 229 * queued. 230 */ 231 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) 232 { 233 unsigned long flags; 234 235 __set_current_state(TASK_RUNNING); 236 /* 237 * We can check for list emptiness outside the lock 238 * IFF: 239 * - we use the "careful" check that verifies both 240 * the next and prev pointers, so that there cannot 241 * be any half-pending updates in progress on other 242 * CPU's that we haven't seen yet (and that might 243 * still change the stack area. 244 * and 245 * - all other users take the lock (ie we can only 246 * have _one_ other CPU that looks at or modifies 247 * the list). 248 */ 249 if (!list_empty_careful(&wait->task_list)) { 250 spin_lock_irqsave(&q->lock, flags); 251 list_del_init(&wait->task_list); 252 spin_unlock_irqrestore(&q->lock, flags); 253 } 254 } 255 EXPORT_SYMBOL(finish_wait); 256 257 /** 258 * abort_exclusive_wait - abort exclusive waiting in a queue 259 * @q: waitqueue waited on 260 * @wait: wait descriptor 261 * @mode: runstate of the waiter to be woken 262 * @key: key to identify a wait bit queue or %NULL 263 * 264 * Sets current thread back to running state and removes 265 * the wait descriptor from the given waitqueue if still 266 * queued. 267 * 268 * Wakes up the next waiter if the caller is concurrently 269 * woken up through the queue. 270 * 271 * This prevents waiter starvation where an exclusive waiter 272 * aborts and is woken up concurrently and no one wakes up 273 * the next waiter. 274 */ 275 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, 276 unsigned int mode, void *key) 277 { 278 unsigned long flags; 279 280 __set_current_state(TASK_RUNNING); 281 spin_lock_irqsave(&q->lock, flags); 282 if (!list_empty(&wait->task_list)) 283 list_del_init(&wait->task_list); 284 else if (waitqueue_active(q)) 285 __wake_up_locked_key(q, mode, key); 286 spin_unlock_irqrestore(&q->lock, flags); 287 } 288 EXPORT_SYMBOL(abort_exclusive_wait); 289 290 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) 291 { 292 int ret = default_wake_function(wait, mode, sync, key); 293 294 if (ret) 295 list_del_init(&wait->task_list); 296 return ret; 297 } 298 EXPORT_SYMBOL(autoremove_wake_function); 299 300 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) 301 { 302 struct wait_bit_key *key = arg; 303 struct wait_bit_queue *wait_bit 304 = container_of(wait, struct wait_bit_queue, wait); 305 306 if (wait_bit->key.flags != key->flags || 307 wait_bit->key.bit_nr != key->bit_nr || 308 test_bit(key->bit_nr, key->flags)) 309 return 0; 310 else 311 return autoremove_wake_function(wait, mode, sync, key); 312 } 313 EXPORT_SYMBOL(wake_bit_function); 314 315 /* 316 * To allow interruptible waiting and asynchronous (i.e. nonblocking) 317 * waiting, the actions of __wait_on_bit() and __wait_on_bit_lock() are 318 * permitted return codes. Nonzero return codes halt waiting and return. 319 */ 320 int __sched 321 __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, 322 wait_bit_action_f *action, unsigned mode) 323 { 324 int ret = 0; 325 326 do { 327 prepare_to_wait(wq, &q->wait, mode); 328 if (test_bit(q->key.bit_nr, q->key.flags)) 329 ret = (*action)(&q->key); 330 } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); 331 finish_wait(wq, &q->wait); 332 return ret; 333 } 334 EXPORT_SYMBOL(__wait_on_bit); 335 336 int __sched out_of_line_wait_on_bit(void *word, int bit, 337 wait_bit_action_f *action, unsigned mode) 338 { 339 wait_queue_head_t *wq = bit_waitqueue(word, bit); 340 DEFINE_WAIT_BIT(wait, word, bit); 341 342 return __wait_on_bit(wq, &wait, action, mode); 343 } 344 EXPORT_SYMBOL(out_of_line_wait_on_bit); 345 346 int __sched out_of_line_wait_on_bit_timeout( 347 void *word, int bit, wait_bit_action_f *action, 348 unsigned mode, unsigned long timeout) 349 { 350 wait_queue_head_t *wq = bit_waitqueue(word, bit); 351 DEFINE_WAIT_BIT(wait, word, bit); 352 353 wait.key.timeout = jiffies + timeout; 354 return __wait_on_bit(wq, &wait, action, mode); 355 } 356 EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); 357 358 int __sched 359 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, 360 wait_bit_action_f *action, unsigned mode) 361 { 362 do { 363 int ret; 364 365 prepare_to_wait_exclusive(wq, &q->wait, mode); 366 if (!test_bit(q->key.bit_nr, q->key.flags)) 367 continue; 368 ret = action(&q->key); 369 if (!ret) 370 continue; 371 abort_exclusive_wait(wq, &q->wait, mode, &q->key); 372 return ret; 373 } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); 374 finish_wait(wq, &q->wait); 375 return 0; 376 } 377 EXPORT_SYMBOL(__wait_on_bit_lock); 378 379 int __sched out_of_line_wait_on_bit_lock(void *word, int bit, 380 wait_bit_action_f *action, unsigned mode) 381 { 382 wait_queue_head_t *wq = bit_waitqueue(word, bit); 383 DEFINE_WAIT_BIT(wait, word, bit); 384 385 return __wait_on_bit_lock(wq, &wait, action, mode); 386 } 387 EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); 388 389 void __wake_up_bit(wait_queue_head_t *wq, void *word, int bit) 390 { 391 struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); 392 if (waitqueue_active(wq)) 393 __wake_up(wq, TASK_NORMAL, 1, &key); 394 } 395 EXPORT_SYMBOL(__wake_up_bit); 396 397 /** 398 * wake_up_bit - wake up a waiter on a bit 399 * @word: the word being waited on, a kernel virtual address 400 * @bit: the bit of the word being waited on 401 * 402 * There is a standard hashed waitqueue table for generic use. This 403 * is the part of the hashtable's accessor API that wakes up waiters 404 * on a bit. For instance, if one were to have waiters on a bitflag, 405 * one would call wake_up_bit() after clearing the bit. 406 * 407 * In order for this to function properly, as it uses waitqueue_active() 408 * internally, some kind of memory barrier must be done prior to calling 409 * this. Typically, this will be smp_mb__after_atomic(), but in some 410 * cases where bitflags are manipulated non-atomically under a lock, one 411 * may need to use a less regular barrier, such fs/inode.c's smp_mb(), 412 * because spin_unlock() does not guarantee a memory barrier. 413 */ 414 void wake_up_bit(void *word, int bit) 415 { 416 __wake_up_bit(bit_waitqueue(word, bit), word, bit); 417 } 418 EXPORT_SYMBOL(wake_up_bit); 419 420 wait_queue_head_t *bit_waitqueue(void *word, int bit) 421 { 422 const int shift = BITS_PER_LONG == 32 ? 5 : 6; 423 const struct zone *zone = page_zone(virt_to_page(word)); 424 unsigned long val = (unsigned long)word << shift | bit; 425 426 return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; 427 } 428 EXPORT_SYMBOL(bit_waitqueue); 429 430 /* 431 * Manipulate the atomic_t address to produce a better bit waitqueue table hash 432 * index (we're keying off bit -1, but that would produce a horrible hash 433 * value). 434 */ 435 static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) 436 { 437 if (BITS_PER_LONG == 64) { 438 unsigned long q = (unsigned long)p; 439 return bit_waitqueue((void *)(q & ~1), q & 1); 440 } 441 return bit_waitqueue(p, 0); 442 } 443 444 static int wake_atomic_t_function(wait_queue_t *wait, unsigned mode, int sync, 445 void *arg) 446 { 447 struct wait_bit_key *key = arg; 448 struct wait_bit_queue *wait_bit 449 = container_of(wait, struct wait_bit_queue, wait); 450 atomic_t *val = key->flags; 451 452 if (wait_bit->key.flags != key->flags || 453 wait_bit->key.bit_nr != key->bit_nr || 454 atomic_read(val) != 0) 455 return 0; 456 return autoremove_wake_function(wait, mode, sync, key); 457 } 458 459 /* 460 * To allow interruptible waiting and asynchronous (i.e. nonblocking) waiting, 461 * the actions of __wait_on_atomic_t() are permitted return codes. Nonzero 462 * return codes halt waiting and return. 463 */ 464 static __sched 465 int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q, 466 int (*action)(atomic_t *), unsigned mode) 467 { 468 atomic_t *val; 469 int ret = 0; 470 471 do { 472 prepare_to_wait(wq, &q->wait, mode); 473 val = q->key.flags; 474 if (atomic_read(val) == 0) 475 break; 476 ret = (*action)(val); 477 } while (!ret && atomic_read(val) != 0); 478 finish_wait(wq, &q->wait); 479 return ret; 480 } 481 482 #define DEFINE_WAIT_ATOMIC_T(name, p) \ 483 struct wait_bit_queue name = { \ 484 .key = __WAIT_ATOMIC_T_KEY_INITIALIZER(p), \ 485 .wait = { \ 486 .private = current, \ 487 .func = wake_atomic_t_function, \ 488 .task_list = \ 489 LIST_HEAD_INIT((name).wait.task_list), \ 490 }, \ 491 } 492 493 __sched int out_of_line_wait_on_atomic_t(atomic_t *p, int (*action)(atomic_t *), 494 unsigned mode) 495 { 496 wait_queue_head_t *wq = atomic_t_waitqueue(p); 497 DEFINE_WAIT_ATOMIC_T(wait, p); 498 499 return __wait_on_atomic_t(wq, &wait, action, mode); 500 } 501 EXPORT_SYMBOL(out_of_line_wait_on_atomic_t); 502 503 /** 504 * wake_up_atomic_t - Wake up a waiter on a atomic_t 505 * @p: The atomic_t being waited on, a kernel virtual address 506 * 507 * Wake up anyone waiting for the atomic_t to go to zero. 508 * 509 * Abuse the bit-waker function and its waitqueue hash table set (the atomic_t 510 * check is done by the waiter's wake function, not the by the waker itself). 511 */ 512 void wake_up_atomic_t(atomic_t *p) 513 { 514 __wake_up_bit(atomic_t_waitqueue(p), p, WAIT_ATOMIC_T_BIT_NR); 515 } 516 EXPORT_SYMBOL(wake_up_atomic_t); 517 518 __sched int bit_wait(struct wait_bit_key *word) 519 { 520 if (signal_pending_state(current->state, current)) 521 return 1; 522 schedule(); 523 return 0; 524 } 525 EXPORT_SYMBOL(bit_wait); 526 527 __sched int bit_wait_io(struct wait_bit_key *word) 528 { 529 if (signal_pending_state(current->state, current)) 530 return 1; 531 io_schedule(); 532 return 0; 533 } 534 EXPORT_SYMBOL(bit_wait_io); 535 536 __sched int bit_wait_timeout(struct wait_bit_key *word) 537 { 538 unsigned long now = ACCESS_ONCE(jiffies); 539 if (signal_pending_state(current->state, current)) 540 return 1; 541 if (time_after_eq(now, word->timeout)) 542 return -EAGAIN; 543 schedule_timeout(word->timeout - now); 544 return 0; 545 } 546 EXPORT_SYMBOL_GPL(bit_wait_timeout); 547 548 __sched int bit_wait_io_timeout(struct wait_bit_key *word) 549 { 550 unsigned long now = ACCESS_ONCE(jiffies); 551 if (signal_pending_state(current->state, current)) 552 return 1; 553 if (time_after_eq(now, word->timeout)) 554 return -EAGAIN; 555 io_schedule_timeout(word->timeout - now); 556 return 0; 557 } 558 EXPORT_SYMBOL_GPL(bit_wait_io_timeout); 559