1 /* 2 * linux/kernel/workqueue.c 3 * 4 * Generic mechanism for defining kernel helper threads for running 5 * arbitrary tasks in process context. 6 * 7 * Started by Ingo Molnar, Copyright (C) 2002 8 * 9 * Derived from the taskqueue/keventd code by: 10 * 11 * David Woodhouse <dwmw2@infradead.org> 12 * Andrew Morton 13 * Kai Petzke <wpp@marie.physik.tu-berlin.de> 14 * Theodore Ts'o <tytso@mit.edu> 15 * 16 * Made to use alloc_percpu by Christoph Lameter. 17 */ 18 19 #include <linux/module.h> 20 #include <linux/kernel.h> 21 #include <linux/sched.h> 22 #include <linux/init.h> 23 #include <linux/signal.h> 24 #include <linux/completion.h> 25 #include <linux/workqueue.h> 26 #include <linux/slab.h> 27 #include <linux/cpu.h> 28 #include <linux/notifier.h> 29 #include <linux/kthread.h> 30 #include <linux/hardirq.h> 31 #include <linux/mempolicy.h> 32 #include <linux/freezer.h> 33 #include <linux/kallsyms.h> 34 #include <linux/debug_locks.h> 35 #include <linux/lockdep.h> 36 #include <linux/idr.h> 37 38 #include "workqueue_sched.h" 39 40 enum { 41 /* global_cwq flags */ 42 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ 43 GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */ 44 GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ 45 GCWQ_FREEZING = 1 << 3, /* freeze in progress */ 46 GCWQ_HIGHPRI_PENDING = 1 << 4, /* highpri works on queue */ 47 48 /* worker flags */ 49 WORKER_STARTED = 1 << 0, /* started */ 50 WORKER_DIE = 1 << 1, /* die die die */ 51 WORKER_IDLE = 1 << 2, /* is idle */ 52 WORKER_PREP = 1 << 3, /* preparing to run works */ 53 WORKER_ROGUE = 1 << 4, /* not bound to any cpu */ 54 WORKER_REBIND = 1 << 5, /* mom is home, come back */ 55 WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ 56 WORKER_UNBOUND = 1 << 7, /* worker is unbound */ 57 58 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND | 59 WORKER_CPU_INTENSIVE | WORKER_UNBOUND, 60 61 /* gcwq->trustee_state */ 62 TRUSTEE_START = 0, /* start */ 63 TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */ 64 TRUSTEE_BUTCHER = 2, /* butcher workers */ 65 TRUSTEE_RELEASE = 3, /* release workers */ 66 TRUSTEE_DONE = 4, /* trustee is done */ 67 68 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ 69 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, 70 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, 71 72 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ 73 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ 74 75 MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ 76 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ 77 CREATE_COOLDOWN = HZ, /* time to breath after fail */ 78 TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ 79 80 /* 81 * Rescue workers are used only on emergencies and shared by 82 * all cpus. Give -20. 83 */ 84 RESCUER_NICE_LEVEL = -20, 85 }; 86 87 /* 88 * Structure fields follow one of the following exclusion rules. 89 * 90 * I: Set during initialization and read-only afterwards. 91 * 92 * P: Preemption protected. Disabling preemption is enough and should 93 * only be modified and accessed from the local cpu. 94 * 95 * L: gcwq->lock protected. Access with gcwq->lock held. 96 * 97 * X: During normal operation, modification requires gcwq->lock and 98 * should be done only from local cpu. Either disabling preemption 99 * on local cpu or grabbing gcwq->lock is enough for read access. 100 * If GCWQ_DISASSOCIATED is set, it's identical to L. 101 * 102 * F: wq->flush_mutex protected. 103 * 104 * W: workqueue_lock protected. 105 */ 106 107 struct global_cwq; 108 109 /* 110 * The poor guys doing the actual heavy lifting. All on-duty workers 111 * are either serving the manager role, on idle list or on busy hash. 112 */ 113 struct worker { 114 /* on idle list while idle, on busy hash table while busy */ 115 union { 116 struct list_head entry; /* L: while idle */ 117 struct hlist_node hentry; /* L: while busy */ 118 }; 119 120 struct work_struct *current_work; /* L: work being processed */ 121 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ 122 struct list_head scheduled; /* L: scheduled works */ 123 struct task_struct *task; /* I: worker task */ 124 struct global_cwq *gcwq; /* I: the associated gcwq */ 125 /* 64 bytes boundary on 64bit, 32 on 32bit */ 126 unsigned long last_active; /* L: last active timestamp */ 127 unsigned int flags; /* X: flags */ 128 int id; /* I: worker id */ 129 struct work_struct rebind_work; /* L: rebind worker to cpu */ 130 }; 131 132 /* 133 * Global per-cpu workqueue. There's one and only one for each cpu 134 * and all works are queued and processed here regardless of their 135 * target workqueues. 136 */ 137 struct global_cwq { 138 spinlock_t lock; /* the gcwq lock */ 139 struct list_head worklist; /* L: list of pending works */ 140 unsigned int cpu; /* I: the associated cpu */ 141 unsigned int flags; /* L: GCWQ_* flags */ 142 143 int nr_workers; /* L: total number of workers */ 144 int nr_idle; /* L: currently idle ones */ 145 146 /* workers are chained either in the idle_list or busy_hash */ 147 struct list_head idle_list; /* X: list of idle workers */ 148 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; 149 /* L: hash of busy workers */ 150 151 struct timer_list idle_timer; /* L: worker idle timeout */ 152 struct timer_list mayday_timer; /* L: SOS timer for dworkers */ 153 154 struct ida worker_ida; /* L: for worker IDs */ 155 156 struct task_struct *trustee; /* L: for gcwq shutdown */ 157 unsigned int trustee_state; /* L: trustee state */ 158 wait_queue_head_t trustee_wait; /* trustee wait */ 159 struct worker *first_idle; /* L: first idle worker */ 160 } ____cacheline_aligned_in_smp; 161 162 /* 163 * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of 164 * work_struct->data are used for flags and thus cwqs need to be 165 * aligned at two's power of the number of flag bits. 166 */ 167 struct cpu_workqueue_struct { 168 struct global_cwq *gcwq; /* I: the associated gcwq */ 169 struct workqueue_struct *wq; /* I: the owning workqueue */ 170 int work_color; /* L: current color */ 171 int flush_color; /* L: flushing color */ 172 int nr_in_flight[WORK_NR_COLORS]; 173 /* L: nr of in_flight works */ 174 int nr_active; /* L: nr of active works */ 175 int max_active; /* L: max active works */ 176 struct list_head delayed_works; /* L: delayed works */ 177 }; 178 179 /* 180 * Structure used to wait for workqueue flush. 181 */ 182 struct wq_flusher { 183 struct list_head list; /* F: list of flushers */ 184 int flush_color; /* F: flush color waiting for */ 185 struct completion done; /* flush completion */ 186 }; 187 188 /* 189 * All cpumasks are assumed to be always set on UP and thus can't be 190 * used to determine whether there's something to be done. 191 */ 192 #ifdef CONFIG_SMP 193 typedef cpumask_var_t mayday_mask_t; 194 #define mayday_test_and_set_cpu(cpu, mask) \ 195 cpumask_test_and_set_cpu((cpu), (mask)) 196 #define mayday_clear_cpu(cpu, mask) cpumask_clear_cpu((cpu), (mask)) 197 #define for_each_mayday_cpu(cpu, mask) for_each_cpu((cpu), (mask)) 198 #define alloc_mayday_mask(maskp, gfp) alloc_cpumask_var((maskp), (gfp)) 199 #define free_mayday_mask(mask) free_cpumask_var((mask)) 200 #else 201 typedef unsigned long mayday_mask_t; 202 #define mayday_test_and_set_cpu(cpu, mask) test_and_set_bit(0, &(mask)) 203 #define mayday_clear_cpu(cpu, mask) clear_bit(0, &(mask)) 204 #define for_each_mayday_cpu(cpu, mask) if ((cpu) = 0, (mask)) 205 #define alloc_mayday_mask(maskp, gfp) true 206 #define free_mayday_mask(mask) do { } while (0) 207 #endif 208 209 /* 210 * The externally visible workqueue abstraction is an array of 211 * per-CPU workqueues: 212 */ 213 struct workqueue_struct { 214 unsigned int flags; /* I: WQ_* flags */ 215 union { 216 struct cpu_workqueue_struct __percpu *pcpu; 217 struct cpu_workqueue_struct *single; 218 unsigned long v; 219 } cpu_wq; /* I: cwq's */ 220 struct list_head list; /* W: list of all workqueues */ 221 222 struct mutex flush_mutex; /* protects wq flushing */ 223 int work_color; /* F: current work color */ 224 int flush_color; /* F: current flush color */ 225 atomic_t nr_cwqs_to_flush; /* flush in progress */ 226 struct wq_flusher *first_flusher; /* F: first flusher */ 227 struct list_head flusher_queue; /* F: flush waiters */ 228 struct list_head flusher_overflow; /* F: flush overflow list */ 229 230 mayday_mask_t mayday_mask; /* cpus requesting rescue */ 231 struct worker *rescuer; /* I: rescue worker */ 232 233 int saved_max_active; /* W: saved cwq max_active */ 234 const char *name; /* I: workqueue name */ 235 #ifdef CONFIG_LOCKDEP 236 struct lockdep_map lockdep_map; 237 #endif 238 }; 239 240 struct workqueue_struct *system_wq __read_mostly; 241 struct workqueue_struct *system_long_wq __read_mostly; 242 struct workqueue_struct *system_nrt_wq __read_mostly; 243 struct workqueue_struct *system_unbound_wq __read_mostly; 244 EXPORT_SYMBOL_GPL(system_wq); 245 EXPORT_SYMBOL_GPL(system_long_wq); 246 EXPORT_SYMBOL_GPL(system_nrt_wq); 247 EXPORT_SYMBOL_GPL(system_unbound_wq); 248 249 #define for_each_busy_worker(worker, i, pos, gcwq) \ 250 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ 251 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) 252 253 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, 254 unsigned int sw) 255 { 256 if (cpu < nr_cpu_ids) { 257 if (sw & 1) { 258 cpu = cpumask_next(cpu, mask); 259 if (cpu < nr_cpu_ids) 260 return cpu; 261 } 262 if (sw & 2) 263 return WORK_CPU_UNBOUND; 264 } 265 return WORK_CPU_NONE; 266 } 267 268 static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, 269 struct workqueue_struct *wq) 270 { 271 return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); 272 } 273 274 /* 275 * CPU iterators 276 * 277 * An extra gcwq is defined for an invalid cpu number 278 * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any 279 * specific CPU. The following iterators are similar to 280 * for_each_*_cpu() iterators but also considers the unbound gcwq. 281 * 282 * for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND 283 * for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND 284 * for_each_cwq_cpu() : possible CPUs for bound workqueues, 285 * WORK_CPU_UNBOUND for unbound workqueues 286 */ 287 #define for_each_gcwq_cpu(cpu) \ 288 for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ 289 (cpu) < WORK_CPU_NONE; \ 290 (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) 291 292 #define for_each_online_gcwq_cpu(cpu) \ 293 for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ 294 (cpu) < WORK_CPU_NONE; \ 295 (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) 296 297 #define for_each_cwq_cpu(cpu, wq) \ 298 for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ 299 (cpu) < WORK_CPU_NONE; \ 300 (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) 301 302 #ifdef CONFIG_LOCKDEP 303 /** 304 * in_workqueue_context() - in context of specified workqueue? 305 * @wq: the workqueue of interest 306 * 307 * Checks lockdep state to see if the current task is executing from 308 * within a workqueue item. This function exists only if lockdep is 309 * enabled. 310 */ 311 int in_workqueue_context(struct workqueue_struct *wq) 312 { 313 return lock_is_held(&wq->lockdep_map); 314 } 315 #endif 316 317 #ifdef CONFIG_DEBUG_OBJECTS_WORK 318 319 static struct debug_obj_descr work_debug_descr; 320 321 /* 322 * fixup_init is called when: 323 * - an active object is initialized 324 */ 325 static int work_fixup_init(void *addr, enum debug_obj_state state) 326 { 327 struct work_struct *work = addr; 328 329 switch (state) { 330 case ODEBUG_STATE_ACTIVE: 331 cancel_work_sync(work); 332 debug_object_init(work, &work_debug_descr); 333 return 1; 334 default: 335 return 0; 336 } 337 } 338 339 /* 340 * fixup_activate is called when: 341 * - an active object is activated 342 * - an unknown object is activated (might be a statically initialized object) 343 */ 344 static int work_fixup_activate(void *addr, enum debug_obj_state state) 345 { 346 struct work_struct *work = addr; 347 348 switch (state) { 349 350 case ODEBUG_STATE_NOTAVAILABLE: 351 /* 352 * This is not really a fixup. The work struct was 353 * statically initialized. We just make sure that it 354 * is tracked in the object tracker. 355 */ 356 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) { 357 debug_object_init(work, &work_debug_descr); 358 debug_object_activate(work, &work_debug_descr); 359 return 0; 360 } 361 WARN_ON_ONCE(1); 362 return 0; 363 364 case ODEBUG_STATE_ACTIVE: 365 WARN_ON(1); 366 367 default: 368 return 0; 369 } 370 } 371 372 /* 373 * fixup_free is called when: 374 * - an active object is freed 375 */ 376 static int work_fixup_free(void *addr, enum debug_obj_state state) 377 { 378 struct work_struct *work = addr; 379 380 switch (state) { 381 case ODEBUG_STATE_ACTIVE: 382 cancel_work_sync(work); 383 debug_object_free(work, &work_debug_descr); 384 return 1; 385 default: 386 return 0; 387 } 388 } 389 390 static struct debug_obj_descr work_debug_descr = { 391 .name = "work_struct", 392 .fixup_init = work_fixup_init, 393 .fixup_activate = work_fixup_activate, 394 .fixup_free = work_fixup_free, 395 }; 396 397 static inline void debug_work_activate(struct work_struct *work) 398 { 399 debug_object_activate(work, &work_debug_descr); 400 } 401 402 static inline void debug_work_deactivate(struct work_struct *work) 403 { 404 debug_object_deactivate(work, &work_debug_descr); 405 } 406 407 void __init_work(struct work_struct *work, int onstack) 408 { 409 if (onstack) 410 debug_object_init_on_stack(work, &work_debug_descr); 411 else 412 debug_object_init(work, &work_debug_descr); 413 } 414 EXPORT_SYMBOL_GPL(__init_work); 415 416 void destroy_work_on_stack(struct work_struct *work) 417 { 418 debug_object_free(work, &work_debug_descr); 419 } 420 EXPORT_SYMBOL_GPL(destroy_work_on_stack); 421 422 #else 423 static inline void debug_work_activate(struct work_struct *work) { } 424 static inline void debug_work_deactivate(struct work_struct *work) { } 425 #endif 426 427 /* Serializes the accesses to the list of workqueues. */ 428 static DEFINE_SPINLOCK(workqueue_lock); 429 static LIST_HEAD(workqueues); 430 static bool workqueue_freezing; /* W: have wqs started freezing? */ 431 432 /* 433 * The almighty global cpu workqueues. nr_running is the only field 434 * which is expected to be used frequently by other cpus via 435 * try_to_wake_up(). Put it in a separate cacheline. 436 */ 437 static DEFINE_PER_CPU(struct global_cwq, global_cwq); 438 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running); 439 440 /* 441 * Global cpu workqueue and nr_running counter for unbound gcwq. The 442 * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its 443 * workers have WORKER_UNBOUND set. 444 */ 445 static struct global_cwq unbound_global_cwq; 446 static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0); /* always 0 */ 447 448 static int worker_thread(void *__worker); 449 450 static struct global_cwq *get_gcwq(unsigned int cpu) 451 { 452 if (cpu != WORK_CPU_UNBOUND) 453 return &per_cpu(global_cwq, cpu); 454 else 455 return &unbound_global_cwq; 456 } 457 458 static atomic_t *get_gcwq_nr_running(unsigned int cpu) 459 { 460 if (cpu != WORK_CPU_UNBOUND) 461 return &per_cpu(gcwq_nr_running, cpu); 462 else 463 return &unbound_gcwq_nr_running; 464 } 465 466 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, 467 struct workqueue_struct *wq) 468 { 469 if (!(wq->flags & WQ_UNBOUND)) { 470 if (likely(cpu < nr_cpu_ids)) { 471 #ifdef CONFIG_SMP 472 return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); 473 #else 474 return wq->cpu_wq.single; 475 #endif 476 } 477 } else if (likely(cpu == WORK_CPU_UNBOUND)) 478 return wq->cpu_wq.single; 479 return NULL; 480 } 481 482 static unsigned int work_color_to_flags(int color) 483 { 484 return color << WORK_STRUCT_COLOR_SHIFT; 485 } 486 487 static int get_work_color(struct work_struct *work) 488 { 489 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) & 490 ((1 << WORK_STRUCT_COLOR_BITS) - 1); 491 } 492 493 static int work_next_color(int color) 494 { 495 return (color + 1) % WORK_NR_COLORS; 496 } 497 498 /* 499 * A work's data points to the cwq with WORK_STRUCT_CWQ set while the 500 * work is on queue. Once execution starts, WORK_STRUCT_CWQ is 501 * cleared and the work data contains the cpu number it was last on. 502 * 503 * set_work_{cwq|cpu}() and clear_work_data() can be used to set the 504 * cwq, cpu or clear work->data. These functions should only be 505 * called while the work is owned - ie. while the PENDING bit is set. 506 * 507 * get_work_[g]cwq() can be used to obtain the gcwq or cwq 508 * corresponding to a work. gcwq is available once the work has been 509 * queued anywhere after initialization. cwq is available only from 510 * queueing until execution starts. 511 */ 512 static inline void set_work_data(struct work_struct *work, unsigned long data, 513 unsigned long flags) 514 { 515 BUG_ON(!work_pending(work)); 516 atomic_long_set(&work->data, data | flags | work_static(work)); 517 } 518 519 static void set_work_cwq(struct work_struct *work, 520 struct cpu_workqueue_struct *cwq, 521 unsigned long extra_flags) 522 { 523 set_work_data(work, (unsigned long)cwq, 524 WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); 525 } 526 527 static void set_work_cpu(struct work_struct *work, unsigned int cpu) 528 { 529 set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING); 530 } 531 532 static void clear_work_data(struct work_struct *work) 533 { 534 set_work_data(work, WORK_STRUCT_NO_CPU, 0); 535 } 536 537 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) 538 { 539 unsigned long data = atomic_long_read(&work->data); 540 541 if (data & WORK_STRUCT_CWQ) 542 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); 543 else 544 return NULL; 545 } 546 547 static struct global_cwq *get_work_gcwq(struct work_struct *work) 548 { 549 unsigned long data = atomic_long_read(&work->data); 550 unsigned int cpu; 551 552 if (data & WORK_STRUCT_CWQ) 553 return ((struct cpu_workqueue_struct *) 554 (data & WORK_STRUCT_WQ_DATA_MASK))->gcwq; 555 556 cpu = data >> WORK_STRUCT_FLAG_BITS; 557 if (cpu == WORK_CPU_NONE) 558 return NULL; 559 560 BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); 561 return get_gcwq(cpu); 562 } 563 564 /* 565 * Policy functions. These define the policies on how the global 566 * worker pool is managed. Unless noted otherwise, these functions 567 * assume that they're being called with gcwq->lock held. 568 */ 569 570 static bool __need_more_worker(struct global_cwq *gcwq) 571 { 572 return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) || 573 gcwq->flags & GCWQ_HIGHPRI_PENDING; 574 } 575 576 /* 577 * Need to wake up a worker? Called from anything but currently 578 * running workers. 579 */ 580 static bool need_more_worker(struct global_cwq *gcwq) 581 { 582 return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq); 583 } 584 585 /* Can I start working? Called from busy but !running workers. */ 586 static bool may_start_working(struct global_cwq *gcwq) 587 { 588 return gcwq->nr_idle; 589 } 590 591 /* Do I need to keep working? Called from currently running workers. */ 592 static bool keep_working(struct global_cwq *gcwq) 593 { 594 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); 595 596 return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1; 597 } 598 599 /* Do we need a new worker? Called from manager. */ 600 static bool need_to_create_worker(struct global_cwq *gcwq) 601 { 602 return need_more_worker(gcwq) && !may_start_working(gcwq); 603 } 604 605 /* Do I need to be the manager? */ 606 static bool need_to_manage_workers(struct global_cwq *gcwq) 607 { 608 return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS; 609 } 610 611 /* Do we have too many workers and should some go away? */ 612 static bool too_many_workers(struct global_cwq *gcwq) 613 { 614 bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS; 615 int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */ 616 int nr_busy = gcwq->nr_workers - nr_idle; 617 618 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; 619 } 620 621 /* 622 * Wake up functions. 623 */ 624 625 /* Return the first worker. Safe with preemption disabled */ 626 static struct worker *first_worker(struct global_cwq *gcwq) 627 { 628 if (unlikely(list_empty(&gcwq->idle_list))) 629 return NULL; 630 631 return list_first_entry(&gcwq->idle_list, struct worker, entry); 632 } 633 634 /** 635 * wake_up_worker - wake up an idle worker 636 * @gcwq: gcwq to wake worker for 637 * 638 * Wake up the first idle worker of @gcwq. 639 * 640 * CONTEXT: 641 * spin_lock_irq(gcwq->lock). 642 */ 643 static void wake_up_worker(struct global_cwq *gcwq) 644 { 645 struct worker *worker = first_worker(gcwq); 646 647 if (likely(worker)) 648 wake_up_process(worker->task); 649 } 650 651 /** 652 * wq_worker_waking_up - a worker is waking up 653 * @task: task waking up 654 * @cpu: CPU @task is waking up to 655 * 656 * This function is called during try_to_wake_up() when a worker is 657 * being awoken. 658 * 659 * CONTEXT: 660 * spin_lock_irq(rq->lock) 661 */ 662 void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) 663 { 664 struct worker *worker = kthread_data(task); 665 666 if (likely(!(worker->flags & WORKER_NOT_RUNNING))) 667 atomic_inc(get_gcwq_nr_running(cpu)); 668 } 669 670 /** 671 * wq_worker_sleeping - a worker is going to sleep 672 * @task: task going to sleep 673 * @cpu: CPU in question, must be the current CPU number 674 * 675 * This function is called during schedule() when a busy worker is 676 * going to sleep. Worker on the same cpu can be woken up by 677 * returning pointer to its task. 678 * 679 * CONTEXT: 680 * spin_lock_irq(rq->lock) 681 * 682 * RETURNS: 683 * Worker task on @cpu to wake up, %NULL if none. 684 */ 685 struct task_struct *wq_worker_sleeping(struct task_struct *task, 686 unsigned int cpu) 687 { 688 struct worker *worker = kthread_data(task), *to_wakeup = NULL; 689 struct global_cwq *gcwq = get_gcwq(cpu); 690 atomic_t *nr_running = get_gcwq_nr_running(cpu); 691 692 if (unlikely(worker->flags & WORKER_NOT_RUNNING)) 693 return NULL; 694 695 /* this can only happen on the local cpu */ 696 BUG_ON(cpu != raw_smp_processor_id()); 697 698 /* 699 * The counterpart of the following dec_and_test, implied mb, 700 * worklist not empty test sequence is in insert_work(). 701 * Please read comment there. 702 * 703 * NOT_RUNNING is clear. This means that trustee is not in 704 * charge and we're running on the local cpu w/ rq lock held 705 * and preemption disabled, which in turn means that none else 706 * could be manipulating idle_list, so dereferencing idle_list 707 * without gcwq lock is safe. 708 */ 709 if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist)) 710 to_wakeup = first_worker(gcwq); 711 return to_wakeup ? to_wakeup->task : NULL; 712 } 713 714 /** 715 * worker_set_flags - set worker flags and adjust nr_running accordingly 716 * @worker: self 717 * @flags: flags to set 718 * @wakeup: wakeup an idle worker if necessary 719 * 720 * Set @flags in @worker->flags and adjust nr_running accordingly. If 721 * nr_running becomes zero and @wakeup is %true, an idle worker is 722 * woken up. 723 * 724 * CONTEXT: 725 * spin_lock_irq(gcwq->lock) 726 */ 727 static inline void worker_set_flags(struct worker *worker, unsigned int flags, 728 bool wakeup) 729 { 730 struct global_cwq *gcwq = worker->gcwq; 731 732 WARN_ON_ONCE(worker->task != current); 733 734 /* 735 * If transitioning into NOT_RUNNING, adjust nr_running and 736 * wake up an idle worker as necessary if requested by 737 * @wakeup. 738 */ 739 if ((flags & WORKER_NOT_RUNNING) && 740 !(worker->flags & WORKER_NOT_RUNNING)) { 741 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); 742 743 if (wakeup) { 744 if (atomic_dec_and_test(nr_running) && 745 !list_empty(&gcwq->worklist)) 746 wake_up_worker(gcwq); 747 } else 748 atomic_dec(nr_running); 749 } 750 751 worker->flags |= flags; 752 } 753 754 /** 755 * worker_clr_flags - clear worker flags and adjust nr_running accordingly 756 * @worker: self 757 * @flags: flags to clear 758 * 759 * Clear @flags in @worker->flags and adjust nr_running accordingly. 760 * 761 * CONTEXT: 762 * spin_lock_irq(gcwq->lock) 763 */ 764 static inline void worker_clr_flags(struct worker *worker, unsigned int flags) 765 { 766 struct global_cwq *gcwq = worker->gcwq; 767 unsigned int oflags = worker->flags; 768 769 WARN_ON_ONCE(worker->task != current); 770 771 worker->flags &= ~flags; 772 773 /* if transitioning out of NOT_RUNNING, increment nr_running */ 774 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 775 if (!(worker->flags & WORKER_NOT_RUNNING)) 776 atomic_inc(get_gcwq_nr_running(gcwq->cpu)); 777 } 778 779 /** 780 * busy_worker_head - return the busy hash head for a work 781 * @gcwq: gcwq of interest 782 * @work: work to be hashed 783 * 784 * Return hash head of @gcwq for @work. 785 * 786 * CONTEXT: 787 * spin_lock_irq(gcwq->lock). 788 * 789 * RETURNS: 790 * Pointer to the hash head. 791 */ 792 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq, 793 struct work_struct *work) 794 { 795 const int base_shift = ilog2(sizeof(struct work_struct)); 796 unsigned long v = (unsigned long)work; 797 798 /* simple shift and fold hash, do we need something better? */ 799 v >>= base_shift; 800 v += v >> BUSY_WORKER_HASH_ORDER; 801 v &= BUSY_WORKER_HASH_MASK; 802 803 return &gcwq->busy_hash[v]; 804 } 805 806 /** 807 * __find_worker_executing_work - find worker which is executing a work 808 * @gcwq: gcwq of interest 809 * @bwh: hash head as returned by busy_worker_head() 810 * @work: work to find worker for 811 * 812 * Find a worker which is executing @work on @gcwq. @bwh should be 813 * the hash head obtained by calling busy_worker_head() with the same 814 * work. 815 * 816 * CONTEXT: 817 * spin_lock_irq(gcwq->lock). 818 * 819 * RETURNS: 820 * Pointer to worker which is executing @work if found, NULL 821 * otherwise. 822 */ 823 static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, 824 struct hlist_head *bwh, 825 struct work_struct *work) 826 { 827 struct worker *worker; 828 struct hlist_node *tmp; 829 830 hlist_for_each_entry(worker, tmp, bwh, hentry) 831 if (worker->current_work == work) 832 return worker; 833 return NULL; 834 } 835 836 /** 837 * find_worker_executing_work - find worker which is executing a work 838 * @gcwq: gcwq of interest 839 * @work: work to find worker for 840 * 841 * Find a worker which is executing @work on @gcwq. This function is 842 * identical to __find_worker_executing_work() except that this 843 * function calculates @bwh itself. 844 * 845 * CONTEXT: 846 * spin_lock_irq(gcwq->lock). 847 * 848 * RETURNS: 849 * Pointer to worker which is executing @work if found, NULL 850 * otherwise. 851 */ 852 static struct worker *find_worker_executing_work(struct global_cwq *gcwq, 853 struct work_struct *work) 854 { 855 return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work), 856 work); 857 } 858 859 /** 860 * gcwq_determine_ins_pos - find insertion position 861 * @gcwq: gcwq of interest 862 * @cwq: cwq a work is being queued for 863 * 864 * A work for @cwq is about to be queued on @gcwq, determine insertion 865 * position for the work. If @cwq is for HIGHPRI wq, the work is 866 * queued at the head of the queue but in FIFO order with respect to 867 * other HIGHPRI works; otherwise, at the end of the queue. This 868 * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that 869 * there are HIGHPRI works pending. 870 * 871 * CONTEXT: 872 * spin_lock_irq(gcwq->lock). 873 * 874 * RETURNS: 875 * Pointer to inserstion position. 876 */ 877 static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq, 878 struct cpu_workqueue_struct *cwq) 879 { 880 struct work_struct *twork; 881 882 if (likely(!(cwq->wq->flags & WQ_HIGHPRI))) 883 return &gcwq->worklist; 884 885 list_for_each_entry(twork, &gcwq->worklist, entry) { 886 struct cpu_workqueue_struct *tcwq = get_work_cwq(twork); 887 888 if (!(tcwq->wq->flags & WQ_HIGHPRI)) 889 break; 890 } 891 892 gcwq->flags |= GCWQ_HIGHPRI_PENDING; 893 return &twork->entry; 894 } 895 896 /** 897 * insert_work - insert a work into gcwq 898 * @cwq: cwq @work belongs to 899 * @work: work to insert 900 * @head: insertion point 901 * @extra_flags: extra WORK_STRUCT_* flags to set 902 * 903 * Insert @work which belongs to @cwq into @gcwq after @head. 904 * @extra_flags is or'd to work_struct flags. 905 * 906 * CONTEXT: 907 * spin_lock_irq(gcwq->lock). 908 */ 909 static void insert_work(struct cpu_workqueue_struct *cwq, 910 struct work_struct *work, struct list_head *head, 911 unsigned int extra_flags) 912 { 913 struct global_cwq *gcwq = cwq->gcwq; 914 915 /* we own @work, set data and link */ 916 set_work_cwq(work, cwq, extra_flags); 917 918 /* 919 * Ensure that we get the right work->data if we see the 920 * result of list_add() below, see try_to_grab_pending(). 921 */ 922 smp_wmb(); 923 924 list_add_tail(&work->entry, head); 925 926 /* 927 * Ensure either worker_sched_deactivated() sees the above 928 * list_add_tail() or we see zero nr_running to avoid workers 929 * lying around lazily while there are works to be processed. 930 */ 931 smp_mb(); 932 933 if (__need_more_worker(gcwq)) 934 wake_up_worker(gcwq); 935 } 936 937 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, 938 struct work_struct *work) 939 { 940 struct global_cwq *gcwq; 941 struct cpu_workqueue_struct *cwq; 942 struct list_head *worklist; 943 unsigned long flags; 944 945 debug_work_activate(work); 946 947 /* determine gcwq to use */ 948 if (!(wq->flags & WQ_UNBOUND)) { 949 struct global_cwq *last_gcwq; 950 951 if (unlikely(cpu == WORK_CPU_UNBOUND)) 952 cpu = raw_smp_processor_id(); 953 954 /* 955 * It's multi cpu. If @wq is non-reentrant and @work 956 * was previously on a different cpu, it might still 957 * be running there, in which case the work needs to 958 * be queued on that cpu to guarantee non-reentrance. 959 */ 960 gcwq = get_gcwq(cpu); 961 if (wq->flags & WQ_NON_REENTRANT && 962 (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { 963 struct worker *worker; 964 965 spin_lock_irqsave(&last_gcwq->lock, flags); 966 967 worker = find_worker_executing_work(last_gcwq, work); 968 969 if (worker && worker->current_cwq->wq == wq) 970 gcwq = last_gcwq; 971 else { 972 /* meh... not running there, queue here */ 973 spin_unlock_irqrestore(&last_gcwq->lock, flags); 974 spin_lock_irqsave(&gcwq->lock, flags); 975 } 976 } else 977 spin_lock_irqsave(&gcwq->lock, flags); 978 } else { 979 gcwq = get_gcwq(WORK_CPU_UNBOUND); 980 spin_lock_irqsave(&gcwq->lock, flags); 981 } 982 983 /* gcwq determined, get cwq and queue */ 984 cwq = get_cwq(gcwq->cpu, wq); 985 986 BUG_ON(!list_empty(&work->entry)); 987 988 cwq->nr_in_flight[cwq->work_color]++; 989 990 if (likely(cwq->nr_active < cwq->max_active)) { 991 cwq->nr_active++; 992 worklist = gcwq_determine_ins_pos(gcwq, cwq); 993 } else 994 worklist = &cwq->delayed_works; 995 996 insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color)); 997 998 spin_unlock_irqrestore(&gcwq->lock, flags); 999 } 1000 1001 /** 1002 * queue_work - queue work on a workqueue 1003 * @wq: workqueue to use 1004 * @work: work to queue 1005 * 1006 * Returns 0 if @work was already on a queue, non-zero otherwise. 1007 * 1008 * We queue the work to the CPU on which it was submitted, but if the CPU dies 1009 * it can be processed by another CPU. 1010 */ 1011 int queue_work(struct workqueue_struct *wq, struct work_struct *work) 1012 { 1013 int ret; 1014 1015 ret = queue_work_on(get_cpu(), wq, work); 1016 put_cpu(); 1017 1018 return ret; 1019 } 1020 EXPORT_SYMBOL_GPL(queue_work); 1021 1022 /** 1023 * queue_work_on - queue work on specific cpu 1024 * @cpu: CPU number to execute work on 1025 * @wq: workqueue to use 1026 * @work: work to queue 1027 * 1028 * Returns 0 if @work was already on a queue, non-zero otherwise. 1029 * 1030 * We queue the work to a specific CPU, the caller must ensure it 1031 * can't go away. 1032 */ 1033 int 1034 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work) 1035 { 1036 int ret = 0; 1037 1038 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { 1039 __queue_work(cpu, wq, work); 1040 ret = 1; 1041 } 1042 return ret; 1043 } 1044 EXPORT_SYMBOL_GPL(queue_work_on); 1045 1046 static void delayed_work_timer_fn(unsigned long __data) 1047 { 1048 struct delayed_work *dwork = (struct delayed_work *)__data; 1049 struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); 1050 1051 __queue_work(smp_processor_id(), cwq->wq, &dwork->work); 1052 } 1053 1054 /** 1055 * queue_delayed_work - queue work on a workqueue after delay 1056 * @wq: workqueue to use 1057 * @dwork: delayable work to queue 1058 * @delay: number of jiffies to wait before queueing 1059 * 1060 * Returns 0 if @work was already on a queue, non-zero otherwise. 1061 */ 1062 int queue_delayed_work(struct workqueue_struct *wq, 1063 struct delayed_work *dwork, unsigned long delay) 1064 { 1065 if (delay == 0) 1066 return queue_work(wq, &dwork->work); 1067 1068 return queue_delayed_work_on(-1, wq, dwork, delay); 1069 } 1070 EXPORT_SYMBOL_GPL(queue_delayed_work); 1071 1072 /** 1073 * queue_delayed_work_on - queue work on specific CPU after delay 1074 * @cpu: CPU number to execute work on 1075 * @wq: workqueue to use 1076 * @dwork: work to queue 1077 * @delay: number of jiffies to wait before queueing 1078 * 1079 * Returns 0 if @work was already on a queue, non-zero otherwise. 1080 */ 1081 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 1082 struct delayed_work *dwork, unsigned long delay) 1083 { 1084 int ret = 0; 1085 struct timer_list *timer = &dwork->timer; 1086 struct work_struct *work = &dwork->work; 1087 1088 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { 1089 unsigned int lcpu; 1090 1091 BUG_ON(timer_pending(timer)); 1092 BUG_ON(!list_empty(&work->entry)); 1093 1094 timer_stats_timer_set_start_info(&dwork->timer); 1095 1096 /* 1097 * This stores cwq for the moment, for the timer_fn. 1098 * Note that the work's gcwq is preserved to allow 1099 * reentrance detection for delayed works. 1100 */ 1101 if (!(wq->flags & WQ_UNBOUND)) { 1102 struct global_cwq *gcwq = get_work_gcwq(work); 1103 1104 if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND) 1105 lcpu = gcwq->cpu; 1106 else 1107 lcpu = raw_smp_processor_id(); 1108 } else 1109 lcpu = WORK_CPU_UNBOUND; 1110 1111 set_work_cwq(work, get_cwq(lcpu, wq), 0); 1112 1113 timer->expires = jiffies + delay; 1114 timer->data = (unsigned long)dwork; 1115 timer->function = delayed_work_timer_fn; 1116 1117 if (unlikely(cpu >= 0)) 1118 add_timer_on(timer, cpu); 1119 else 1120 add_timer(timer); 1121 ret = 1; 1122 } 1123 return ret; 1124 } 1125 EXPORT_SYMBOL_GPL(queue_delayed_work_on); 1126 1127 /** 1128 * worker_enter_idle - enter idle state 1129 * @worker: worker which is entering idle state 1130 * 1131 * @worker is entering idle state. Update stats and idle timer if 1132 * necessary. 1133 * 1134 * LOCKING: 1135 * spin_lock_irq(gcwq->lock). 1136 */ 1137 static void worker_enter_idle(struct worker *worker) 1138 { 1139 struct global_cwq *gcwq = worker->gcwq; 1140 1141 BUG_ON(worker->flags & WORKER_IDLE); 1142 BUG_ON(!list_empty(&worker->entry) && 1143 (worker->hentry.next || worker->hentry.pprev)); 1144 1145 /* can't use worker_set_flags(), also called from start_worker() */ 1146 worker->flags |= WORKER_IDLE; 1147 gcwq->nr_idle++; 1148 worker->last_active = jiffies; 1149 1150 /* idle_list is LIFO */ 1151 list_add(&worker->entry, &gcwq->idle_list); 1152 1153 if (likely(!(worker->flags & WORKER_ROGUE))) { 1154 if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) 1155 mod_timer(&gcwq->idle_timer, 1156 jiffies + IDLE_WORKER_TIMEOUT); 1157 } else 1158 wake_up_all(&gcwq->trustee_wait); 1159 1160 /* sanity check nr_running */ 1161 WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle && 1162 atomic_read(get_gcwq_nr_running(gcwq->cpu))); 1163 } 1164 1165 /** 1166 * worker_leave_idle - leave idle state 1167 * @worker: worker which is leaving idle state 1168 * 1169 * @worker is leaving idle state. Update stats. 1170 * 1171 * LOCKING: 1172 * spin_lock_irq(gcwq->lock). 1173 */ 1174 static void worker_leave_idle(struct worker *worker) 1175 { 1176 struct global_cwq *gcwq = worker->gcwq; 1177 1178 BUG_ON(!(worker->flags & WORKER_IDLE)); 1179 worker_clr_flags(worker, WORKER_IDLE); 1180 gcwq->nr_idle--; 1181 list_del_init(&worker->entry); 1182 } 1183 1184 /** 1185 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq 1186 * @worker: self 1187 * 1188 * Works which are scheduled while the cpu is online must at least be 1189 * scheduled to a worker which is bound to the cpu so that if they are 1190 * flushed from cpu callbacks while cpu is going down, they are 1191 * guaranteed to execute on the cpu. 1192 * 1193 * This function is to be used by rogue workers and rescuers to bind 1194 * themselves to the target cpu and may race with cpu going down or 1195 * coming online. kthread_bind() can't be used because it may put the 1196 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used 1197 * verbatim as it's best effort and blocking and gcwq may be 1198 * [dis]associated in the meantime. 1199 * 1200 * This function tries set_cpus_allowed() and locks gcwq and verifies 1201 * the binding against GCWQ_DISASSOCIATED which is set during 1202 * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters 1203 * idle state or fetches works without dropping lock, it can guarantee 1204 * the scheduling requirement described in the first paragraph. 1205 * 1206 * CONTEXT: 1207 * Might sleep. Called without any lock but returns with gcwq->lock 1208 * held. 1209 * 1210 * RETURNS: 1211 * %true if the associated gcwq is online (@worker is successfully 1212 * bound), %false if offline. 1213 */ 1214 static bool worker_maybe_bind_and_lock(struct worker *worker) 1215 { 1216 struct global_cwq *gcwq = worker->gcwq; 1217 struct task_struct *task = worker->task; 1218 1219 while (true) { 1220 /* 1221 * The following call may fail, succeed or succeed 1222 * without actually migrating the task to the cpu if 1223 * it races with cpu hotunplug operation. Verify 1224 * against GCWQ_DISASSOCIATED. 1225 */ 1226 if (!(gcwq->flags & GCWQ_DISASSOCIATED)) 1227 set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu)); 1228 1229 spin_lock_irq(&gcwq->lock); 1230 if (gcwq->flags & GCWQ_DISASSOCIATED) 1231 return false; 1232 if (task_cpu(task) == gcwq->cpu && 1233 cpumask_equal(¤t->cpus_allowed, 1234 get_cpu_mask(gcwq->cpu))) 1235 return true; 1236 spin_unlock_irq(&gcwq->lock); 1237 1238 /* CPU has come up inbetween, retry migration */ 1239 cpu_relax(); 1240 } 1241 } 1242 1243 /* 1244 * Function for worker->rebind_work used to rebind rogue busy workers 1245 * to the associated cpu which is coming back online. This is 1246 * scheduled by cpu up but can race with other cpu hotplug operations 1247 * and may be executed twice without intervening cpu down. 1248 */ 1249 static void worker_rebind_fn(struct work_struct *work) 1250 { 1251 struct worker *worker = container_of(work, struct worker, rebind_work); 1252 struct global_cwq *gcwq = worker->gcwq; 1253 1254 if (worker_maybe_bind_and_lock(worker)) 1255 worker_clr_flags(worker, WORKER_REBIND); 1256 1257 spin_unlock_irq(&gcwq->lock); 1258 } 1259 1260 static struct worker *alloc_worker(void) 1261 { 1262 struct worker *worker; 1263 1264 worker = kzalloc(sizeof(*worker), GFP_KERNEL); 1265 if (worker) { 1266 INIT_LIST_HEAD(&worker->entry); 1267 INIT_LIST_HEAD(&worker->scheduled); 1268 INIT_WORK(&worker->rebind_work, worker_rebind_fn); 1269 /* on creation a worker is in !idle && prep state */ 1270 worker->flags = WORKER_PREP; 1271 } 1272 return worker; 1273 } 1274 1275 /** 1276 * create_worker - create a new workqueue worker 1277 * @gcwq: gcwq the new worker will belong to 1278 * @bind: whether to set affinity to @cpu or not 1279 * 1280 * Create a new worker which is bound to @gcwq. The returned worker 1281 * can be started by calling start_worker() or destroyed using 1282 * destroy_worker(). 1283 * 1284 * CONTEXT: 1285 * Might sleep. Does GFP_KERNEL allocations. 1286 * 1287 * RETURNS: 1288 * Pointer to the newly created worker. 1289 */ 1290 static struct worker *create_worker(struct global_cwq *gcwq, bool bind) 1291 { 1292 bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND; 1293 struct worker *worker = NULL; 1294 int id = -1; 1295 1296 spin_lock_irq(&gcwq->lock); 1297 while (ida_get_new(&gcwq->worker_ida, &id)) { 1298 spin_unlock_irq(&gcwq->lock); 1299 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL)) 1300 goto fail; 1301 spin_lock_irq(&gcwq->lock); 1302 } 1303 spin_unlock_irq(&gcwq->lock); 1304 1305 worker = alloc_worker(); 1306 if (!worker) 1307 goto fail; 1308 1309 worker->gcwq = gcwq; 1310 worker->id = id; 1311 1312 if (!on_unbound_cpu) 1313 worker->task = kthread_create(worker_thread, worker, 1314 "kworker/%u:%d", gcwq->cpu, id); 1315 else 1316 worker->task = kthread_create(worker_thread, worker, 1317 "kworker/u:%d", id); 1318 if (IS_ERR(worker->task)) 1319 goto fail; 1320 1321 /* 1322 * A rogue worker will become a regular one if CPU comes 1323 * online later on. Make sure every worker has 1324 * PF_THREAD_BOUND set. 1325 */ 1326 if (bind && !on_unbound_cpu) 1327 kthread_bind(worker->task, gcwq->cpu); 1328 else { 1329 worker->task->flags |= PF_THREAD_BOUND; 1330 if (on_unbound_cpu) 1331 worker->flags |= WORKER_UNBOUND; 1332 } 1333 1334 return worker; 1335 fail: 1336 if (id >= 0) { 1337 spin_lock_irq(&gcwq->lock); 1338 ida_remove(&gcwq->worker_ida, id); 1339 spin_unlock_irq(&gcwq->lock); 1340 } 1341 kfree(worker); 1342 return NULL; 1343 } 1344 1345 /** 1346 * start_worker - start a newly created worker 1347 * @worker: worker to start 1348 * 1349 * Make the gcwq aware of @worker and start it. 1350 * 1351 * CONTEXT: 1352 * spin_lock_irq(gcwq->lock). 1353 */ 1354 static void start_worker(struct worker *worker) 1355 { 1356 worker->flags |= WORKER_STARTED; 1357 worker->gcwq->nr_workers++; 1358 worker_enter_idle(worker); 1359 wake_up_process(worker->task); 1360 } 1361 1362 /** 1363 * destroy_worker - destroy a workqueue worker 1364 * @worker: worker to be destroyed 1365 * 1366 * Destroy @worker and adjust @gcwq stats accordingly. 1367 * 1368 * CONTEXT: 1369 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 1370 */ 1371 static void destroy_worker(struct worker *worker) 1372 { 1373 struct global_cwq *gcwq = worker->gcwq; 1374 int id = worker->id; 1375 1376 /* sanity check frenzy */ 1377 BUG_ON(worker->current_work); 1378 BUG_ON(!list_empty(&worker->scheduled)); 1379 1380 if (worker->flags & WORKER_STARTED) 1381 gcwq->nr_workers--; 1382 if (worker->flags & WORKER_IDLE) 1383 gcwq->nr_idle--; 1384 1385 list_del_init(&worker->entry); 1386 worker->flags |= WORKER_DIE; 1387 1388 spin_unlock_irq(&gcwq->lock); 1389 1390 kthread_stop(worker->task); 1391 kfree(worker); 1392 1393 spin_lock_irq(&gcwq->lock); 1394 ida_remove(&gcwq->worker_ida, id); 1395 } 1396 1397 static void idle_worker_timeout(unsigned long __gcwq) 1398 { 1399 struct global_cwq *gcwq = (void *)__gcwq; 1400 1401 spin_lock_irq(&gcwq->lock); 1402 1403 if (too_many_workers(gcwq)) { 1404 struct worker *worker; 1405 unsigned long expires; 1406 1407 /* idle_list is kept in LIFO order, check the last one */ 1408 worker = list_entry(gcwq->idle_list.prev, struct worker, entry); 1409 expires = worker->last_active + IDLE_WORKER_TIMEOUT; 1410 1411 if (time_before(jiffies, expires)) 1412 mod_timer(&gcwq->idle_timer, expires); 1413 else { 1414 /* it's been idle for too long, wake up manager */ 1415 gcwq->flags |= GCWQ_MANAGE_WORKERS; 1416 wake_up_worker(gcwq); 1417 } 1418 } 1419 1420 spin_unlock_irq(&gcwq->lock); 1421 } 1422 1423 static bool send_mayday(struct work_struct *work) 1424 { 1425 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 1426 struct workqueue_struct *wq = cwq->wq; 1427 unsigned int cpu; 1428 1429 if (!(wq->flags & WQ_RESCUER)) 1430 return false; 1431 1432 /* mayday mayday mayday */ 1433 cpu = cwq->gcwq->cpu; 1434 /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ 1435 if (cpu == WORK_CPU_UNBOUND) 1436 cpu = 0; 1437 if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask)) 1438 wake_up_process(wq->rescuer->task); 1439 return true; 1440 } 1441 1442 static void gcwq_mayday_timeout(unsigned long __gcwq) 1443 { 1444 struct global_cwq *gcwq = (void *)__gcwq; 1445 struct work_struct *work; 1446 1447 spin_lock_irq(&gcwq->lock); 1448 1449 if (need_to_create_worker(gcwq)) { 1450 /* 1451 * We've been trying to create a new worker but 1452 * haven't been successful. We might be hitting an 1453 * allocation deadlock. Send distress signals to 1454 * rescuers. 1455 */ 1456 list_for_each_entry(work, &gcwq->worklist, entry) 1457 send_mayday(work); 1458 } 1459 1460 spin_unlock_irq(&gcwq->lock); 1461 1462 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL); 1463 } 1464 1465 /** 1466 * maybe_create_worker - create a new worker if necessary 1467 * @gcwq: gcwq to create a new worker for 1468 * 1469 * Create a new worker for @gcwq if necessary. @gcwq is guaranteed to 1470 * have at least one idle worker on return from this function. If 1471 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is 1472 * sent to all rescuers with works scheduled on @gcwq to resolve 1473 * possible allocation deadlock. 1474 * 1475 * On return, need_to_create_worker() is guaranteed to be false and 1476 * may_start_working() true. 1477 * 1478 * LOCKING: 1479 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1480 * multiple times. Does GFP_KERNEL allocations. Called only from 1481 * manager. 1482 * 1483 * RETURNS: 1484 * false if no action was taken and gcwq->lock stayed locked, true 1485 * otherwise. 1486 */ 1487 static bool maybe_create_worker(struct global_cwq *gcwq) 1488 { 1489 if (!need_to_create_worker(gcwq)) 1490 return false; 1491 restart: 1492 spin_unlock_irq(&gcwq->lock); 1493 1494 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ 1495 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); 1496 1497 while (true) { 1498 struct worker *worker; 1499 1500 worker = create_worker(gcwq, true); 1501 if (worker) { 1502 del_timer_sync(&gcwq->mayday_timer); 1503 spin_lock_irq(&gcwq->lock); 1504 start_worker(worker); 1505 BUG_ON(need_to_create_worker(gcwq)); 1506 return true; 1507 } 1508 1509 if (!need_to_create_worker(gcwq)) 1510 break; 1511 1512 __set_current_state(TASK_INTERRUPTIBLE); 1513 schedule_timeout(CREATE_COOLDOWN); 1514 1515 if (!need_to_create_worker(gcwq)) 1516 break; 1517 } 1518 1519 del_timer_sync(&gcwq->mayday_timer); 1520 spin_lock_irq(&gcwq->lock); 1521 if (need_to_create_worker(gcwq)) 1522 goto restart; 1523 return true; 1524 } 1525 1526 /** 1527 * maybe_destroy_worker - destroy workers which have been idle for a while 1528 * @gcwq: gcwq to destroy workers for 1529 * 1530 * Destroy @gcwq workers which have been idle for longer than 1531 * IDLE_WORKER_TIMEOUT. 1532 * 1533 * LOCKING: 1534 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1535 * multiple times. Called only from manager. 1536 * 1537 * RETURNS: 1538 * false if no action was taken and gcwq->lock stayed locked, true 1539 * otherwise. 1540 */ 1541 static bool maybe_destroy_workers(struct global_cwq *gcwq) 1542 { 1543 bool ret = false; 1544 1545 while (too_many_workers(gcwq)) { 1546 struct worker *worker; 1547 unsigned long expires; 1548 1549 worker = list_entry(gcwq->idle_list.prev, struct worker, entry); 1550 expires = worker->last_active + IDLE_WORKER_TIMEOUT; 1551 1552 if (time_before(jiffies, expires)) { 1553 mod_timer(&gcwq->idle_timer, expires); 1554 break; 1555 } 1556 1557 destroy_worker(worker); 1558 ret = true; 1559 } 1560 1561 return ret; 1562 } 1563 1564 /** 1565 * manage_workers - manage worker pool 1566 * @worker: self 1567 * 1568 * Assume the manager role and manage gcwq worker pool @worker belongs 1569 * to. At any given time, there can be only zero or one manager per 1570 * gcwq. The exclusion is handled automatically by this function. 1571 * 1572 * The caller can safely start processing works on false return. On 1573 * true return, it's guaranteed that need_to_create_worker() is false 1574 * and may_start_working() is true. 1575 * 1576 * CONTEXT: 1577 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1578 * multiple times. Does GFP_KERNEL allocations. 1579 * 1580 * RETURNS: 1581 * false if no action was taken and gcwq->lock stayed locked, true if 1582 * some action was taken. 1583 */ 1584 static bool manage_workers(struct worker *worker) 1585 { 1586 struct global_cwq *gcwq = worker->gcwq; 1587 bool ret = false; 1588 1589 if (gcwq->flags & GCWQ_MANAGING_WORKERS) 1590 return ret; 1591 1592 gcwq->flags &= ~GCWQ_MANAGE_WORKERS; 1593 gcwq->flags |= GCWQ_MANAGING_WORKERS; 1594 1595 /* 1596 * Destroy and then create so that may_start_working() is true 1597 * on return. 1598 */ 1599 ret |= maybe_destroy_workers(gcwq); 1600 ret |= maybe_create_worker(gcwq); 1601 1602 gcwq->flags &= ~GCWQ_MANAGING_WORKERS; 1603 1604 /* 1605 * The trustee might be waiting to take over the manager 1606 * position, tell it we're done. 1607 */ 1608 if (unlikely(gcwq->trustee)) 1609 wake_up_all(&gcwq->trustee_wait); 1610 1611 return ret; 1612 } 1613 1614 /** 1615 * move_linked_works - move linked works to a list 1616 * @work: start of series of works to be scheduled 1617 * @head: target list to append @work to 1618 * @nextp: out paramter for nested worklist walking 1619 * 1620 * Schedule linked works starting from @work to @head. Work series to 1621 * be scheduled starts at @work and includes any consecutive work with 1622 * WORK_STRUCT_LINKED set in its predecessor. 1623 * 1624 * If @nextp is not NULL, it's updated to point to the next work of 1625 * the last scheduled work. This allows move_linked_works() to be 1626 * nested inside outer list_for_each_entry_safe(). 1627 * 1628 * CONTEXT: 1629 * spin_lock_irq(gcwq->lock). 1630 */ 1631 static void move_linked_works(struct work_struct *work, struct list_head *head, 1632 struct work_struct **nextp) 1633 { 1634 struct work_struct *n; 1635 1636 /* 1637 * Linked worklist will always end before the end of the list, 1638 * use NULL for list head. 1639 */ 1640 list_for_each_entry_safe_from(work, n, NULL, entry) { 1641 list_move_tail(&work->entry, head); 1642 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED)) 1643 break; 1644 } 1645 1646 /* 1647 * If we're already inside safe list traversal and have moved 1648 * multiple works to the scheduled queue, the next position 1649 * needs to be updated. 1650 */ 1651 if (nextp) 1652 *nextp = n; 1653 } 1654 1655 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) 1656 { 1657 struct work_struct *work = list_first_entry(&cwq->delayed_works, 1658 struct work_struct, entry); 1659 struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); 1660 1661 move_linked_works(work, pos, NULL); 1662 cwq->nr_active++; 1663 } 1664 1665 /** 1666 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight 1667 * @cwq: cwq of interest 1668 * @color: color of work which left the queue 1669 * 1670 * A work either has completed or is removed from pending queue, 1671 * decrement nr_in_flight of its cwq and handle workqueue flushing. 1672 * 1673 * CONTEXT: 1674 * spin_lock_irq(gcwq->lock). 1675 */ 1676 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) 1677 { 1678 /* ignore uncolored works */ 1679 if (color == WORK_NO_COLOR) 1680 return; 1681 1682 cwq->nr_in_flight[color]--; 1683 cwq->nr_active--; 1684 1685 if (!list_empty(&cwq->delayed_works)) { 1686 /* one down, submit a delayed one */ 1687 if (cwq->nr_active < cwq->max_active) 1688 cwq_activate_first_delayed(cwq); 1689 } 1690 1691 /* is flush in progress and are we at the flushing tip? */ 1692 if (likely(cwq->flush_color != color)) 1693 return; 1694 1695 /* are there still in-flight works? */ 1696 if (cwq->nr_in_flight[color]) 1697 return; 1698 1699 /* this cwq is done, clear flush_color */ 1700 cwq->flush_color = -1; 1701 1702 /* 1703 * If this was the last cwq, wake up the first flusher. It 1704 * will handle the rest. 1705 */ 1706 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) 1707 complete(&cwq->wq->first_flusher->done); 1708 } 1709 1710 /** 1711 * process_one_work - process single work 1712 * @worker: self 1713 * @work: work to process 1714 * 1715 * Process @work. This function contains all the logics necessary to 1716 * process a single work including synchronization against and 1717 * interaction with other workers on the same cpu, queueing and 1718 * flushing. As long as context requirement is met, any worker can 1719 * call this function to process a work. 1720 * 1721 * CONTEXT: 1722 * spin_lock_irq(gcwq->lock) which is released and regrabbed. 1723 */ 1724 static void process_one_work(struct worker *worker, struct work_struct *work) 1725 { 1726 struct cpu_workqueue_struct *cwq = get_work_cwq(work); 1727 struct global_cwq *gcwq = cwq->gcwq; 1728 struct hlist_head *bwh = busy_worker_head(gcwq, work); 1729 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; 1730 work_func_t f = work->func; 1731 int work_color; 1732 struct worker *collision; 1733 #ifdef CONFIG_LOCKDEP 1734 /* 1735 * It is permissible to free the struct work_struct from 1736 * inside the function that is called from it, this we need to 1737 * take into account for lockdep too. To avoid bogus "held 1738 * lock freed" warnings as well as problems when looking into 1739 * work->lockdep_map, make a copy and use that here. 1740 */ 1741 struct lockdep_map lockdep_map = work->lockdep_map; 1742 #endif 1743 /* 1744 * A single work shouldn't be executed concurrently by 1745 * multiple workers on a single cpu. Check whether anyone is 1746 * already processing the work. If so, defer the work to the 1747 * currently executing one. 1748 */ 1749 collision = __find_worker_executing_work(gcwq, bwh, work); 1750 if (unlikely(collision)) { 1751 move_linked_works(work, &collision->scheduled, NULL); 1752 return; 1753 } 1754 1755 /* claim and process */ 1756 debug_work_deactivate(work); 1757 hlist_add_head(&worker->hentry, bwh); 1758 worker->current_work = work; 1759 worker->current_cwq = cwq; 1760 work_color = get_work_color(work); 1761 1762 /* record the current cpu number in the work data and dequeue */ 1763 set_work_cpu(work, gcwq->cpu); 1764 list_del_init(&work->entry); 1765 1766 /* 1767 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI, 1768 * wake up another worker; otherwise, clear HIGHPRI_PENDING. 1769 */ 1770 if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) { 1771 struct work_struct *nwork = list_first_entry(&gcwq->worklist, 1772 struct work_struct, entry); 1773 1774 if (!list_empty(&gcwq->worklist) && 1775 get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI) 1776 wake_up_worker(gcwq); 1777 else 1778 gcwq->flags &= ~GCWQ_HIGHPRI_PENDING; 1779 } 1780 1781 /* 1782 * CPU intensive works don't participate in concurrency 1783 * management. They're the scheduler's responsibility. 1784 */ 1785 if (unlikely(cpu_intensive)) 1786 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); 1787 1788 spin_unlock_irq(&gcwq->lock); 1789 1790 work_clear_pending(work); 1791 lock_map_acquire(&cwq->wq->lockdep_map); 1792 lock_map_acquire(&lockdep_map); 1793 f(work); 1794 lock_map_release(&lockdep_map); 1795 lock_map_release(&cwq->wq->lockdep_map); 1796 1797 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { 1798 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: " 1799 "%s/0x%08x/%d\n", 1800 current->comm, preempt_count(), task_pid_nr(current)); 1801 printk(KERN_ERR " last function: "); 1802 print_symbol("%s\n", (unsigned long)f); 1803 debug_show_held_locks(current); 1804 dump_stack(); 1805 } 1806 1807 spin_lock_irq(&gcwq->lock); 1808 1809 /* clear cpu intensive status */ 1810 if (unlikely(cpu_intensive)) 1811 worker_clr_flags(worker, WORKER_CPU_INTENSIVE); 1812 1813 /* we're done with it, release */ 1814 hlist_del_init(&worker->hentry); 1815 worker->current_work = NULL; 1816 worker->current_cwq = NULL; 1817 cwq_dec_nr_in_flight(cwq, work_color); 1818 } 1819 1820 /** 1821 * process_scheduled_works - process scheduled works 1822 * @worker: self 1823 * 1824 * Process all scheduled works. Please note that the scheduled list 1825 * may change while processing a work, so this function repeatedly 1826 * fetches a work from the top and executes it. 1827 * 1828 * CONTEXT: 1829 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 1830 * multiple times. 1831 */ 1832 static void process_scheduled_works(struct worker *worker) 1833 { 1834 while (!list_empty(&worker->scheduled)) { 1835 struct work_struct *work = list_first_entry(&worker->scheduled, 1836 struct work_struct, entry); 1837 process_one_work(worker, work); 1838 } 1839 } 1840 1841 /** 1842 * worker_thread - the worker thread function 1843 * @__worker: self 1844 * 1845 * The gcwq worker thread function. There's a single dynamic pool of 1846 * these per each cpu. These workers process all works regardless of 1847 * their specific target workqueue. The only exception is works which 1848 * belong to workqueues with a rescuer which will be explained in 1849 * rescuer_thread(). 1850 */ 1851 static int worker_thread(void *__worker) 1852 { 1853 struct worker *worker = __worker; 1854 struct global_cwq *gcwq = worker->gcwq; 1855 1856 /* tell the scheduler that this is a workqueue worker */ 1857 worker->task->flags |= PF_WQ_WORKER; 1858 woke_up: 1859 spin_lock_irq(&gcwq->lock); 1860 1861 /* DIE can be set only while we're idle, checking here is enough */ 1862 if (worker->flags & WORKER_DIE) { 1863 spin_unlock_irq(&gcwq->lock); 1864 worker->task->flags &= ~PF_WQ_WORKER; 1865 return 0; 1866 } 1867 1868 worker_leave_idle(worker); 1869 recheck: 1870 /* no more worker necessary? */ 1871 if (!need_more_worker(gcwq)) 1872 goto sleep; 1873 1874 /* do we need to manage? */ 1875 if (unlikely(!may_start_working(gcwq)) && manage_workers(worker)) 1876 goto recheck; 1877 1878 /* 1879 * ->scheduled list can only be filled while a worker is 1880 * preparing to process a work or actually processing it. 1881 * Make sure nobody diddled with it while I was sleeping. 1882 */ 1883 BUG_ON(!list_empty(&worker->scheduled)); 1884 1885 /* 1886 * When control reaches this point, we're guaranteed to have 1887 * at least one idle worker or that someone else has already 1888 * assumed the manager role. 1889 */ 1890 worker_clr_flags(worker, WORKER_PREP); 1891 1892 do { 1893 struct work_struct *work = 1894 list_first_entry(&gcwq->worklist, 1895 struct work_struct, entry); 1896 1897 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { 1898 /* optimization path, not strictly necessary */ 1899 process_one_work(worker, work); 1900 if (unlikely(!list_empty(&worker->scheduled))) 1901 process_scheduled_works(worker); 1902 } else { 1903 move_linked_works(work, &worker->scheduled, NULL); 1904 process_scheduled_works(worker); 1905 } 1906 } while (keep_working(gcwq)); 1907 1908 worker_set_flags(worker, WORKER_PREP, false); 1909 sleep: 1910 if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker)) 1911 goto recheck; 1912 1913 /* 1914 * gcwq->lock is held and there's no work to process and no 1915 * need to manage, sleep. Workers are woken up only while 1916 * holding gcwq->lock or from local cpu, so setting the 1917 * current state before releasing gcwq->lock is enough to 1918 * prevent losing any event. 1919 */ 1920 worker_enter_idle(worker); 1921 __set_current_state(TASK_INTERRUPTIBLE); 1922 spin_unlock_irq(&gcwq->lock); 1923 schedule(); 1924 goto woke_up; 1925 } 1926 1927 /** 1928 * rescuer_thread - the rescuer thread function 1929 * @__wq: the associated workqueue 1930 * 1931 * Workqueue rescuer thread function. There's one rescuer for each 1932 * workqueue which has WQ_RESCUER set. 1933 * 1934 * Regular work processing on a gcwq may block trying to create a new 1935 * worker which uses GFP_KERNEL allocation which has slight chance of 1936 * developing into deadlock if some works currently on the same queue 1937 * need to be processed to satisfy the GFP_KERNEL allocation. This is 1938 * the problem rescuer solves. 1939 * 1940 * When such condition is possible, the gcwq summons rescuers of all 1941 * workqueues which have works queued on the gcwq and let them process 1942 * those works so that forward progress can be guaranteed. 1943 * 1944 * This should happen rarely. 1945 */ 1946 static int rescuer_thread(void *__wq) 1947 { 1948 struct workqueue_struct *wq = __wq; 1949 struct worker *rescuer = wq->rescuer; 1950 struct list_head *scheduled = &rescuer->scheduled; 1951 bool is_unbound = wq->flags & WQ_UNBOUND; 1952 unsigned int cpu; 1953 1954 set_user_nice(current, RESCUER_NICE_LEVEL); 1955 repeat: 1956 set_current_state(TASK_INTERRUPTIBLE); 1957 1958 if (kthread_should_stop()) 1959 return 0; 1960 1961 /* 1962 * See whether any cpu is asking for help. Unbounded 1963 * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND. 1964 */ 1965 for_each_mayday_cpu(cpu, wq->mayday_mask) { 1966 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; 1967 struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); 1968 struct global_cwq *gcwq = cwq->gcwq; 1969 struct work_struct *work, *n; 1970 1971 __set_current_state(TASK_RUNNING); 1972 mayday_clear_cpu(cpu, wq->mayday_mask); 1973 1974 /* migrate to the target cpu if possible */ 1975 rescuer->gcwq = gcwq; 1976 worker_maybe_bind_and_lock(rescuer); 1977 1978 /* 1979 * Slurp in all works issued via this workqueue and 1980 * process'em. 1981 */ 1982 BUG_ON(!list_empty(&rescuer->scheduled)); 1983 list_for_each_entry_safe(work, n, &gcwq->worklist, entry) 1984 if (get_work_cwq(work) == cwq) 1985 move_linked_works(work, scheduled, &n); 1986 1987 process_scheduled_works(rescuer); 1988 spin_unlock_irq(&gcwq->lock); 1989 } 1990 1991 schedule(); 1992 goto repeat; 1993 } 1994 1995 struct wq_barrier { 1996 struct work_struct work; 1997 struct completion done; 1998 }; 1999 2000 static void wq_barrier_func(struct work_struct *work) 2001 { 2002 struct wq_barrier *barr = container_of(work, struct wq_barrier, work); 2003 complete(&barr->done); 2004 } 2005 2006 /** 2007 * insert_wq_barrier - insert a barrier work 2008 * @cwq: cwq to insert barrier into 2009 * @barr: wq_barrier to insert 2010 * @target: target work to attach @barr to 2011 * @worker: worker currently executing @target, NULL if @target is not executing 2012 * 2013 * @barr is linked to @target such that @barr is completed only after 2014 * @target finishes execution. Please note that the ordering 2015 * guarantee is observed only with respect to @target and on the local 2016 * cpu. 2017 * 2018 * Currently, a queued barrier can't be canceled. This is because 2019 * try_to_grab_pending() can't determine whether the work to be 2020 * grabbed is at the head of the queue and thus can't clear LINKED 2021 * flag of the previous work while there must be a valid next work 2022 * after a work with LINKED flag set. 2023 * 2024 * Note that when @worker is non-NULL, @target may be modified 2025 * underneath us, so we can't reliably determine cwq from @target. 2026 * 2027 * CONTEXT: 2028 * spin_lock_irq(gcwq->lock). 2029 */ 2030 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, 2031 struct wq_barrier *barr, 2032 struct work_struct *target, struct worker *worker) 2033 { 2034 struct list_head *head; 2035 unsigned int linked = 0; 2036 2037 /* 2038 * debugobject calls are safe here even with gcwq->lock locked 2039 * as we know for sure that this will not trigger any of the 2040 * checks and call back into the fixup functions where we 2041 * might deadlock. 2042 */ 2043 INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); 2044 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); 2045 init_completion(&barr->done); 2046 2047 /* 2048 * If @target is currently being executed, schedule the 2049 * barrier to the worker; otherwise, put it after @target. 2050 */ 2051 if (worker) 2052 head = worker->scheduled.next; 2053 else { 2054 unsigned long *bits = work_data_bits(target); 2055 2056 head = target->entry.next; 2057 /* there can already be other linked works, inherit and set */ 2058 linked = *bits & WORK_STRUCT_LINKED; 2059 __set_bit(WORK_STRUCT_LINKED_BIT, bits); 2060 } 2061 2062 debug_work_activate(&barr->work); 2063 insert_work(cwq, &barr->work, head, 2064 work_color_to_flags(WORK_NO_COLOR) | linked); 2065 } 2066 2067 /** 2068 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing 2069 * @wq: workqueue being flushed 2070 * @flush_color: new flush color, < 0 for no-op 2071 * @work_color: new work color, < 0 for no-op 2072 * 2073 * Prepare cwqs for workqueue flushing. 2074 * 2075 * If @flush_color is non-negative, flush_color on all cwqs should be 2076 * -1. If no cwq has in-flight commands at the specified color, all 2077 * cwq->flush_color's stay at -1 and %false is returned. If any cwq 2078 * has in flight commands, its cwq->flush_color is set to 2079 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq 2080 * wakeup logic is armed and %true is returned. 2081 * 2082 * The caller should have initialized @wq->first_flusher prior to 2083 * calling this function with non-negative @flush_color. If 2084 * @flush_color is negative, no flush color update is done and %false 2085 * is returned. 2086 * 2087 * If @work_color is non-negative, all cwqs should have the same 2088 * work_color which is previous to @work_color and all will be 2089 * advanced to @work_color. 2090 * 2091 * CONTEXT: 2092 * mutex_lock(wq->flush_mutex). 2093 * 2094 * RETURNS: 2095 * %true if @flush_color >= 0 and there's something to flush. %false 2096 * otherwise. 2097 */ 2098 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, 2099 int flush_color, int work_color) 2100 { 2101 bool wait = false; 2102 unsigned int cpu; 2103 2104 if (flush_color >= 0) { 2105 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush)); 2106 atomic_set(&wq->nr_cwqs_to_flush, 1); 2107 } 2108 2109 for_each_cwq_cpu(cpu, wq) { 2110 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2111 struct global_cwq *gcwq = cwq->gcwq; 2112 2113 spin_lock_irq(&gcwq->lock); 2114 2115 if (flush_color >= 0) { 2116 BUG_ON(cwq->flush_color != -1); 2117 2118 if (cwq->nr_in_flight[flush_color]) { 2119 cwq->flush_color = flush_color; 2120 atomic_inc(&wq->nr_cwqs_to_flush); 2121 wait = true; 2122 } 2123 } 2124 2125 if (work_color >= 0) { 2126 BUG_ON(work_color != work_next_color(cwq->work_color)); 2127 cwq->work_color = work_color; 2128 } 2129 2130 spin_unlock_irq(&gcwq->lock); 2131 } 2132 2133 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush)) 2134 complete(&wq->first_flusher->done); 2135 2136 return wait; 2137 } 2138 2139 /** 2140 * flush_workqueue - ensure that any scheduled work has run to completion. 2141 * @wq: workqueue to flush 2142 * 2143 * Forces execution of the workqueue and blocks until its completion. 2144 * This is typically used in driver shutdown handlers. 2145 * 2146 * We sleep until all works which were queued on entry have been handled, 2147 * but we are not livelocked by new incoming ones. 2148 */ 2149 void flush_workqueue(struct workqueue_struct *wq) 2150 { 2151 struct wq_flusher this_flusher = { 2152 .list = LIST_HEAD_INIT(this_flusher.list), 2153 .flush_color = -1, 2154 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done), 2155 }; 2156 int next_color; 2157 2158 lock_map_acquire(&wq->lockdep_map); 2159 lock_map_release(&wq->lockdep_map); 2160 2161 mutex_lock(&wq->flush_mutex); 2162 2163 /* 2164 * Start-to-wait phase 2165 */ 2166 next_color = work_next_color(wq->work_color); 2167 2168 if (next_color != wq->flush_color) { 2169 /* 2170 * Color space is not full. The current work_color 2171 * becomes our flush_color and work_color is advanced 2172 * by one. 2173 */ 2174 BUG_ON(!list_empty(&wq->flusher_overflow)); 2175 this_flusher.flush_color = wq->work_color; 2176 wq->work_color = next_color; 2177 2178 if (!wq->first_flusher) { 2179 /* no flush in progress, become the first flusher */ 2180 BUG_ON(wq->flush_color != this_flusher.flush_color); 2181 2182 wq->first_flusher = &this_flusher; 2183 2184 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color, 2185 wq->work_color)) { 2186 /* nothing to flush, done */ 2187 wq->flush_color = next_color; 2188 wq->first_flusher = NULL; 2189 goto out_unlock; 2190 } 2191 } else { 2192 /* wait in queue */ 2193 BUG_ON(wq->flush_color == this_flusher.flush_color); 2194 list_add_tail(&this_flusher.list, &wq->flusher_queue); 2195 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2196 } 2197 } else { 2198 /* 2199 * Oops, color space is full, wait on overflow queue. 2200 * The next flush completion will assign us 2201 * flush_color and transfer to flusher_queue. 2202 */ 2203 list_add_tail(&this_flusher.list, &wq->flusher_overflow); 2204 } 2205 2206 mutex_unlock(&wq->flush_mutex); 2207 2208 wait_for_completion(&this_flusher.done); 2209 2210 /* 2211 * Wake-up-and-cascade phase 2212 * 2213 * First flushers are responsible for cascading flushes and 2214 * handling overflow. Non-first flushers can simply return. 2215 */ 2216 if (wq->first_flusher != &this_flusher) 2217 return; 2218 2219 mutex_lock(&wq->flush_mutex); 2220 2221 /* we might have raced, check again with mutex held */ 2222 if (wq->first_flusher != &this_flusher) 2223 goto out_unlock; 2224 2225 wq->first_flusher = NULL; 2226 2227 BUG_ON(!list_empty(&this_flusher.list)); 2228 BUG_ON(wq->flush_color != this_flusher.flush_color); 2229 2230 while (true) { 2231 struct wq_flusher *next, *tmp; 2232 2233 /* complete all the flushers sharing the current flush color */ 2234 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) { 2235 if (next->flush_color != wq->flush_color) 2236 break; 2237 list_del_init(&next->list); 2238 complete(&next->done); 2239 } 2240 2241 BUG_ON(!list_empty(&wq->flusher_overflow) && 2242 wq->flush_color != work_next_color(wq->work_color)); 2243 2244 /* this flush_color is finished, advance by one */ 2245 wq->flush_color = work_next_color(wq->flush_color); 2246 2247 /* one color has been freed, handle overflow queue */ 2248 if (!list_empty(&wq->flusher_overflow)) { 2249 /* 2250 * Assign the same color to all overflowed 2251 * flushers, advance work_color and append to 2252 * flusher_queue. This is the start-to-wait 2253 * phase for these overflowed flushers. 2254 */ 2255 list_for_each_entry(tmp, &wq->flusher_overflow, list) 2256 tmp->flush_color = wq->work_color; 2257 2258 wq->work_color = work_next_color(wq->work_color); 2259 2260 list_splice_tail_init(&wq->flusher_overflow, 2261 &wq->flusher_queue); 2262 flush_workqueue_prep_cwqs(wq, -1, wq->work_color); 2263 } 2264 2265 if (list_empty(&wq->flusher_queue)) { 2266 BUG_ON(wq->flush_color != wq->work_color); 2267 break; 2268 } 2269 2270 /* 2271 * Need to flush more colors. Make the next flusher 2272 * the new first flusher and arm cwqs. 2273 */ 2274 BUG_ON(wq->flush_color == wq->work_color); 2275 BUG_ON(wq->flush_color != next->flush_color); 2276 2277 list_del_init(&next->list); 2278 wq->first_flusher = next; 2279 2280 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1)) 2281 break; 2282 2283 /* 2284 * Meh... this color is already done, clear first 2285 * flusher and repeat cascading. 2286 */ 2287 wq->first_flusher = NULL; 2288 } 2289 2290 out_unlock: 2291 mutex_unlock(&wq->flush_mutex); 2292 } 2293 EXPORT_SYMBOL_GPL(flush_workqueue); 2294 2295 /** 2296 * flush_work - block until a work_struct's callback has terminated 2297 * @work: the work which is to be flushed 2298 * 2299 * Returns false if @work has already terminated. 2300 * 2301 * It is expected that, prior to calling flush_work(), the caller has 2302 * arranged for the work to not be requeued, otherwise it doesn't make 2303 * sense to use this function. 2304 */ 2305 int flush_work(struct work_struct *work) 2306 { 2307 struct worker *worker = NULL; 2308 struct global_cwq *gcwq; 2309 struct cpu_workqueue_struct *cwq; 2310 struct wq_barrier barr; 2311 2312 might_sleep(); 2313 gcwq = get_work_gcwq(work); 2314 if (!gcwq) 2315 return 0; 2316 2317 spin_lock_irq(&gcwq->lock); 2318 if (!list_empty(&work->entry)) { 2319 /* 2320 * See the comment near try_to_grab_pending()->smp_rmb(). 2321 * If it was re-queued to a different gcwq under us, we 2322 * are not going to wait. 2323 */ 2324 smp_rmb(); 2325 cwq = get_work_cwq(work); 2326 if (unlikely(!cwq || gcwq != cwq->gcwq)) 2327 goto already_gone; 2328 } else { 2329 worker = find_worker_executing_work(gcwq, work); 2330 if (!worker) 2331 goto already_gone; 2332 cwq = worker->current_cwq; 2333 } 2334 2335 insert_wq_barrier(cwq, &barr, work, worker); 2336 spin_unlock_irq(&gcwq->lock); 2337 2338 lock_map_acquire(&cwq->wq->lockdep_map); 2339 lock_map_release(&cwq->wq->lockdep_map); 2340 2341 wait_for_completion(&barr.done); 2342 destroy_work_on_stack(&barr.work); 2343 return 1; 2344 already_gone: 2345 spin_unlock_irq(&gcwq->lock); 2346 return 0; 2347 } 2348 EXPORT_SYMBOL_GPL(flush_work); 2349 2350 /* 2351 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, 2352 * so this work can't be re-armed in any way. 2353 */ 2354 static int try_to_grab_pending(struct work_struct *work) 2355 { 2356 struct global_cwq *gcwq; 2357 int ret = -1; 2358 2359 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) 2360 return 0; 2361 2362 /* 2363 * The queueing is in progress, or it is already queued. Try to 2364 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. 2365 */ 2366 gcwq = get_work_gcwq(work); 2367 if (!gcwq) 2368 return ret; 2369 2370 spin_lock_irq(&gcwq->lock); 2371 if (!list_empty(&work->entry)) { 2372 /* 2373 * This work is queued, but perhaps we locked the wrong gcwq. 2374 * In that case we must see the new value after rmb(), see 2375 * insert_work()->wmb(). 2376 */ 2377 smp_rmb(); 2378 if (gcwq == get_work_gcwq(work)) { 2379 debug_work_deactivate(work); 2380 list_del_init(&work->entry); 2381 cwq_dec_nr_in_flight(get_work_cwq(work), 2382 get_work_color(work)); 2383 ret = 1; 2384 } 2385 } 2386 spin_unlock_irq(&gcwq->lock); 2387 2388 return ret; 2389 } 2390 2391 static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) 2392 { 2393 struct wq_barrier barr; 2394 struct worker *worker; 2395 2396 spin_lock_irq(&gcwq->lock); 2397 2398 worker = find_worker_executing_work(gcwq, work); 2399 if (unlikely(worker)) 2400 insert_wq_barrier(worker->current_cwq, &barr, work, worker); 2401 2402 spin_unlock_irq(&gcwq->lock); 2403 2404 if (unlikely(worker)) { 2405 wait_for_completion(&barr.done); 2406 destroy_work_on_stack(&barr.work); 2407 } 2408 } 2409 2410 static void wait_on_work(struct work_struct *work) 2411 { 2412 int cpu; 2413 2414 might_sleep(); 2415 2416 lock_map_acquire(&work->lockdep_map); 2417 lock_map_release(&work->lockdep_map); 2418 2419 for_each_gcwq_cpu(cpu) 2420 wait_on_cpu_work(get_gcwq(cpu), work); 2421 } 2422 2423 static int __cancel_work_timer(struct work_struct *work, 2424 struct timer_list* timer) 2425 { 2426 int ret; 2427 2428 do { 2429 ret = (timer && likely(del_timer(timer))); 2430 if (!ret) 2431 ret = try_to_grab_pending(work); 2432 wait_on_work(work); 2433 } while (unlikely(ret < 0)); 2434 2435 clear_work_data(work); 2436 return ret; 2437 } 2438 2439 /** 2440 * cancel_work_sync - block until a work_struct's callback has terminated 2441 * @work: the work which is to be flushed 2442 * 2443 * Returns true if @work was pending. 2444 * 2445 * cancel_work_sync() will cancel the work if it is queued. If the work's 2446 * callback appears to be running, cancel_work_sync() will block until it 2447 * has completed. 2448 * 2449 * It is possible to use this function if the work re-queues itself. It can 2450 * cancel the work even if it migrates to another workqueue, however in that 2451 * case it only guarantees that work->func() has completed on the last queued 2452 * workqueue. 2453 * 2454 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not 2455 * pending, otherwise it goes into a busy-wait loop until the timer expires. 2456 * 2457 * The caller must ensure that workqueue_struct on which this work was last 2458 * queued can't be destroyed before this function returns. 2459 */ 2460 int cancel_work_sync(struct work_struct *work) 2461 { 2462 return __cancel_work_timer(work, NULL); 2463 } 2464 EXPORT_SYMBOL_GPL(cancel_work_sync); 2465 2466 /** 2467 * cancel_delayed_work_sync - reliably kill off a delayed work. 2468 * @dwork: the delayed work struct 2469 * 2470 * Returns true if @dwork was pending. 2471 * 2472 * It is possible to use this function if @dwork rearms itself via queue_work() 2473 * or queue_delayed_work(). See also the comment for cancel_work_sync(). 2474 */ 2475 int cancel_delayed_work_sync(struct delayed_work *dwork) 2476 { 2477 return __cancel_work_timer(&dwork->work, &dwork->timer); 2478 } 2479 EXPORT_SYMBOL(cancel_delayed_work_sync); 2480 2481 /** 2482 * schedule_work - put work task in global workqueue 2483 * @work: job to be done 2484 * 2485 * Returns zero if @work was already on the kernel-global workqueue and 2486 * non-zero otherwise. 2487 * 2488 * This puts a job in the kernel-global workqueue if it was not already 2489 * queued and leaves it in the same position on the kernel-global 2490 * workqueue otherwise. 2491 */ 2492 int schedule_work(struct work_struct *work) 2493 { 2494 return queue_work(system_wq, work); 2495 } 2496 EXPORT_SYMBOL(schedule_work); 2497 2498 /* 2499 * schedule_work_on - put work task on a specific cpu 2500 * @cpu: cpu to put the work task on 2501 * @work: job to be done 2502 * 2503 * This puts a job on a specific cpu 2504 */ 2505 int schedule_work_on(int cpu, struct work_struct *work) 2506 { 2507 return queue_work_on(cpu, system_wq, work); 2508 } 2509 EXPORT_SYMBOL(schedule_work_on); 2510 2511 /** 2512 * schedule_delayed_work - put work task in global workqueue after delay 2513 * @dwork: job to be done 2514 * @delay: number of jiffies to wait or 0 for immediate execution 2515 * 2516 * After waiting for a given time this puts a job in the kernel-global 2517 * workqueue. 2518 */ 2519 int schedule_delayed_work(struct delayed_work *dwork, 2520 unsigned long delay) 2521 { 2522 return queue_delayed_work(system_wq, dwork, delay); 2523 } 2524 EXPORT_SYMBOL(schedule_delayed_work); 2525 2526 /** 2527 * flush_delayed_work - block until a dwork_struct's callback has terminated 2528 * @dwork: the delayed work which is to be flushed 2529 * 2530 * Any timeout is cancelled, and any pending work is run immediately. 2531 */ 2532 void flush_delayed_work(struct delayed_work *dwork) 2533 { 2534 if (del_timer_sync(&dwork->timer)) { 2535 __queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq, 2536 &dwork->work); 2537 put_cpu(); 2538 } 2539 flush_work(&dwork->work); 2540 } 2541 EXPORT_SYMBOL(flush_delayed_work); 2542 2543 /** 2544 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay 2545 * @cpu: cpu to use 2546 * @dwork: job to be done 2547 * @delay: number of jiffies to wait 2548 * 2549 * After waiting for a given time this puts a job in the kernel-global 2550 * workqueue on the specified CPU. 2551 */ 2552 int schedule_delayed_work_on(int cpu, 2553 struct delayed_work *dwork, unsigned long delay) 2554 { 2555 return queue_delayed_work_on(cpu, system_wq, dwork, delay); 2556 } 2557 EXPORT_SYMBOL(schedule_delayed_work_on); 2558 2559 /** 2560 * schedule_on_each_cpu - call a function on each online CPU from keventd 2561 * @func: the function to call 2562 * 2563 * Returns zero on success. 2564 * Returns -ve errno on failure. 2565 * 2566 * schedule_on_each_cpu() is very slow. 2567 */ 2568 int schedule_on_each_cpu(work_func_t func) 2569 { 2570 int cpu; 2571 struct work_struct __percpu *works; 2572 2573 works = alloc_percpu(struct work_struct); 2574 if (!works) 2575 return -ENOMEM; 2576 2577 get_online_cpus(); 2578 2579 for_each_online_cpu(cpu) { 2580 struct work_struct *work = per_cpu_ptr(works, cpu); 2581 2582 INIT_WORK(work, func); 2583 schedule_work_on(cpu, work); 2584 } 2585 2586 for_each_online_cpu(cpu) 2587 flush_work(per_cpu_ptr(works, cpu)); 2588 2589 put_online_cpus(); 2590 free_percpu(works); 2591 return 0; 2592 } 2593 2594 /** 2595 * flush_scheduled_work - ensure that any scheduled work has run to completion. 2596 * 2597 * Forces execution of the kernel-global workqueue and blocks until its 2598 * completion. 2599 * 2600 * Think twice before calling this function! It's very easy to get into 2601 * trouble if you don't take great care. Either of the following situations 2602 * will lead to deadlock: 2603 * 2604 * One of the work items currently on the workqueue needs to acquire 2605 * a lock held by your code or its caller. 2606 * 2607 * Your code is running in the context of a work routine. 2608 * 2609 * They will be detected by lockdep when they occur, but the first might not 2610 * occur very often. It depends on what work items are on the workqueue and 2611 * what locks they need, which you have no control over. 2612 * 2613 * In most situations flushing the entire workqueue is overkill; you merely 2614 * need to know that a particular work item isn't queued and isn't running. 2615 * In such cases you should use cancel_delayed_work_sync() or 2616 * cancel_work_sync() instead. 2617 */ 2618 void flush_scheduled_work(void) 2619 { 2620 flush_workqueue(system_wq); 2621 } 2622 EXPORT_SYMBOL(flush_scheduled_work); 2623 2624 /** 2625 * execute_in_process_context - reliably execute the routine with user context 2626 * @fn: the function to execute 2627 * @ew: guaranteed storage for the execute work structure (must 2628 * be available when the work executes) 2629 * 2630 * Executes the function immediately if process context is available, 2631 * otherwise schedules the function for delayed execution. 2632 * 2633 * Returns: 0 - function was executed 2634 * 1 - function was scheduled for execution 2635 */ 2636 int execute_in_process_context(work_func_t fn, struct execute_work *ew) 2637 { 2638 if (!in_interrupt()) { 2639 fn(&ew->work); 2640 return 0; 2641 } 2642 2643 INIT_WORK(&ew->work, fn); 2644 schedule_work(&ew->work); 2645 2646 return 1; 2647 } 2648 EXPORT_SYMBOL_GPL(execute_in_process_context); 2649 2650 int keventd_up(void) 2651 { 2652 return system_wq != NULL; 2653 } 2654 2655 static int alloc_cwqs(struct workqueue_struct *wq) 2656 { 2657 /* 2658 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. 2659 * Make sure that the alignment isn't lower than that of 2660 * unsigned long long. 2661 */ 2662 const size_t size = sizeof(struct cpu_workqueue_struct); 2663 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, 2664 __alignof__(unsigned long long)); 2665 #ifdef CONFIG_SMP 2666 bool percpu = !(wq->flags & WQ_UNBOUND); 2667 #else 2668 bool percpu = false; 2669 #endif 2670 2671 if (percpu) 2672 wq->cpu_wq.pcpu = __alloc_percpu(size, align); 2673 else { 2674 void *ptr; 2675 2676 /* 2677 * Allocate enough room to align cwq and put an extra 2678 * pointer at the end pointing back to the originally 2679 * allocated pointer which will be used for free. 2680 */ 2681 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL); 2682 if (ptr) { 2683 wq->cpu_wq.single = PTR_ALIGN(ptr, align); 2684 *(void **)(wq->cpu_wq.single + 1) = ptr; 2685 } 2686 } 2687 2688 /* just in case, make sure it's actually aligned */ 2689 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); 2690 return wq->cpu_wq.v ? 0 : -ENOMEM; 2691 } 2692 2693 static void free_cwqs(struct workqueue_struct *wq) 2694 { 2695 #ifdef CONFIG_SMP 2696 bool percpu = !(wq->flags & WQ_UNBOUND); 2697 #else 2698 bool percpu = false; 2699 #endif 2700 2701 if (percpu) 2702 free_percpu(wq->cpu_wq.pcpu); 2703 else if (wq->cpu_wq.single) { 2704 /* the pointer to free is stored right after the cwq */ 2705 kfree(*(void **)(wq->cpu_wq.single + 1)); 2706 } 2707 } 2708 2709 static int wq_clamp_max_active(int max_active, unsigned int flags, 2710 const char *name) 2711 { 2712 int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE; 2713 2714 if (max_active < 1 || max_active > lim) 2715 printk(KERN_WARNING "workqueue: max_active %d requested for %s " 2716 "is out of range, clamping between %d and %d\n", 2717 max_active, name, 1, lim); 2718 2719 return clamp_val(max_active, 1, lim); 2720 } 2721 2722 struct workqueue_struct *__alloc_workqueue_key(const char *name, 2723 unsigned int flags, 2724 int max_active, 2725 struct lock_class_key *key, 2726 const char *lock_name) 2727 { 2728 struct workqueue_struct *wq; 2729 unsigned int cpu; 2730 2731 /* 2732 * Unbound workqueues aren't concurrency managed and should be 2733 * dispatched to workers immediately. 2734 */ 2735 if (flags & WQ_UNBOUND) 2736 flags |= WQ_HIGHPRI; 2737 2738 max_active = max_active ?: WQ_DFL_ACTIVE; 2739 max_active = wq_clamp_max_active(max_active, flags, name); 2740 2741 wq = kzalloc(sizeof(*wq), GFP_KERNEL); 2742 if (!wq) 2743 goto err; 2744 2745 wq->flags = flags; 2746 wq->saved_max_active = max_active; 2747 mutex_init(&wq->flush_mutex); 2748 atomic_set(&wq->nr_cwqs_to_flush, 0); 2749 INIT_LIST_HEAD(&wq->flusher_queue); 2750 INIT_LIST_HEAD(&wq->flusher_overflow); 2751 2752 wq->name = name; 2753 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); 2754 INIT_LIST_HEAD(&wq->list); 2755 2756 if (alloc_cwqs(wq) < 0) 2757 goto err; 2758 2759 for_each_cwq_cpu(cpu, wq) { 2760 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2761 struct global_cwq *gcwq = get_gcwq(cpu); 2762 2763 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); 2764 cwq->gcwq = gcwq; 2765 cwq->wq = wq; 2766 cwq->flush_color = -1; 2767 cwq->max_active = max_active; 2768 INIT_LIST_HEAD(&cwq->delayed_works); 2769 } 2770 2771 if (flags & WQ_RESCUER) { 2772 struct worker *rescuer; 2773 2774 if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL)) 2775 goto err; 2776 2777 wq->rescuer = rescuer = alloc_worker(); 2778 if (!rescuer) 2779 goto err; 2780 2781 rescuer->task = kthread_create(rescuer_thread, wq, "%s", name); 2782 if (IS_ERR(rescuer->task)) 2783 goto err; 2784 2785 wq->rescuer = rescuer; 2786 rescuer->task->flags |= PF_THREAD_BOUND; 2787 wake_up_process(rescuer->task); 2788 } 2789 2790 /* 2791 * workqueue_lock protects global freeze state and workqueues 2792 * list. Grab it, set max_active accordingly and add the new 2793 * workqueue to workqueues list. 2794 */ 2795 spin_lock(&workqueue_lock); 2796 2797 if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) 2798 for_each_cwq_cpu(cpu, wq) 2799 get_cwq(cpu, wq)->max_active = 0; 2800 2801 list_add(&wq->list, &workqueues); 2802 2803 spin_unlock(&workqueue_lock); 2804 2805 return wq; 2806 err: 2807 if (wq) { 2808 free_cwqs(wq); 2809 free_mayday_mask(wq->mayday_mask); 2810 kfree(wq->rescuer); 2811 kfree(wq); 2812 } 2813 return NULL; 2814 } 2815 EXPORT_SYMBOL_GPL(__alloc_workqueue_key); 2816 2817 /** 2818 * destroy_workqueue - safely terminate a workqueue 2819 * @wq: target workqueue 2820 * 2821 * Safely destroy a workqueue. All work currently pending will be done first. 2822 */ 2823 void destroy_workqueue(struct workqueue_struct *wq) 2824 { 2825 unsigned int cpu; 2826 2827 flush_workqueue(wq); 2828 2829 /* 2830 * wq list is used to freeze wq, remove from list after 2831 * flushing is complete in case freeze races us. 2832 */ 2833 spin_lock(&workqueue_lock); 2834 list_del(&wq->list); 2835 spin_unlock(&workqueue_lock); 2836 2837 /* sanity check */ 2838 for_each_cwq_cpu(cpu, wq) { 2839 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2840 int i; 2841 2842 for (i = 0; i < WORK_NR_COLORS; i++) 2843 BUG_ON(cwq->nr_in_flight[i]); 2844 BUG_ON(cwq->nr_active); 2845 BUG_ON(!list_empty(&cwq->delayed_works)); 2846 } 2847 2848 if (wq->flags & WQ_RESCUER) { 2849 kthread_stop(wq->rescuer->task); 2850 free_mayday_mask(wq->mayday_mask); 2851 } 2852 2853 free_cwqs(wq); 2854 kfree(wq); 2855 } 2856 EXPORT_SYMBOL_GPL(destroy_workqueue); 2857 2858 /** 2859 * workqueue_set_max_active - adjust max_active of a workqueue 2860 * @wq: target workqueue 2861 * @max_active: new max_active value. 2862 * 2863 * Set max_active of @wq to @max_active. 2864 * 2865 * CONTEXT: 2866 * Don't call from IRQ context. 2867 */ 2868 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) 2869 { 2870 unsigned int cpu; 2871 2872 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); 2873 2874 spin_lock(&workqueue_lock); 2875 2876 wq->saved_max_active = max_active; 2877 2878 for_each_cwq_cpu(cpu, wq) { 2879 struct global_cwq *gcwq = get_gcwq(cpu); 2880 2881 spin_lock_irq(&gcwq->lock); 2882 2883 if (!(wq->flags & WQ_FREEZEABLE) || 2884 !(gcwq->flags & GCWQ_FREEZING)) 2885 get_cwq(gcwq->cpu, wq)->max_active = max_active; 2886 2887 spin_unlock_irq(&gcwq->lock); 2888 } 2889 2890 spin_unlock(&workqueue_lock); 2891 } 2892 EXPORT_SYMBOL_GPL(workqueue_set_max_active); 2893 2894 /** 2895 * workqueue_congested - test whether a workqueue is congested 2896 * @cpu: CPU in question 2897 * @wq: target workqueue 2898 * 2899 * Test whether @wq's cpu workqueue for @cpu is congested. There is 2900 * no synchronization around this function and the test result is 2901 * unreliable and only useful as advisory hints or for debugging. 2902 * 2903 * RETURNS: 2904 * %true if congested, %false otherwise. 2905 */ 2906 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) 2907 { 2908 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2909 2910 return !list_empty(&cwq->delayed_works); 2911 } 2912 EXPORT_SYMBOL_GPL(workqueue_congested); 2913 2914 /** 2915 * work_cpu - return the last known associated cpu for @work 2916 * @work: the work of interest 2917 * 2918 * RETURNS: 2919 * CPU number if @work was ever queued. WORK_CPU_NONE otherwise. 2920 */ 2921 unsigned int work_cpu(struct work_struct *work) 2922 { 2923 struct global_cwq *gcwq = get_work_gcwq(work); 2924 2925 return gcwq ? gcwq->cpu : WORK_CPU_NONE; 2926 } 2927 EXPORT_SYMBOL_GPL(work_cpu); 2928 2929 /** 2930 * work_busy - test whether a work is currently pending or running 2931 * @work: the work to be tested 2932 * 2933 * Test whether @work is currently pending or running. There is no 2934 * synchronization around this function and the test result is 2935 * unreliable and only useful as advisory hints or for debugging. 2936 * Especially for reentrant wqs, the pending state might hide the 2937 * running state. 2938 * 2939 * RETURNS: 2940 * OR'd bitmask of WORK_BUSY_* bits. 2941 */ 2942 unsigned int work_busy(struct work_struct *work) 2943 { 2944 struct global_cwq *gcwq = get_work_gcwq(work); 2945 unsigned long flags; 2946 unsigned int ret = 0; 2947 2948 if (!gcwq) 2949 return false; 2950 2951 spin_lock_irqsave(&gcwq->lock, flags); 2952 2953 if (work_pending(work)) 2954 ret |= WORK_BUSY_PENDING; 2955 if (find_worker_executing_work(gcwq, work)) 2956 ret |= WORK_BUSY_RUNNING; 2957 2958 spin_unlock_irqrestore(&gcwq->lock, flags); 2959 2960 return ret; 2961 } 2962 EXPORT_SYMBOL_GPL(work_busy); 2963 2964 /* 2965 * CPU hotplug. 2966 * 2967 * There are two challenges in supporting CPU hotplug. Firstly, there 2968 * are a lot of assumptions on strong associations among work, cwq and 2969 * gcwq which make migrating pending and scheduled works very 2970 * difficult to implement without impacting hot paths. Secondly, 2971 * gcwqs serve mix of short, long and very long running works making 2972 * blocked draining impractical. 2973 * 2974 * This is solved by allowing a gcwq to be detached from CPU, running 2975 * it with unbound (rogue) workers and allowing it to be reattached 2976 * later if the cpu comes back online. A separate thread is created 2977 * to govern a gcwq in such state and is called the trustee of the 2978 * gcwq. 2979 * 2980 * Trustee states and their descriptions. 2981 * 2982 * START Command state used on startup. On CPU_DOWN_PREPARE, a 2983 * new trustee is started with this state. 2984 * 2985 * IN_CHARGE Once started, trustee will enter this state after 2986 * assuming the manager role and making all existing 2987 * workers rogue. DOWN_PREPARE waits for trustee to 2988 * enter this state. After reaching IN_CHARGE, trustee 2989 * tries to execute the pending worklist until it's empty 2990 * and the state is set to BUTCHER, or the state is set 2991 * to RELEASE. 2992 * 2993 * BUTCHER Command state which is set by the cpu callback after 2994 * the cpu has went down. Once this state is set trustee 2995 * knows that there will be no new works on the worklist 2996 * and once the worklist is empty it can proceed to 2997 * killing idle workers. 2998 * 2999 * RELEASE Command state which is set by the cpu callback if the 3000 * cpu down has been canceled or it has come online 3001 * again. After recognizing this state, trustee stops 3002 * trying to drain or butcher and clears ROGUE, rebinds 3003 * all remaining workers back to the cpu and releases 3004 * manager role. 3005 * 3006 * DONE Trustee will enter this state after BUTCHER or RELEASE 3007 * is complete. 3008 * 3009 * trustee CPU draining 3010 * took over down complete 3011 * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE 3012 * | | ^ 3013 * | CPU is back online v return workers | 3014 * ----------------> RELEASE -------------- 3015 */ 3016 3017 /** 3018 * trustee_wait_event_timeout - timed event wait for trustee 3019 * @cond: condition to wait for 3020 * @timeout: timeout in jiffies 3021 * 3022 * wait_event_timeout() for trustee to use. Handles locking and 3023 * checks for RELEASE request. 3024 * 3025 * CONTEXT: 3026 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 3027 * multiple times. To be used by trustee. 3028 * 3029 * RETURNS: 3030 * Positive indicating left time if @cond is satisfied, 0 if timed 3031 * out, -1 if canceled. 3032 */ 3033 #define trustee_wait_event_timeout(cond, timeout) ({ \ 3034 long __ret = (timeout); \ 3035 while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \ 3036 __ret) { \ 3037 spin_unlock_irq(&gcwq->lock); \ 3038 __wait_event_timeout(gcwq->trustee_wait, (cond) || \ 3039 (gcwq->trustee_state == TRUSTEE_RELEASE), \ 3040 __ret); \ 3041 spin_lock_irq(&gcwq->lock); \ 3042 } \ 3043 gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \ 3044 }) 3045 3046 /** 3047 * trustee_wait_event - event wait for trustee 3048 * @cond: condition to wait for 3049 * 3050 * wait_event() for trustee to use. Automatically handles locking and 3051 * checks for CANCEL request. 3052 * 3053 * CONTEXT: 3054 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 3055 * multiple times. To be used by trustee. 3056 * 3057 * RETURNS: 3058 * 0 if @cond is satisfied, -1 if canceled. 3059 */ 3060 #define trustee_wait_event(cond) ({ \ 3061 long __ret1; \ 3062 __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\ 3063 __ret1 < 0 ? -1 : 0; \ 3064 }) 3065 3066 static int __cpuinit trustee_thread(void *__gcwq) 3067 { 3068 struct global_cwq *gcwq = __gcwq; 3069 struct worker *worker; 3070 struct work_struct *work; 3071 struct hlist_node *pos; 3072 long rc; 3073 int i; 3074 3075 BUG_ON(gcwq->cpu != smp_processor_id()); 3076 3077 spin_lock_irq(&gcwq->lock); 3078 /* 3079 * Claim the manager position and make all workers rogue. 3080 * Trustee must be bound to the target cpu and can't be 3081 * cancelled. 3082 */ 3083 BUG_ON(gcwq->cpu != smp_processor_id()); 3084 rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS)); 3085 BUG_ON(rc < 0); 3086 3087 gcwq->flags |= GCWQ_MANAGING_WORKERS; 3088 3089 list_for_each_entry(worker, &gcwq->idle_list, entry) 3090 worker->flags |= WORKER_ROGUE; 3091 3092 for_each_busy_worker(worker, i, pos, gcwq) 3093 worker->flags |= WORKER_ROGUE; 3094 3095 /* 3096 * Call schedule() so that we cross rq->lock and thus can 3097 * guarantee sched callbacks see the rogue flag. This is 3098 * necessary as scheduler callbacks may be invoked from other 3099 * cpus. 3100 */ 3101 spin_unlock_irq(&gcwq->lock); 3102 schedule(); 3103 spin_lock_irq(&gcwq->lock); 3104 3105 /* 3106 * Sched callbacks are disabled now. Zap nr_running. After 3107 * this, nr_running stays zero and need_more_worker() and 3108 * keep_working() are always true as long as the worklist is 3109 * not empty. 3110 */ 3111 atomic_set(get_gcwq_nr_running(gcwq->cpu), 0); 3112 3113 spin_unlock_irq(&gcwq->lock); 3114 del_timer_sync(&gcwq->idle_timer); 3115 spin_lock_irq(&gcwq->lock); 3116 3117 /* 3118 * We're now in charge. Notify and proceed to drain. We need 3119 * to keep the gcwq running during the whole CPU down 3120 * procedure as other cpu hotunplug callbacks may need to 3121 * flush currently running tasks. 3122 */ 3123 gcwq->trustee_state = TRUSTEE_IN_CHARGE; 3124 wake_up_all(&gcwq->trustee_wait); 3125 3126 /* 3127 * The original cpu is in the process of dying and may go away 3128 * anytime now. When that happens, we and all workers would 3129 * be migrated to other cpus. Try draining any left work. We 3130 * want to get it over with ASAP - spam rescuers, wake up as 3131 * many idlers as necessary and create new ones till the 3132 * worklist is empty. Note that if the gcwq is frozen, there 3133 * may be frozen works in freezeable cwqs. Don't declare 3134 * completion while frozen. 3135 */ 3136 while (gcwq->nr_workers != gcwq->nr_idle || 3137 gcwq->flags & GCWQ_FREEZING || 3138 gcwq->trustee_state == TRUSTEE_IN_CHARGE) { 3139 int nr_works = 0; 3140 3141 list_for_each_entry(work, &gcwq->worklist, entry) { 3142 send_mayday(work); 3143 nr_works++; 3144 } 3145 3146 list_for_each_entry(worker, &gcwq->idle_list, entry) { 3147 if (!nr_works--) 3148 break; 3149 wake_up_process(worker->task); 3150 } 3151 3152 if (need_to_create_worker(gcwq)) { 3153 spin_unlock_irq(&gcwq->lock); 3154 worker = create_worker(gcwq, false); 3155 spin_lock_irq(&gcwq->lock); 3156 if (worker) { 3157 worker->flags |= WORKER_ROGUE; 3158 start_worker(worker); 3159 } 3160 } 3161 3162 /* give a breather */ 3163 if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0) 3164 break; 3165 } 3166 3167 /* 3168 * Either all works have been scheduled and cpu is down, or 3169 * cpu down has already been canceled. Wait for and butcher 3170 * all workers till we're canceled. 3171 */ 3172 do { 3173 rc = trustee_wait_event(!list_empty(&gcwq->idle_list)); 3174 while (!list_empty(&gcwq->idle_list)) 3175 destroy_worker(list_first_entry(&gcwq->idle_list, 3176 struct worker, entry)); 3177 } while (gcwq->nr_workers && rc >= 0); 3178 3179 /* 3180 * At this point, either draining has completed and no worker 3181 * is left, or cpu down has been canceled or the cpu is being 3182 * brought back up. There shouldn't be any idle one left. 3183 * Tell the remaining busy ones to rebind once it finishes the 3184 * currently scheduled works by scheduling the rebind_work. 3185 */ 3186 WARN_ON(!list_empty(&gcwq->idle_list)); 3187 3188 for_each_busy_worker(worker, i, pos, gcwq) { 3189 struct work_struct *rebind_work = &worker->rebind_work; 3190 3191 /* 3192 * Rebind_work may race with future cpu hotplug 3193 * operations. Use a separate flag to mark that 3194 * rebinding is scheduled. 3195 */ 3196 worker->flags |= WORKER_REBIND; 3197 worker->flags &= ~WORKER_ROGUE; 3198 3199 /* queue rebind_work, wq doesn't matter, use the default one */ 3200 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, 3201 work_data_bits(rebind_work))) 3202 continue; 3203 3204 debug_work_activate(rebind_work); 3205 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, 3206 worker->scheduled.next, 3207 work_color_to_flags(WORK_NO_COLOR)); 3208 } 3209 3210 /* relinquish manager role */ 3211 gcwq->flags &= ~GCWQ_MANAGING_WORKERS; 3212 3213 /* notify completion */ 3214 gcwq->trustee = NULL; 3215 gcwq->trustee_state = TRUSTEE_DONE; 3216 wake_up_all(&gcwq->trustee_wait); 3217 spin_unlock_irq(&gcwq->lock); 3218 return 0; 3219 } 3220 3221 /** 3222 * wait_trustee_state - wait for trustee to enter the specified state 3223 * @gcwq: gcwq the trustee of interest belongs to 3224 * @state: target state to wait for 3225 * 3226 * Wait for the trustee to reach @state. DONE is already matched. 3227 * 3228 * CONTEXT: 3229 * spin_lock_irq(gcwq->lock) which may be released and regrabbed 3230 * multiple times. To be used by cpu_callback. 3231 */ 3232 static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state) 3233 { 3234 if (!(gcwq->trustee_state == state || 3235 gcwq->trustee_state == TRUSTEE_DONE)) { 3236 spin_unlock_irq(&gcwq->lock); 3237 __wait_event(gcwq->trustee_wait, 3238 gcwq->trustee_state == state || 3239 gcwq->trustee_state == TRUSTEE_DONE); 3240 spin_lock_irq(&gcwq->lock); 3241 } 3242 } 3243 3244 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, 3245 unsigned long action, 3246 void *hcpu) 3247 { 3248 unsigned int cpu = (unsigned long)hcpu; 3249 struct global_cwq *gcwq = get_gcwq(cpu); 3250 struct task_struct *new_trustee = NULL; 3251 struct worker *uninitialized_var(new_worker); 3252 unsigned long flags; 3253 3254 action &= ~CPU_TASKS_FROZEN; 3255 3256 switch (action) { 3257 case CPU_DOWN_PREPARE: 3258 new_trustee = kthread_create(trustee_thread, gcwq, 3259 "workqueue_trustee/%d\n", cpu); 3260 if (IS_ERR(new_trustee)) 3261 return notifier_from_errno(PTR_ERR(new_trustee)); 3262 kthread_bind(new_trustee, cpu); 3263 /* fall through */ 3264 case CPU_UP_PREPARE: 3265 BUG_ON(gcwq->first_idle); 3266 new_worker = create_worker(gcwq, false); 3267 if (!new_worker) { 3268 if (new_trustee) 3269 kthread_stop(new_trustee); 3270 return NOTIFY_BAD; 3271 } 3272 } 3273 3274 /* some are called w/ irq disabled, don't disturb irq status */ 3275 spin_lock_irqsave(&gcwq->lock, flags); 3276 3277 switch (action) { 3278 case CPU_DOWN_PREPARE: 3279 /* initialize trustee and tell it to acquire the gcwq */ 3280 BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE); 3281 gcwq->trustee = new_trustee; 3282 gcwq->trustee_state = TRUSTEE_START; 3283 wake_up_process(gcwq->trustee); 3284 wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE); 3285 /* fall through */ 3286 case CPU_UP_PREPARE: 3287 BUG_ON(gcwq->first_idle); 3288 gcwq->first_idle = new_worker; 3289 break; 3290 3291 case CPU_DYING: 3292 /* 3293 * Before this, the trustee and all workers except for 3294 * the ones which are still executing works from 3295 * before the last CPU down must be on the cpu. After 3296 * this, they'll all be diasporas. 3297 */ 3298 gcwq->flags |= GCWQ_DISASSOCIATED; 3299 break; 3300 3301 case CPU_POST_DEAD: 3302 gcwq->trustee_state = TRUSTEE_BUTCHER; 3303 /* fall through */ 3304 case CPU_UP_CANCELED: 3305 destroy_worker(gcwq->first_idle); 3306 gcwq->first_idle = NULL; 3307 break; 3308 3309 case CPU_DOWN_FAILED: 3310 case CPU_ONLINE: 3311 gcwq->flags &= ~GCWQ_DISASSOCIATED; 3312 if (gcwq->trustee_state != TRUSTEE_DONE) { 3313 gcwq->trustee_state = TRUSTEE_RELEASE; 3314 wake_up_process(gcwq->trustee); 3315 wait_trustee_state(gcwq, TRUSTEE_DONE); 3316 } 3317 3318 /* 3319 * Trustee is done and there might be no worker left. 3320 * Put the first_idle in and request a real manager to 3321 * take a look. 3322 */ 3323 spin_unlock_irq(&gcwq->lock); 3324 kthread_bind(gcwq->first_idle->task, cpu); 3325 spin_lock_irq(&gcwq->lock); 3326 gcwq->flags |= GCWQ_MANAGE_WORKERS; 3327 start_worker(gcwq->first_idle); 3328 gcwq->first_idle = NULL; 3329 break; 3330 } 3331 3332 spin_unlock_irqrestore(&gcwq->lock, flags); 3333 3334 return notifier_from_errno(0); 3335 } 3336 3337 #ifdef CONFIG_SMP 3338 3339 struct work_for_cpu { 3340 struct completion completion; 3341 long (*fn)(void *); 3342 void *arg; 3343 long ret; 3344 }; 3345 3346 static int do_work_for_cpu(void *_wfc) 3347 { 3348 struct work_for_cpu *wfc = _wfc; 3349 wfc->ret = wfc->fn(wfc->arg); 3350 complete(&wfc->completion); 3351 return 0; 3352 } 3353 3354 /** 3355 * work_on_cpu - run a function in user context on a particular cpu 3356 * @cpu: the cpu to run on 3357 * @fn: the function to run 3358 * @arg: the function arg 3359 * 3360 * This will return the value @fn returns. 3361 * It is up to the caller to ensure that the cpu doesn't go offline. 3362 * The caller must not hold any locks which would prevent @fn from completing. 3363 */ 3364 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 3365 { 3366 struct task_struct *sub_thread; 3367 struct work_for_cpu wfc = { 3368 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), 3369 .fn = fn, 3370 .arg = arg, 3371 }; 3372 3373 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); 3374 if (IS_ERR(sub_thread)) 3375 return PTR_ERR(sub_thread); 3376 kthread_bind(sub_thread, cpu); 3377 wake_up_process(sub_thread); 3378 wait_for_completion(&wfc.completion); 3379 return wfc.ret; 3380 } 3381 EXPORT_SYMBOL_GPL(work_on_cpu); 3382 #endif /* CONFIG_SMP */ 3383 3384 #ifdef CONFIG_FREEZER 3385 3386 /** 3387 * freeze_workqueues_begin - begin freezing workqueues 3388 * 3389 * Start freezing workqueues. After this function returns, all 3390 * freezeable workqueues will queue new works to their frozen_works 3391 * list instead of gcwq->worklist. 3392 * 3393 * CONTEXT: 3394 * Grabs and releases workqueue_lock and gcwq->lock's. 3395 */ 3396 void freeze_workqueues_begin(void) 3397 { 3398 unsigned int cpu; 3399 3400 spin_lock(&workqueue_lock); 3401 3402 BUG_ON(workqueue_freezing); 3403 workqueue_freezing = true; 3404 3405 for_each_gcwq_cpu(cpu) { 3406 struct global_cwq *gcwq = get_gcwq(cpu); 3407 struct workqueue_struct *wq; 3408 3409 spin_lock_irq(&gcwq->lock); 3410 3411 BUG_ON(gcwq->flags & GCWQ_FREEZING); 3412 gcwq->flags |= GCWQ_FREEZING; 3413 3414 list_for_each_entry(wq, &workqueues, list) { 3415 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3416 3417 if (cwq && wq->flags & WQ_FREEZEABLE) 3418 cwq->max_active = 0; 3419 } 3420 3421 spin_unlock_irq(&gcwq->lock); 3422 } 3423 3424 spin_unlock(&workqueue_lock); 3425 } 3426 3427 /** 3428 * freeze_workqueues_busy - are freezeable workqueues still busy? 3429 * 3430 * Check whether freezing is complete. This function must be called 3431 * between freeze_workqueues_begin() and thaw_workqueues(). 3432 * 3433 * CONTEXT: 3434 * Grabs and releases workqueue_lock. 3435 * 3436 * RETURNS: 3437 * %true if some freezeable workqueues are still busy. %false if 3438 * freezing is complete. 3439 */ 3440 bool freeze_workqueues_busy(void) 3441 { 3442 unsigned int cpu; 3443 bool busy = false; 3444 3445 spin_lock(&workqueue_lock); 3446 3447 BUG_ON(!workqueue_freezing); 3448 3449 for_each_gcwq_cpu(cpu) { 3450 struct workqueue_struct *wq; 3451 /* 3452 * nr_active is monotonically decreasing. It's safe 3453 * to peek without lock. 3454 */ 3455 list_for_each_entry(wq, &workqueues, list) { 3456 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3457 3458 if (!cwq || !(wq->flags & WQ_FREEZEABLE)) 3459 continue; 3460 3461 BUG_ON(cwq->nr_active < 0); 3462 if (cwq->nr_active) { 3463 busy = true; 3464 goto out_unlock; 3465 } 3466 } 3467 } 3468 out_unlock: 3469 spin_unlock(&workqueue_lock); 3470 return busy; 3471 } 3472 3473 /** 3474 * thaw_workqueues - thaw workqueues 3475 * 3476 * Thaw workqueues. Normal queueing is restored and all collected 3477 * frozen works are transferred to their respective gcwq worklists. 3478 * 3479 * CONTEXT: 3480 * Grabs and releases workqueue_lock and gcwq->lock's. 3481 */ 3482 void thaw_workqueues(void) 3483 { 3484 unsigned int cpu; 3485 3486 spin_lock(&workqueue_lock); 3487 3488 if (!workqueue_freezing) 3489 goto out_unlock; 3490 3491 for_each_gcwq_cpu(cpu) { 3492 struct global_cwq *gcwq = get_gcwq(cpu); 3493 struct workqueue_struct *wq; 3494 3495 spin_lock_irq(&gcwq->lock); 3496 3497 BUG_ON(!(gcwq->flags & GCWQ_FREEZING)); 3498 gcwq->flags &= ~GCWQ_FREEZING; 3499 3500 list_for_each_entry(wq, &workqueues, list) { 3501 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 3502 3503 if (!cwq || !(wq->flags & WQ_FREEZEABLE)) 3504 continue; 3505 3506 /* restore max_active and repopulate worklist */ 3507 cwq->max_active = wq->saved_max_active; 3508 3509 while (!list_empty(&cwq->delayed_works) && 3510 cwq->nr_active < cwq->max_active) 3511 cwq_activate_first_delayed(cwq); 3512 } 3513 3514 wake_up_worker(gcwq); 3515 3516 spin_unlock_irq(&gcwq->lock); 3517 } 3518 3519 workqueue_freezing = false; 3520 out_unlock: 3521 spin_unlock(&workqueue_lock); 3522 } 3523 #endif /* CONFIG_FREEZER */ 3524 3525 static int __init init_workqueues(void) 3526 { 3527 unsigned int cpu; 3528 int i; 3529 3530 cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); 3531 3532 /* initialize gcwqs */ 3533 for_each_gcwq_cpu(cpu) { 3534 struct global_cwq *gcwq = get_gcwq(cpu); 3535 3536 spin_lock_init(&gcwq->lock); 3537 INIT_LIST_HEAD(&gcwq->worklist); 3538 gcwq->cpu = cpu; 3539 if (cpu == WORK_CPU_UNBOUND) 3540 gcwq->flags |= GCWQ_DISASSOCIATED; 3541 3542 INIT_LIST_HEAD(&gcwq->idle_list); 3543 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) 3544 INIT_HLIST_HEAD(&gcwq->busy_hash[i]); 3545 3546 init_timer_deferrable(&gcwq->idle_timer); 3547 gcwq->idle_timer.function = idle_worker_timeout; 3548 gcwq->idle_timer.data = (unsigned long)gcwq; 3549 3550 setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout, 3551 (unsigned long)gcwq); 3552 3553 ida_init(&gcwq->worker_ida); 3554 3555 gcwq->trustee_state = TRUSTEE_DONE; 3556 init_waitqueue_head(&gcwq->trustee_wait); 3557 } 3558 3559 /* create the initial worker */ 3560 for_each_online_gcwq_cpu(cpu) { 3561 struct global_cwq *gcwq = get_gcwq(cpu); 3562 struct worker *worker; 3563 3564 worker = create_worker(gcwq, true); 3565 BUG_ON(!worker); 3566 spin_lock_irq(&gcwq->lock); 3567 start_worker(worker); 3568 spin_unlock_irq(&gcwq->lock); 3569 } 3570 3571 system_wq = alloc_workqueue("events", 0, 0); 3572 system_long_wq = alloc_workqueue("events_long", 0, 0); 3573 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); 3574 system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, 3575 WQ_UNBOUND_MAX_ACTIVE); 3576 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); 3577 return 0; 3578 } 3579 early_initcall(init_workqueues); 3580