kernel/workqueue.c

   1 /*
   2  * kernel/workqueue.c - generic async execution with shared worker pool
   3  *
   4  * Copyright (C) 2002           Ingo Molnar
   5  *
   6  *   Derived from the taskqueue/keventd code by:
   7  *     David Woodhouse <dwmw2@infradead.org>
   8  *     Andrew Morton
   9  *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
  10  *     Theodore Ts'o <tytso@mit.edu>
  11  *
  12  * Made to use alloc_percpu by Christoph Lameter.
  13  *
  14  * Copyright (C) 2010           SUSE Linux Products GmbH
  15  * Copyright (C) 2010           Tejun Heo <tj@kernel.org>
  16  *
  17  * This is the generic async execution mechanism.  Work items as are
  18  * executed in process context.  The worker pool is shared and
  19  * automatically managed.  There is one worker pool for each CPU and
  20  * one extra for works which are better served by workers which are
  21  * not bound to any specific CPU.
  22  *
  23  * Please read Documentation/workqueue.txt for details.
  24  */
  25
  26 #include <linux/export.h>
  27 #include <linux/kernel.h>
  28 #include <linux/sched.h>
  29 #include <linux/init.h>
  30 #include <linux/signal.h>
  31 #include <linux/completion.h>
  32 #include <linux/workqueue.h>
  33 #include <linux/slab.h>
  34 #include <linux/cpu.h>
  35 #include <linux/notifier.h>
  36 #include <linux/kthread.h>
  37 #include <linux/hardirq.h>
  38 #include <linux/mempolicy.h>
  39 #include <linux/freezer.h>
  40 #include <linux/kallsyms.h>
  41 #include <linux/debug_locks.h>
  42 #include <linux/lockdep.h>
  43 #include <linux/idr.h>
  44
  45 #include "workqueue_sched.h"
  46
  47 enum {
  48         /* global_cwq flags */
  49         GCWQ_DISASSOCIATED      = 1 << 0,       /* cpu can't serve workers */
  50         GCWQ_FREEZING           = 1 << 1,       /* freeze in progress */
  51
  52         /* pool flags */
  53         POOL_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
  54         POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
  55         POOL_HIGHPRI_PENDING    = 1 << 2,       /* highpri works on queue */
  56
  57         /* worker flags */
  58         WORKER_STARTED          = 1 << 0,       /* started */
  59         WORKER_DIE              = 1 << 1,       /* die die die */
  60         WORKER_IDLE             = 1 << 2,       /* is idle */
  61         WORKER_PREP             = 1 << 3,       /* preparing to run works */
  62         WORKER_ROGUE            = 1 << 4,       /* not bound to any cpu */
  63         WORKER_REBIND           = 1 << 5,       /* mom is home, come back */
  64         WORKER_CPU_INTENSIVE    = 1 << 6,       /* cpu intensive */
  65         WORKER_UNBOUND          = 1 << 7,       /* worker is unbound */
  66
  67         WORKER_NOT_RUNNING      = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
  68                                   WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
  69
  70         /* gcwq->trustee_state */
  71         TRUSTEE_START           = 0,            /* start */
  72         TRUSTEE_IN_CHARGE       = 1,            /* trustee in charge of gcwq */
  73         TRUSTEE_BUTCHER         = 2,            /* butcher workers */
  74         TRUSTEE_RELEASE         = 3,            /* release workers */
  75         TRUSTEE_DONE            = 4,            /* trustee is done */
  76
  77         BUSY_WORKER_HASH_ORDER  = 6,            /* 64 pointers */
  78         BUSY_WORKER_HASH_SIZE   = 1 << BUSY_WORKER_HASH_ORDER,
  79         BUSY_WORKER_HASH_MASK   = BUSY_WORKER_HASH_SIZE - 1,
  80
  81         MAX_IDLE_WORKERS_RATIO  = 4,            /* 1/4 of busy can be idle */
  82         IDLE_WORKER_TIMEOUT     = 300 * HZ,     /* keep idle ones for 5 mins */
  83
  84         MAYDAY_INITIAL_TIMEOUT  = HZ / 100 >= 2 ? HZ / 100 : 2,
  85                                                 /* call for help after 10ms
  86                                                    (min two ticks) */
  87         MAYDAY_INTERVAL         = HZ / 10,      /* and then every 100ms */
  88         CREATE_COOLDOWN         = HZ,           /* time to breath after fail */
  89         TRUSTEE_COOLDOWN        = HZ / 10,      /* for trustee draining */
  90
  91         /*
  92          * Rescue workers are used only on emergencies and shared by
  93          * all cpus.  Give -20.
  94          */
  95         RESCUER_NICE_LEVEL      = -20,
  96 };
  97
  98 /*
  99  * Structure fields follow one of the following exclusion rules.
 100  *
 101  * I: Modifiable by initialization/destruction paths and read-only for
 102  *    everyone else.
 103  *
 104  * P: Preemption protected.  Disabling preemption is enough and should
 105  *    only be modified and accessed from the local cpu.
 106  *
 107  * L: gcwq->lock protected.  Access with gcwq->lock held.
 108  *
 109  * X: During normal operation, modification requires gcwq->lock and
 110  *    should be done only from local cpu.  Either disabling preemption
 111  *    on local cpu or grabbing gcwq->lock is enough for read access.
 112  *    If GCWQ_DISASSOCIATED is set, it's identical to L.
 113  *
 114  * F: wq->flush_mutex protected.
 115  *
 116  * W: workqueue_lock protected.
 117  */
 118
 119 struct global_cwq;
 120 struct worker_pool;
 121
 122 /*
 123  * The poor guys doing the actual heavy lifting.  All on-duty workers
 124  * are either serving the manager role, on idle list or on busy hash.
 125  */
 126 struct worker {
 127         /* on idle list while idle, on busy hash table while busy */
 128         union {
 129                 struct list_head        entry;  /* L: while idle */
 130                 struct hlist_node       hentry; /* L: while busy */
 131         };
 132
 133         struct work_struct      *current_work;  /* L: work being processed */
 134         struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
 135         struct list_head        scheduled;      /* L: scheduled works */
 136         struct task_struct      *task;          /* I: worker task */
 137         struct worker_pool      *pool;          /* I: the associated pool */
 138         /* 64 bytes boundary on 64bit, 32 on 32bit */
 139         unsigned long           last_active;    /* L: last active timestamp */
 140         unsigned int            flags;          /* X: flags */
 141         int                     id;             /* I: worker id */
 142         struct work_struct      rebind_work;    /* L: rebind worker to cpu */
 143 };
 144
 145 struct worker_pool {
 146         struct global_cwq       *gcwq;          /* I: the owning gcwq */
 147         unsigned int            flags;          /* X: flags */
 148
 149         struct list_head        worklist;       /* L: list of pending works */
 150         int                     nr_workers;     /* L: total number of workers */
 151         int                     nr_idle;        /* L: currently idle ones */
 152
 153         struct list_head        idle_list;      /* X: list of idle workers */
 154         struct timer_list       idle_timer;     /* L: worker idle timeout */
 155         struct timer_list       mayday_timer;   /* L: SOS timer for workers */
 156
 157         struct ida              worker_ida;     /* L: for worker IDs */
 158         struct worker           *first_idle;    /* L: first idle worker */
 159 };
 160
 161 /*
 162  * Global per-cpu workqueue.  There's one and only one for each cpu
 163  * and all works are queued and processed here regardless of their
 164  * target workqueues.
 165  */
 166 struct global_cwq {
 167         spinlock_t              lock;           /* the gcwq lock */
 168         unsigned int            cpu;            /* I: the associated cpu */
 169         unsigned int            flags;          /* L: GCWQ_* flags */
 170
 171         /* workers are chained either in busy_hash or pool idle_list */
 172         struct hlist_head       busy_hash[BUSY_WORKER_HASH_SIZE];
 173                                                 /* L: hash of busy workers */
 174
 175         struct worker_pool      pool;           /* the worker pools */
 176
 177         struct task_struct      *trustee;       /* L: for gcwq shutdown */
 178         unsigned int            trustee_state;  /* L: trustee state */
 179         wait_queue_head_t       trustee_wait;   /* trustee wait */
 180 } ____cacheline_aligned_in_smp;
 181
 182 /*
 183  * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
 184  * work_struct->data are used for flags and thus cwqs need to be
 185  * aligned at two's power of the number of flag bits.
 186  */
 187 struct cpu_workqueue_struct {
 188         struct worker_pool      *pool;          /* I: the associated pool */
 189         struct workqueue_struct *wq;            /* I: the owning workqueue */
 190         int                     work_color;     /* L: current color */
 191         int                     flush_color;    /* L: flushing color */
 192         int                     nr_in_flight[WORK_NR_COLORS];
 193                                                 /* L: nr of in_flight works */
 194         int                     nr_active;      /* L: nr of active works */
 195         int                     max_active;     /* L: max active works */
 196         struct list_head        delayed_works;  /* L: delayed works */
 197 };
 198
 199 /*
 200  * Structure used to wait for workqueue flush.
 201  */
 202 struct wq_flusher {
 203         struct list_head        list;           /* F: list of flushers */
 204         int                     flush_color;    /* F: flush color waiting for */
 205         struct completion       done;           /* flush completion */
 206 };
 207
 208 /*
 209  * All cpumasks are assumed to be always set on UP and thus can't be
 210  * used to determine whether there's something to be done.
 211  */
 212 #ifdef CONFIG_SMP
 213 typedef cpumask_var_t mayday_mask_t;
 214 #define mayday_test_and_set_cpu(cpu, mask)      \
 215         cpumask_test_and_set_cpu((cpu), (mask))
 216 #define mayday_clear_cpu(cpu, mask)             cpumask_clear_cpu((cpu), (mask))
 217 #define for_each_mayday_cpu(cpu, mask)          for_each_cpu((cpu), (mask))
 218 #define alloc_mayday_mask(maskp, gfp)           zalloc_cpumask_var((maskp), (gfp))
 219 #define free_mayday_mask(mask)                  free_cpumask_var((mask))
 220 #else
 221 typedef unsigned long mayday_mask_t;
 222 #define mayday_test_and_set_cpu(cpu, mask)      test_and_set_bit(0, &(mask))
 223 #define mayday_clear_cpu(cpu, mask)             clear_bit(0, &(mask))
 224 #define for_each_mayday_cpu(cpu, mask)          if ((cpu) = 0, (mask))
 225 #define alloc_mayday_mask(maskp, gfp)           true
 226 #define free_mayday_mask(mask)                  do { } while (0)
 227 #endif
 228
 229 /*
 230  * The externally visible workqueue abstraction is an array of
 231  * per-CPU workqueues:
 232  */
 233 struct workqueue_struct {
 234         unsigned int            flags;          /* W: WQ_* flags */
 235         union {
 236                 struct cpu_workqueue_struct __percpu    *pcpu;
 237                 struct cpu_workqueue_struct             *single;
 238                 unsigned long                           v;
 239         } cpu_wq;                               /* I: cwq's */
 240         struct list_head        list;           /* W: list of all workqueues */
 241
 242         struct mutex            flush_mutex;    /* protects wq flushing */
 243         int                     work_color;     /* F: current work color */
 244         int                     flush_color;    /* F: current flush color */
 245         atomic_t                nr_cwqs_to_flush; /* flush in progress */
 246         struct wq_flusher       *first_flusher; /* F: first flusher */
 247         struct list_head        flusher_queue;  /* F: flush waiters */
 248         struct list_head        flusher_overflow; /* F: flush overflow list */
 249
 250         mayday_mask_t           mayday_mask;    /* cpus requesting rescue */
 251         struct worker           *rescuer;       /* I: rescue worker */
 252
 253         int                     nr_drainers;    /* W: drain in progress */
 254         int                     saved_max_active; /* W: saved cwq max_active */
 255 #ifdef CONFIG_LOCKDEP
 256         struct lockdep_map      lockdep_map;
 257 #endif
 258         char                    name[];         /* I: workqueue name */
 259 };
 260
 261 struct workqueue_struct *system_wq __read_mostly;
 262 struct workqueue_struct *system_long_wq __read_mostly;
 263 struct workqueue_struct *system_nrt_wq __read_mostly;
 264 struct workqueue_struct *system_unbound_wq __read_mostly;
 265 struct workqueue_struct *system_freezable_wq __read_mostly;
 266 struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 267 EXPORT_SYMBOL_GPL(system_wq);
 268 EXPORT_SYMBOL_GPL(system_long_wq);
 269 EXPORT_SYMBOL_GPL(system_nrt_wq);
 270 EXPORT_SYMBOL_GPL(system_unbound_wq);
 271 EXPORT_SYMBOL_GPL(system_freezable_wq);
 272 EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 273
 274 #define CREATE_TRACE_POINTS
 275 #include <trace/events/workqueue.h>
 276
 277 #define for_each_busy_worker(worker, i, pos, gcwq)                      \
 278         for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)                     \
 279                 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
 280
 281 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
 282                                   unsigned int sw)
 283 {
 284         if (cpu < nr_cpu_ids) {
 285                 if (sw & 1) {
 286                         cpu = cpumask_next(cpu, mask);
 287                         if (cpu < nr_cpu_ids)
 288                                 return cpu;
 289                 }
 290                 if (sw & 2)
 291                         return WORK_CPU_UNBOUND;
 292         }
 293         return WORK_CPU_NONE;
 294 }
 295
 296 static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 297                                 struct workqueue_struct *wq)
 298 {
 299         return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
 300 }
 301
 302 /*
 303  * CPU iterators
 304  *
 305  * An extra gcwq is defined for an invalid cpu number
 306  * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
 307  * specific CPU.  The following iterators are similar to
 308  * for_each_*_cpu() iterators but also considers the unbound gcwq.
 309  *
 310  * for_each_gcwq_cpu()          : possible CPUs + WORK_CPU_UNBOUND
 311  * for_each_online_gcwq_cpu()   : online CPUs + WORK_CPU_UNBOUND
 312  * for_each_cwq_cpu()           : possible CPUs for bound workqueues,
 313  *                                WORK_CPU_UNBOUND for unbound workqueues
 314  */
 315 #define for_each_gcwq_cpu(cpu)                                          \
 316         for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3);         \
 317              (cpu) < WORK_CPU_NONE;                                     \
 318              (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
 319
 320 #define for_each_online_gcwq_cpu(cpu)                                   \
 321         for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3);           \
 322              (cpu) < WORK_CPU_NONE;                                     \
 323              (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
 324
 325 #define for_each_cwq_cpu(cpu, wq)                                       \
 326         for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq));        \
 327              (cpu) < WORK_CPU_NONE;                                     \
 328              (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
 329
 330 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 331
 332 static struct debug_obj_descr work_debug_descr;
 333
 334 static void *work_debug_hint(void *addr)
 335 {
 336         return ((struct work_struct *) addr)->func;
 337 }
 338
 339 /*
 340  * fixup_init is called when:
 341  * - an active object is initialized
 342  */
 343 static int work_fixup_init(void *addr, enum debug_obj_state state)
 344 {
 345         struct work_struct *work = addr;
 346
 347         switch (state) {
 348         case ODEBUG_STATE_ACTIVE:
 349                 cancel_work_sync(work);
 350                 debug_object_init(work, &work_debug_descr);
 351                 return 1;
 352         default:
 353                 return 0;
 354         }
 355 }
 356
 357 /*
 358  * fixup_activate is called when:
 359  * - an active object is activated
 360  * - an unknown object is activated (might be a statically initialized object)
 361  */
 362 static int work_fixup_activate(void *addr, enum debug_obj_state state)
 363 {
 364         struct work_struct *work = addr;
 365
 366         switch (state) {
 367
 368         case ODEBUG_STATE_NOTAVAILABLE:
 369                 /*
 370                  * This is not really a fixup. The work struct was
 371                  * statically initialized. We just make sure that it
 372                  * is tracked in the object tracker.
 373                  */
 374                 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
 375                         debug_object_init(work, &work_debug_descr);
 376                         debug_object_activate(work, &work_debug_descr);
 377                         return 0;
 378                 }
 379                 WARN_ON_ONCE(1);
 380                 return 0;
 381
 382         case ODEBUG_STATE_ACTIVE:
 383                 WARN_ON(1);
 384
 385         default:
 386                 return 0;
 387         }
 388 }
 389
 390 /*
 391  * fixup_free is called when:
 392  * - an active object is freed
 393  */
 394 static int work_fixup_free(void *addr, enum debug_obj_state state)
 395 {
 396         struct work_struct *work = addr;
 397
 398         switch (state) {
 399         case ODEBUG_STATE_ACTIVE:
 400                 cancel_work_sync(work);
 401                 debug_object_free(work, &work_debug_descr);
 402                 return 1;
 403         default:
 404                 return 0;
 405         }
 406 }
 407
 408 static struct debug_obj_descr work_debug_descr = {
 409         .name           = "work_struct",
 410         .debug_hint     = work_debug_hint,
 411         .fixup_init     = work_fixup_init,
 412         .fixup_activate = work_fixup_activate,
 413         .fixup_free     = work_fixup_free,
 414 };
 415
 416 static inline void debug_work_activate(struct work_struct *work)
 417 {
 418         debug_object_activate(work, &work_debug_descr);
 419 }
 420
 421 static inline void debug_work_deactivate(struct work_struct *work)
 422 {
 423         debug_object_deactivate(work, &work_debug_descr);
 424 }
 425
 426 void __init_work(struct work_struct *work, int onstack)
 427 {
 428         if (onstack)
 429                 debug_object_init_on_stack(work, &work_debug_descr);
 430         else
 431                 debug_object_init(work, &work_debug_descr);
 432 }
 433 EXPORT_SYMBOL_GPL(__init_work);
 434
 435 void destroy_work_on_stack(struct work_struct *work)
 436 {
 437         debug_object_free(work, &work_debug_descr);
 438 }
 439 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
 440
 441 #else
 442 static inline void debug_work_activate(struct work_struct *work) { }
 443 static inline void debug_work_deactivate(struct work_struct *work) { }
 444 #endif
 445
 446 /* Serializes the accesses to the list of workqueues. */
 447 static DEFINE_SPINLOCK(workqueue_lock);
 448 static LIST_HEAD(workqueues);
 449 static bool workqueue_freezing;         /* W: have wqs started freezing? */
 450
 451 /*
 452  * The almighty global cpu workqueues.  nr_running is the only field
 453  * which is expected to be used frequently by other cpus via
 454  * try_to_wake_up().  Put it in a separate cacheline.
 455  */
 456 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 457 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
 458
 459 /*
 460  * Global cpu workqueue and nr_running counter for unbound gcwq.  The
 461  * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
 462  * workers have WORKER_UNBOUND set.
 463  */
 464 static struct global_cwq unbound_global_cwq;
 465 static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);       /* always 0 */
 466
 467 static int worker_thread(void *__worker);
 468
 469 static struct global_cwq *get_gcwq(unsigned int cpu)
 470 {
 471         if (cpu != WORK_CPU_UNBOUND)
 472                 return &per_cpu(global_cwq, cpu);
 473         else
 474                 return &unbound_global_cwq;
 475 }
 476
 477 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 478 {
 479         int cpu = pool->gcwq->cpu;
 480
 481         if (cpu != WORK_CPU_UNBOUND)
 482                 return &per_cpu(gcwq_nr_running, cpu);
 483         else
 484                 return &unbound_gcwq_nr_running;
 485 }
 486
 487 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 488                                             struct workqueue_struct *wq)
 489 {
 490         if (!(wq->flags & WQ_UNBOUND)) {
 491                 if (likely(cpu < nr_cpu_ids))
 492                         return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
 493         } else if (likely(cpu == WORK_CPU_UNBOUND))
 494                 return wq->cpu_wq.single;
 495         return NULL;
 496 }
 497
 498 static unsigned int work_color_to_flags(int color)
 499 {
 500         return color << WORK_STRUCT_COLOR_SHIFT;
 501 }
 502
 503 static int get_work_color(struct work_struct *work)
 504 {
 505         return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
 506                 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
 507 }
 508
 509 static int work_next_color(int color)
 510 {
 511         return (color + 1) % WORK_NR_COLORS;
 512 }
 513
 514 /*
 515  * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
 516  * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
 517  * cleared and the work data contains the cpu number it was last on.
 518  *
 519  * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
 520  * cwq, cpu or clear work->data.  These functions should only be
 521  * called while the work is owned - ie. while the PENDING bit is set.
 522  *
 523  * get_work_[g]cwq() can be used to obtain the gcwq or cwq
 524  * corresponding to a work.  gcwq is available once the work has been
 525  * queued anywhere after initialization.  cwq is available only from
 526  * queueing until execution starts.
 527  */
 528 static inline void set_work_data(struct work_struct *work, unsigned long data,
 529                                  unsigned long flags)
 530 {
 531         BUG_ON(!work_pending(work));
 532         atomic_long_set(&work->data, data | flags | work_static(work));
 533 }
 534
 535 static void set_work_cwq(struct work_struct *work,
 536                          struct cpu_workqueue_struct *cwq,
 537                          unsigned long extra_flags)
 538 {
 539         set_work_data(work, (unsigned long)cwq,
 540                       WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 541 }
 542
 543 static void set_work_cpu(struct work_struct *work, unsigned int cpu)
 544 {
 545         set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
 546 }
 547
 548 static void clear_work_data(struct work_struct *work)
 549 {
 550         set_work_data(work, WORK_STRUCT_NO_CPU, 0);
 551 }
 552
 553 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 554 {
 555         unsigned long data = atomic_long_read(&work->data);
 556
 557         if (data & WORK_STRUCT_CWQ)
 558                 return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
 559         else
 560                 return NULL;
 561 }
 562
 563 static struct global_cwq *get_work_gcwq(struct work_struct *work)
 564 {
 565         unsigned long data = atomic_long_read(&work->data);
 566         unsigned int cpu;
 567
 568         if (data & WORK_STRUCT_CWQ)
 569                 return ((struct cpu_workqueue_struct *)
 570                         (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 571
 572         cpu = data >> WORK_STRUCT_FLAG_BITS;
 573         if (cpu == WORK_CPU_NONE)
 574                 return NULL;
 575
 576         BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
 577         return get_gcwq(cpu);
 578 }
 579
 580 /*
 581  * Policy functions.  These define the policies on how the global
 582  * worker pool is managed.  Unless noted otherwise, these functions
 583  * assume that they're being called with gcwq->lock held.
 584  */
 585
 586 static bool __need_more_worker(struct worker_pool *pool)
 587 {
 588         return !atomic_read(get_pool_nr_running(pool)) ||
 589                 (pool->flags & POOL_HIGHPRI_PENDING);
 590 }
 591
 592 /*
 593  * Need to wake up a worker?  Called from anything but currently
 594  * running workers.
 595  *
 596  * Note that, because unbound workers never contribute to nr_running, this
 597  * function will always return %true for unbound gcwq as long as the
 598  * worklist isn't empty.
 599  */
 600 static bool need_more_worker(struct worker_pool *pool)
 601 {
 602         return !list_empty(&pool->worklist) && __need_more_worker(pool);
 603 }
 604
 605 /* Can I start working?  Called from busy but !running workers. */
 606 static bool may_start_working(struct worker_pool *pool)
 607 {
 608         return pool->nr_idle;
 609 }
 610
 611 /* Do I need to keep working?  Called from currently running workers. */
 612 static bool keep_working(struct worker_pool *pool)
 613 {
 614         atomic_t *nr_running = get_pool_nr_running(pool);
 615
 616         return !list_empty(&pool->worklist) &&
 617                 (atomic_read(nr_running) <= 1 ||
 618                  (pool->flags & POOL_HIGHPRI_PENDING));
 619 }
 620
 621 /* Do we need a new worker?  Called from manager. */
 622 static bool need_to_create_worker(struct worker_pool *pool)
 623 {
 624         return need_more_worker(pool) && !may_start_working(pool);
 625 }
 626
 627 /* Do I need to be the manager? */
 628 static bool need_to_manage_workers(struct worker_pool *pool)
 629 {
 630         return need_to_create_worker(pool) ||
 631                 (pool->flags & POOL_MANAGE_WORKERS);
 632 }
 633
 634 /* Do we have too many workers and should some go away? */
 635 static bool too_many_workers(struct worker_pool *pool)
 636 {
 637         bool managing = pool->flags & POOL_MANAGING_WORKERS;
 638         int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
 639         int nr_busy = pool->nr_workers - nr_idle;
 640
 641         return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
 642 }
 643
 644 /*
 645  * Wake up functions.
 646  */
 647
 648 /* Return the first worker.  Safe with preemption disabled */
 649 static struct worker *first_worker(struct worker_pool *pool)
 650 {
 651         if (unlikely(list_empty(&pool->idle_list)))
 652                 return NULL;
 653
 654         return list_first_entry(&pool->idle_list, struct worker, entry);
 655 }
 656
 657 /**
 658  * wake_up_worker - wake up an idle worker
 659  * @pool: worker pool to wake worker from
 660  *
 661  * Wake up the first idle worker of @pool.
 662  *
 663  * CONTEXT:
 664  * spin_lock_irq(gcwq->lock).
 665  */
 666 static void wake_up_worker(struct worker_pool *pool)
 667 {
 668         struct worker *worker = first_worker(pool);
 669
 670         if (likely(worker))
 671                 wake_up_process(worker->task);
 672 }
 673
 674 /**
 675  * wq_worker_waking_up - a worker is waking up
 676  * @task: task waking up
 677  * @cpu: CPU @task is waking up to
 678  *
 679  * This function is called during try_to_wake_up() when a worker is
 680  * being awoken.
 681  *
 682  * CONTEXT:
 683  * spin_lock_irq(rq->lock)
 684  */
 685 void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
 686 {
 687         struct worker *worker = kthread_data(task);
 688
 689         if (!(worker->flags & WORKER_NOT_RUNNING))
 690                 atomic_inc(get_pool_nr_running(worker->pool));
 691 }
 692
 693 /**
 694  * wq_worker_sleeping - a worker is going to sleep
 695  * @task: task going to sleep
 696  * @cpu: CPU in question, must be the current CPU number
 697  *
 698  * This function is called during schedule() when a busy worker is
 699  * going to sleep.  Worker on the same cpu can be woken up by
 700  * returning pointer to its task.
 701  *
 702  * CONTEXT:
 703  * spin_lock_irq(rq->lock)
 704  *
 705  * RETURNS:
 706  * Worker task on @cpu to wake up, %NULL if none.
 707  */
 708 struct task_struct *wq_worker_sleeping(struct task_struct *task,
 709                                        unsigned int cpu)
 710 {
 711         struct worker *worker = kthread_data(task), *to_wakeup = NULL;
 712         struct worker_pool *pool = worker->pool;
 713         atomic_t *nr_running = get_pool_nr_running(pool);
 714
 715         if (worker->flags & WORKER_NOT_RUNNING)
 716                 return NULL;
 717
 718         /* this can only happen on the local cpu */
 719         BUG_ON(cpu != raw_smp_processor_id());
 720
 721         /*
 722          * The counterpart of the following dec_and_test, implied mb,
 723          * worklist not empty test sequence is in insert_work().
 724          * Please read comment there.
 725          *
 726          * NOT_RUNNING is clear.  This means that trustee is not in
 727          * charge and we're running on the local cpu w/ rq lock held
 728          * and preemption disabled, which in turn means that none else
 729          * could be manipulating idle_list, so dereferencing idle_list
 730          * without gcwq lock is safe.
 731          */
 732         if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist))
 733                 to_wakeup = first_worker(pool);
 734         return to_wakeup ? to_wakeup->task : NULL;
 735 }
 736
 737 /**
 738  * worker_set_flags - set worker flags and adjust nr_running accordingly
 739  * @worker: self
 740  * @flags: flags to set
 741  * @wakeup: wakeup an idle worker if necessary
 742  *
 743  * Set @flags in @worker->flags and adjust nr_running accordingly.  If
 744  * nr_running becomes zero and @wakeup is %true, an idle worker is
 745  * woken up.
 746  *
 747  * CONTEXT:
 748  * spin_lock_irq(gcwq->lock)
 749  */
 750 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
 751                                     bool wakeup)
 752 {
 753         struct worker_pool *pool = worker->pool;
 754
 755         WARN_ON_ONCE(worker->task != current);
 756
 757         /*
 758          * If transitioning into NOT_RUNNING, adjust nr_running and
 759          * wake up an idle worker as necessary if requested by
 760          * @wakeup.
 761          */
 762         if ((flags & WORKER_NOT_RUNNING) &&
 763             !(worker->flags & WORKER_NOT_RUNNING)) {
 764                 atomic_t *nr_running = get_pool_nr_running(pool);
 765
 766                 if (wakeup) {
 767                         if (atomic_dec_and_test(nr_running) &&
 768                             !list_empty(&pool->worklist))
 769                                 wake_up_worker(pool);
 770                 } else
 771                         atomic_dec(nr_running);
 772         }
 773
 774         worker->flags |= flags;
 775 }
 776
 777 /**
 778  * worker_clr_flags - clear worker flags and adjust nr_running accordingly
 779  * @worker: self
 780  * @flags: flags to clear
 781  *
 782  * Clear @flags in @worker->flags and adjust nr_running accordingly.
 783  *
 784  * CONTEXT:
 785  * spin_lock_irq(gcwq->lock)
 786  */
 787 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 788 {
 789         struct worker_pool *pool = worker->pool;
 790         unsigned int oflags = worker->flags;
 791
 792         WARN_ON_ONCE(worker->task != current);
 793
 794         worker->flags &= ~flags;
 795
 796         /*
 797          * If transitioning out of NOT_RUNNING, increment nr_running.  Note
 798          * that the nested NOT_RUNNING is not a noop.  NOT_RUNNING is mask
 799          * of multiple flags, not a single flag.
 800          */
 801         if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
 802                 if (!(worker->flags & WORKER_NOT_RUNNING))
 803                         atomic_inc(get_pool_nr_running(pool));
 804 }
 805
 806 /**
 807  * busy_worker_head - return the busy hash head for a work
 808  * @gcwq: gcwq of interest
 809  * @work: work to be hashed
 810  *
 811  * Return hash head of @gcwq for @work.
 812  *
 813  * CONTEXT:
 814  * spin_lock_irq(gcwq->lock).
 815  *
 816  * RETURNS:
 817  * Pointer to the hash head.
 818  */
 819 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
 820                                            struct work_struct *work)
 821 {
 822         const int base_shift = ilog2(sizeof(struct work_struct));
 823         unsigned long v = (unsigned long)work;
 824
 825         /* simple shift and fold hash, do we need something better? */
 826         v >>= base_shift;
 827         v += v >> BUSY_WORKER_HASH_ORDER;
 828         v &= BUSY_WORKER_HASH_MASK;
 829
 830         return &gcwq->busy_hash[v];
 831 }
 832
 833 /**
 834  * __find_worker_executing_work - find worker which is executing a work
 835  * @gcwq: gcwq of interest
 836  * @bwh: hash head as returned by busy_worker_head()
 837  * @work: work to find worker for
 838  *
 839  * Find a worker which is executing @work on @gcwq.  @bwh should be
 840  * the hash head obtained by calling busy_worker_head() with the same
 841  * work.
 842  *
 843  * CONTEXT:
 844  * spin_lock_irq(gcwq->lock).
 845  *
 846  * RETURNS:
 847  * Pointer to worker which is executing @work if found, NULL
 848  * otherwise.
 849  */
 850 static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
 851                                                    struct hlist_head *bwh,
 852                                                    struct work_struct *work)
 853 {
 854         struct worker *worker;
 855         struct hlist_node *tmp;
 856
 857         hlist_for_each_entry(worker, tmp, bwh, hentry)
 858                 if (worker->current_work == work)
 859                         return worker;
 860         return NULL;
 861 }
 862
 863 /**
 864  * find_worker_executing_work - find worker which is executing a work
 865  * @gcwq: gcwq of interest
 866  * @work: work to find worker for
 867  *
 868  * Find a worker which is executing @work on @gcwq.  This function is
 869  * identical to __find_worker_executing_work() except that this
 870  * function calculates @bwh itself.
 871  *
 872  * CONTEXT:
 873  * spin_lock_irq(gcwq->lock).
 874  *
 875  * RETURNS:
 876  * Pointer to worker which is executing @work if found, NULL
 877  * otherwise.
 878  */
 879 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 880                                                  struct work_struct *work)
 881 {
 882         return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
 883                                             work);
 884 }
 885
 886 /**
 887  * pool_determine_ins_pos - find insertion position
 888  * @pool: pool of interest
 889  * @cwq: cwq a work is being queued for
 890  *
 891  * A work for @cwq is about to be queued on @pool, determine insertion
 892  * position for the work.  If @cwq is for HIGHPRI wq, the work is
 893  * queued at the head of the queue but in FIFO order with respect to
 894  * other HIGHPRI works; otherwise, at the end of the queue.  This
 895  * function also sets POOL_HIGHPRI_PENDING flag to hint @pool that
 896  * there are HIGHPRI works pending.
 897  *
 898  * CONTEXT:
 899  * spin_lock_irq(gcwq->lock).
 900  *
 901  * RETURNS:
 902  * Pointer to inserstion position.
 903  */
 904 static inline struct list_head *pool_determine_ins_pos(struct worker_pool *pool,
 905                                                struct cpu_workqueue_struct *cwq)
 906 {
 907         struct work_struct *twork;
 908
 909         if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
 910                 return &pool->worklist;
 911
 912         list_for_each_entry(twork, &pool->worklist, entry) {
 913                 struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
 914
 915                 if (!(tcwq->wq->flags & WQ_HIGHPRI))
 916                         break;
 917         }
 918
 919         pool->flags |= POOL_HIGHPRI_PENDING;
 920         return &twork->entry;
 921 }
 922
 923 /**
 924  * insert_work - insert a work into gcwq
 925  * @cwq: cwq @work belongs to
 926  * @work: work to insert
 927  * @head: insertion point
 928  * @extra_flags: extra WORK_STRUCT_* flags to set
 929  *
 930  * Insert @work which belongs to @cwq into @gcwq after @head.
 931  * @extra_flags is or'd to work_struct flags.
 932  *
 933  * CONTEXT:
 934  * spin_lock_irq(gcwq->lock).
 935  */
 936 static void insert_work(struct cpu_workqueue_struct *cwq,
 937                         struct work_struct *work, struct list_head *head,
 938                         unsigned int extra_flags)
 939 {
 940         struct worker_pool *pool = cwq->pool;
 941
 942         /* we own @work, set data and link */
 943         set_work_cwq(work, cwq, extra_flags);
 944
 945         /*
 946          * Ensure that we get the right work->data if we see the
 947          * result of list_add() below, see try_to_grab_pending().
 948          */
 949         smp_wmb();
 950
 951         list_add_tail(&work->entry, head);
 952
 953         /*
 954          * Ensure either worker_sched_deactivated() sees the above
 955          * list_add_tail() or we see zero nr_running to avoid workers
 956          * lying around lazily while there are works to be processed.
 957          */
 958         smp_mb();
 959
 960         if (__need_more_worker(pool))
 961                 wake_up_worker(pool);
 962 }
 963
 964 /*
 965  * Test whether @work is being queued from another work executing on the
 966  * same workqueue.  This is rather expensive and should only be used from
 967  * cold paths.
 968  */
 969 static bool is_chained_work(struct workqueue_struct *wq)
 970 {
 971         unsigned long flags;
 972         unsigned int cpu;
 973
 974         for_each_gcwq_cpu(cpu) {
 975                 struct global_cwq *gcwq = get_gcwq(cpu);
 976                 struct worker *worker;
 977                 struct hlist_node *pos;
 978                 int i;
 979
 980                 spin_lock_irqsave(&gcwq->lock, flags);
 981                 for_each_busy_worker(worker, i, pos, gcwq) {
 982                         if (worker->task != current)
 983                                 continue;
 984                         spin_unlock_irqrestore(&gcwq->lock, flags);
 985                         /*
 986                          * I'm @worker, no locking necessary.  See if @work
 987                          * is headed to the same workqueue.
 988                          */
 989                         return worker->current_cwq->wq == wq;
 990                 }
 991                 spin_unlock_irqrestore(&gcwq->lock, flags);
 992         }
 993         return false;
 994 }
 995
 996 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 997                          struct work_struct *work)
 998 {
 999         struct global_cwq *gcwq;
1000         struct cpu_workqueue_struct *cwq;
1001         struct list_head *worklist;
1002         unsigned int work_flags;
1003         unsigned long flags;
1004
1005         debug_work_activate(work);
1006
1007         /* if dying, only works from the same workqueue are allowed */
1008         if (unlikely(wq->flags & WQ_DRAINING) &&
1009             WARN_ON_ONCE(!is_chained_work(wq)))
1010                 return;
1011
1012         /* determine gcwq to use */
1013         if (!(wq->flags & WQ_UNBOUND)) {
1014                 struct global_cwq *last_gcwq;
1015
1016                 if (unlikely(cpu == WORK_CPU_UNBOUND))
1017                         cpu = raw_smp_processor_id();
1018
1019                 /*
1020                  * It's multi cpu.  If @wq is non-reentrant and @work
1021                  * was previously on a different cpu, it might still
1022                  * be running there, in which case the work needs to
1023                  * be queued on that cpu to guarantee non-reentrance.
1024                  */
1025                 gcwq = get_gcwq(cpu);
1026                 if (wq->flags & WQ_NON_REENTRANT &&
1027                     (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
1028                         struct worker *worker;
1029
1030                         spin_lock_irqsave(&last_gcwq->lock, flags);
1031
1032                         worker = find_worker_executing_work(last_gcwq, work);
1033
1034                         if (worker && worker->current_cwq->wq == wq)
1035                                 gcwq = last_gcwq;
1036                         else {
1037                                 /* meh... not running there, queue here */
1038                                 spin_unlock_irqrestore(&last_gcwq->lock, flags);
1039                                 spin_lock_irqsave(&gcwq->lock, flags);
1040                         }
1041                 } else
1042                         spin_lock_irqsave(&gcwq->lock, flags);
1043         } else {
1044                 gcwq = get_gcwq(WORK_CPU_UNBOUND);
1045                 spin_lock_irqsave(&gcwq->lock, flags);
1046         }
1047
1048         /* gcwq determined, get cwq and queue */
1049         cwq = get_cwq(gcwq->cpu, wq);
1050         trace_workqueue_queue_work(cpu, cwq, work);
1051
1052         if (WARN_ON(!list_empty(&work->entry))) {
1053                 spin_unlock_irqrestore(&gcwq->lock, flags);
1054                 return;
1055         }
1056
1057         cwq->nr_in_flight[cwq->work_color]++;
1058         work_flags = work_color_to_flags(cwq->work_color);
1059
1060         if (likely(cwq->nr_active < cwq->max_active)) {
1061                 trace_workqueue_activate_work(work);
1062                 cwq->nr_active++;
1063                 worklist = pool_determine_ins_pos(cwq->pool, cwq);
1064         } else {
1065                 work_flags |= WORK_STRUCT_DELAYED;
1066                 worklist = &cwq->delayed_works;
1067         }
1068
1069         insert_work(cwq, work, worklist, work_flags);
1070
1071         spin_unlock_irqrestore(&gcwq->lock, flags);
1072 }
1073
1074 /**
1075  * queue_work - queue work on a workqueue
1076  * @wq: workqueue to use
1077  * @work: work to queue
1078  *
1079  * Returns 0 if @work was already on a queue, non-zero otherwise.
1080  *
1081  * We queue the work to the CPU on which it was submitted, but if the CPU dies
1082  * it can be processed by another CPU.
1083  */
1084 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
1085 {
1086         int ret;
1087
1088         ret = queue_work_on(get_cpu(), wq, work);
1089         put_cpu();
1090
1091         return ret;
1092 }
1093 EXPORT_SYMBOL_GPL(queue_work);
1094
1095 /**
1096  * queue_work_on - queue work on specific cpu
1097  * @cpu: CPU number to execute work on
1098  * @wq: workqueue to use
1099  * @work: work to queue
1100  *
1101  * Returns 0 if @work was already on a queue, non-zero otherwise.
1102  *
1103  * We queue the work to a specific CPU, the caller must ensure it
1104  * can't go away.
1105  */
1106 int
1107 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
1108 {
1109         int ret = 0;
1110
1111         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1112                 __queue_work(cpu, wq, work);
1113                 ret = 1;
1114         }
1115         return ret;
1116 }
1117 EXPORT_SYMBOL_GPL(queue_work_on);
1118
1119 static void delayed_work_timer_fn(unsigned long __data)
1120 {
1121         struct delayed_work *dwork = (struct delayed_work *)__data;
1122         struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
1123
1124         __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
1125 }
1126
1127 /**
1128  * queue_delayed_work - queue work on a workqueue after delay
1129  * @wq: workqueue to use
1130  * @dwork: delayable work to queue
1131  * @delay: number of jiffies to wait before queueing
1132  *
1133  * Returns 0 if @work was already on a queue, non-zero otherwise.
1134  */
1135 int queue_delayed_work(struct workqueue_struct *wq,
1136                         struct delayed_work *dwork, unsigned long delay)
1137 {
1138         if (delay == 0)
1139                 return queue_work(wq, &dwork->work);
1140
1141         return queue_delayed_work_on(-1, wq, dwork, delay);
1142 }
1143 EXPORT_SYMBOL_GPL(queue_delayed_work);
1144
1145 /**
1146  * queue_delayed_work_on - queue work on specific CPU after delay
1147  * @cpu: CPU number to execute work on
1148  * @wq: workqueue to use
1149  * @dwork: work to queue
1150  * @delay: number of jiffies to wait before queueing
1151  *
1152  * Returns 0 if @work was already on a queue, non-zero otherwise.
1153  */
1154 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1155                         struct delayed_work *dwork, unsigned long delay)
1156 {
1157         int ret = 0;
1158         struct timer_list *timer = &dwork->timer;
1159         struct work_struct *work = &dwork->work;
1160
1161         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1162                 unsigned int lcpu;
1163
1164                 BUG_ON(timer_pending(timer));
1165                 BUG_ON(!list_empty(&work->entry));
1166
1167                 timer_stats_timer_set_start_info(&dwork->timer);
1168
1169                 /*
1170                  * This stores cwq for the moment, for the timer_fn.
1171                  * Note that the work's gcwq is preserved to allow
1172                  * reentrance detection for delayed works.
1173                  */
1174                 if (!(wq->flags & WQ_UNBOUND)) {
1175                         struct global_cwq *gcwq = get_work_gcwq(work);
1176
1177                         if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
1178                                 lcpu = gcwq->cpu;
1179                         else
1180                                 lcpu = raw_smp_processor_id();
1181                 } else
1182                         lcpu = WORK_CPU_UNBOUND;
1183
1184                 set_work_cwq(work, get_cwq(lcpu, wq), 0);
1185
1186                 timer->expires = jiffies + delay;
1187                 timer->data = (unsigned long)dwork;
1188                 timer->function = delayed_work_timer_fn;
1189
1190                 if (unlikely(cpu >= 0))
1191                         add_timer_on(timer, cpu);
1192                 else
1193                         add_timer(timer);
1194                 ret = 1;
1195         }
1196         return ret;
1197 }
1198 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
1199
1200 /**
1201  * worker_enter_idle - enter idle state
1202  * @worker: worker which is entering idle state
1203  *
1204  * @worker is entering idle state.  Update stats and idle timer if
1205  * necessary.
1206  *
1207  * LOCKING:
1208  * spin_lock_irq(gcwq->lock).
1209  */
1210 static void worker_enter_idle(struct worker *worker)
1211 {
1212         struct worker_pool *pool = worker->pool;
1213         struct global_cwq *gcwq = pool->gcwq;
1214
1215         BUG_ON(worker->flags & WORKER_IDLE);
1216         BUG_ON(!list_empty(&worker->entry) &&
1217                (worker->hentry.next || worker->hentry.pprev));
1218
1219         /* can't use worker_set_flags(), also called from start_worker() */
1220         worker->flags |= WORKER_IDLE;
1221         pool->nr_idle++;
1222         worker->last_active = jiffies;
1223
1224         /* idle_list is LIFO */
1225         list_add(&worker->entry, &pool->idle_list);
1226
1227         if (likely(!(worker->flags & WORKER_ROGUE))) {
1228                 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
1229                         mod_timer(&pool->idle_timer,
1230                                   jiffies + IDLE_WORKER_TIMEOUT);
1231         } else
1232                 wake_up_all(&gcwq->trustee_wait);
1233
1234         /*
1235          * Sanity check nr_running.  Because trustee releases gcwq->lock
1236          * between setting %WORKER_ROGUE and zapping nr_running, the
1237          * warning may trigger spuriously.  Check iff trustee is idle.
1238          */
1239         WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE &&
1240                      pool->nr_workers == pool->nr_idle &&
1241                      atomic_read(get_pool_nr_running(pool)));
1242 }
1243
1244 /**
1245  * worker_leave_idle - leave idle state
1246  * @worker: worker which is leaving idle state
1247  *
1248  * @worker is leaving idle state.  Update stats.
1249  *
1250  * LOCKING:
1251  * spin_lock_irq(gcwq->lock).
1252  */
1253 static void worker_leave_idle(struct worker *worker)
1254 {
1255         struct worker_pool *pool = worker->pool;
1256
1257         BUG_ON(!(worker->flags & WORKER_IDLE));
1258         worker_clr_flags(worker, WORKER_IDLE);
1259         pool->nr_idle--;
1260         list_del_init(&worker->entry);
1261 }
1262
1263 /**
1264  * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
1265  * @worker: self
1266  *
1267  * Works which are scheduled while the cpu is online must at least be
1268  * scheduled to a worker which is bound to the cpu so that if they are
1269  * flushed from cpu callbacks while cpu is going down, they are
1270  * guaranteed to execute on the cpu.
1271  *
1272  * This function is to be used by rogue workers and rescuers to bind
1273  * themselves to the target cpu and may race with cpu going down or
1274  * coming online.  kthread_bind() can't be used because it may put the
1275  * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1276  * verbatim as it's best effort and blocking and gcwq may be
1277  * [dis]associated in the meantime.
1278  *
1279  * This function tries set_cpus_allowed() and locks gcwq and verifies
1280  * the binding against GCWQ_DISASSOCIATED which is set during
1281  * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
1282  * idle state or fetches works without dropping lock, it can guarantee
1283  * the scheduling requirement described in the first paragraph.
1284  *
1285  * CONTEXT:
1286  * Might sleep.  Called without any lock but returns with gcwq->lock
1287  * held.
1288  *
1289  * RETURNS:
1290  * %true if the associated gcwq is online (@worker is successfully
1291  * bound), %false if offline.
1292  */
1293 static bool worker_maybe_bind_and_lock(struct worker *worker)
1294 __acquires(&gcwq->lock)
1295 {
1296         struct global_cwq *gcwq = worker->pool->gcwq;
1297         struct task_struct *task = worker->task;
1298
1299         while (true) {
1300                 /*
1301                  * The following call may fail, succeed or succeed
1302                  * without actually migrating the task to the cpu if
1303                  * it races with cpu hotunplug operation.  Verify
1304                  * against GCWQ_DISASSOCIATED.
1305                  */
1306                 if (!(gcwq->flags & GCWQ_DISASSOCIATED))
1307                         set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
1308
1309                 spin_lock_irq(&gcwq->lock);
1310                 if (gcwq->flags & GCWQ_DISASSOCIATED)
1311                         return false;
1312                 if (task_cpu(task) == gcwq->cpu &&
1313                     cpumask_equal(&current->cpus_allowed,
1314                                   get_cpu_mask(gcwq->cpu)))
1315                         return true;
1316                 spin_unlock_irq(&gcwq->lock);
1317
1318                 /*
1319                  * We've raced with CPU hot[un]plug.  Give it a breather
1320                  * and retry migration.  cond_resched() is required here;
1321                  * otherwise, we might deadlock against cpu_stop trying to
1322                  * bring down the CPU on non-preemptive kernel.
1323                  */
1324                 cpu_relax();
1325                 cond_resched();
1326         }
1327 }
1328
1329 /*
1330  * Function for worker->rebind_work used to rebind rogue busy workers
1331  * to the associated cpu which is coming back online.  This is
1332  * scheduled by cpu up but can race with other cpu hotplug operations
1333  * and may be executed twice without intervening cpu down.
1334  */
1335 static void worker_rebind_fn(struct work_struct *work)
1336 {
1337         struct worker *worker = container_of(work, struct worker, rebind_work);
1338         struct global_cwq *gcwq = worker->pool->gcwq;
1339
1340         if (worker_maybe_bind_and_lock(worker))
1341                 worker_clr_flags(worker, WORKER_REBIND);
1342
1343         spin_unlock_irq(&gcwq->lock);
1344 }
1345
1346 static struct worker *alloc_worker(void)
1347 {
1348         struct worker *worker;
1349
1350         worker = kzalloc(sizeof(*worker), GFP_KERNEL);
1351         if (worker) {
1352                 INIT_LIST_HEAD(&worker->entry);
1353                 INIT_LIST_HEAD(&worker->scheduled);
1354                 INIT_WORK(&worker->rebind_work, worker_rebind_fn);
1355                 /* on creation a worker is in !idle && prep state */
1356                 worker->flags = WORKER_PREP;
1357         }
1358         return worker;
1359 }
1360
1361 /**
1362  * create_worker - create a new workqueue worker
1363  * @pool: pool the new worker will belong to
1364  * @bind: whether to set affinity to @cpu or not
1365  *
1366  * Create a new worker which is bound to @pool.  The returned worker
1367  * can be started by calling start_worker() or destroyed using
1368  * destroy_worker().
1369  *
1370  * CONTEXT:
1371  * Might sleep.  Does GFP_KERNEL allocations.
1372  *
1373  * RETURNS:
1374  * Pointer to the newly created worker.
1375  */
1376 static struct worker *create_worker(struct worker_pool *pool, bool bind)
1377 {
1378         struct global_cwq *gcwq = pool->gcwq;
1379         bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
1380         struct worker *worker = NULL;
1381         int id = -1;
1382
1383         spin_lock_irq(&gcwq->lock);
1384         while (ida_get_new(&pool->worker_ida, &id)) {
1385                 spin_unlock_irq(&gcwq->lock);
1386                 if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
1387                         goto fail;
1388                 spin_lock_irq(&gcwq->lock);
1389         }
1390         spin_unlock_irq(&gcwq->lock);
1391
1392         worker = alloc_worker();
1393         if (!worker)
1394                 goto fail;
1395
1396         worker->pool = pool;
1397         worker->id = id;
1398
1399         if (!on_unbound_cpu)
1400                 worker->task = kthread_create_on_node(worker_thread,
1401                                                       worker,
1402                                                       cpu_to_node(gcwq->cpu),
1403                                                       "kworker/%u:%d", gcwq->cpu, id);
1404         else
1405                 worker->task = kthread_create(worker_thread, worker,
1406                                               "kworker/u:%d", id);
1407         if (IS_ERR(worker->task))
1408                 goto fail;
1409
1410         /*
1411          * A rogue worker will become a regular one if CPU comes
1412          * online later on.  Make sure every worker has
1413          * PF_THREAD_BOUND set.
1414          */
1415         if (bind && !on_unbound_cpu)
1416                 kthread_bind(worker->task, gcwq->cpu);
1417         else {
1418                 worker->task->flags |= PF_THREAD_BOUND;
1419                 if (on_unbound_cpu)
1420                         worker->flags |= WORKER_UNBOUND;
1421         }
1422
1423         return worker;
1424 fail:
1425         if (id >= 0) {
1426                 spin_lock_irq(&gcwq->lock);
1427                 ida_remove(&pool->worker_ida, id);
1428                 spin_unlock_irq(&gcwq->lock);
1429         }
1430         kfree(worker);
1431         return NULL;
1432 }
1433
1434 /**
1435  * start_worker - start a newly created worker
1436  * @worker: worker to start
1437  *
1438  * Make the gcwq aware of @worker and start it.
1439  *
1440  * CONTEXT:
1441  * spin_lock_irq(gcwq->lock).
1442  */
1443 static void start_worker(struct worker *worker)
1444 {
1445         worker->flags |= WORKER_STARTED;
1446         worker->pool->nr_workers++;
1447         worker_enter_idle(worker);
1448         wake_up_process(worker->task);
1449 }
1450
1451 /**
1452  * destroy_worker - destroy a workqueue worker
1453  * @worker: worker to be destroyed
1454  *
1455  * Destroy @worker and adjust @gcwq stats accordingly.
1456  *
1457  * CONTEXT:
1458  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1459  */
1460 static void destroy_worker(struct worker *worker)
1461 {
1462         struct worker_pool *pool = worker->pool;
1463         struct global_cwq *gcwq = pool->gcwq;
1464         int id = worker->id;
1465
1466         /* sanity check frenzy */
1467         BUG_ON(worker->current_work);
1468         BUG_ON(!list_empty(&worker->scheduled));
1469
1470         if (worker->flags & WORKER_STARTED)
1471                 pool->nr_workers--;
1472         if (worker->flags & WORKER_IDLE)
1473                 pool->nr_idle--;
1474
1475         list_del_init(&worker->entry);
1476         worker->flags |= WORKER_DIE;
1477
1478         spin_unlock_irq(&gcwq->lock);
1479
1480         kthread_stop(worker->task);
1481         kfree(worker);
1482
1483         spin_lock_irq(&gcwq->lock);
1484         ida_remove(&pool->worker_ida, id);
1485 }
1486
1487 static void idle_worker_timeout(unsigned long __pool)
1488 {
1489         struct worker_pool *pool = (void *)__pool;
1490         struct global_cwq *gcwq = pool->gcwq;
1491
1492         spin_lock_irq(&gcwq->lock);
1493
1494         if (too_many_workers(pool)) {
1495                 struct worker *worker;
1496                 unsigned long expires;
1497
1498                 /* idle_list is kept in LIFO order, check the last one */
1499                 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1500                 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1501
1502                 if (time_before(jiffies, expires))
1503                         mod_timer(&pool->idle_timer, expires);
1504                 else {
1505                         /* it's been idle for too long, wake up manager */
1506                         pool->flags |= POOL_MANAGE_WORKERS;
1507                         wake_up_worker(pool);
1508                 }
1509         }
1510
1511         spin_unlock_irq(&gcwq->lock);
1512 }
1513
1514 static bool send_mayday(struct work_struct *work)
1515 {
1516         struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1517         struct workqueue_struct *wq = cwq->wq;
1518         unsigned int cpu;
1519
1520         if (!(wq->flags & WQ_RESCUER))
1521                 return false;
1522
1523         /* mayday mayday mayday */
1524         cpu = cwq->pool->gcwq->cpu;
1525         /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
1526         if (cpu == WORK_CPU_UNBOUND)
1527                 cpu = 0;
1528         if (!mayday_test_and_set_cpu(cpu, wq->mayday_mask))
1529                 wake_up_process(wq->rescuer->task);
1530         return true;
1531 }
1532
1533 static void gcwq_mayday_timeout(unsigned long __pool)
1534 {
1535         struct worker_pool *pool = (void *)__pool;
1536         struct global_cwq *gcwq = pool->gcwq;
1537         struct work_struct *work;
1538
1539         spin_lock_irq(&gcwq->lock);
1540
1541         if (need_to_create_worker(pool)) {
1542                 /*
1543                  * We've been trying to create a new worker but
1544                  * haven't been successful.  We might be hitting an
1545                  * allocation deadlock.  Send distress signals to
1546                  * rescuers.
1547                  */
1548                 list_for_each_entry(work, &pool->worklist, entry)
1549                         send_mayday(work);
1550         }
1551
1552         spin_unlock_irq(&gcwq->lock);
1553
1554         mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
1555 }
1556
1557 /**
1558  * maybe_create_worker - create a new worker if necessary
1559  * @pool: pool to create a new worker for
1560  *
1561  * Create a new worker for @pool if necessary.  @pool is guaranteed to
1562  * have at least one idle worker on return from this function.  If
1563  * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
1564  * sent to all rescuers with works scheduled on @pool to resolve
1565  * possible allocation deadlock.
1566  *
1567  * On return, need_to_create_worker() is guaranteed to be false and
1568  * may_start_working() true.
1569  *
1570  * LOCKING:
1571  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1572  * multiple times.  Does GFP_KERNEL allocations.  Called only from
1573  * manager.
1574  *
1575  * RETURNS:
1576  * false if no action was taken and gcwq->lock stayed locked, true
1577  * otherwise.
1578  */
1579 static bool maybe_create_worker(struct worker_pool *pool)
1580 __releases(&gcwq->lock)
1581 __acquires(&gcwq->lock)
1582 {
1583         struct global_cwq *gcwq = pool->gcwq;
1584
1585         if (!need_to_create_worker(pool))
1586                 return false;
1587 restart:
1588         spin_unlock_irq(&gcwq->lock);
1589
1590         /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
1591         mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1592
1593         while (true) {
1594                 struct worker *worker;
1595
1596                 worker = create_worker(pool, true);
1597                 if (worker) {
1598                         del_timer_sync(&pool->mayday_timer);
1599                         spin_lock_irq(&gcwq->lock);
1600                         start_worker(worker);
1601                         BUG_ON(need_to_create_worker(pool));
1602                         return true;
1603                 }
1604
1605                 if (!need_to_create_worker(pool))
1606                         break;
1607
1608                 __set_current_state(TASK_INTERRUPTIBLE);
1609                 schedule_timeout(CREATE_COOLDOWN);
1610
1611                 if (!need_to_create_worker(pool))
1612                         break;
1613         }
1614
1615         del_timer_sync(&pool->mayday_timer);
1616         spin_lock_irq(&gcwq->lock);
1617         if (need_to_create_worker(pool))
1618                 goto restart;
1619         return true;
1620 }
1621
1622 /**
1623  * maybe_destroy_worker - destroy workers which have been idle for a while
1624  * @pool: pool to destroy workers for
1625  *
1626  * Destroy @pool workers which have been idle for longer than
1627  * IDLE_WORKER_TIMEOUT.
1628  *
1629  * LOCKING:
1630  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1631  * multiple times.  Called only from manager.
1632  *
1633  * RETURNS:
1634  * false if no action was taken and gcwq->lock stayed locked, true
1635  * otherwise.
1636  */
1637 static bool maybe_destroy_workers(struct worker_pool *pool)
1638 {
1639         bool ret = false;
1640
1641         while (too_many_workers(pool)) {
1642                 struct worker *worker;
1643                 unsigned long expires;
1644
1645                 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1646                 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1647
1648                 if (time_before(jiffies, expires)) {
1649                         mod_timer(&pool->idle_timer, expires);
1650                         break;
1651                 }
1652
1653                 destroy_worker(worker);
1654                 ret = true;
1655         }
1656
1657         return ret;
1658 }
1659
1660 /**
1661  * manage_workers - manage worker pool
1662  * @worker: self
1663  *
1664  * Assume the manager role and manage gcwq worker pool @worker belongs
1665  * to.  At any given time, there can be only zero or one manager per
1666  * gcwq.  The exclusion is handled automatically by this function.
1667  *
1668  * The caller can safely start processing works on false return.  On
1669  * true return, it's guaranteed that need_to_create_worker() is false
1670  * and may_start_working() is true.
1671  *
1672  * CONTEXT:
1673  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1674  * multiple times.  Does GFP_KERNEL allocations.
1675  *
1676  * RETURNS:
1677  * false if no action was taken and gcwq->lock stayed locked, true if
1678  * some action was taken.
1679  */
1680 static bool manage_workers(struct worker *worker)
1681 {
1682         struct worker_pool *pool = worker->pool;
1683         struct global_cwq *gcwq = pool->gcwq;
1684         bool ret = false;
1685
1686         if (pool->flags & POOL_MANAGING_WORKERS)
1687                 return ret;
1688
1689         pool->flags &= ~POOL_MANAGE_WORKERS;
1690         pool->flags |= POOL_MANAGING_WORKERS;
1691
1692         /*
1693          * Destroy and then create so that may_start_working() is true
1694          * on return.
1695          */
1696         ret |= maybe_destroy_workers(pool);
1697         ret |= maybe_create_worker(pool);
1698
1699         pool->flags &= ~POOL_MANAGING_WORKERS;
1700
1701         /*
1702          * The trustee might be waiting to take over the manager
1703          * position, tell it we're done.
1704          */
1705         if (unlikely(gcwq->trustee))
1706                 wake_up_all(&gcwq->trustee_wait);
1707
1708         return ret;
1709 }
1710
1711 /**
1712  * move_linked_works - move linked works to a list
1713  * @work: start of series of works to be scheduled
1714  * @head: target list to append @work to
1715  * @nextp: out paramter for nested worklist walking
1716  *
1717  * Schedule linked works starting from @work to @head.  Work series to
1718  * be scheduled starts at @work and includes any consecutive work with
1719  * WORK_STRUCT_LINKED set in its predecessor.
1720  *
1721  * If @nextp is not NULL, it's updated to point to the next work of
1722  * the last scheduled work.  This allows move_linked_works() to be
1723  * nested inside outer list_for_each_entry_safe().
1724  *
1725  * CONTEXT:
1726  * spin_lock_irq(gcwq->lock).
1727  */
1728 static void move_linked_works(struct work_struct *work, struct list_head *head,
1729                               struct work_struct **nextp)
1730 {
1731         struct work_struct *n;
1732
1733         /*
1734          * Linked worklist will always end before the end of the list,
1735          * use NULL for list head.
1736          */
1737         list_for_each_entry_safe_from(work, n, NULL, entry) {
1738                 list_move_tail(&work->entry, head);
1739                 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1740                         break;
1741         }
1742
1743         /*
1744          * If we're already inside safe list traversal and have moved
1745          * multiple works to the scheduled queue, the next position
1746          * needs to be updated.
1747          */
1748         if (nextp)
1749                 *nextp = n;
1750 }
1751
1752 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1753 {
1754         struct work_struct *work = list_first_entry(&cwq->delayed_works,
1755                                                     struct work_struct, entry);
1756         struct list_head *pos = pool_determine_ins_pos(cwq->pool, cwq);
1757
1758         trace_workqueue_activate_work(work);
1759         move_linked_works(work, pos, NULL);
1760         __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
1761         cwq->nr_active++;
1762 }
1763
1764 /**
1765  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
1766  * @cwq: cwq of interest
1767  * @color: color of work which left the queue
1768  * @delayed: for a delayed work
1769  *
1770  * A work either has completed or is removed from pending queue,
1771  * decrement nr_in_flight of its cwq and handle workqueue flushing.
1772  *
1773  * CONTEXT:
1774  * spin_lock_irq(gcwq->lock).
1775  */
1776 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
1777                                  bool delayed)
1778 {
1779         /* ignore uncolored works */
1780         if (color == WORK_NO_COLOR)
1781                 return;
1782
1783         cwq->nr_in_flight[color]--;
1784
1785         if (!delayed) {
1786                 cwq->nr_active--;
1787                 if (!list_empty(&cwq->delayed_works)) {
1788                         /* one down, submit a delayed one */
1789                         if (cwq->nr_active < cwq->max_active)
1790                                 cwq_activate_first_delayed(cwq);
1791                 }
1792         }
1793
1794         /* is flush in progress and are we at the flushing tip? */
1795         if (likely(cwq->flush_color != color))
1796                 return;
1797
1798         /* are there still in-flight works? */
1799         if (cwq->nr_in_flight[color])
1800                 return;
1801
1802         /* this cwq is done, clear flush_color */
1803         cwq->flush_color = -1;
1804
1805         /*
1806          * If this was the last cwq, wake up the first flusher.  It
1807          * will handle the rest.
1808          */
1809         if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
1810                 complete(&cwq->wq->first_flusher->done);
1811 }
1812
1813 /**
1814  * process_one_work - process single work
1815  * @worker: self
1816  * @work: work to process
1817  *
1818  * Process @work.  This function contains all the logics necessary to
1819  * process a single work including synchronization against and
1820  * interaction with other workers on the same cpu, queueing and
1821  * flushing.  As long as context requirement is met, any worker can
1822  * call this function to process a work.
1823  *
1824  * CONTEXT:
1825  * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1826  */
1827 static void process_one_work(struct worker *worker, struct work_struct *work)
1828 __releases(&gcwq->lock)
1829 __acquires(&gcwq->lock)
1830 {
1831         struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1832         struct worker_pool *pool = worker->pool;
1833         struct global_cwq *gcwq = pool->gcwq;
1834         struct hlist_head *bwh = busy_worker_head(gcwq, work);
1835         bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
1836         work_func_t f = work->func;
1837         int work_color;
1838         struct worker *collision;
1839 #ifdef CONFIG_LOCKDEP
1840         /*
1841          * It is permissible to free the struct work_struct from
1842          * inside the function that is called from it, this we need to
1843          * take into account for lockdep too.  To avoid bogus "held
1844          * lock freed" warnings as well as problems when looking into
1845          * work->lockdep_map, make a copy and use that here.
1846          */
1847         struct lockdep_map lockdep_map;
1848
1849         lockdep_copy_map(&lockdep_map, &work->lockdep_map);
1850 #endif
1851         /*
1852          * A single work shouldn't be executed concurrently by
1853          * multiple workers on a single cpu.  Check whether anyone is
1854          * already processing the work.  If so, defer the work to the
1855          * currently executing one.
1856          */
1857         collision = __find_worker_executing_work(gcwq, bwh, work);
1858         if (unlikely(collision)) {
1859                 move_linked_works(work, &collision->scheduled, NULL);
1860                 return;
1861         }
1862
1863         /* claim and process */
1864         debug_work_deactivate(work);
1865         hlist_add_head(&worker->hentry, bwh);
1866         worker->current_work = work;
1867         worker->current_cwq = cwq;
1868         work_color = get_work_color(work);
1869
1870         /* record the current cpu number in the work data and dequeue */
1871         set_work_cpu(work, gcwq->cpu);
1872         list_del_init(&work->entry);
1873
1874         /*
1875          * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
1876          * wake up another worker; otherwise, clear HIGHPRI_PENDING.
1877          */
1878         if (unlikely(pool->flags & POOL_HIGHPRI_PENDING)) {
1879                 struct work_struct *nwork = list_first_entry(&pool->worklist,
1880                                          struct work_struct, entry);
1881
1882                 if (!list_empty(&pool->worklist) &&
1883                     get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
1884                         wake_up_worker(pool);
1885                 else
1886                         pool->flags &= ~POOL_HIGHPRI_PENDING;
1887         }
1888
1889         /*
1890          * CPU intensive works don't participate in concurrency
1891          * management.  They're the scheduler's responsibility.
1892          */
1893         if (unlikely(cpu_intensive))
1894                 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
1895
1896         /*
1897          * Unbound gcwq isn't concurrency managed and work items should be
1898          * executed ASAP.  Wake up another worker if necessary.
1899          */
1900         if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
1901                 wake_up_worker(pool);
1902
1903         spin_unlock_irq(&gcwq->lock);
1904
1905         work_clear_pending(work);
1906         lock_map_acquire_read(&cwq->wq->lockdep_map);
1907         lock_map_acquire(&lockdep_map);
1908         trace_workqueue_execute_start(work);
1909         f(work);
1910         /*
1911          * While we must be careful to not use "work" after this, the trace
1912          * point will only record its address.
1913          */
1914         trace_workqueue_execute_end(work);
1915         lock_map_release(&lockdep_map);
1916         lock_map_release(&cwq->wq->lockdep_map);
1917
1918         if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
1919                 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
1920                        "%s/0x%08x/%d\n",
1921                        current->comm, preempt_count(), task_pid_nr(current));
1922                 printk(KERN_ERR "    last function: ");
1923                 print_symbol("%s\n", (unsigned long)f);
1924                 debug_show_held_locks(current);
1925                 dump_stack();
1926         }
1927
1928         spin_lock_irq(&gcwq->lock);
1929
1930         /* clear cpu intensive status */
1931         if (unlikely(cpu_intensive))
1932                 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
1933
1934         /* we're done with it, release */
1935         hlist_del_init(&worker->hentry);
1936         worker->current_work = NULL;
1937         worker->current_cwq = NULL;
1938         cwq_dec_nr_in_flight(cwq, work_color, false);
1939 }
1940
1941 /**
1942  * process_scheduled_works - process scheduled works
1943  * @worker: self
1944  *
1945  * Process all scheduled works.  Please note that the scheduled list
1946  * may change while processing a work, so this function repeatedly
1947  * fetches a work from the top and executes it.
1948  *
1949  * CONTEXT:
1950  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1951  * multiple times.
1952  */
1953 static void process_scheduled_works(struct worker *worker)
1954 {
1955         while (!list_empty(&worker->scheduled)) {
1956                 struct work_struct *work = list_first_entry(&worker->scheduled,
1957                                                 struct work_struct, entry);
1958                 process_one_work(worker, work);
1959         }
1960 }
1961
1962 /**
1963  * worker_thread - the worker thread function
1964  * @__worker: self
1965  *
1966  * The gcwq worker thread function.  There's a single dynamic pool of
1967  * these per each cpu.  These workers process all works regardless of
1968  * their specific target workqueue.  The only exception is works which
1969  * belong to workqueues with a rescuer which will be explained in
1970  * rescuer_thread().
1971  */
1972 static int worker_thread(void *__worker)
1973 {
1974         struct worker *worker = __worker;
1975         struct worker_pool *pool = worker->pool;
1976         struct global_cwq *gcwq = pool->gcwq;
1977
1978         /* tell the scheduler that this is a workqueue worker */
1979         worker->task->flags |= PF_WQ_WORKER;
1980 woke_up:
1981         spin_lock_irq(&gcwq->lock);
1982
1983         /* DIE can be set only while we're idle, checking here is enough */
1984         if (worker->flags & WORKER_DIE) {
1985                 spin_unlock_irq(&gcwq->lock);
1986                 worker->task->flags &= ~PF_WQ_WORKER;
1987                 return 0;
1988         }
1989
1990         worker_leave_idle(worker);
1991 recheck:
1992         /* no more worker necessary? */
1993         if (!need_more_worker(pool))
1994                 goto sleep;
1995
1996         /* do we need to manage? */
1997         if (unlikely(!may_start_working(pool)) && manage_workers(worker))
1998                 goto recheck;
1999
2000         /*
2001          * ->scheduled list can only be filled while a worker is
2002          * preparing to process a work or actually processing it.
2003          * Make sure nobody diddled with it while I was sleeping.
2004          */
2005         BUG_ON(!list_empty(&worker->scheduled));
2006
2007         /*
2008          * When control reaches this point, we're guaranteed to have
2009          * at least one idle worker or that someone else has already
2010          * assumed the manager role.
2011          */
2012         worker_clr_flags(worker, WORKER_PREP);
2013
2014         do {
2015                 struct work_struct *work =
2016                         list_first_entry(&pool->worklist,
2017                                          struct work_struct, entry);
2018
2019                 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2020                         /* optimization path, not strictly necessary */
2021                         process_one_work(worker, work);
2022                         if (unlikely(!list_empty(&worker->scheduled)))
2023                                 process_scheduled_works(worker);
2024                 } else {
2025                         move_linked_works(work, &worker->scheduled, NULL);
2026                         process_scheduled_works(worker);
2027                 }
2028         } while (keep_working(pool));
2029
2030         worker_set_flags(worker, WORKER_PREP, false);
2031 sleep:
2032         if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
2033                 goto recheck;
2034
2035         /*
2036          * gcwq->lock is held and there's no work to process and no
2037          * need to manage, sleep.  Workers are woken up only while
2038          * holding gcwq->lock or from local cpu, so setting the
2039          * current state before releasing gcwq->lock is enough to
2040          * prevent losing any event.
2041          */
2042         worker_enter_idle(worker);
2043         __set_current_state(TASK_INTERRUPTIBLE);
2044         spin_unlock_irq(&gcwq->lock);
2045         schedule();
2046         goto woke_up;
2047 }
2048
2049 /**
2050  * rescuer_thread - the rescuer thread function
2051  * @__wq: the associated workqueue
2052  *
2053  * Workqueue rescuer thread function.  There's one rescuer for each
2054  * workqueue which has WQ_RESCUER set.
2055  *
2056  * Regular work processing on a gcwq may block trying to create a new
2057  * worker which uses GFP_KERNEL allocation which has slight chance of
2058  * developing into deadlock if some works currently on the same queue
2059  * need to be processed to satisfy the GFP_KERNEL allocation.  This is
2060  * the problem rescuer solves.
2061  *
2062  * When such condition is possible, the gcwq summons rescuers of all
2063  * workqueues which have works queued on the gcwq and let them process
2064  * those works so that forward progress can be guaranteed.
2065  *
2066  * This should happen rarely.
2067  */
2068 static int rescuer_thread(void *__wq)
2069 {
2070         struct workqueue_struct *wq = __wq;
2071         struct worker *rescuer = wq->rescuer;
2072         struct list_head *scheduled = &rescuer->scheduled;
2073         bool is_unbound = wq->flags & WQ_UNBOUND;
2074         unsigned int cpu;
2075
2076         set_user_nice(current, RESCUER_NICE_LEVEL);
2077 repeat:
2078         set_current_state(TASK_INTERRUPTIBLE);
2079
2080         if (kthread_should_stop())
2081                 return 0;
2082
2083         /*
2084          * See whether any cpu is asking for help.  Unbounded
2085          * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
2086          */
2087         for_each_mayday_cpu(cpu, wq->mayday_mask) {
2088                 unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
2089                 struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
2090                 struct worker_pool *pool = cwq->pool;
2091                 struct global_cwq *gcwq = pool->gcwq;
2092                 struct work_struct *work, *n;
2093
2094                 __set_current_state(TASK_RUNNING);
2095                 mayday_clear_cpu(cpu, wq->mayday_mask);
2096
2097                 /* migrate to the target cpu if possible */
2098                 rescuer->pool = pool;
2099                 worker_maybe_bind_and_lock(rescuer);
2100
2101                 /*
2102                  * Slurp in all works issued via this workqueue and
2103                  * process'em.
2104                  */
2105                 BUG_ON(!list_empty(&rescuer->scheduled));
2106                 list_for_each_entry_safe(work, n, &pool->worklist, entry)
2107                         if (get_work_cwq(work) == cwq)
2108                                 move_linked_works(work, scheduled, &n);
2109
2110                 process_scheduled_works(rescuer);
2111
2112                 /*
2113                  * Leave this gcwq.  If keep_working() is %true, notify a
2114                  * regular worker; otherwise, we end up with 0 concurrency
2115                  * and stalling the execution.
2116                  */
2117                 if (keep_working(pool))
2118                         wake_up_worker(pool);
2119
2120                 spin_unlock_irq(&gcwq->lock);
2121         }
2122
2123         schedule();
2124         goto repeat;
2125 }
2126
2127 struct wq_barrier {
2128         struct work_struct      work;
2129         struct completion       done;
2130 };
2131
2132 static void wq_barrier_func(struct work_struct *work)
2133 {
2134         struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2135         complete(&barr->done);
2136 }
2137
2138 /**
2139  * insert_wq_barrier - insert a barrier work
2140  * @cwq: cwq to insert barrier into
2141  * @barr: wq_barrier to insert
2142  * @target: target work to attach @barr to
2143  * @worker: worker currently executing @target, NULL if @target is not executing
2144  *
2145  * @barr is linked to @target such that @barr is completed only after
2146  * @target finishes execution.  Please note that the ordering
2147  * guarantee is observed only with respect to @target and on the local
2148  * cpu.
2149  *
2150  * Currently, a queued barrier can't be canceled.  This is because
2151  * try_to_grab_pending() can't determine whether the work to be
2152  * grabbed is at the head of the queue and thus can't clear LINKED
2153  * flag of the previous work while there must be a valid next work
2154  * after a work with LINKED flag set.
2155  *
2156  * Note that when @worker is non-NULL, @target may be modified
2157  * underneath us, so we can't reliably determine cwq from @target.
2158  *
2159  * CONTEXT:
2160  * spin_lock_irq(gcwq->lock).
2161  */
2162 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
2163                               struct wq_barrier *barr,
2164                               struct work_struct *target, struct worker *worker)
2165 {
2166         struct list_head *head;
2167         unsigned int linked = 0;
2168
2169         /*
2170          * debugobject calls are safe here even with gcwq->lock locked
2171          * as we know for sure that this will not trigger any of the
2172          * checks and call back into the fixup functions where we
2173          * might deadlock.
2174          */
2175         INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2176         __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
2177         init_completion(&barr->done);
2178
2179         /*
2180          * If @target is currently being executed, schedule the
2181          * barrier to the worker; otherwise, put it after @target.
2182          */
2183         if (worker)
2184                 head = worker->scheduled.next;
2185         else {
2186                 unsigned long *bits = work_data_bits(target);
2187
2188                 head = target->entry.next;
2189                 /* there can already be other linked works, inherit and set */
2190                 linked = *bits & WORK_STRUCT_LINKED;
2191                 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
2192         }
2193
2194         debug_work_activate(&barr->work);
2195         insert_work(cwq, &barr->work, head,
2196                     work_color_to_flags(WORK_NO_COLOR) | linked);
2197 }
2198
2199 /**
2200  * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
2201  * @wq: workqueue being flushed
2202  * @flush_color: new flush color, < 0 for no-op
2203  * @work_color: new work color, < 0 for no-op
2204  *
2205  * Prepare cwqs for workqueue flushing.
2206  *
2207  * If @flush_color is non-negative, flush_color on all cwqs should be
2208  * -1.  If no cwq has in-flight commands at the specified color, all
2209  * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
2210  * has in flight commands, its cwq->flush_color is set to
2211  * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
2212  * wakeup logic is armed and %true is returned.
2213  *
2214  * The caller should have initialized @wq->first_flusher prior to
2215  * calling this function with non-negative @flush_color.  If
2216  * @flush_color is negative, no flush color update is done and %false
2217  * is returned.
2218  *
2219  * If @work_color is non-negative, all cwqs should have the same
2220  * work_color which is previous to @work_color and all will be
2221  * advanced to @work_color.
2222  *
2223  * CONTEXT:
2224  * mutex_lock(wq->flush_mutex).
2225  *
2226  * RETURNS:
2227  * %true if @flush_color >= 0 and there's something to flush.  %false
2228  * otherwise.
2229  */
2230 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
2231                                       int flush_color, int work_color)
2232 {
2233         bool wait = false;
2234         unsigned int cpu;
2235
2236         if (flush_color >= 0) {
2237                 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
2238                 atomic_set(&wq->nr_cwqs_to_flush, 1);
2239         }
2240
2241         for_each_cwq_cpu(cpu, wq) {
2242                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2243                 struct global_cwq *gcwq = cwq->pool->gcwq;
2244
2245                 spin_lock_irq(&gcwq->lock);
2246
2247                 if (flush_color >= 0) {
2248                         BUG_ON(cwq->flush_color != -1);
2249
2250                         if (cwq->nr_in_flight[flush_color]) {
2251                                 cwq->flush_color = flush_color;
2252                                 atomic_inc(&wq->nr_cwqs_to_flush);
2253                                 wait = true;
2254                         }
2255                 }
2256
2257                 if (work_color >= 0) {
2258                         BUG_ON(work_color != work_next_color(cwq->work_color));
2259                         cwq->work_color = work_color;
2260                 }
2261
2262                 spin_unlock_irq(&gcwq->lock);
2263         }
2264
2265         if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
2266                 complete(&wq->first_flusher->done);
2267
2268         return wait;
2269 }
2270
2271 /**
2272  * flush_workqueue - ensure that any scheduled work has run to completion.
2273  * @wq: workqueue to flush
2274  *
2275  * Forces execution of the workqueue and blocks until its completion.
2276  * This is typically used in driver shutdown handlers.
2277  *
2278  * We sleep until all works which were queued on entry have been handled,
2279  * but we are not livelocked by new incoming ones.
2280  */
2281 void flush_workqueue(struct workqueue_struct *wq)
2282 {
2283         struct wq_flusher this_flusher = {
2284                 .list = LIST_HEAD_INIT(this_flusher.list),
2285                 .flush_color = -1,
2286                 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2287         };
2288         int next_color;
2289
2290         lock_map_acquire(&wq->lockdep_map);
2291         lock_map_release(&wq->lockdep_map);
2292
2293         mutex_lock(&wq->flush_mutex);
2294
2295         /*
2296          * Start-to-wait phase
2297          */
2298         next_color = work_next_color(wq->work_color);
2299
2300         if (next_color != wq->flush_color) {
2301                 /*
2302                  * Color space is not full.  The current work_color
2303                  * becomes our flush_color and work_color is advanced
2304                  * by one.
2305                  */
2306                 BUG_ON(!list_empty(&wq->flusher_overflow));
2307                 this_flusher.flush_color = wq->work_color;
2308                 wq->work_color = next_color;
2309
2310                 if (!wq->first_flusher) {
2311                         /* no flush in progress, become the first flusher */
2312                         BUG_ON(wq->flush_color != this_flusher.flush_color);
2313
2314                         wq->first_flusher = &this_flusher;
2315
2316                         if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
2317                                                        wq->work_color)) {
2318                                 /* nothing to flush, done */
2319                                 wq->flush_color = next_color;
2320                                 wq->first_flusher = NULL;
2321                                 goto out_unlock;
2322                         }
2323                 } else {
2324                         /* wait in queue */
2325                         BUG_ON(wq->flush_color == this_flusher.flush_color);
2326                         list_add_tail(&this_flusher.list, &wq->flusher_queue);
2327                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2328                 }
2329         } else {
2330                 /*
2331                  * Oops, color space is full, wait on overflow queue.
2332                  * The next flush completion will assign us
2333                  * flush_color and transfer to flusher_queue.
2334                  */
2335                 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2336         }
2337
2338         mutex_unlock(&wq->flush_mutex);
2339
2340         wait_for_completion(&this_flusher.done);
2341
2342         /*
2343          * Wake-up-and-cascade phase
2344          *
2345          * First flushers are responsible for cascading flushes and
2346          * handling overflow.  Non-first flushers can simply return.
2347          */
2348         if (wq->first_flusher != &this_flusher)
2349                 return;
2350
2351         mutex_lock(&wq->flush_mutex);
2352
2353         /* we might have raced, check again with mutex held */
2354         if (wq->first_flusher != &this_flusher)
2355                 goto out_unlock;
2356
2357         wq->first_flusher = NULL;
2358
2359         BUG_ON(!list_empty(&this_flusher.list));
2360         BUG_ON(wq->flush_color != this_flusher.flush_color);
2361
2362         while (true) {
2363                 struct wq_flusher *next, *tmp;
2364
2365                 /* complete all the flushers sharing the current flush color */
2366                 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2367                         if (next->flush_color != wq->flush_color)
2368                                 break;
2369                         list_del_init(&next->list);
2370                         complete(&next->done);
2371                 }
2372
2373                 BUG_ON(!list_empty(&wq->flusher_overflow) &&
2374                        wq->flush_color != work_next_color(wq->work_color));
2375
2376                 /* this flush_color is finished, advance by one */
2377                 wq->flush_color = work_next_color(wq->flush_color);
2378
2379                 /* one color has been freed, handle overflow queue */
2380                 if (!list_empty(&wq->flusher_overflow)) {
2381                         /*
2382                          * Assign the same color to all overflowed
2383                          * flushers, advance work_color and append to
2384                          * flusher_queue.  This is the start-to-wait
2385                          * phase for these overflowed flushers.
2386                          */
2387                         list_for_each_entry(tmp, &wq->flusher_overflow, list)
2388                                 tmp->flush_color = wq->work_color;
2389
2390                         wq->work_color = work_next_color(wq->work_color);
2391
2392                         list_splice_tail_init(&wq->flusher_overflow,
2393                                               &wq->flusher_queue);
2394                         flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2395                 }
2396
2397                 if (list_empty(&wq->flusher_queue)) {
2398                         BUG_ON(wq->flush_color != wq->work_color);
2399                         break;
2400                 }
2401
2402                 /*
2403                  * Need to flush more colors.  Make the next flusher
2404                  * the new first flusher and arm cwqs.
2405                  */
2406                 BUG_ON(wq->flush_color == wq->work_color);
2407                 BUG_ON(wq->flush_color != next->flush_color);
2408
2409                 list_del_init(&next->list);
2410                 wq->first_flusher = next;
2411
2412                 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
2413                         break;
2414
2415                 /*
2416                  * Meh... this color is already done, clear first
2417                  * flusher and repeat cascading.
2418                  */
2419                 wq->first_flusher = NULL;
2420         }
2421
2422 out_unlock:
2423         mutex_unlock(&wq->flush_mutex);
2424 }
2425 EXPORT_SYMBOL_GPL(flush_workqueue);
2426
2427 /**
2428  * drain_workqueue - drain a workqueue
2429  * @wq: workqueue to drain
2430  *
2431  * Wait until the workqueue becomes empty.  While draining is in progress,
2432  * only chain queueing is allowed.  IOW, only currently pending or running
2433  * work items on @wq can queue further work items on it.  @wq is flushed
2434  * repeatedly until it becomes empty.  The number of flushing is detemined
2435  * by the depth of chaining and should be relatively short.  Whine if it
2436  * takes too long.
2437  */
2438 void drain_workqueue(struct workqueue_struct *wq)
2439 {
2440         unsigned int flush_cnt = 0;
2441         unsigned int cpu;
2442
2443         /*
2444          * __queue_work() needs to test whether there are drainers, is much
2445          * hotter than drain_workqueue() and already looks at @wq->flags.
2446          * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
2447          */
2448         spin_lock(&workqueue_lock);
2449         if (!wq->nr_drainers++)
2450                 wq->flags |= WQ_DRAINING;
2451         spin_unlock(&workqueue_lock);
2452 reflush:
2453         flush_workqueue(wq);
2454
2455         for_each_cwq_cpu(cpu, wq) {
2456                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2457                 bool drained;
2458
2459                 spin_lock_irq(&cwq->pool->gcwq->lock);
2460                 drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
2461                 spin_unlock_irq(&cwq->pool->gcwq->lock);
2462
2463                 if (drained)
2464                         continue;
2465
2466                 if (++flush_cnt == 10 ||
2467                     (flush_cnt % 100 == 0 && flush_cnt <= 1000))
2468                         pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
2469                                    wq->name, flush_cnt);
2470                 goto reflush;
2471         }
2472
2473         spin_lock(&workqueue_lock);
2474         if (!--wq->nr_drainers)
2475                 wq->flags &= ~WQ_DRAINING;
2476         spin_unlock(&workqueue_lock);
2477 }
2478 EXPORT_SYMBOL_GPL(drain_workqueue);
2479
2480 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2481                              bool wait_executing)
2482 {
2483         struct worker *worker = NULL;
2484         struct global_cwq *gcwq;
2485         struct cpu_workqueue_struct *cwq;
2486
2487         might_sleep();
2488         gcwq = get_work_gcwq(work);
2489         if (!gcwq)
2490                 return false;
2491
2492         spin_lock_irq(&gcwq->lock);
2493         if (!list_empty(&work->entry)) {
2494                 /*
2495                  * See the comment near try_to_grab_pending()->smp_rmb().
2496                  * If it was re-queued to a different gcwq under us, we
2497                  * are not going to wait.
2498                  */
2499                 smp_rmb();
2500                 cwq = get_work_cwq(work);
2501                 if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
2502                         goto already_gone;
2503         } else if (wait_executing) {
2504                 worker = find_worker_executing_work(gcwq, work);
2505                 if (!worker)
2506                         goto already_gone;
2507                 cwq = worker->current_cwq;
2508         } else
2509                 goto already_gone;
2510
2511         insert_wq_barrier(cwq, barr, work, worker);
2512         spin_unlock_irq(&gcwq->lock);
2513
2514         /*
2515          * If @max_active is 1 or rescuer is in use, flushing another work
2516          * item on the same workqueue may lead to deadlock.  Make sure the
2517          * flusher is not running on the same workqueue by verifying write
2518          * access.
2519          */
2520         if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
2521                 lock_map_acquire(&cwq->wq->lockdep_map);
2522         else
2523                 lock_map_acquire_read(&cwq->wq->lockdep_map);
2524         lock_map_release(&cwq->wq->lockdep_map);
2525
2526         return true;
2527 already_gone:
2528         spin_unlock_irq(&gcwq->lock);
2529         return false;
2530 }
2531
2532 /**
2533  * flush_work - wait for a work to finish executing the last queueing instance
2534  * @work: the work to flush
2535  *
2536  * Wait until @work has finished execution.  This function considers
2537  * only the last queueing instance of @work.  If @work has been
2538  * enqueued across different CPUs on a non-reentrant workqueue or on
2539  * multiple workqueues, @work might still be executing on return on
2540  * some of the CPUs from earlier queueing.
2541  *
2542  * If @work was queued only on a non-reentrant, ordered or unbound
2543  * workqueue, @work is guaranteed to be idle on return if it hasn't
2544  * been requeued since flush started.
2545  *
2546  * RETURNS:
2547  * %true if flush_work() waited for the work to finish execution,
2548  * %false if it was already idle.
2549  */
2550 bool flush_work(struct work_struct *work)
2551 {
2552         struct wq_barrier barr;
2553
2554         lock_map_acquire(&work->lockdep_map);
2555         lock_map_release(&work->lockdep_map);
2556
2557         if (start_flush_work(work, &barr, true)) {
2558                 wait_for_completion(&barr.done);
2559                 destroy_work_on_stack(&barr.work);
2560                 return true;
2561         } else
2562                 return false;
2563 }
2564 EXPORT_SYMBOL_GPL(flush_work);
2565
2566 static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
2567 {
2568         struct wq_barrier barr;
2569         struct worker *worker;
2570
2571         spin_lock_irq(&gcwq->lock);
2572
2573         worker = find_worker_executing_work(gcwq, work);
2574         if (unlikely(worker))
2575                 insert_wq_barrier(worker->current_cwq, &barr, work, worker);
2576
2577         spin_unlock_irq(&gcwq->lock);
2578
2579         if (unlikely(worker)) {
2580                 wait_for_completion(&barr.done);
2581                 destroy_work_on_stack(&barr.work);
2582                 return true;
2583         } else
2584                 return false;
2585 }
2586
2587 static bool wait_on_work(struct work_struct *work)
2588 {
2589         bool ret = false;
2590         int cpu;
2591
2592         might_sleep();
2593
2594         lock_map_acquire(&work->lockdep_map);
2595         lock_map_release(&work->lockdep_map);
2596
2597         for_each_gcwq_cpu(cpu)
2598                 ret |= wait_on_cpu_work(get_gcwq(cpu), work);
2599         return ret;
2600 }
2601
2602 /**
2603  * flush_work_sync - wait until a work has finished execution
2604  * @work: the work to flush
2605  *
2606  * Wait until @work has finished execution.  On return, it's
2607  * guaranteed that all queueing instances of @work which happened
2608  * before this function is called are finished.  In other words, if
2609  * @work hasn't been requeued since this function was called, @work is
2610  * guaranteed to be idle on return.
2611  *
2612  * RETURNS:
2613  * %true if flush_work_sync() waited for the work to finish execution,
2614  * %false if it was already idle.
2615  */
2616 bool flush_work_sync(struct work_struct *work)
2617 {
2618         struct wq_barrier barr;
2619         bool pending, waited;
2620
2621         /* we'll wait for executions separately, queue barr only if pending */
2622         pending = start_flush_work(work, &barr, false);
2623
2624         /* wait for executions to finish */
2625         waited = wait_on_work(work);
2626
2627         /* wait for the pending one */
2628         if (pending) {
2629                 wait_for_completion(&barr.done);
2630                 destroy_work_on_stack(&barr.work);
2631         }
2632
2633         return pending || waited;
2634 }
2635 EXPORT_SYMBOL_GPL(flush_work_sync);
2636
2637 /*
2638  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
2639  * so this work can't be re-armed in any way.
2640  */
2641 static int try_to_grab_pending(struct work_struct *work)
2642 {
2643         struct global_cwq *gcwq;
2644         int ret = -1;
2645
2646         if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
2647                 return 0;
2648
2649         /*
2650          * The queueing is in progress, or it is already queued. Try to
2651          * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
2652          */
2653         gcwq = get_work_gcwq(work);
2654         if (!gcwq)
2655                 return ret;
2656
2657         spin_lock_irq(&gcwq->lock);
2658         if (!list_empty(&work->entry)) {
2659                 /*
2660                  * This work is queued, but perhaps we locked the wrong gcwq.
2661                  * In that case we must see the new value after rmb(), see
2662                  * insert_work()->wmb().
2663                  */
2664                 smp_rmb();
2665                 if (gcwq == get_work_gcwq(work)) {
2666                         debug_work_deactivate(work);
2667                         list_del_init(&work->entry);
2668                         cwq_dec_nr_in_flight(get_work_cwq(work),
2669                                 get_work_color(work),
2670                                 *work_data_bits(work) & WORK_STRUCT_DELAYED);
2671                         ret = 1;
2672                 }
2673         }
2674         spin_unlock_irq(&gcwq->lock);
2675
2676         return ret;
2677 }
2678
2679 static bool __cancel_work_timer(struct work_struct *work,
2680                                 struct timer_list* timer)
2681 {
2682         int ret;
2683
2684         do {
2685                 ret = (timer && likely(del_timer(timer)));
2686                 if (!ret)
2687                         ret = try_to_grab_pending(work);
2688                 wait_on_work(work);
2689         } while (unlikely(ret < 0));
2690
2691         clear_work_data(work);
2692         return ret;
2693 }
2694
2695 /**
2696  * cancel_work_sync - cancel a work and wait for it to finish
2697  * @work: the work to cancel
2698  *
2699  * Cancel @work and wait for its execution to finish.  This function
2700  * can be used even if the work re-queues itself or migrates to
2701  * another workqueue.  On return from this function, @work is
2702  * guaranteed to be not pending or executing on any CPU.
2703  *
2704  * cancel_work_sync(&delayed_work->work) must not be used for
2705  * delayed_work's.  Use cancel_delayed_work_sync() instead.
2706  *
2707  * The caller must ensure that the workqueue on which @work was last
2708  * queued can't be destroyed before this function returns.
2709  *
2710  * RETURNS:
2711  * %true if @work was pending, %false otherwise.
2712  */
2713 bool cancel_work_sync(struct work_struct *work)
2714 {
2715         return __cancel_work_timer(work, NULL);
2716 }
2717 EXPORT_SYMBOL_GPL(cancel_work_sync);
2718
2719 /**
2720  * flush_delayed_work - wait for a dwork to finish executing the last queueing
2721  * @dwork: the delayed work to flush
2722  *
2723  * Delayed timer is cancelled and the pending work is queued for
2724  * immediate execution.  Like flush_work(), this function only
2725  * considers the last queueing instance of @dwork.
2726  *
2727  * RETURNS:
2728  * %true if flush_work() waited for the work to finish execution,
2729  * %false if it was already idle.
2730  */
2731 bool flush_delayed_work(struct delayed_work *dwork)
2732 {
2733         if (del_timer_sync(&dwork->timer))
2734                 __queue_work(raw_smp_processor_id(),
2735                              get_work_cwq(&dwork->work)->wq, &dwork->work);
2736         return flush_work(&dwork->work);
2737 }
2738 EXPORT_SYMBOL(flush_delayed_work);
2739
2740 /**
2741  * flush_delayed_work_sync - wait for a dwork to finish
2742  * @dwork: the delayed work to flush
2743  *
2744  * Delayed timer is cancelled and the pending work is queued for
2745  * execution immediately.  Other than timer handling, its behavior
2746  * is identical to flush_work_sync().
2747  *
2748  * RETURNS:
2749  * %true if flush_work_sync() waited for the work to finish execution,
2750  * %false if it was already idle.
2751  */
2752 bool flush_delayed_work_sync(struct delayed_work *dwork)
2753 {
2754         if (del_timer_sync(&dwork->timer))
2755                 __queue_work(raw_smp_processor_id(),
2756                              get_work_cwq(&dwork->work)->wq, &dwork->work);
2757         return flush_work_sync(&dwork->work);
2758 }
2759 EXPORT_SYMBOL(flush_delayed_work_sync);
2760
2761 /**
2762  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
2763  * @dwork: the delayed work cancel
2764  *
2765  * This is cancel_work_sync() for delayed works.
2766  *
2767  * RETURNS:
2768  * %true if @dwork was pending, %false otherwise.
2769  */
2770 bool cancel_delayed_work_sync(struct delayed_work *dwork)
2771 {
2772         return __cancel_work_timer(&dwork->work, &dwork->timer);
2773 }
2774 EXPORT_SYMBOL(cancel_delayed_work_sync);
2775
2776 /**
2777  * schedule_work - put work task in global workqueue
2778  * @work: job to be done
2779  *
2780  * Returns zero if @work was already on the kernel-global workqueue and
2781  * non-zero otherwise.
2782  *
2783  * This puts a job in the kernel-global workqueue if it was not already
2784  * queued and leaves it in the same position on the kernel-global
2785  * workqueue otherwise.
2786  */
2787 int schedule_work(struct work_struct *work)
2788 {
2789         return queue_work(system_wq, work);
2790 }
2791 EXPORT_SYMBOL(schedule_work);
2792
2793 /*
2794  * schedule_work_on - put work task on a specific cpu
2795  * @cpu: cpu to put the work task on
2796  * @work: job to be done
2797  *
2798  * This puts a job on a specific cpu
2799  */
2800 int schedule_work_on(int cpu, struct work_struct *work)
2801 {
2802         return queue_work_on(cpu, system_wq, work);
2803 }
2804 EXPORT_SYMBOL(schedule_work_on);
2805
2806 /**
2807  * schedule_delayed_work - put work task in global workqueue after delay
2808  * @dwork: job to be done
2809  * @delay: number of jiffies to wait or 0 for immediate execution
2810  *
2811  * After waiting for a given time this puts a job in the kernel-global
2812  * workqueue.
2813  */
2814 int schedule_delayed_work(struct delayed_work *dwork,
2815                                         unsigned long delay)
2816 {
2817         return queue_delayed_work(system_wq, dwork, delay);
2818 }
2819 EXPORT_SYMBOL(schedule_delayed_work);
2820
2821 /**
2822  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
2823  * @cpu: cpu to use
2824  * @dwork: job to be done
2825  * @delay: number of jiffies to wait
2826  *
2827  * After waiting for a given time this puts a job in the kernel-global
2828  * workqueue on the specified CPU.
2829  */
2830 int schedule_delayed_work_on(int cpu,
2831                         struct delayed_work *dwork, unsigned long delay)
2832 {
2833         return queue_delayed_work_on(cpu, system_wq, dwork, delay);
2834 }
2835 EXPORT_SYMBOL(schedule_delayed_work_on);
2836
2837 /**
2838  * schedule_on_each_cpu - execute a function synchronously on each online CPU
2839  * @func: the function to call
2840  *
2841  * schedule_on_each_cpu() executes @func on each online CPU using the
2842  * system workqueue and blocks until all CPUs have completed.
2843  * schedule_on_each_cpu() is very slow.
2844  *
2845  * RETURNS:
2846  * 0 on success, -errno on failure.
2847  */
2848 int schedule_on_each_cpu(work_func_t func)
2849 {
2850         int cpu;
2851         struct work_struct __percpu *works;
2852
2853         works = alloc_percpu(struct work_struct);
2854         if (!works)
2855                 return -ENOMEM;
2856
2857         get_online_cpus();
2858
2859         for_each_online_cpu(cpu) {
2860                 struct work_struct *work = per_cpu_ptr(works, cpu);
2861
2862                 INIT_WORK(work, func);
2863                 schedule_work_on(cpu, work);
2864         }
2865
2866         for_each_online_cpu(cpu)
2867                 flush_work(per_cpu_ptr(works, cpu));
2868
2869         put_online_cpus();
2870         free_percpu(works);
2871         return 0;
2872 }
2873
2874 /**
2875  * flush_scheduled_work - ensure that any scheduled work has run to completion.
2876  *
2877  * Forces execution of the kernel-global workqueue and blocks until its
2878  * completion.
2879  *
2880  * Think twice before calling this function!  It's very easy to get into
2881  * trouble if you don't take great care.  Either of the following situations
2882  * will lead to deadlock:
2883  *
2884  *      One of the work items currently on the workqueue needs to acquire
2885  *      a lock held by your code or its caller.
2886  *
2887  *      Your code is running in the context of a work routine.
2888  *
2889  * They will be detected by lockdep when they occur, but the first might not
2890  * occur very often.  It depends on what work items are on the workqueue and
2891  * what locks they need, which you have no control over.
2892  *
2893  * In most situations flushing the entire workqueue is overkill; you merely
2894  * need to know that a particular work item isn't queued and isn't running.
2895  * In such cases you should use cancel_delayed_work_sync() or
2896  * cancel_work_sync() instead.
2897  */
2898 void flush_scheduled_work(void)
2899 {
2900         flush_workqueue(system_wq);
2901 }
2902 EXPORT_SYMBOL(flush_scheduled_work);
2903
2904 /**
2905  * execute_in_process_context - reliably execute the routine with user context
2906  * @fn:         the function to execute
2907  * @ew:         guaranteed storage for the execute work structure (must
2908  *              be available when the work executes)
2909  *
2910  * Executes the function immediately if process context is available,
2911  * otherwise schedules the function for delayed execution.
2912  *
2913  * Returns:     0 - function was executed
2914  *              1 - function was scheduled for execution
2915  */
2916 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2917 {
2918         if (!in_interrupt()) {
2919                 fn(&ew->work);
2920                 return 0;
2921         }
2922
2923         INIT_WORK(&ew->work, fn);
2924         schedule_work(&ew->work);
2925
2926         return 1;
2927 }
2928 EXPORT_SYMBOL_GPL(execute_in_process_context);
2929
2930 int keventd_up(void)
2931 {
2932         return system_wq != NULL;
2933 }
2934
2935 static int alloc_cwqs(struct workqueue_struct *wq)
2936 {
2937         /*
2938          * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
2939          * Make sure that the alignment isn't lower than that of
2940          * unsigned long long.
2941          */
2942         const size_t size = sizeof(struct cpu_workqueue_struct);
2943         const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
2944                                    __alignof__(unsigned long long));
2945
2946         if (!(wq->flags & WQ_UNBOUND))
2947                 wq->cpu_wq.pcpu = __alloc_percpu(size, align);
2948         else {
2949                 void *ptr;
2950
2951                 /*
2952                  * Allocate enough room to align cwq and put an extra
2953                  * pointer at the end pointing back to the originally
2954                  * allocated pointer which will be used for free.
2955                  */
2956                 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
2957                 if (ptr) {
2958                         wq->cpu_wq.single = PTR_ALIGN(ptr, align);
2959                         *(void **)(wq->cpu_wq.single + 1) = ptr;
2960                 }
2961         }
2962
2963         /* just in case, make sure it's actually aligned */
2964         BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
2965         return wq->cpu_wq.v ? 0 : -ENOMEM;
2966 }
2967
2968 static void free_cwqs(struct workqueue_struct *wq)
2969 {
2970         if (!(wq->flags & WQ_UNBOUND))
2971                 free_percpu(wq->cpu_wq.pcpu);
2972         else if (wq->cpu_wq.single) {
2973                 /* the pointer to free is stored right after the cwq */
2974                 kfree(*(void **)(wq->cpu_wq.single + 1));
2975         }
2976 }
2977
2978 static int wq_clamp_max_active(int max_active, unsigned int flags,
2979                                const char *name)
2980 {
2981         int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
2982
2983         if (max_active < 1 || max_active > lim)
2984                 printk(KERN_WARNING "workqueue: max_active %d requested for %s "
2985                        "is out of range, clamping between %d and %d\n",
2986                        max_active, name, 1, lim);
2987
2988         return clamp_val(max_active, 1, lim);
2989 }
2990
2991 struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
2992                                                unsigned int flags,
2993                                                int max_active,
2994                                                struct lock_class_key *key,
2995                                                const char *lock_name, ...)
2996 {
2997         va_list args, args1;
2998         struct workqueue_struct *wq;
2999         unsigned int cpu;
3000         size_t namelen;
3001
3002         /* determine namelen, allocate wq and format name */
3003         va_start(args, lock_name);
3004         va_copy(args1, args);
3005         namelen = vsnprintf(NULL, 0, fmt, args) + 1;
3006
3007         wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
3008         if (!wq)
3009                 goto err;
3010
3011         vsnprintf(wq->name, namelen, fmt, args1);
3012         va_end(args);
3013         va_end(args1);
3014
3015         /*
3016          * Workqueues which may be used during memory reclaim should
3017          * have a rescuer to guarantee forward progress.
3018          */
3019         if (flags & WQ_MEM_RECLAIM)
3020                 flags |= WQ_RESCUER;
3021
3022         max_active = max_active ?: WQ_DFL_ACTIVE;
3023         max_active = wq_clamp_max_active(max_active, flags, wq->name);
3024
3025         /* init wq */
3026         wq->flags = flags;
3027         wq->saved_max_active = max_active;
3028         mutex_init(&wq->flush_mutex);
3029         atomic_set(&wq->nr_cwqs_to_flush, 0);
3030         INIT_LIST_HEAD(&wq->flusher_queue);
3031         INIT_LIST_HEAD(&wq->flusher_overflow);
3032
3033         lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
3034         INIT_LIST_HEAD(&wq->list);
3035
3036         if (alloc_cwqs(wq) < 0)
3037                 goto err;
3038
3039         for_each_cwq_cpu(cpu, wq) {
3040                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3041                 struct global_cwq *gcwq = get_gcwq(cpu);
3042
3043                 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
3044                 cwq->pool = &gcwq->pool;
3045                 cwq->wq = wq;
3046                 cwq->flush_color = -1;
3047                 cwq->max_active = max_active;
3048                 INIT_LIST_HEAD(&cwq->delayed_works);
3049         }
3050
3051         if (flags & WQ_RESCUER) {
3052                 struct worker *rescuer;
3053
3054                 if (!alloc_mayday_mask(&wq->mayday_mask, GFP_KERNEL))
3055                         goto err;
3056
3057                 wq->rescuer = rescuer = alloc_worker();
3058                 if (!rescuer)
3059                         goto err;
3060
3061                 rescuer->task = kthread_create(rescuer_thread, wq, "%s",
3062                                                wq->name);
3063                 if (IS_ERR(rescuer->task))
3064                         goto err;
3065
3066                 rescuer->task->flags |= PF_THREAD_BOUND;
3067                 wake_up_process(rescuer->task);
3068         }
3069
3070         /*
3071          * workqueue_lock protects global freeze state and workqueues
3072          * list.  Grab it, set max_active accordingly and add the new
3073          * workqueue to workqueues list.
3074          */
3075         spin_lock(&workqueue_lock);
3076
3077         if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
3078                 for_each_cwq_cpu(cpu, wq)
3079                         get_cwq(cpu, wq)->max_active = 0;
3080
3081         list_add(&wq->list, &workqueues);
3082
3083         spin_unlock(&workqueue_lock);
3084
3085         return wq;
3086 err:
3087         if (wq) {
3088                 free_cwqs(wq);
3089                 free_mayday_mask(wq->mayday_mask);
3090                 kfree(wq->rescuer);
3091                 kfree(wq);
3092         }
3093         return NULL;
3094 }
3095 EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
3096
3097 /**
3098  * destroy_workqueue - safely terminate a workqueue
3099  * @wq: target workqueue
3100  *
3101  * Safely destroy a workqueue. All work currently pending will be done first.
3102  */
3103 void destroy_workqueue(struct workqueue_struct *wq)
3104 {
3105         unsigned int cpu;
3106
3107         /* drain it before proceeding with destruction */
3108         drain_workqueue(wq);
3109
3110         /*
3111          * wq list is used to freeze wq, remove from list after
3112          * flushing is complete in case freeze races us.
3113          */
3114         spin_lock(&workqueue_lock);
3115         list_del(&wq->list);
3116         spin_unlock(&workqueue_lock);
3117
3118         /* sanity check */
3119         for_each_cwq_cpu(cpu, wq) {
3120                 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3121                 int i;
3122
3123                 for (i = 0; i < WORK_NR_COLORS; i++)
3124                         BUG_ON(cwq->nr_in_flight[i]);
3125                 BUG_ON(cwq->nr_active);
3126                 BUG_ON(!list_empty(&cwq->delayed_works));
3127         }
3128
3129         if (wq->flags & WQ_RESCUER) {
3130                 kthread_stop(wq->rescuer->task);
3131                 free_mayday_mask(wq->mayday_mask);
3132                 kfree(wq->rescuer);
3133         }
3134
3135         free_cwqs(wq);
3136         kfree(wq);
3137 }
3138 EXPORT_SYMBOL_GPL(destroy_workqueue);
3139
3140 /**
3141  * workqueue_set_max_active - adjust max_active of a workqueue
3142  * @wq: target workqueue
3143  * @max_active: new max_active value.
3144  *
3145  * Set max_active of @wq to @max_active.
3146  *
3147  * CONTEXT:
3148  * Don't call from IRQ context.
3149  */
3150 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
3151 {
3152         unsigned int cpu;
3153
3154         max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
3155
3156         spin_lock(&workqueue_lock);
3157
3158         wq->saved_max_active = max_active;
3159
3160         for_each_cwq_cpu(cpu, wq) {
3161                 struct global_cwq *gcwq = get_gcwq(cpu);
3162
3163                 spin_lock_irq(&gcwq->lock);
3164
3165                 if (!(wq->flags & WQ_FREEZABLE) ||
3166                     !(gcwq->flags & GCWQ_FREEZING))
3167                         get_cwq(gcwq->cpu, wq)->max_active = max_active;
3168
3169                 spin_unlock_irq(&gcwq->lock);
3170         }
3171
3172         spin_unlock(&workqueue_lock);
3173 }
3174 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
3175
3176 /**
3177  * workqueue_congested - test whether a workqueue is congested
3178  * @cpu: CPU in question
3179  * @wq: target workqueue
3180  *
3181  * Test whether @wq's cpu workqueue for @cpu is congested.  There is
3182  * no synchronization around this function and the test result is
3183  * unreliable and only useful as advisory hints or for debugging.
3184  *
3185  * RETURNS:
3186  * %true if congested, %false otherwise.
3187  */
3188 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
3189 {
3190         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3191
3192         return !list_empty(&cwq->delayed_works);
3193 }
3194 EXPORT_SYMBOL_GPL(workqueue_congested);
3195
3196 /**
3197  * work_cpu - return the last known associated cpu for @work
3198  * @work: the work of interest
3199  *
3200  * RETURNS:
3201  * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
3202  */
3203 unsigned int work_cpu(struct work_struct *work)
3204 {
3205         struct global_cwq *gcwq = get_work_gcwq(work);
3206
3207         return gcwq ? gcwq->cpu : WORK_CPU_NONE;
3208 }
3209 EXPORT_SYMBOL_GPL(work_cpu);
3210
3211 /**
3212  * work_busy - test whether a work is currently pending or running
3213  * @work: the work to be tested
3214  *
3215  * Test whether @work is currently pending or running.  There is no
3216  * synchronization around this function and the test result is
3217  * unreliable and only useful as advisory hints or for debugging.
3218  * Especially for reentrant wqs, the pending state might hide the
3219  * running state.
3220  *
3221  * RETURNS:
3222  * OR'd bitmask of WORK_BUSY_* bits.
3223  */
3224 unsigned int work_busy(struct work_struct *work)
3225 {
3226         struct global_cwq *gcwq = get_work_gcwq(work);
3227         unsigned long flags;
3228         unsigned int ret = 0;
3229
3230         if (!gcwq)
3231                 return false;
3232
3233         spin_lock_irqsave(&gcwq->lock, flags);
3234
3235         if (work_pending(work))
3236                 ret |= WORK_BUSY_PENDING;
3237         if (find_worker_executing_work(gcwq, work))
3238                 ret |= WORK_BUSY_RUNNING;
3239
3240         spin_unlock_irqrestore(&gcwq->lock, flags);
3241
3242         return ret;
3243 }
3244 EXPORT_SYMBOL_GPL(work_busy);
3245
3246 /*
3247  * CPU hotplug.
3248  *
3249  * There are two challenges in supporting CPU hotplug.  Firstly, there
3250  * are a lot of assumptions on strong associations among work, cwq and
3251  * gcwq which make migrating pending and scheduled works very
3252  * difficult to implement without impacting hot paths.  Secondly,
3253  * gcwqs serve mix of short, long and very long running works making
3254  * blocked draining impractical.
3255  *
3256  * This is solved by allowing a gcwq to be detached from CPU, running
3257  * it with unbound (rogue) workers and allowing it to be reattached
3258  * later if the cpu comes back online.  A separate thread is created
3259  * to govern a gcwq in such state and is called the trustee of the
3260  * gcwq.
3261  *
3262  * Trustee states and their descriptions.
3263  *
3264  * START        Command state used on startup.  On CPU_DOWN_PREPARE, a
3265  *              new trustee is started with this state.
3266  *
3267  * IN_CHARGE    Once started, trustee will enter this state after
3268  *              assuming the manager role and making all existing
3269  *              workers rogue.  DOWN_PREPARE waits for trustee to
3270  *              enter this state.  After reaching IN_CHARGE, trustee
3271  *              tries to execute the pending worklist until it's empty
3272  *              and the state is set to BUTCHER, or the state is set
3273  *              to RELEASE.
3274  *
3275  * BUTCHER      Command state which is set by the cpu callback after
3276  *              the cpu has went down.  Once this state is set trustee
3277  *              knows that there will be no new works on the worklist
3278  *              and once the worklist is empty it can proceed to
3279  *              killing idle workers.
3280  *
3281  * RELEASE      Command state which is set by the cpu callback if the
3282  *              cpu down has been canceled or it has come online
3283  *              again.  After recognizing this state, trustee stops
3284  *              trying to drain or butcher and clears ROGUE, rebinds
3285  *              all remaining workers back to the cpu and releases
3286  *              manager role.
3287  *
3288  * DONE         Trustee will enter this state after BUTCHER or RELEASE
3289  *              is complete.
3290  *
3291  *          trustee                 CPU                draining
3292  *         took over                down               complete
3293  * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
3294  *                        |                     |                  ^
3295  *                        | CPU is back online  v   return workers |
3296  *                         ----------------> RELEASE --------------
3297  */
3298
3299 /**
3300  * trustee_wait_event_timeout - timed event wait for trustee
3301  * @cond: condition to wait for
3302  * @timeout: timeout in jiffies
3303  *
3304  * wait_event_timeout() for trustee to use.  Handles locking and
3305  * checks for RELEASE request.
3306  *
3307  * CONTEXT:
3308  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3309  * multiple times.  To be used by trustee.
3310  *
3311  * RETURNS:
3312  * Positive indicating left time if @cond is satisfied, 0 if timed
3313  * out, -1 if canceled.
3314  */
3315 #define trustee_wait_event_timeout(cond, timeout) ({                    \
3316         long __ret = (timeout);                                         \
3317         while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
3318                __ret) {                                                 \
3319                 spin_unlock_irq(&gcwq->lock);                           \
3320                 __wait_event_timeout(gcwq->trustee_wait, (cond) ||      \
3321                         (gcwq->trustee_state == TRUSTEE_RELEASE),       \
3322                         __ret);                                         \
3323                 spin_lock_irq(&gcwq->lock);                             \
3324         }                                                               \
3325         gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret);          \
3326 })
3327
3328 /**
3329  * trustee_wait_event - event wait for trustee
3330  * @cond: condition to wait for
3331  *
3332  * wait_event() for trustee to use.  Automatically handles locking and
3333  * checks for CANCEL request.
3334  *
3335  * CONTEXT:
3336  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3337  * multiple times.  To be used by trustee.
3338  *
3339  * RETURNS:
3340  * 0 if @cond is satisfied, -1 if canceled.
3341  */
3342 #define trustee_wait_event(cond) ({                                     \
3343         long __ret1;                                                    \
3344         __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
3345         __ret1 < 0 ? -1 : 0;                                            \
3346 })
3347
3348 static int __cpuinit trustee_thread(void *__gcwq)
3349 {
3350         struct global_cwq *gcwq = __gcwq;
3351         struct worker *worker;
3352         struct work_struct *work;
3353         struct hlist_node *pos;
3354         long rc;
3355         int i;
3356
3357         BUG_ON(gcwq->cpu != smp_processor_id());
3358
3359         spin_lock_irq(&gcwq->lock);
3360         /*
3361          * Claim the manager position and make all workers rogue.
3362          * Trustee must be bound to the target cpu and can't be
3363          * cancelled.
3364          */
3365         BUG_ON(gcwq->cpu != smp_processor_id());
3366         rc = trustee_wait_event(!(gcwq->pool.flags & POOL_MANAGING_WORKERS));
3367         BUG_ON(rc < 0);
3368
3369         gcwq->pool.flags |= POOL_MANAGING_WORKERS;
3370
3371         list_for_each_entry(worker, &gcwq->pool.idle_list, entry)
3372                 worker->flags |= WORKER_ROGUE;
3373
3374         for_each_busy_worker(worker, i, pos, gcwq)
3375                 worker->flags |= WORKER_ROGUE;
3376
3377         /*
3378          * Call schedule() so that we cross rq->lock and thus can
3379          * guarantee sched callbacks see the rogue flag.  This is
3380          * necessary as scheduler callbacks may be invoked from other
3381          * cpus.
3382          */
3383         spin_unlock_irq(&gcwq->lock);
3384         schedule();
3385         spin_lock_irq(&gcwq->lock);
3386
3387         /*
3388          * Sched callbacks are disabled now.  Zap nr_running.  After
3389          * this, nr_running stays zero and need_more_worker() and
3390          * keep_working() are always true as long as the worklist is
3391          * not empty.
3392          */
3393         atomic_set(get_pool_nr_running(&gcwq->pool), 0);
3394
3395         spin_unlock_irq(&gcwq->lock);
3396         del_timer_sync(&gcwq->pool.idle_timer);
3397         spin_lock_irq(&gcwq->lock);
3398
3399         /*
3400          * We're now in charge.  Notify and proceed to drain.  We need
3401          * to keep the gcwq running during the whole CPU down
3402          * procedure as other cpu hotunplug callbacks may need to
3403          * flush currently running tasks.
3404          */
3405         gcwq->trustee_state = TRUSTEE_IN_CHARGE;
3406         wake_up_all(&gcwq->trustee_wait);
3407
3408         /*
3409          * The original cpu is in the process of dying and may go away
3410          * anytime now.  When that happens, we and all workers would
3411          * be migrated to other cpus.  Try draining any left work.  We
3412          * want to get it over with ASAP - spam rescuers, wake up as
3413          * many idlers as necessary and create new ones till the
3414          * worklist is empty.  Note that if the gcwq is frozen, there
3415          * may be frozen works in freezable cwqs.  Don't declare
3416          * completion while frozen.
3417          */
3418         while (gcwq->pool.nr_workers != gcwq->pool.nr_idle ||
3419                gcwq->flags & GCWQ_FREEZING ||
3420                gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
3421                 int nr_works = 0;
3422
3423                 list_for_each_entry(work, &gcwq->pool.worklist, entry) {
3424                         send_mayday(work);
3425                         nr_works++;
3426                 }
3427
3428                 list_for_each_entry(worker, &gcwq->pool.idle_list, entry) {
3429                         if (!nr_works--)
3430                                 break;
3431                         wake_up_process(worker->task);
3432                 }
3433
3434                 if (need_to_create_worker(&gcwq->pool)) {
3435                         spin_unlock_irq(&gcwq->lock);
3436                         worker = create_worker(&gcwq->pool, false);
3437                         spin_lock_irq(&gcwq->lock);
3438                         if (worker) {
3439                                 worker->flags |= WORKER_ROGUE;
3440                                 start_worker(worker);
3441                         }
3442                 }
3443
3444                 /* give a breather */
3445                 if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
3446                         break;
3447         }
3448
3449         /*
3450          * Either all works have been scheduled and cpu is down, or
3451          * cpu down has already been canceled.  Wait for and butcher
3452          * all workers till we're canceled.
3453          */
3454         do {
3455                 rc = trustee_wait_event(!list_empty(&gcwq->pool.idle_list));
3456                 while (!list_empty(&gcwq->pool.idle_list))
3457                         destroy_worker(list_first_entry(&gcwq->pool.idle_list,
3458                                                         struct worker, entry));
3459         } while (gcwq->pool.nr_workers && rc >= 0);
3460
3461         /*
3462          * At this point, either draining has completed and no worker
3463          * is left, or cpu down has been canceled or the cpu is being
3464          * brought back up.  There shouldn't be any idle one left.
3465          * Tell the remaining busy ones to rebind once it finishes the
3466          * currently scheduled works by scheduling the rebind_work.
3467          */
3468         WARN_ON(!list_empty(&gcwq->pool.idle_list));
3469
3470         for_each_busy_worker(worker, i, pos, gcwq) {
3471                 struct work_struct *rebind_work = &worker->rebind_work;
3472
3473                 /*
3474                  * Rebind_work may race with future cpu hotplug
3475                  * operations.  Use a separate flag to mark that
3476                  * rebinding is scheduled.
3477                  */
3478                 worker->flags |= WORKER_REBIND;
3479                 worker->flags &= ~WORKER_ROGUE;
3480
3481                 /* queue rebind_work, wq doesn't matter, use the default one */
3482                 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
3483                                      work_data_bits(rebind_work)))
3484                         continue;
3485
3486                 debug_work_activate(rebind_work);
3487                 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
3488                             worker->scheduled.next,
3489                             work_color_to_flags(WORK_NO_COLOR));
3490         }
3491
3492         /* relinquish manager role */
3493         gcwq->pool.flags &= ~POOL_MANAGING_WORKERS;
3494
3495         /* notify completion */
3496         gcwq->trustee = NULL;
3497         gcwq->trustee_state = TRUSTEE_DONE;
3498         wake_up_all(&gcwq->trustee_wait);
3499         spin_unlock_irq(&gcwq->lock);
3500         return 0;
3501 }
3502
3503 /**
3504  * wait_trustee_state - wait for trustee to enter the specified state
3505  * @gcwq: gcwq the trustee of interest belongs to
3506  * @state: target state to wait for
3507  *
3508  * Wait for the trustee to reach @state.  DONE is already matched.
3509  *
3510  * CONTEXT:
3511  * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3512  * multiple times.  To be used by cpu_callback.
3513  */
3514 static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
3515 __releases(&gcwq->lock)
3516 __acquires(&gcwq->lock)
3517 {
3518         if (!(gcwq->trustee_state == state ||
3519               gcwq->trustee_state == TRUSTEE_DONE)) {
3520                 spin_unlock_irq(&gcwq->lock);
3521                 __wait_event(gcwq->trustee_wait,
3522                              gcwq->trustee_state == state ||
3523                              gcwq->trustee_state == TRUSTEE_DONE);
3524                 spin_lock_irq(&gcwq->lock);
3525         }
3526 }
3527
3528 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3529                                                 unsigned long action,
3530                                                 void *hcpu)
3531 {
3532         unsigned int cpu = (unsigned long)hcpu;
3533         struct global_cwq *gcwq = get_gcwq(cpu);
3534         struct task_struct *new_trustee = NULL;
3535         struct worker *uninitialized_var(new_worker);
3536         unsigned long flags;
3537
3538         action &= ~CPU_TASKS_FROZEN;
3539
3540         switch (action) {
3541         case CPU_DOWN_PREPARE:
3542                 new_trustee = kthread_create(trustee_thread, gcwq,
3543                                              "workqueue_trustee/%d\n", cpu);
3544                 if (IS_ERR(new_trustee))
3545                         return notifier_from_errno(PTR_ERR(new_trustee));
3546                 kthread_bind(new_trustee, cpu);
3547                 /* fall through */
3548         case CPU_UP_PREPARE:
3549                 BUG_ON(gcwq->pool.first_idle);
3550                 new_worker = create_worker(&gcwq->pool, false);
3551                 if (!new_worker) {
3552                         if (new_trustee)
3553                                 kthread_stop(new_trustee);
3554                         return NOTIFY_BAD;
3555                 }
3556         }
3557
3558         /* some are called w/ irq disabled, don't disturb irq status */
3559         spin_lock_irqsave(&gcwq->lock, flags);
3560
3561         switch (action) {
3562         case CPU_DOWN_PREPARE:
3563                 /* initialize trustee and tell it to acquire the gcwq */
3564                 BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE);
3565                 gcwq->trustee = new_trustee;
3566                 gcwq->trustee_state = TRUSTEE_START;
3567                 wake_up_process(gcwq->trustee);
3568                 wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
3569                 /* fall through */
3570         case CPU_UP_PREPARE:
3571                 BUG_ON(gcwq->pool.first_idle);
3572                 gcwq->pool.first_idle = new_worker;
3573                 break;
3574
3575         case CPU_DYING:
3576                 /*
3577                  * Before this, the trustee and all workers except for
3578                  * the ones which are still executing works from
3579                  * before the last CPU down must be on the cpu.  After
3580                  * this, they'll all be diasporas.
3581                  */
3582                 gcwq->flags |= GCWQ_DISASSOCIATED;
3583                 break;
3584
3585         case CPU_POST_DEAD:
3586                 gcwq->trustee_state = TRUSTEE_BUTCHER;
3587                 /* fall through */
3588         case CPU_UP_CANCELED:
3589                 destroy_worker(gcwq->pool.first_idle);
3590                 gcwq->pool.first_idle = NULL;
3591                 break;
3592
3593         case CPU_DOWN_FAILED:
3594         case CPU_ONLINE:
3595                 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3596                 if (gcwq->trustee_state != TRUSTEE_DONE) {
3597                         gcwq->trustee_state = TRUSTEE_RELEASE;
3598                         wake_up_process(gcwq->trustee);
3599                         wait_trustee_state(gcwq, TRUSTEE_DONE);
3600                 }
3601
3602                 /*
3603                  * Trustee is done and there might be no worker left.
3604                  * Put the first_idle in and request a real manager to
3605                  * take a look.
3606                  */
3607                 spin_unlock_irq(&gcwq->lock);
3608                 kthread_bind(gcwq->pool.first_idle->task, cpu);
3609                 spin_lock_irq(&gcwq->lock);
3610                 gcwq->pool.flags |= POOL_MANAGE_WORKERS;
3611                 start_worker(gcwq->pool.first_idle);
3612                 gcwq->pool.first_idle = NULL;
3613                 break;
3614         }
3615
3616         spin_unlock_irqrestore(&gcwq->lock, flags);
3617
3618         return notifier_from_errno(0);
3619 }
3620
3621 #ifdef CONFIG_SMP
3622
3623 struct work_for_cpu {
3624         struct completion completion;
3625         long (*fn)(void *);
3626         void *arg;
3627         long ret;
3628 };
3629
3630 static int do_work_for_cpu(void *_wfc)
3631 {
3632         struct work_for_cpu *wfc = _wfc;
3633         wfc->ret = wfc->fn(wfc->arg);
3634         complete(&wfc->completion);
3635         return 0;
3636 }
3637
3638 /**
3639  * work_on_cpu - run a function in user context on a particular cpu
3640  * @cpu: the cpu to run on
3641  * @fn: the function to run
3642  * @arg: the function arg
3643  *
3644  * This will return the value @fn returns.
3645  * It is up to the caller to ensure that the cpu doesn't go offline.
3646  * The caller must not hold any locks which would prevent @fn from completing.
3647  */
3648 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
3649 {
3650         struct task_struct *sub_thread;
3651         struct work_for_cpu wfc = {
3652                 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
3653                 .fn = fn,
3654                 .arg = arg,
3655         };
3656
3657         sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
3658         if (IS_ERR(sub_thread))
3659                 return PTR_ERR(sub_thread);
3660         kthread_bind(sub_thread, cpu);
3661         wake_up_process(sub_thread);
3662         wait_for_completion(&wfc.completion);
3663         return wfc.ret;
3664 }
3665 EXPORT_SYMBOL_GPL(work_on_cpu);
3666 #endif /* CONFIG_SMP */
3667
3668 #ifdef CONFIG_FREEZER
3669
3670 /**
3671  * freeze_workqueues_begin - begin freezing workqueues
3672  *
3673  * Start freezing workqueues.  After this function returns, all freezable
3674  * workqueues will queue new works to their frozen_works list instead of
3675  * gcwq->worklist.
3676  *
3677  * CONTEXT:
3678  * Grabs and releases workqueue_lock and gcwq->lock's.
3679  */
3680 void freeze_workqueues_begin(void)
3681 {
3682         unsigned int cpu;
3683
3684         spin_lock(&workqueue_lock);
3685
3686         BUG_ON(workqueue_freezing);
3687         workqueue_freezing = true;
3688
3689         for_each_gcwq_cpu(cpu) {
3690                 struct global_cwq *gcwq = get_gcwq(cpu);
3691                 struct workqueue_struct *wq;
3692
3693                 spin_lock_irq(&gcwq->lock);
3694
3695                 BUG_ON(gcwq->flags & GCWQ_FREEZING);
3696                 gcwq->flags |= GCWQ_FREEZING;
3697
3698                 list_for_each_entry(wq, &workqueues, list) {
3699                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3700
3701                         if (cwq && wq->flags & WQ_FREEZABLE)
3702                                 cwq->max_active = 0;
3703                 }
3704
3705                 spin_unlock_irq(&gcwq->lock);
3706         }
3707
3708         spin_unlock(&workqueue_lock);
3709 }
3710
3711 /**
3712  * freeze_workqueues_busy - are freezable workqueues still busy?
3713  *
3714  * Check whether freezing is complete.  This function must be called
3715  * between freeze_workqueues_begin() and thaw_workqueues().
3716  *
3717  * CONTEXT:
3718  * Grabs and releases workqueue_lock.
3719  *
3720  * RETURNS:
3721  * %true if some freezable workqueues are still busy.  %false if freezing
3722  * is complete.
3723  */
3724 bool freeze_workqueues_busy(void)
3725 {
3726         unsigned int cpu;
3727         bool busy = false;
3728
3729         spin_lock(&workqueue_lock);
3730
3731         BUG_ON(!workqueue_freezing);
3732
3733         for_each_gcwq_cpu(cpu) {
3734                 struct workqueue_struct *wq;
3735                 /*
3736                  * nr_active is monotonically decreasing.  It's safe
3737                  * to peek without lock.
3738                  */
3739                 list_for_each_entry(wq, &workqueues, list) {
3740                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3741
3742                         if (!cwq || !(wq->flags & WQ_FREEZABLE))
3743                                 continue;
3744
3745                         BUG_ON(cwq->nr_active < 0);
3746                         if (cwq->nr_active) {
3747                                 busy = true;
3748                                 goto out_unlock;
3749                         }
3750                 }
3751         }
3752 out_unlock:
3753         spin_unlock(&workqueue_lock);
3754         return busy;
3755 }
3756
3757 /**
3758  * thaw_workqueues - thaw workqueues
3759  *
3760  * Thaw workqueues.  Normal queueing is restored and all collected
3761  * frozen works are transferred to their respective gcwq worklists.
3762  *
3763  * CONTEXT:
3764  * Grabs and releases workqueue_lock and gcwq->lock's.
3765  */
3766 void thaw_workqueues(void)
3767 {
3768         unsigned int cpu;
3769
3770         spin_lock(&workqueue_lock);
3771
3772         if (!workqueue_freezing)
3773                 goto out_unlock;
3774
3775         for_each_gcwq_cpu(cpu) {
3776                 struct global_cwq *gcwq = get_gcwq(cpu);
3777                 struct workqueue_struct *wq;
3778
3779                 spin_lock_irq(&gcwq->lock);
3780
3781                 BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
3782                 gcwq->flags &= ~GCWQ_FREEZING;
3783
3784                 list_for_each_entry(wq, &workqueues, list) {
3785                         struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3786
3787                         if (!cwq || !(wq->flags & WQ_FREEZABLE))
3788                                 continue;
3789
3790                         /* restore max_active and repopulate worklist */
3791                         cwq->max_active = wq->saved_max_active;
3792
3793                         while (!list_empty(&cwq->delayed_works) &&
3794                                cwq->nr_active < cwq->max_active)
3795                                 cwq_activate_first_delayed(cwq);
3796                 }
3797
3798                 wake_up_worker(&gcwq->pool);
3799
3800                 spin_unlock_irq(&gcwq->lock);
3801         }
3802
3803         workqueue_freezing = false;
3804 out_unlock:
3805         spin_unlock(&workqueue_lock);
3806 }
3807 #endif /* CONFIG_FREEZER */
3808
3809 static int __init init_workqueues(void)
3810 {
3811         unsigned int cpu;
3812         int i;
3813
3814         cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
3815
3816         /* initialize gcwqs */
3817         for_each_gcwq_cpu(cpu) {
3818                 struct global_cwq *gcwq = get_gcwq(cpu);
3819
3820                 spin_lock_init(&gcwq->lock);
3821                 gcwq->pool.gcwq = gcwq;
3822                 INIT_LIST_HEAD(&gcwq->pool.worklist);
3823                 gcwq->cpu = cpu;
3824                 gcwq->flags |= GCWQ_DISASSOCIATED;
3825
3826                 INIT_LIST_HEAD(&gcwq->pool.idle_list);
3827                 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
3828                         INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
3829
3830                 init_timer_deferrable(&gcwq->pool.idle_timer);
3831                 gcwq->pool.idle_timer.function = idle_worker_timeout;
3832                 gcwq->pool.idle_timer.data = (unsigned long)&gcwq->pool;
3833
3834                 setup_timer(&gcwq->pool.mayday_timer, gcwq_mayday_timeout,
3835                             (unsigned long)&gcwq->pool);
3836
3837                 ida_init(&gcwq->pool.worker_ida);
3838
3839                 gcwq->trustee_state = TRUSTEE_DONE;
3840                 init_waitqueue_head(&gcwq->trustee_wait);
3841         }
3842
3843         /* create the initial worker */
3844         for_each_online_gcwq_cpu(cpu) {
3845                 struct global_cwq *gcwq = get_gcwq(cpu);
3846                 struct worker *worker;
3847
3848                 if (cpu != WORK_CPU_UNBOUND)
3849                         gcwq->flags &= ~GCWQ_DISASSOCIATED;
3850                 worker = create_worker(&gcwq->pool, true);
3851                 BUG_ON(!worker);
3852                 spin_lock_irq(&gcwq->lock);
3853                 start_worker(worker);
3854                 spin_unlock_irq(&gcwq->lock);
3855         }
3856
3857         system_wq = alloc_workqueue("events", 0, 0);
3858         system_long_wq = alloc_workqueue("events_long", 0, 0);
3859         system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3860         system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
3861                                             WQ_UNBOUND_MAX_ACTIVE);
3862         system_freezable_wq = alloc_workqueue("events_freezable",
3863                                               WQ_FREEZABLE, 0);
3864         system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
3865                         WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
3866         BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
3867                !system_unbound_wq || !system_freezable_wq ||
3868                 !system_nrt_freezable_wq);
3869         return 0;
3870 }
3871 early_initcall(init_workqueues);