Lines Matching +full:noise +full:- +full:sensitive
1 // SPDX-License-Identifier: GPL-2.0-only
3 * kernel/workqueue.c - generic async execution with shared worker pool
10 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
22 * pools for workqueues which are not bound to any specific CPU - the
25 * Please read Documentation/core-api/workqueue.rst for details.
79 * BH pool is per-CPU and always DISASSOCIATED.
108 UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
128 WORKER_ID_LEN = 10 + WQ_NAME_LEN, /* "kworker/R-" + WQ_NAME_LEN */
142 * I: Modifiable by initialization/destruction paths and read-only for
148 * L: pool->lock protected. Access with pool->lock held.
150 * LN: pool->lock and wq_node_nr_active->lock protected for writes. Either for
153 * K: Only modified by worker while holding pool->lock. Can be safely read by
154 * self, while holding pool->lock or from IRQ context if %current is the
165 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
167 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
170 * WQ: wq->mutex protected.
172 * WR: wq->mutex protected for writes. RCU protected for reads.
174 * WO: wq->mutex protected for writes. Updated with WRITE_ONCE() and can be read
197 * but w/ pool->lock held. The readers grab pool->lock and are
234 * Per-pool_workqueue statistics. These can be monitored using
242 PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
251 * The per-pool workqueue. While queued, bits below WORK_PWQ_SHIFT
252 * of work_struct->data are used for flags and the remaining high bits
269 * When pwq->nr_active >= max_active, new work item is queued to
270 * pwq->inactive_works instead of pool->worklist and marked with
274 * nr_active and all work items in pwq->inactive_works are marked with
276 * in pwq->inactive_works. Some of them are ready to run in
277 * pool->worklist or worker->scheduled. Those work itmes are only struct
279 * in nr_active. For non-barrier work item, it is marked with
280 * WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
284 struct list_head pending_node; /* LN: node on wq_node_nr_active->pending_pwqs */
285 struct list_head pwqs_node; /* WR: node on wq->pwqs */
286 struct list_head mayday_node; /* MD: node on wq->maydays */
294 * grabbing wq->mutex.
312 * Unlike in a per-cpu workqueue where max_active limits its concurrency level
317 * The following struct is used to enforce per-node max_active. When a pwq wants
318 * to start executing a work item, it should increment ->nr using
319 * tryinc_node_nr_active(). If acquisition fails due to ->nr already being over
320 * ->max, the pwq is queued on ->pending_pwqs. As in-flight work items finish
321 * and decrement ->nr, node_activate_pending_pwq() activates the pending pwqs in
322 * round-robin order.
325 int max; /* per-node max_active */
326 atomic_t nr; /* per-node nr_active */
381 struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
382 struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
387 * See the comment above workqueue_attrs->affn_scope.
391 cpumask_var_t *pod_cpus; /* pod -> cpus */
392 int *pod_node; /* pod -> node */
393 int *cpu_pod; /* cpu -> pod */
412 * Per-cpu work items which run for longer than the following threshold are
414 * management to prevent them from noticeably delaying other per-cpu work items.
442 static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
469 * following always forces round-robin CPU selection on unbound work items
485 /* the per-cpu worker pools */
490 /* PL: hash of all unbound pools keyed by pool->attrs */
502 * worker to avoid A-A deadlocks.
540 !lockdep_is_held(&wq->mutex) && \
542 "RCU, wq->mutex or wq_pool_mutex should be held")
555 * for_each_pool - iterate through all worker_pools in the system
572 * for_each_pool_worker - iterate through all workers of a worker_pool
582 list_for_each_entry((worker), &(pool)->workers, node) \
587 * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
591 * This must be called either with wq->mutex held or RCU read locked.
599 list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
600 lockdep_is_held(&(wq->mutex)))
608 return ((struct work_struct *) addr)->func; in work_debug_hint()
620 * - an active object is initialized
638 * - an active object is freed
689 destroy_timer_on_stack(&work->timer); in destroy_delayed_work_on_stack()
690 debug_object_free(&work->work, &work_debug_descr); in destroy_delayed_work_on_stack()
700 * worker_pool_assign_id - allocate ID and assign it to @pool
704 * successfully, -errno on failure.
715 pool->id = ret; in worker_pool_assign_id()
725 return per_cpu_ptr(wq->cpu_pwq, cpu); in unbound_pwq_slot()
727 return &wq->dfl_pwq; in unbound_pwq_slot()
735 lockdep_is_held(&wq->mutex)); in unbound_pwq()
739 * unbound_effective_cpumask - effective cpumask of an unbound workqueue
742 * @wq->unbound_attrs->cpumask contains the cpumask requested by the user which
748 return unbound_pwq(wq, -1)->pool->attrs->__pod_cpumask; in unbound_effective_cpumask()
759 ((1 << WORK_STRUCT_COLOR_BITS) - 1); in get_work_color()
769 return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0; in pool_offq_flags()
778 * can be used to set the pwq, pool or clear work->data. These functions should
779 * only be called while the work is owned - ie. while the PENDING bit is set.
789 atomic_long_set(&work->data, data | work_static(work)); in set_work_data()
821 * work->current_func, which is executed afterwards. This possible in set_work_pool_and_clear_pending()
826 * ---------------------------- -------------------------------- in set_work_pool_and_clear_pending()
834 * 7 work->current_func() { in set_work_pool_and_clear_pending()
856 unsigned long data = atomic_long_read(&work->data); in get_work_pwq()
865 * get_work_pool - return the worker_pool a given work was associated with
881 unsigned long data = atomic_long_read(&work->data); in get_work_pool()
887 return work_struct_pwq(data)->pool; in get_work_pool()
898 return (v >> shift) & ((1U << bits) - 1); in shift_and_mask()
905 offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT, in work_offqd_unpack()
907 offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT, in work_offqd_unpack()
909 offqd->flags = data & WORK_OFFQ_FLAG_MASK; in work_offqd_unpack()
914 return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) | in work_offqd_pack_flags()
915 ((unsigned long)offqd->flags); in work_offqd_pack_flags()
921 * they're being called with pool->lock held.
934 return !list_empty(&pool->worklist) && !pool->nr_running; in need_more_worker()
940 return pool->nr_idle; in may_start_working()
946 return !list_empty(&pool->worklist) && (pool->nr_running <= 1); in keep_working()
958 bool managing = pool->flags & POOL_MANAGER_ACTIVE; in too_many_workers()
959 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ in too_many_workers()
960 int nr_busy = pool->nr_workers - nr_idle; in too_many_workers()
962 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; in too_many_workers()
966 * worker_set_flags - set worker flags and adjust nr_running accordingly
970 * Set @flags in @worker->flags and adjust nr_running accordingly.
974 struct worker_pool *pool = worker->pool; in worker_set_flags()
976 lockdep_assert_held(&pool->lock); in worker_set_flags()
980 !(worker->flags & WORKER_NOT_RUNNING)) { in worker_set_flags()
981 pool->nr_running--; in worker_set_flags()
984 worker->flags |= flags; in worker_set_flags()
988 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
992 * Clear @flags in @worker->flags and adjust nr_running accordingly.
996 struct worker_pool *pool = worker->pool; in worker_clr_flags()
997 unsigned int oflags = worker->flags; in worker_clr_flags()
999 lockdep_assert_held(&pool->lock); in worker_clr_flags()
1001 worker->flags &= ~flags; in worker_clr_flags()
1009 if (!(worker->flags & WORKER_NOT_RUNNING)) in worker_clr_flags()
1010 pool->nr_running++; in worker_clr_flags()
1013 /* Return the first idle worker. Called with pool->lock held. */
1016 if (unlikely(list_empty(&pool->idle_list))) in first_idle_worker()
1019 return list_first_entry(&pool->idle_list, struct worker, entry); in first_idle_worker()
1023 * worker_enter_idle - enter idle state
1030 * raw_spin_lock_irq(pool->lock).
1034 struct worker_pool *pool = worker->pool; in worker_enter_idle()
1036 if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) || in worker_enter_idle()
1037 WARN_ON_ONCE(!list_empty(&worker->entry) && in worker_enter_idle()
1038 (worker->hentry.next || worker->hentry.pprev))) in worker_enter_idle()
1042 worker->flags |= WORKER_IDLE; in worker_enter_idle()
1043 pool->nr_idle++; in worker_enter_idle()
1044 worker->last_active = jiffies; in worker_enter_idle()
1047 list_add(&worker->entry, &pool->idle_list); in worker_enter_idle()
1049 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) in worker_enter_idle()
1050 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); in worker_enter_idle()
1053 WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running); in worker_enter_idle()
1057 * worker_leave_idle - leave idle state
1063 * raw_spin_lock_irq(pool->lock).
1067 struct worker_pool *pool = worker->pool; in worker_leave_idle()
1069 if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE))) in worker_leave_idle()
1072 pool->nr_idle--; in worker_leave_idle()
1073 list_del_init(&worker->entry); in worker_leave_idle()
1077 * find_worker_executing_work - find worker which is executing a work
1082 * @pool->busy_hash which is keyed by the address of @work. For a worker
1103 * raw_spin_lock_irq(pool->lock).
1114 hash_for_each_possible(pool->busy_hash, worker, hentry, in find_worker_executing_work()
1116 if (worker->current_work == work && in find_worker_executing_work()
1117 worker->current_func == work->func) in find_worker_executing_work()
1124 * move_linked_works - move linked works to a list
1135 * raw_spin_lock_irq(pool->lock).
1147 list_move_tail(&work->entry, head); in move_linked_works()
1162 * assign_work - assign a work item and its linked work items to a worker
1180 struct worker_pool *pool = worker->pool; in assign_work()
1183 lockdep_assert_held(&pool->lock); in assign_work()
1195 move_linked_works(work, &collision->scheduled, nextp); in assign_work()
1199 move_linked_works(work, &worker->scheduled, nextp); in assign_work()
1205 int high = pool->attrs->nice == HIGHPRI_NICE_LEVEL ? 1 : 0; in bh_pool_irq_work()
1207 return &per_cpu(bh_pool_irq_works, pool->cpu)[high]; in bh_pool_irq_work()
1214 if (unlikely(pool->cpu != smp_processor_id() && in kick_bh_pool()
1215 !(pool->flags & POOL_BH_DRAINING))) { in kick_bh_pool()
1216 irq_work_queue_on(bh_pool_irq_work(pool), pool->cpu); in kick_bh_pool()
1220 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in kick_bh_pool()
1227 * kick_pool - wake up an idle worker if necessary
1238 lockdep_assert_held(&pool->lock); in kick_pool()
1243 if (pool->flags & POOL_BH) { in kick_pool()
1248 p = worker->task; in kick_pool()
1259 * so, setting the wake_cpu won't do anything. As this is a best-effort in kick_pool()
1260 * optimization and the race window is narrow, let's leave as-is for in kick_pool()
1264 * If @pool has non-strict affinity, @worker might have ended up outside in kick_pool()
1267 if (!pool->attrs->affn_strict && in kick_pool()
1268 !cpumask_test_cpu(p->wake_cpu, pool->attrs->__pod_cpumask)) { in kick_pool()
1269 struct work_struct *work = list_first_entry(&pool->worklist, in kick_pool()
1271 int wake_cpu = cpumask_any_and_distribute(pool->attrs->__pod_cpumask, in kick_pool()
1274 p->wake_cpu = wake_cpu; in kick_pool()
1275 get_work_pwq(work)->stats[PWQ_STAT_REPATRIATED]++; in kick_pool()
1286 * Concurrency-managed per-cpu work items that hog CPU for longer than
1288 * which prevents them from stalling other concurrency-managed work items. If a
1316 if (ent->func == func) in wci_find_ent()
1335 cnt = atomic64_inc_return_relaxed(&ent->cnt); in wq_cpu_intensive_report()
1338 is_power_of_2(cnt + 1 - wq_cpu_intensive_warning_thresh)) in wq_cpu_intensive_report()
1340 ent->func, wq_cpu_intensive_thresh_us, in wq_cpu_intensive_report()
1341 atomic64_read(&ent->cnt)); in wq_cpu_intensive_report()
1348 * noise already. in wq_cpu_intensive_report()
1366 ent->func = func; in wq_cpu_intensive_report()
1367 atomic64_set(&ent->cnt, 0); in wq_cpu_intensive_report()
1368 hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func); in wq_cpu_intensive_report()
1380 * wq_worker_running - a worker is running again
1389 if (!READ_ONCE(worker->sleeping)) in wq_worker_running()
1395 * and leave with an unexpected pool->nr_running == 1 on the newly unbound in wq_worker_running()
1399 if (!(worker->flags & WORKER_NOT_RUNNING)) in wq_worker_running()
1400 worker->pool->nr_running++; in wq_worker_running()
1404 * CPU intensive auto-detection cares about how long a work item hogged in wq_worker_running()
1407 worker->current_at = worker->task->se.sum_exec_runtime; in wq_worker_running()
1409 WRITE_ONCE(worker->sleeping, 0); in wq_worker_running()
1413 * wq_worker_sleeping - a worker is going to sleep
1429 if (worker->flags & WORKER_NOT_RUNNING) in wq_worker_sleeping()
1432 pool = worker->pool; in wq_worker_sleeping()
1435 if (READ_ONCE(worker->sleeping)) in wq_worker_sleeping()
1438 WRITE_ONCE(worker->sleeping, 1); in wq_worker_sleeping()
1439 raw_spin_lock_irq(&pool->lock); in wq_worker_sleeping()
1446 if (worker->flags & WORKER_NOT_RUNNING) { in wq_worker_sleeping()
1447 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1451 pool->nr_running--; in wq_worker_sleeping()
1453 worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_sleeping()
1455 raw_spin_unlock_irq(&pool->lock); in wq_worker_sleeping()
1459 * wq_worker_tick - a scheduler tick occurred while a kworker is running
1468 struct pool_workqueue *pwq = worker->current_pwq; in wq_worker_tick()
1469 struct worker_pool *pool = worker->pool; in wq_worker_tick()
1474 pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC; in wq_worker_tick()
1482 * CPU_INTENSIVE to avoid stalling other concurrency-managed work items. in wq_worker_tick()
1484 * Set @worker->sleeping means that @worker is in the process of in wq_worker_tick()
1486 * @pool->nr_running until it wakes up. As wq_worker_sleeping() also in wq_worker_tick()
1487 * decrements ->nr_running, setting CPU_INTENSIVE here can lead to in wq_worker_tick()
1491 if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) || in wq_worker_tick()
1492 worker->task->se.sum_exec_runtime - worker->current_at < in wq_worker_tick()
1496 raw_spin_lock(&pool->lock); in wq_worker_tick()
1499 wq_cpu_intensive_report(worker->current_func); in wq_worker_tick()
1500 pwq->stats[PWQ_STAT_CPU_INTENSIVE]++; in wq_worker_tick()
1503 pwq->stats[PWQ_STAT_CM_WAKEUP]++; in wq_worker_tick()
1505 raw_spin_unlock(&pool->lock); in wq_worker_tick()
1509 * wq_worker_last_func - retrieve worker's last work function
1516 * raw_spin_lock_irq(rq->lock)
1520 * dequeuing, to allow periodic aggregation to shut-off when that
1523 * As this function doesn't involve any workqueue-related locking, it
1536 return worker->last_func; in wq_worker_last_func()
1540 * wq_node_nr_active - Determine wq_node_nr_active to use
1546 * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
1548 * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
1550 * - Otherwise, node_nr_active[@node].
1555 if (!(wq->flags & WQ_UNBOUND)) in wq_node_nr_active()
1561 return wq->node_nr_active[node]; in wq_node_nr_active()
1565 * wq_update_node_max_active - Update per-node max_actives to use
1567 * @off_cpu: CPU that's going down, -1 if a CPU is not going down
1569 * Update @wq->node_nr_active[]->max. @wq must be unbound. max_active is
1571 * cpus. The result is always between @wq->min_active and max_active.
1576 int min_active = READ_ONCE(wq->min_active); in wq_update_node_max_active()
1577 int max_active = READ_ONCE(wq->max_active); in wq_update_node_max_active()
1580 lockdep_assert_held(&wq->mutex); in wq_update_node_max_active()
1586 off_cpu = -1; in wq_update_node_max_active()
1590 total_cpus--; in wq_update_node_max_active()
1595 wq_node_nr_active(wq, node)->max = min_active; in wq_update_node_max_active()
1597 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1606 node_cpus--; in wq_update_node_max_active()
1608 wq_node_nr_active(wq, node)->max = in wq_update_node_max_active()
1613 wq_node_nr_active(wq, NUMA_NO_NODE)->max = max_active; in wq_update_node_max_active()
1617 * get_pwq - get an extra reference on the specified pool_workqueue
1621 * @pwq has positive refcnt and be holding the matching pool->lock.
1625 lockdep_assert_held(&pwq->pool->lock); in get_pwq()
1626 WARN_ON_ONCE(pwq->refcnt <= 0); in get_pwq()
1627 pwq->refcnt++; in get_pwq()
1631 * put_pwq - put a pool_workqueue reference
1635 * destruction. The caller should be holding the matching pool->lock.
1639 lockdep_assert_held(&pwq->pool->lock); in put_pwq()
1640 if (likely(--pwq->refcnt)) in put_pwq()
1643 * @pwq can't be released under pool->lock, bounce to a dedicated in put_pwq()
1644 * kthread_worker to avoid A-A deadlocks. in put_pwq()
1646 kthread_queue_work(pwq_release_worker, &pwq->release_work); in put_pwq()
1650 * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1662 raw_spin_lock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1664 raw_spin_unlock_irq(&pwq->pool->lock); in put_pwq_unlocked()
1670 return !pwq->nr_active && list_empty(&pwq->inactive_works); in pwq_is_empty()
1680 if (list_empty(&pwq->pool->worklist)) in __pwq_activate_work()
1681 pwq->pool->watchdog_ts = jiffies; in __pwq_activate_work()
1682 move_linked_works(work, &pwq->pool->worklist, NULL); in __pwq_activate_work()
1688 int max = READ_ONCE(nna->max); in tryinc_node_nr_active()
1693 old = atomic_read(&nna->nr); in tryinc_node_nr_active()
1696 tmp = atomic_cmpxchg_relaxed(&nna->nr, old, old + 1); in tryinc_node_nr_active()
1703 * pwq_tryinc_nr_active - Try to increment nr_active for a pwq
1712 struct workqueue_struct *wq = pwq->wq; in pwq_tryinc_nr_active()
1713 struct worker_pool *pool = pwq->pool; in pwq_tryinc_nr_active()
1714 struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node); in pwq_tryinc_nr_active()
1717 lockdep_assert_held(&pool->lock); in pwq_tryinc_nr_active()
1720 /* BH or per-cpu workqueue, pwq->nr_active is sufficient */ in pwq_tryinc_nr_active()
1721 obtained = pwq->nr_active < READ_ONCE(wq->max_active); in pwq_tryinc_nr_active()
1725 if (unlikely(pwq->plugged)) in pwq_tryinc_nr_active()
1729 * Unbound workqueue uses per-node shared nr_active $nna. If @pwq is in pwq_tryinc_nr_active()
1737 if (!list_empty(&pwq->pending_node) && likely(!fill)) in pwq_tryinc_nr_active()
1745 * Lockless acquisition failed. Lock, add ourself to $nna->pending_pwqs in pwq_tryinc_nr_active()
1748 * we see the decremented $nna->nr or they see non-empty in pwq_tryinc_nr_active()
1749 * $nna->pending_pwqs. in pwq_tryinc_nr_active()
1751 raw_spin_lock(&nna->lock); in pwq_tryinc_nr_active()
1753 if (list_empty(&pwq->pending_node)) in pwq_tryinc_nr_active()
1754 list_add_tail(&pwq->pending_node, &nna->pending_pwqs); in pwq_tryinc_nr_active()
1767 list_del_init(&pwq->pending_node); in pwq_tryinc_nr_active()
1770 raw_spin_unlock(&nna->lock); in pwq_tryinc_nr_active()
1773 pwq->nr_active++; in pwq_tryinc_nr_active()
1778 * pwq_activate_first_inactive - Activate the first inactive work item on a pwq
1791 list_first_entry_or_null(&pwq->inactive_works, in pwq_activate_first_inactive()
1803 * unplug_oldest_pwq - unplug the oldest pool_workqueue
1810 * dfl_pwq --------------+ [P] - plugged
1813 * pwqs -> A -> B [P] -> C [P] (newest)
1821 * pwq's are linked into wq->pwqs with the oldest first, so the first one in
1828 lockdep_assert_held(&wq->mutex); in unplug_oldest_pwq()
1831 pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue, in unplug_oldest_pwq()
1833 raw_spin_lock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1834 if (pwq->plugged) { in unplug_oldest_pwq()
1835 pwq->plugged = false; in unplug_oldest_pwq()
1837 kick_pool(pwq->pool); in unplug_oldest_pwq()
1839 raw_spin_unlock_irq(&pwq->pool->lock); in unplug_oldest_pwq()
1843 * node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
1847 * Activate a pwq in @nna->pending_pwqs. Called with @caller_pool locked.
1857 lockdep_assert_held(&caller_pool->lock); in node_activate_pending_pwq()
1859 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1861 pwq = list_first_entry_or_null(&nna->pending_pwqs, in node_activate_pending_pwq()
1868 * @pwq->pool->lock. Let's trylock first. If unsuccessful, do the unlock in node_activate_pending_pwq()
1869 * / lock dance. For that, we also need to release @nna->lock as it's in node_activate_pending_pwq()
1872 if (pwq->pool != locked_pool) { in node_activate_pending_pwq()
1873 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1874 locked_pool = pwq->pool; in node_activate_pending_pwq()
1875 if (!raw_spin_trylock(&locked_pool->lock)) { in node_activate_pending_pwq()
1876 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1877 raw_spin_lock(&locked_pool->lock); in node_activate_pending_pwq()
1878 raw_spin_lock(&nna->lock); in node_activate_pending_pwq()
1887 work = list_first_entry_or_null(&pwq->inactive_works, in node_activate_pending_pwq()
1890 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1897 * pending_pwqs so that we round-robin through them. This means that in node_activate_pending_pwq()
1902 pwq->nr_active++; in node_activate_pending_pwq()
1905 if (list_empty(&pwq->inactive_works)) in node_activate_pending_pwq()
1906 list_del_init(&pwq->pending_node); in node_activate_pending_pwq()
1908 list_move_tail(&pwq->pending_node, &nna->pending_pwqs); in node_activate_pending_pwq()
1911 if (pwq->pool != caller_pool) in node_activate_pending_pwq()
1912 kick_pool(pwq->pool); in node_activate_pending_pwq()
1916 raw_spin_unlock(&nna->lock); in node_activate_pending_pwq()
1918 raw_spin_unlock(&locked_pool->lock); in node_activate_pending_pwq()
1919 raw_spin_lock(&caller_pool->lock); in node_activate_pending_pwq()
1924 * pwq_dec_nr_active - Retire an active count
1928 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock.
1932 struct worker_pool *pool = pwq->pool; in pwq_dec_nr_active()
1933 struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node); in pwq_dec_nr_active()
1935 lockdep_assert_held(&pool->lock); in pwq_dec_nr_active()
1938 * @pwq->nr_active should be decremented for both percpu and unbound in pwq_dec_nr_active()
1941 pwq->nr_active--; in pwq_dec_nr_active()
1956 * $nna->pending_pwqs. The following atomic_dec_return()'s implied in pwq_dec_nr_active()
1958 * guarantee that either we see non-empty pending_pwqs or they see in pwq_dec_nr_active()
1959 * decremented $nna->nr. in pwq_dec_nr_active()
1961 * $nna->max may change as CPUs come online/offline and @pwq->wq's in pwq_dec_nr_active()
1963 * larger than @pwq->wq->min_active which is above zero unless freezing. in pwq_dec_nr_active()
1966 if (atomic_dec_return(&nna->nr) >= READ_ONCE(nna->max)) in pwq_dec_nr_active()
1969 if (!list_empty(&nna->pending_pwqs)) in pwq_dec_nr_active()
1974 * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1982 * For unbound workqueues, this function may temporarily drop @pwq->pool->lock
1983 * and thus should be called after all other state updates for the in-flight
1987 * raw_spin_lock_irq(pool->lock).
1996 pwq->nr_in_flight[color]--; in pwq_dec_nr_in_flight()
1999 if (likely(pwq->flush_color != color)) in pwq_dec_nr_in_flight()
2002 /* are there still in-flight works? */ in pwq_dec_nr_in_flight()
2003 if (pwq->nr_in_flight[color]) in pwq_dec_nr_in_flight()
2007 pwq->flush_color = -1; in pwq_dec_nr_in_flight()
2013 if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush)) in pwq_dec_nr_in_flight()
2014 complete(&pwq->wq->first_flusher->done); in pwq_dec_nr_in_flight()
2020 * try_to_grab_pending - steal work item from worklist and disable irq
2026 * stable state - idle, on timer or on worklist.
2033 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
2039 * disabled on entry. This, combined with delayed_work->timer being
2040 * irqsafe, ensures that we return -EAGAIN for finite short period of time.
2060 * dwork->timer is irqsafe. If del_timer() fails, it's in try_to_grab_pending()
2064 if (likely(del_timer(&dwork->timer))) in try_to_grab_pending()
2075 * steal it from ->worklist without clearing WORK_STRUCT_PENDING. in try_to_grab_pending()
2081 raw_spin_lock(&pool->lock); in try_to_grab_pending()
2083 * work->data is guaranteed to point to pwq only while the work in try_to_grab_pending()
2084 * item is queued on pwq->wq, and both updating work->data to point in try_to_grab_pending()
2086 * pwq->pool->lock. This in turn guarantees that, if work->data in try_to_grab_pending()
2091 if (pwq && pwq->pool == pool) { in try_to_grab_pending()
2098 * pwq->inactive_works since a queued barrier can't be in try_to_grab_pending()
2103 * on the inactive_works list, will confuse pwq->nr_active in try_to_grab_pending()
2111 move_linked_works(work, &pwq->pool->worklist, NULL); in try_to_grab_pending()
2113 list_del_init(&work->entry); in try_to_grab_pending()
2116 * work->data points to pwq iff queued. Let's point to pool. As in try_to_grab_pending()
2117 * this destroys work->data needed by the next step, stash it. in try_to_grab_pending()
2119 set_work_pool_and_keep_pending(work, pool->id, in try_to_grab_pending()
2125 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2129 raw_spin_unlock(&pool->lock); in try_to_grab_pending()
2133 return -EAGAIN; in try_to_grab_pending()
2137 * work_grab_pending - steal work item from worklist and disable irq
2142 * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer
2146 * stored in *@irq_flags. The caller is responsible for re-enabling it using
2165 * insert_work - insert a work into a pool
2175 * raw_spin_lock_irq(pool->lock).
2187 list_add_tail(&work->entry, head); in insert_work()
2204 return worker && worker->current_pwq->wq == wq; in is_chained_work()
2210 * avoid perturbing sensitive tasks.
2220 pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n"); in wq_select_unbound_cpu()
2245 * steal the PENDING will busy-loop waiting for it to either get in __queue_work()
2256 if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && in __queue_work()
2263 if (wq->flags & WQ_UNBOUND) in __queue_work()
2269 pwq = rcu_dereference(*per_cpu_ptr(wq->cpu_pwq, cpu)); in __queue_work()
2270 pool = pwq->pool; in __queue_work()
2275 * pool to guarantee non-reentrancy. in __queue_work()
2279 * non-reentrancy. See the comments above unplug_oldest_pwq(). in __queue_work()
2282 if (last_pool && last_pool != pool && !(wq->flags & __WQ_ORDERED)) { in __queue_work()
2285 raw_spin_lock(&last_pool->lock); in __queue_work()
2289 if (worker && worker->current_pwq->wq == wq) { in __queue_work()
2290 pwq = worker->current_pwq; in __queue_work()
2291 pool = pwq->pool; in __queue_work()
2295 raw_spin_unlock(&last_pool->lock); in __queue_work()
2296 raw_spin_lock(&pool->lock); in __queue_work()
2299 raw_spin_lock(&pool->lock); in __queue_work()
2307 * on it, so the retrying is guaranteed to make forward-progress. in __queue_work()
2309 if (unlikely(!pwq->refcnt)) { in __queue_work()
2310 if (wq->flags & WQ_UNBOUND) { in __queue_work()
2311 raw_spin_unlock(&pool->lock); in __queue_work()
2316 WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt", in __queue_work()
2317 wq->name, cpu); in __queue_work()
2323 if (WARN_ON(!list_empty(&work->entry))) in __queue_work()
2326 pwq->nr_in_flight[pwq->work_color]++; in __queue_work()
2327 work_flags = work_color_to_flags(pwq->work_color); in __queue_work()
2334 if (list_empty(&pwq->inactive_works) && pwq_tryinc_nr_active(pwq, false)) { in __queue_work()
2335 if (list_empty(&pool->worklist)) in __queue_work()
2336 pool->watchdog_ts = jiffies; in __queue_work()
2339 insert_work(pwq, work, &pool->worklist, work_flags); in __queue_work()
2343 insert_work(pwq, work, &pwq->inactive_works, work_flags); in __queue_work()
2347 raw_spin_unlock(&pool->lock); in __queue_work()
2367 * queue_work_on - queue work on specific cpu
2400 * select_numa_node_cpu - Select a CPU based on NUMA node
2429 * queue_work_node - queue work on a "random" cpu for a given NUMA node
2459 * If this is used with a per-cpu workqueue then the logic in in queue_work_node()
2463 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); in queue_work_node()
2485 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in delayed_work_timer_fn()
2492 struct timer_list *timer = &dwork->timer; in __queue_delayed_work()
2493 struct work_struct *work = &dwork->work; in __queue_delayed_work()
2496 WARN_ON_ONCE(timer->function != delayed_work_timer_fn); in __queue_delayed_work()
2498 WARN_ON_ONCE(!list_empty(&work->entry)); in __queue_delayed_work()
2501 * If @delay is 0, queue @dwork->work immediately. This is for in __queue_delayed_work()
2507 __queue_work(cpu, wq, &dwork->work); in __queue_delayed_work()
2511 dwork->wq = wq; in __queue_delayed_work()
2512 dwork->cpu = cpu; in __queue_delayed_work()
2513 timer->expires = jiffies + delay; in __queue_delayed_work()
2530 * queue_delayed_work_on - queue work on specific CPU after delay
2543 struct work_struct *work = &dwork->work; in queue_delayed_work_on()
2562 * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
2585 ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags); in mod_delayed_work_on()
2587 if (!clear_pending_if_disabled(&dwork->work)) in mod_delayed_work_on()
2601 __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work); in rcu_work_rcufn()
2606 * queue_rcu_work - queue work after a RCU grace period
2617 struct work_struct *work = &rwork->work; in queue_rcu_work()
2625 rwork->wq = wq; in queue_rcu_work()
2626 call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); in queue_rcu_work()
2640 INIT_LIST_HEAD(&worker->entry); in alloc_worker()
2641 INIT_LIST_HEAD(&worker->scheduled); in alloc_worker()
2642 INIT_LIST_HEAD(&worker->node); in alloc_worker()
2644 worker->flags = WORKER_PREP; in alloc_worker()
2651 if (pool->cpu < 0 && pool->attrs->affn_strict) in pool_allowed_cpus()
2652 return pool->attrs->__pod_cpumask; in pool_allowed_cpus()
2654 return pool->attrs->cpumask; in pool_allowed_cpus()
2658 * worker_attach_to_pool() - attach a worker to a pool
2663 * cpu-binding of @worker are kept coordinated with the pool across
2664 * cpu-[un]hotplugs.
2674 * details. BH workers are, while per-CPU, always DISASSOCIATED. in worker_attach_to_pool()
2676 if (pool->flags & POOL_DISASSOCIATED) { in worker_attach_to_pool()
2677 worker->flags |= WORKER_UNBOUND; in worker_attach_to_pool()
2679 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_attach_to_pool()
2680 kthread_set_per_cpu(worker->task, pool->cpu); in worker_attach_to_pool()
2683 if (worker->rescue_wq) in worker_attach_to_pool()
2684 set_cpus_allowed_ptr(worker->task, pool_allowed_cpus(pool)); in worker_attach_to_pool()
2686 list_add_tail(&worker->node, &pool->workers); in worker_attach_to_pool()
2687 worker->pool = pool; in worker_attach_to_pool()
2696 kthread_set_per_cpu(worker->task, -1); in unbind_worker()
2698 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0); in unbind_worker()
2700 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0); in unbind_worker()
2709 list_del(&worker->node); in detach_worker()
2713 * worker_detach_from_pool() - detach a worker from its pool
2722 struct worker_pool *pool = worker->pool; in worker_detach_from_pool()
2725 WARN_ON_ONCE(pool->flags & POOL_BH); in worker_detach_from_pool()
2729 worker->pool = NULL; in worker_detach_from_pool()
2732 /* clear leftover flags without pool->lock after it is detached */ in worker_detach_from_pool()
2733 worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); in worker_detach_from_pool()
2739 if (worker->rescue_wq) in format_worker_id()
2740 return scnprintf(buf, size, "kworker/R-%s", in format_worker_id()
2741 worker->rescue_wq->name); in format_worker_id()
2744 if (pool->cpu >= 0) in format_worker_id()
2746 pool->cpu, worker->id, in format_worker_id()
2747 pool->attrs->nice < 0 ? "H" : ""); in format_worker_id()
2750 pool->id, worker->id); in format_worker_id()
2757 * create_worker - create a new workqueue worker
2774 id = ida_alloc(&pool->worker_ida, GFP_KERNEL); in create_worker()
2781 worker = alloc_worker(pool->node); in create_worker()
2787 worker->id = id; in create_worker()
2789 if (!(pool->flags & POOL_BH)) { in create_worker()
2793 worker->task = kthread_create_on_node(worker_thread, worker, in create_worker()
2794 pool->node, "%s", id_buf); in create_worker()
2795 if (IS_ERR(worker->task)) { in create_worker()
2796 if (PTR_ERR(worker->task) == -EINTR) { in create_worker()
2801 worker->task); in create_worker()
2806 set_user_nice(worker->task, pool->attrs->nice); in create_worker()
2807 kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); in create_worker()
2814 raw_spin_lock_irq(&pool->lock); in create_worker()
2816 worker->pool->nr_workers++; in create_worker()
2824 if (worker->task) in create_worker()
2825 wake_up_process(worker->task); in create_worker()
2827 raw_spin_unlock_irq(&pool->lock); in create_worker()
2832 ida_free(&pool->worker_ida, id); in create_worker()
2850 list_del_init(&worker->entry); in reap_dying_workers()
2851 kthread_stop_put(worker->task); in reap_dying_workers()
2857 * set_worker_dying - Tag a worker for destruction
2859 * @list: transfer worker away from its pool->idle_list and into list
2865 * raw_spin_lock_irq(pool->lock).
2869 struct worker_pool *pool = worker->pool; in set_worker_dying()
2871 lockdep_assert_held(&pool->lock); in set_worker_dying()
2875 if (WARN_ON(worker->current_work) || in set_worker_dying()
2876 WARN_ON(!list_empty(&worker->scheduled)) || in set_worker_dying()
2877 WARN_ON(!(worker->flags & WORKER_IDLE))) in set_worker_dying()
2880 pool->nr_workers--; in set_worker_dying()
2881 pool->nr_idle--; in set_worker_dying()
2883 worker->flags |= WORKER_DIE; in set_worker_dying()
2885 list_move(&worker->entry, list); in set_worker_dying()
2888 get_task_struct(worker->task); in set_worker_dying()
2892 * idle_worker_timeout - check if some idle workers can now be deleted.
2899 * it expire and re-evaluate things from there.
2906 if (work_pending(&pool->idle_cull_work)) in idle_worker_timeout()
2909 raw_spin_lock_irq(&pool->lock); in idle_worker_timeout()
2916 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_worker_timeout()
2917 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_worker_timeout()
2921 mod_timer(&pool->idle_timer, expires); in idle_worker_timeout()
2923 raw_spin_unlock_irq(&pool->lock); in idle_worker_timeout()
2926 queue_work(system_unbound_wq, &pool->idle_cull_work); in idle_worker_timeout()
2930 * idle_cull_fn - cull workers that have been idle for too long.
2946 * Grabbing wq_pool_attach_mutex here ensures an already-running worker in idle_cull_fn()
2947 * cannot proceed beyong set_pf_worker() in its self-destruct path. in idle_cull_fn()
2948 * This is required as a previously-preempted worker could run after in idle_cull_fn()
2952 raw_spin_lock_irq(&pool->lock); in idle_cull_fn()
2958 worker = list_last_entry(&pool->idle_list, struct worker, entry); in idle_cull_fn()
2959 expires = worker->last_active + IDLE_WORKER_TIMEOUT; in idle_cull_fn()
2962 mod_timer(&pool->idle_timer, expires); in idle_cull_fn()
2969 raw_spin_unlock_irq(&pool->lock); in idle_cull_fn()
2979 struct workqueue_struct *wq = pwq->wq; in send_mayday()
2983 if (!wq->rescuer) in send_mayday()
2987 if (list_empty(&pwq->mayday_node)) { in send_mayday()
2994 list_add_tail(&pwq->mayday_node, &wq->maydays); in send_mayday()
2995 wake_up_process(wq->rescuer->task); in send_mayday()
2996 pwq->stats[PWQ_STAT_MAYDAY]++; in send_mayday()
3005 raw_spin_lock_irq(&pool->lock); in pool_mayday_timeout()
3006 raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */ in pool_mayday_timeout()
3015 list_for_each_entry(work, &pool->worklist, entry) in pool_mayday_timeout()
3020 raw_spin_unlock_irq(&pool->lock); in pool_mayday_timeout()
3022 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); in pool_mayday_timeout()
3026 * maybe_create_worker - create a new worker if necessary
3039 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3044 __releases(&pool->lock) in maybe_create_worker()
3045 __acquires(&pool->lock) in maybe_create_worker()
3048 raw_spin_unlock_irq(&pool->lock); in maybe_create_worker()
3051 mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); in maybe_create_worker()
3063 del_timer_sync(&pool->mayday_timer); in maybe_create_worker()
3064 raw_spin_lock_irq(&pool->lock); in maybe_create_worker()
3067 * created as @pool->lock was dropped and the new worker might have in maybe_create_worker()
3075 * manage_workers - manage worker pool
3087 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3098 struct worker_pool *pool = worker->pool; in manage_workers()
3100 if (pool->flags & POOL_MANAGER_ACTIVE) in manage_workers()
3103 pool->flags |= POOL_MANAGER_ACTIVE; in manage_workers()
3104 pool->manager = worker; in manage_workers()
3108 pool->manager = NULL; in manage_workers()
3109 pool->flags &= ~POOL_MANAGER_ACTIVE; in manage_workers()
3115 * process_one_work - process single work
3126 * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
3129 __releases(&pool->lock) in process_one_work()
3130 __acquires(&pool->lock) in process_one_work()
3133 struct worker_pool *pool = worker->pool; in process_one_work()
3136 bool bh_draining = pool->flags & POOL_BH_DRAINING; in process_one_work()
3143 * work->lockdep_map, make a copy and use that here. in process_one_work()
3147 lockdep_copy_map(&lockdep_map, &work->lockdep_map); in process_one_work()
3150 WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && in process_one_work()
3151 raw_smp_processor_id() != pool->cpu); in process_one_work()
3155 hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work); in process_one_work()
3156 worker->current_work = work; in process_one_work()
3157 worker->current_func = work->func; in process_one_work()
3158 worker->current_pwq = pwq; in process_one_work()
3159 if (worker->task) in process_one_work()
3160 worker->current_at = worker->task->se.sum_exec_runtime; in process_one_work()
3162 worker->current_color = get_work_color(work_data); in process_one_work()
3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); in process_one_work()
3170 list_del_init(&work->entry); in process_one_work()
3178 if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE)) in process_one_work()
3182 * Kick @pool if necessary. It's always noop for per-cpu worker pools in process_one_work()
3191 * update to @work. Also, do this inside @pool->lock so that in process_one_work()
3195 set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool)); in process_one_work()
3197 pwq->stats[PWQ_STAT_STARTED]++; in process_one_work()
3198 raw_spin_unlock_irq(&pool->lock); in process_one_work()
3204 lock_map_acquire(pwq->wq->lockdep_map); in process_one_work()
3217 * Which would create W1->C->W1 dependencies, even though there is no in process_one_work()
3219 * read-recursive acquire on the work(queue) 'locks', but this will then in process_one_work()
3224 * flush_work() and complete() primitives (except for single-threaded in process_one_work()
3229 worker->current_func(work); in process_one_work()
3234 trace_workqueue_execute_end(work, worker->current_func); in process_one_work()
3235 pwq->stats[PWQ_STAT_COMPLETED]++; in process_one_work()
3238 lock_map_release(pwq->wq->lockdep_map); in process_one_work()
3240 if (unlikely((worker->task && in_atomic()) || in process_one_work()
3244 " preempt=0x%08x lock=%d->%d RCU=%d->%d workfn=%ps\n", in process_one_work()
3245 current->comm, task_pid_nr(current), preempt_count(), in process_one_work()
3248 worker->current_func); in process_one_work()
3261 if (worker->task) in process_one_work()
3264 raw_spin_lock_irq(&pool->lock); in process_one_work()
3274 worker->last_func = worker->current_func; in process_one_work()
3277 hash_del(&worker->hentry); in process_one_work()
3278 worker->current_work = NULL; in process_one_work()
3279 worker->current_func = NULL; in process_one_work()
3280 worker->current_pwq = NULL; in process_one_work()
3281 worker->current_color = INT_MAX; in process_one_work()
3288 * process_scheduled_works - process scheduled works
3296 * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
3304 while ((work = list_first_entry_or_null(&worker->scheduled, in process_scheduled_works()
3307 worker->pool->watchdog_ts = jiffies; in process_scheduled_works()
3318 current->flags |= PF_WQ_WORKER; in set_pf_worker()
3320 current->flags &= ~PF_WQ_WORKER; in set_pf_worker()
3325 * worker_thread - the worker thread function
3328 * The worker thread function. All workers belong to a worker_pool -
3329 * either a per-cpu one or dynamic unbound one. These workers process all
3339 struct worker_pool *pool = worker->pool; in worker_thread()
3344 raw_spin_lock_irq(&pool->lock); in worker_thread()
3347 if (unlikely(worker->flags & WORKER_DIE)) { in worker_thread()
3348 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3351 * The worker is dead and PF_WQ_WORKER is cleared, worker->pool in worker_thread()
3354 worker->pool = NULL; in worker_thread()
3355 ida_free(&pool->worker_ida, worker->id); in worker_thread()
3370 * ->scheduled list can only be filled while a worker is in worker_thread()
3374 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in worker_thread()
3387 list_first_entry(&pool->worklist, in worker_thread()
3397 * pool->lock is held and there's no work to process and no need to in worker_thread()
3399 * pool->lock or from local cpu, so setting the current state in worker_thread()
3400 * before releasing pool->lock is enough to prevent losing any in worker_thread()
3405 raw_spin_unlock_irq(&pool->lock); in worker_thread()
3411 * rescuer_thread - the rescuer thread function
3434 struct workqueue_struct *wq = rescuer->rescue_wq; in rescuer_thread()
3449 * shouldn't have any work pending, but @wq->maydays may still have in rescuer_thread()
3450 * pwq(s) queued. This can happen by non-rescuer workers consuming in rescuer_thread()
3452 * @wq->maydays processing before acting on should_stop so that the in rescuer_thread()
3460 while (!list_empty(&wq->maydays)) { in rescuer_thread()
3461 struct pool_workqueue *pwq = list_first_entry(&wq->maydays, in rescuer_thread()
3463 struct worker_pool *pool = pwq->pool; in rescuer_thread()
3467 list_del_init(&pwq->mayday_node); in rescuer_thread()
3473 raw_spin_lock_irq(&pool->lock); in rescuer_thread()
3479 WARN_ON_ONCE(!list_empty(&rescuer->scheduled)); in rescuer_thread()
3480 list_for_each_entry_safe(work, n, &pool->worklist, entry) { in rescuer_thread()
3483 pwq->stats[PWQ_STAT_RESCUED]++; in rescuer_thread()
3486 if (!list_empty(&rescuer->scheduled)) { in rescuer_thread()
3494 * that such back-to-back work items, which may be in rescuer_thread()
3498 if (pwq->nr_active && need_to_create_worker(pool)) { in rescuer_thread()
3504 if (wq->rescuer && list_empty(&pwq->mayday_node)) { in rescuer_thread()
3506 list_add_tail(&pwq->mayday_node, &wq->maydays); in rescuer_thread()
3524 raw_spin_unlock_irq(&pool->lock); in rescuer_thread()
3540 WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING)); in rescuer_thread()
3547 struct worker_pool *pool = worker->pool; in bh_worker()
3551 raw_spin_lock_irq(&pool->lock); in bh_worker()
3561 WARN_ON_ONCE(!list_empty(&worker->scheduled)); in bh_worker()
3566 list_first_entry(&pool->worklist, in bh_worker()
3572 --nr_restarts && time_before(jiffies, end)); in bh_worker()
3578 raw_spin_unlock_irq(&pool->lock); in bh_worker()
3589 * After full conversion, we'll add worker->softirq_action, directly use the
3597 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in workqueue_softirq_action()
3610 struct worker_pool *pool = dead_work->pool; in drain_dead_softirq_workfn()
3620 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3621 pool->flags |= POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3622 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3624 bh_worker(list_first_entry(&pool->workers, struct worker, node)); in drain_dead_softirq_workfn()
3626 raw_spin_lock_irq(&pool->lock); in drain_dead_softirq_workfn()
3627 pool->flags &= ~POOL_BH_DRAINING; in drain_dead_softirq_workfn()
3629 raw_spin_unlock_irq(&pool->lock); in drain_dead_softirq_workfn()
3637 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in drain_dead_softirq_workfn()
3642 complete(&dead_work->done); in drain_dead_softirq_workfn()
3669 if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) in workqueue_softirq_dead()
3680 * check_flush_dependency - check for flush dependency sanity
3687 * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
3693 work_func_t target_func = target_work ? target_work->func : NULL; in check_flush_dependency()
3696 if (target_wq->flags & WQ_MEM_RECLAIM) in check_flush_dependency()
3701 WARN_ONCE(current->flags & PF_MEMALLOC, in check_flush_dependency()
3703 current->pid, current->comm, target_wq->name, target_func); in check_flush_dependency()
3704 WARN_ONCE(worker && ((worker->current_pwq->wq->flags & in check_flush_dependency()
3707 worker->current_pwq->wq->name, worker->current_func, in check_flush_dependency()
3708 target_wq->name, target_func); in check_flush_dependency()
3720 complete(&barr->done); in wq_barrier_func()
3724 * insert_wq_barrier - insert a barrier work
3741 * Note that when @worker is non-NULL, @target may be modified
3745 * raw_spin_lock_irq(pool->lock).
3757 * debugobject calls are safe here even with pool->lock locked in insert_wq_barrier()
3763 * spuriously triggering "inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} in insert_wq_barrier()
3766 INIT_WORK_ONSTACK_KEY(&barr->work, wq_barrier_func, in insert_wq_barrier()
3767 (pwq->wq->flags & WQ_BH) ? &bh_key : &thr_key); in insert_wq_barrier()
3768 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); in insert_wq_barrier()
3770 init_completion_map(&barr->done, &target->lockdep_map); in insert_wq_barrier()
3772 barr->task = current; in insert_wq_barrier()
3782 head = worker->scheduled.next; in insert_wq_barrier()
3783 work_color = worker->current_color; in insert_wq_barrier()
3787 head = target->entry.next; in insert_wq_barrier()
3794 pwq->nr_in_flight[work_color]++; in insert_wq_barrier()
3797 insert_work(pwq, &barr->work, head, work_flags); in insert_wq_barrier()
3801 * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
3803 * @flush_color: new flush color, < 0 for no-op
3804 * @work_color: new work color, < 0 for no-op
3808 * If @flush_color is non-negative, flush_color on all pwqs should be
3809 * -1. If no pwq has in-flight commands at the specified color, all
3810 * pwq->flush_color's stay at -1 and %false is returned. If any pwq
3811 * has in flight commands, its pwq->flush_color is set to
3812 * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
3815 * The caller should have initialized @wq->first_flusher prior to
3816 * calling this function with non-negative @flush_color. If
3820 * If @work_color is non-negative, all pwqs should have the same
3825 * mutex_lock(wq->mutex).
3838 WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush)); in flush_workqueue_prep_pwqs()
3839 atomic_set(&wq->nr_pwqs_to_flush, 1); in flush_workqueue_prep_pwqs()
3843 struct worker_pool *pool = pwq->pool; in flush_workqueue_prep_pwqs()
3845 raw_spin_lock_irq(&pool->lock); in flush_workqueue_prep_pwqs()
3848 WARN_ON_ONCE(pwq->flush_color != -1); in flush_workqueue_prep_pwqs()
3850 if (pwq->nr_in_flight[flush_color]) { in flush_workqueue_prep_pwqs()
3851 pwq->flush_color = flush_color; in flush_workqueue_prep_pwqs()
3852 atomic_inc(&wq->nr_pwqs_to_flush); in flush_workqueue_prep_pwqs()
3858 WARN_ON_ONCE(work_color != work_next_color(pwq->work_color)); in flush_workqueue_prep_pwqs()
3859 pwq->work_color = work_color; in flush_workqueue_prep_pwqs()
3862 raw_spin_unlock_irq(&pool->lock); in flush_workqueue_prep_pwqs()
3865 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush)) in flush_workqueue_prep_pwqs()
3866 complete(&wq->first_flusher->done); in flush_workqueue_prep_pwqs()
3874 if (unlikely(!wq->lockdep_map)) in touch_wq_lockdep_map()
3877 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3880 lock_map_acquire(wq->lockdep_map); in touch_wq_lockdep_map()
3881 lock_map_release(wq->lockdep_map); in touch_wq_lockdep_map()
3883 if (wq->flags & WQ_BH) in touch_wq_lockdep_map()
3892 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3895 lock_map_acquire(&work->lockdep_map); in touch_work_lockdep_map()
3896 lock_map_release(&work->lockdep_map); in touch_work_lockdep_map()
3898 if (wq->flags & WQ_BH) in touch_work_lockdep_map()
3904 * __flush_workqueue - ensure that any scheduled work has run to completion.
3914 .flush_color = -1, in __flush_workqueue()
3915 .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)), in __flush_workqueue()
3924 mutex_lock(&wq->mutex); in __flush_workqueue()
3927 * Start-to-wait phase in __flush_workqueue()
3929 next_color = work_next_color(wq->work_color); in __flush_workqueue()
3931 if (next_color != wq->flush_color) { in __flush_workqueue()
3937 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow)); in __flush_workqueue()
3938 this_flusher.flush_color = wq->work_color; in __flush_workqueue()
3939 wq->work_color = next_color; in __flush_workqueue()
3941 if (!wq->first_flusher) { in __flush_workqueue()
3943 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
3945 wq->first_flusher = &this_flusher; in __flush_workqueue()
3947 if (!flush_workqueue_prep_pwqs(wq, wq->flush_color, in __flush_workqueue()
3948 wq->work_color)) { in __flush_workqueue()
3950 wq->flush_color = next_color; in __flush_workqueue()
3951 wq->first_flusher = NULL; in __flush_workqueue()
3956 WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color); in __flush_workqueue()
3957 list_add_tail(&this_flusher.list, &wq->flusher_queue); in __flush_workqueue()
3958 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
3966 list_add_tail(&this_flusher.list, &wq->flusher_overflow); in __flush_workqueue()
3971 mutex_unlock(&wq->mutex); in __flush_workqueue()
3976 * Wake-up-and-cascade phase in __flush_workqueue()
3979 * handling overflow. Non-first flushers can simply return. in __flush_workqueue()
3981 if (READ_ONCE(wq->first_flusher) != &this_flusher) in __flush_workqueue()
3984 mutex_lock(&wq->mutex); in __flush_workqueue()
3987 if (wq->first_flusher != &this_flusher) in __flush_workqueue()
3990 WRITE_ONCE(wq->first_flusher, NULL); in __flush_workqueue()
3993 WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color); in __flush_workqueue()
3999 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) { in __flush_workqueue()
4000 if (next->flush_color != wq->flush_color) in __flush_workqueue()
4002 list_del_init(&next->list); in __flush_workqueue()
4003 complete(&next->done); in __flush_workqueue()
4006 WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) && in __flush_workqueue()
4007 wq->flush_color != work_next_color(wq->work_color)); in __flush_workqueue()
4010 wq->flush_color = work_next_color(wq->flush_color); in __flush_workqueue()
4013 if (!list_empty(&wq->flusher_overflow)) { in __flush_workqueue()
4017 * flusher_queue. This is the start-to-wait in __flush_workqueue()
4020 list_for_each_entry(tmp, &wq->flusher_overflow, list) in __flush_workqueue()
4021 tmp->flush_color = wq->work_color; in __flush_workqueue()
4023 wq->work_color = work_next_color(wq->work_color); in __flush_workqueue()
4025 list_splice_tail_init(&wq->flusher_overflow, in __flush_workqueue()
4026 &wq->flusher_queue); in __flush_workqueue()
4027 flush_workqueue_prep_pwqs(wq, -1, wq->work_color); in __flush_workqueue()
4030 if (list_empty(&wq->flusher_queue)) { in __flush_workqueue()
4031 WARN_ON_ONCE(wq->flush_color != wq->work_color); in __flush_workqueue()
4039 WARN_ON_ONCE(wq->flush_color == wq->work_color); in __flush_workqueue()
4040 WARN_ON_ONCE(wq->flush_color != next->flush_color); in __flush_workqueue()
4042 list_del_init(&next->list); in __flush_workqueue()
4043 wq->first_flusher = next; in __flush_workqueue()
4045 if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1)) in __flush_workqueue()
4052 wq->first_flusher = NULL; in __flush_workqueue()
4056 mutex_unlock(&wq->mutex); in __flush_workqueue()
4061 * drain_workqueue - drain a workqueue
4078 * hotter than drain_workqueue() and already looks at @wq->flags. in drain_workqueue()
4081 mutex_lock(&wq->mutex); in drain_workqueue()
4082 if (!wq->nr_drainers++) in drain_workqueue()
4083 wq->flags |= __WQ_DRAINING; in drain_workqueue()
4084 mutex_unlock(&wq->mutex); in drain_workqueue()
4088 mutex_lock(&wq->mutex); in drain_workqueue()
4093 raw_spin_lock_irq(&pwq->pool->lock); in drain_workqueue()
4095 raw_spin_unlock_irq(&pwq->pool->lock); in drain_workqueue()
4103 wq->name, __func__, flush_cnt); in drain_workqueue()
4105 mutex_unlock(&wq->mutex); in drain_workqueue()
4109 if (!--wq->nr_drainers) in drain_workqueue()
4110 wq->flags &= ~__WQ_DRAINING; in drain_workqueue()
4111 mutex_unlock(&wq->mutex); in drain_workqueue()
4130 raw_spin_lock_irq(&pool->lock); in start_flush_work()
4134 if (unlikely(pwq->pool != pool)) in start_flush_work()
4140 pwq = worker->current_pwq; in start_flush_work()
4143 wq = pwq->wq; in start_flush_work()
4147 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4153 * single-threaded or rescuer equipped workqueue. in start_flush_work()
4160 if (!from_cancel && (wq->saved_max_active == 1 || wq->rescuer)) in start_flush_work()
4166 raw_spin_unlock_irq(&pool->lock); in start_flush_work()
4178 if (WARN_ON(!work->func)) in __flush_work()
4189 * BH context and thus can be busy-waited. in __flush_work()
4225 * flush_work - wait for a work to finish executing the last queueing instance
4243 * flush_delayed_work - wait for a dwork to finish executing the last queueing
4257 if (del_timer_sync(&dwork->timer)) in flush_delayed_work()
4258 __queue_work(dwork->cpu, dwork->wq, &dwork->work); in flush_delayed_work()
4260 return flush_work(&dwork->work); in flush_delayed_work()
4265 * flush_rcu_work - wait for a rwork to finish executing the last queueing
4274 if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) { in flush_rcu_work()
4276 flush_work(&rwork->work); in flush_rcu_work()
4279 return flush_work(&rwork->work); in flush_rcu_work()
4286 const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1; in work_offqd_disable()
4288 if (likely(offqd->disable < max)) in work_offqd_disable()
4289 offqd->disable++; in work_offqd_disable()
4296 if (likely(offqd->disable > 0)) in work_offqd_enable()
4297 offqd->disable--; in work_offqd_enable()
4355 * cancel_work_sync - cancel a work and wait for it to finish
4359 * even if the work re-queues itself or migrates to another workqueue. On return
4363 * cancel_work_sync(&delayed_work->work) must not be used for delayed_work's.
4366 * Must be called from a sleepable context if @work was last queued on a non-BH
4367 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4379 * cancel_delayed_work - cancel a delayed work
4389 * it returns %true and the work doesn't re-arm itself. Explicitly flush or
4396 return __cancel_work(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work()
4401 * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
4411 return __cancel_work_sync(&dwork->work, WORK_CANCEL_DELAYED); in cancel_delayed_work_sync()
4416 * disable_work - Disable and cancel a work item
4420 * pending. As long as the disable count is non-zero, any attempt to queue @work
4434 * disable_work_sync - Disable, cancel and drain a work item
4440 * Must be called from a sleepable context if @work was last queued on a non-BH
4441 * workqueue. Can also be called from non-hardirq atomic contexts including BH
4453 * enable_work - Enable a work item
4480 * disable_delayed_work - Disable and cancel a delayed work item
4487 return __cancel_work(&dwork->work, in disable_delayed_work()
4493 * disable_delayed_work_sync - Disable, cancel and drain a delayed work item
4500 return __cancel_work_sync(&dwork->work, in disable_delayed_work_sync()
4506 * enable_delayed_work - Enable a delayed work item
4513 return enable_work(&dwork->work); in enable_delayed_work()
4518 * schedule_on_each_cpu - execute a function synchronously on each online CPU
4526 * 0 on success, -errno on failure.
4535 return -ENOMEM; in schedule_on_each_cpu()
4555 * execute_in_process_context - reliably execute the routine with user context
4563 * Return: 0 - function was executed
4564 * 1 - function was scheduled for execution
4569 fn(&ew->work); in execute_in_process_context()
4573 INIT_WORK(&ew->work, fn); in execute_in_process_context()
4574 schedule_work(&ew->work); in execute_in_process_context()
4581 * free_workqueue_attrs - free a workqueue_attrs
4589 free_cpumask_var(attrs->cpumask); in free_workqueue_attrs()
4590 free_cpumask_var(attrs->__pod_cpumask); in free_workqueue_attrs()
4596 * alloc_workqueue_attrs - allocate a workqueue_attrs
4610 if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL)) in alloc_workqueue_attrs()
4612 if (!alloc_cpumask_var(&attrs->__pod_cpumask, GFP_KERNEL)) in alloc_workqueue_attrs()
4615 cpumask_copy(attrs->cpumask, cpu_possible_mask); in alloc_workqueue_attrs()
4616 attrs->affn_scope = WQ_AFFN_DFL; in alloc_workqueue_attrs()
4626 to->nice = from->nice; in copy_workqueue_attrs()
4627 cpumask_copy(to->cpumask, from->cpumask); in copy_workqueue_attrs()
4628 cpumask_copy(to->__pod_cpumask, from->__pod_cpumask); in copy_workqueue_attrs()
4629 to->affn_strict = from->affn_strict; in copy_workqueue_attrs()
4632 * Unlike hash and equality test, copying shouldn't ignore wq-only in copy_workqueue_attrs()
4636 to->affn_scope = from->affn_scope; in copy_workqueue_attrs()
4637 to->ordered = from->ordered; in copy_workqueue_attrs()
4641 * Some attrs fields are workqueue-only. Clear them for worker_pool's. See the
4646 attrs->affn_scope = WQ_AFFN_NR_TYPES; in wqattrs_clear_for_pool()
4647 attrs->ordered = false; in wqattrs_clear_for_pool()
4648 if (attrs->affn_strict) in wqattrs_clear_for_pool()
4649 cpumask_copy(attrs->cpumask, cpu_possible_mask); in wqattrs_clear_for_pool()
4657 hash = jhash_1word(attrs->nice, hash); in wqattrs_hash()
4658 hash = jhash_1word(attrs->affn_strict, hash); in wqattrs_hash()
4659 hash = jhash(cpumask_bits(attrs->__pod_cpumask), in wqattrs_hash()
4661 if (!attrs->affn_strict) in wqattrs_hash()
4662 hash = jhash(cpumask_bits(attrs->cpumask), in wqattrs_hash()
4671 if (a->nice != b->nice) in wqattrs_equal()
4673 if (a->affn_strict != b->affn_strict) in wqattrs_equal()
4675 if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) in wqattrs_equal()
4677 if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask)) in wqattrs_equal()
4688 * @attrs->cpumask doesn't overlap with @unbound_cpumask, we fallback to in wqattrs_actualize_cpumask()
4691 cpumask_and(attrs->cpumask, attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4692 if (unlikely(cpumask_empty(attrs->cpumask))) in wqattrs_actualize_cpumask()
4693 cpumask_copy(attrs->cpumask, unbound_cpumask); in wqattrs_actualize_cpumask()
4706 if (attrs->affn_scope == WQ_AFFN_DFL) in wqattrs_pod_type()
4709 scope = attrs->affn_scope; in wqattrs_pod_type()
4713 if (!WARN_ON_ONCE(attrs->affn_scope == WQ_AFFN_NR_TYPES) && in wqattrs_pod_type()
4714 likely(pt->nr_pods)) in wqattrs_pod_type()
4722 BUG_ON(!pt->nr_pods); in wqattrs_pod_type()
4727 * init_worker_pool - initialize a newly zalloc'd worker_pool
4730 * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
4732 * Return: 0 on success, -errno on failure. Even on failure, all fields
4738 raw_spin_lock_init(&pool->lock); in init_worker_pool()
4739 pool->id = -1; in init_worker_pool()
4740 pool->cpu = -1; in init_worker_pool()
4741 pool->node = NUMA_NO_NODE; in init_worker_pool()
4742 pool->flags |= POOL_DISASSOCIATED; in init_worker_pool()
4743 pool->watchdog_ts = jiffies; in init_worker_pool()
4744 INIT_LIST_HEAD(&pool->worklist); in init_worker_pool()
4745 INIT_LIST_HEAD(&pool->idle_list); in init_worker_pool()
4746 hash_init(pool->busy_hash); in init_worker_pool()
4748 timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE); in init_worker_pool()
4749 INIT_WORK(&pool->idle_cull_work, idle_cull_fn); in init_worker_pool()
4751 timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0); in init_worker_pool()
4753 INIT_LIST_HEAD(&pool->workers); in init_worker_pool()
4755 ida_init(&pool->worker_ida); in init_worker_pool()
4756 INIT_HLIST_NODE(&pool->hash_node); in init_worker_pool()
4757 pool->refcnt = 1; in init_worker_pool()
4760 pool->attrs = alloc_workqueue_attrs(); in init_worker_pool()
4761 if (!pool->attrs) in init_worker_pool()
4762 return -ENOMEM; in init_worker_pool()
4764 wqattrs_clear_for_pool(pool->attrs); in init_worker_pool()
4774 lockdep_register_key(&wq->key); in wq_init_lockdep()
4775 lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); in wq_init_lockdep()
4777 lock_name = wq->name; in wq_init_lockdep()
4779 wq->lock_name = lock_name; in wq_init_lockdep()
4780 wq->lockdep_map = &wq->__lockdep_map; in wq_init_lockdep()
4781 lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0); in wq_init_lockdep()
4786 if (wq->lockdep_map != &wq->__lockdep_map) in wq_unregister_lockdep()
4789 lockdep_unregister_key(&wq->key); in wq_unregister_lockdep()
4794 if (wq->lockdep_map != &wq->__lockdep_map) in wq_free_lockdep()
4797 if (wq->lock_name != wq->name) in wq_free_lockdep()
4798 kfree(wq->lock_name); in wq_free_lockdep()
4829 nna->max = WQ_DFL_MIN_ACTIVE; in init_node_nr_active()
4830 atomic_set(&nna->nr, 0); in init_node_nr_active()
4831 raw_spin_lock_init(&nna->lock); in init_node_nr_active()
4832 INIT_LIST_HEAD(&nna->pending_pwqs); in init_node_nr_active()
4863 return -ENOMEM; in alloc_node_nr_active()
4871 if (wq->flags & WQ_UNBOUND) in rcu_free_wq()
4872 free_node_nr_active(wq->node_nr_active); in rcu_free_wq()
4875 free_percpu(wq->cpu_pwq); in rcu_free_wq()
4876 free_workqueue_attrs(wq->unbound_attrs); in rcu_free_wq()
4884 ida_destroy(&pool->worker_ida); in rcu_free_pool()
4885 free_workqueue_attrs(pool->attrs); in rcu_free_pool()
4890 * put_unbound_pool - put a worker_pool
4907 if (--pool->refcnt) in put_unbound_pool()
4911 if (WARN_ON(!(pool->cpu < 0)) || in put_unbound_pool()
4912 WARN_ON(!list_empty(&pool->worklist))) in put_unbound_pool()
4916 if (pool->id >= 0) in put_unbound_pool()
4917 idr_remove(&worker_pool_idr, pool->id); in put_unbound_pool()
4918 hash_del(&pool->hash_node); in put_unbound_pool()
4927 * pwq->refcnt == pool->refcnt == 0 in put_unbound_pool()
4931 * drops pool->lock in put_unbound_pool()
4935 !(pool->flags & POOL_MANAGER_ACTIVE), in put_unbound_pool()
4939 raw_spin_lock_irq(&pool->lock); in put_unbound_pool()
4940 if (!(pool->flags & POOL_MANAGER_ACTIVE)) { in put_unbound_pool()
4941 pool->flags |= POOL_MANAGER_ACTIVE; in put_unbound_pool()
4944 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
4950 WARN_ON(pool->nr_workers || pool->nr_idle); in put_unbound_pool()
4951 raw_spin_unlock_irq(&pool->lock); in put_unbound_pool()
4960 del_timer_sync(&pool->idle_timer); in put_unbound_pool()
4961 cancel_work_sync(&pool->idle_cull_work); in put_unbound_pool()
4962 del_timer_sync(&pool->mayday_timer); in put_unbound_pool()
4965 call_rcu(&pool->rcu, rcu_free_pool); in put_unbound_pool()
4969 * get_unbound_pool - get a worker_pool with the specified attributes
4993 if (wqattrs_equal(pool->attrs, attrs)) { in get_unbound_pool()
4994 pool->refcnt++; in get_unbound_pool()
5000 for (pod = 0; pod < pt->nr_pods; pod++) { in get_unbound_pool()
5001 if (cpumask_subset(attrs->__pod_cpumask, pt->pod_cpus[pod])) { in get_unbound_pool()
5002 node = pt->pod_node[pod]; in get_unbound_pool()
5012 pool->node = node; in get_unbound_pool()
5013 copy_workqueue_attrs(pool->attrs, attrs); in get_unbound_pool()
5014 wqattrs_clear_for_pool(pool->attrs); in get_unbound_pool()
5024 hash_add(unbound_pool_hash, &pool->hash_node, hash); in get_unbound_pool()
5041 struct workqueue_struct *wq = pwq->wq; in pwq_release_workfn()
5042 struct worker_pool *pool = pwq->pool; in pwq_release_workfn()
5049 if (!list_empty(&pwq->pwqs_node)) { in pwq_release_workfn()
5050 mutex_lock(&wq->mutex); in pwq_release_workfn()
5051 list_del_rcu(&pwq->pwqs_node); in pwq_release_workfn()
5052 is_last = list_empty(&wq->pwqs); in pwq_release_workfn()
5057 if (!is_last && (wq->flags & __WQ_ORDERED)) in pwq_release_workfn()
5060 mutex_unlock(&wq->mutex); in pwq_release_workfn()
5063 if (wq->flags & WQ_UNBOUND) { in pwq_release_workfn()
5069 if (!list_empty(&pwq->pending_node)) { in pwq_release_workfn()
5071 wq_node_nr_active(pwq->wq, pwq->pool->node); in pwq_release_workfn()
5073 raw_spin_lock_irq(&nna->lock); in pwq_release_workfn()
5074 list_del_init(&pwq->pending_node); in pwq_release_workfn()
5075 raw_spin_unlock_irq(&nna->lock); in pwq_release_workfn()
5086 call_rcu(&wq->rcu, rcu_free_wq); in pwq_release_workfn()
5098 pwq->pool = pool; in init_pwq()
5099 pwq->wq = wq; in init_pwq()
5100 pwq->flush_color = -1; in init_pwq()
5101 pwq->refcnt = 1; in init_pwq()
5102 INIT_LIST_HEAD(&pwq->inactive_works); in init_pwq()
5103 INIT_LIST_HEAD(&pwq->pending_node); in init_pwq()
5104 INIT_LIST_HEAD(&pwq->pwqs_node); in init_pwq()
5105 INIT_LIST_HEAD(&pwq->mayday_node); in init_pwq()
5106 kthread_init_work(&pwq->release_work, pwq_release_workfn); in init_pwq()
5112 struct workqueue_struct *wq = pwq->wq; in link_pwq()
5114 lockdep_assert_held(&wq->mutex); in link_pwq()
5117 if (!list_empty(&pwq->pwqs_node)) in link_pwq()
5121 pwq->work_color = wq->work_color; in link_pwq()
5124 list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs); in link_pwq()
5140 pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node); in alloc_unbound_pwq()
5161 * wq_calc_pod_cpumask - calculate a wq_attrs' cpumask for a pod
5166 * The result is stored in @attrs->__pod_cpumask.
5168 * If pod affinity is not enabled, @attrs->cpumask is always used. If enabled
5170 * intersection of the possible CPUs of @pod and @attrs->cpumask.
5177 int pod = pt->cpu_pod[cpu]; in wq_calc_pod_cpumask()
5180 cpumask_and(attrs->__pod_cpumask, pt->pod_cpus[pod], attrs->cpumask); in wq_calc_pod_cpumask()
5182 if (!cpumask_intersects(attrs->__pod_cpumask, wq_online_cpumask)) { in wq_calc_pod_cpumask()
5183 cpumask_copy(attrs->__pod_cpumask, attrs->cpumask); in wq_calc_pod_cpumask()
5196 lockdep_assert_held(&wq->mutex); in install_unbound_pwq()
5222 put_pwq_unlocked(ctx->pwq_tbl[cpu]); in apply_wqattrs_cleanup()
5223 put_pwq_unlocked(ctx->dfl_pwq); in apply_wqattrs_cleanup()
5225 free_workqueue_attrs(ctx->attrs); in apply_wqattrs_cleanup()
5243 if (WARN_ON(attrs->affn_scope < 0 || in apply_wqattrs_prepare()
5244 attrs->affn_scope >= WQ_AFFN_NR_TYPES)) in apply_wqattrs_prepare()
5245 return ERR_PTR(-EINVAL); in apply_wqattrs_prepare()
5255 * the default pwq covering whole @attrs->cpumask. Always create in apply_wqattrs_prepare()
5260 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5261 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5262 if (!ctx->dfl_pwq) in apply_wqattrs_prepare()
5266 if (new_attrs->ordered) { in apply_wqattrs_prepare()
5267 ctx->dfl_pwq->refcnt++; in apply_wqattrs_prepare()
5268 ctx->pwq_tbl[cpu] = ctx->dfl_pwq; in apply_wqattrs_prepare()
5271 ctx->pwq_tbl[cpu] = alloc_unbound_pwq(wq, new_attrs); in apply_wqattrs_prepare()
5272 if (!ctx->pwq_tbl[cpu]) in apply_wqattrs_prepare()
5279 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); in apply_wqattrs_prepare()
5280 cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask); in apply_wqattrs_prepare()
5281 ctx->attrs = new_attrs; in apply_wqattrs_prepare()
5285 * (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution in apply_wqattrs_prepare()
5289 if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)) in apply_wqattrs_prepare()
5290 ctx->dfl_pwq->plugged = true; in apply_wqattrs_prepare()
5292 ctx->wq = wq; in apply_wqattrs_prepare()
5298 return ERR_PTR(-ENOMEM); in apply_wqattrs_prepare()
5307 mutex_lock(&ctx->wq->mutex); in apply_wqattrs_commit()
5309 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs); in apply_wqattrs_commit()
5313 ctx->pwq_tbl[cpu] = install_unbound_pwq(ctx->wq, cpu, in apply_wqattrs_commit()
5314 ctx->pwq_tbl[cpu]); in apply_wqattrs_commit()
5315 ctx->dfl_pwq = install_unbound_pwq(ctx->wq, -1, ctx->dfl_pwq); in apply_wqattrs_commit()
5317 /* update node_nr_active->max */ in apply_wqattrs_commit()
5318 wq_update_node_max_active(ctx->wq, -1); in apply_wqattrs_commit()
5321 if (ctx->wq->rescuer) in apply_wqattrs_commit()
5322 set_cpus_allowed_ptr(ctx->wq->rescuer->task, in apply_wqattrs_commit()
5323 unbound_effective_cpumask(ctx->wq)); in apply_wqattrs_commit()
5325 mutex_unlock(&ctx->wq->mutex); in apply_wqattrs_commit()
5334 if (WARN_ON(!(wq->flags & WQ_UNBOUND))) in apply_workqueue_attrs_locked()
5335 return -EINVAL; in apply_workqueue_attrs_locked()
5349 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
5354 * a separate pwq to each CPU pod with possibles CPUs in @attrs->cpumask so that
5356 * in-flight work items finish. Note that a work item which repeatedly requeues
5357 * itself back-to-back will stay on its current pwq.
5361 * Return: 0 on success and -errno on failure.
5376 * unbound_wq_update_pwq - update a pwq slot for CPU hot[un]plug
5385 * back to @wq->dfl_pwq which may not be optimal but is always correct.
5390 * may execute on any CPU. This is similar to how per-cpu workqueues behave on
5401 if (!(wq->flags & WQ_UNBOUND) || wq->unbound_attrs->ordered) in unbound_wq_update_pwq()
5411 copy_workqueue_attrs(target_attrs, wq->unbound_attrs); in unbound_wq_update_pwq()
5416 if (wqattrs_equal(target_attrs, unbound_pwq(wq, cpu)->pool->attrs)) in unbound_wq_update_pwq()
5423 wq->name); in unbound_wq_update_pwq()
5428 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5433 mutex_lock(&wq->mutex); in unbound_wq_update_pwq()
5434 pwq = unbound_pwq(wq, -1); in unbound_wq_update_pwq()
5435 raw_spin_lock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5437 raw_spin_unlock_irq(&pwq->pool->lock); in unbound_wq_update_pwq()
5440 mutex_unlock(&wq->mutex); in unbound_wq_update_pwq()
5446 bool highpri = wq->flags & WQ_HIGHPRI; in alloc_and_link_pwqs()
5451 wq->cpu_pwq = alloc_percpu(struct pool_workqueue *); in alloc_and_link_pwqs()
5452 if (!wq->cpu_pwq) in alloc_and_link_pwqs()
5455 if (!(wq->flags & WQ_UNBOUND)) { in alloc_and_link_pwqs()
5458 if (wq->flags & WQ_BH) in alloc_and_link_pwqs()
5468 pwq_p = per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5471 pool->node); in alloc_and_link_pwqs()
5477 mutex_lock(&wq->mutex); in alloc_and_link_pwqs()
5479 mutex_unlock(&wq->mutex); in alloc_and_link_pwqs()
5484 if (wq->flags & __WQ_ORDERED) { in alloc_and_link_pwqs()
5489 dfl_pwq = rcu_access_pointer(wq->dfl_pwq); in alloc_and_link_pwqs()
5490 WARN(!ret && (wq->pwqs.next != &dfl_pwq->pwqs_node || in alloc_and_link_pwqs()
5491 wq->pwqs.prev != &dfl_pwq->pwqs_node), in alloc_and_link_pwqs()
5492 "ordering guarantee broken for workqueue %s\n", wq->name); in alloc_and_link_pwqs()
5500 if (wq->cpu_pwq) { in alloc_and_link_pwqs()
5502 struct pool_workqueue *pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in alloc_and_link_pwqs()
5507 free_percpu(wq->cpu_pwq); in alloc_and_link_pwqs()
5508 wq->cpu_pwq = NULL; in alloc_and_link_pwqs()
5510 return -ENOMEM; in alloc_and_link_pwqs()
5535 if (!(wq->flags & WQ_MEM_RECLAIM)) in init_rescuer()
5541 wq->name); in init_rescuer()
5542 return -ENOMEM; in init_rescuer()
5545 rescuer->rescue_wq = wq; in init_rescuer()
5548 rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", id_buf); in init_rescuer()
5549 if (IS_ERR(rescuer->task)) { in init_rescuer()
5550 ret = PTR_ERR(rescuer->task); in init_rescuer()
5552 wq->name, ERR_PTR(ret)); in init_rescuer()
5557 wq->rescuer = rescuer; in init_rescuer()
5558 if (wq->flags & WQ_UNBOUND) in init_rescuer()
5559 kthread_bind_mask(rescuer->task, unbound_effective_cpumask(wq)); in init_rescuer()
5561 kthread_bind_mask(rescuer->task, cpu_possible_mask); in init_rescuer()
5562 wake_up_process(rescuer->task); in init_rescuer()
5568 * wq_adjust_max_active - update a wq's max_active to the current setting
5571 * If @wq isn't freezing, set @wq->max_active to the saved_max_active and
5573 * @wq->max_active to zero.
5580 lockdep_assert_held(&wq->mutex); in wq_adjust_max_active()
5582 if ((wq->flags & WQ_FREEZABLE) && workqueue_freezing) { in wq_adjust_max_active()
5586 new_max = wq->saved_max_active; in wq_adjust_max_active()
5587 new_min = wq->saved_min_active; in wq_adjust_max_active()
5590 if (wq->max_active == new_max && wq->min_active == new_min) in wq_adjust_max_active()
5594 * Update @wq->max/min_active and then kick inactive work items if more in wq_adjust_max_active()
5599 WRITE_ONCE(wq->max_active, new_max); in wq_adjust_max_active()
5600 WRITE_ONCE(wq->min_active, new_min); in wq_adjust_max_active()
5602 if (wq->flags & WQ_UNBOUND) in wq_adjust_max_active()
5603 wq_update_node_max_active(wq, -1); in wq_adjust_max_active()
5609 * Round-robin through pwq's activating the first inactive work item in wq_adjust_max_active()
5620 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5623 kick_pool(pwq->pool); in wq_adjust_max_active()
5625 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in wq_adjust_max_active()
5660 wq->unbound_attrs = alloc_workqueue_attrs(); in __alloc_workqueue()
5661 if (!wq->unbound_attrs) in __alloc_workqueue()
5665 name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); in __alloc_workqueue()
5669 wq->name); in __alloc_workqueue()
5679 max_active = wq_clamp_max_active(max_active, flags, wq->name); in __alloc_workqueue()
5683 wq->flags = flags; in __alloc_workqueue()
5684 wq->max_active = max_active; in __alloc_workqueue()
5685 wq->min_active = min(max_active, WQ_DFL_MIN_ACTIVE); in __alloc_workqueue()
5686 wq->saved_max_active = wq->max_active; in __alloc_workqueue()
5687 wq->saved_min_active = wq->min_active; in __alloc_workqueue()
5688 mutex_init(&wq->mutex); in __alloc_workqueue()
5689 atomic_set(&wq->nr_pwqs_to_flush, 0); in __alloc_workqueue()
5690 INIT_LIST_HEAD(&wq->pwqs); in __alloc_workqueue()
5691 INIT_LIST_HEAD(&wq->flusher_queue); in __alloc_workqueue()
5692 INIT_LIST_HEAD(&wq->flusher_overflow); in __alloc_workqueue()
5693 INIT_LIST_HEAD(&wq->maydays); in __alloc_workqueue()
5695 INIT_LIST_HEAD(&wq->list); in __alloc_workqueue()
5698 if (alloc_node_nr_active(wq->node_nr_active) < 0) in __alloc_workqueue()
5711 mutex_lock(&wq->mutex); in __alloc_workqueue()
5713 mutex_unlock(&wq->mutex); in __alloc_workqueue()
5715 list_add_tail_rcu(&wq->list, &workqueues); in __alloc_workqueue()
5722 if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq)) in __alloc_workqueue()
5730 * Failed alloc_and_link_pwqs() may leave pending pwq->release_work, in __alloc_workqueue()
5734 if (wq->flags & WQ_UNBOUND) { in __alloc_workqueue()
5736 free_node_nr_active(wq->node_nr_active); in __alloc_workqueue()
5739 free_workqueue_attrs(wq->unbound_attrs); in __alloc_workqueue()
5784 wq->lockdep_map = lockdep_map; in alloc_workqueue_lockdep_map()
5796 if (pwq->nr_in_flight[i]) in pwq_busy()
5799 if ((pwq != rcu_access_pointer(pwq->wq->dfl_pwq)) && (pwq->refcnt > 1)) in pwq_busy()
5808 * destroy_workqueue - safely terminate a workqueue
5825 mutex_lock(&wq->mutex); in destroy_workqueue()
5826 wq->flags |= __WQ_DESTROYING; in destroy_workqueue()
5827 mutex_unlock(&wq->mutex); in destroy_workqueue()
5833 if (wq->rescuer) { in destroy_workqueue()
5834 struct worker *rescuer = wq->rescuer; in destroy_workqueue()
5838 wq->rescuer = NULL; in destroy_workqueue()
5842 kthread_stop(rescuer->task); in destroy_workqueue()
5847 * Sanity checks - grab all the locks so that we wait for all in destroy_workqueue()
5848 * in-flight operations which may do put_pwq(). in destroy_workqueue()
5851 mutex_lock(&wq->mutex); in destroy_workqueue()
5853 raw_spin_lock_irq(&pwq->pool->lock); in destroy_workqueue()
5856 __func__, wq->name); in destroy_workqueue()
5858 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5859 mutex_unlock(&wq->mutex); in destroy_workqueue()
5864 raw_spin_unlock_irq(&pwq->pool->lock); in destroy_workqueue()
5866 mutex_unlock(&wq->mutex); in destroy_workqueue()
5872 list_del_rcu(&wq->list); in destroy_workqueue()
5877 * to put the base refs. @wq will be auto-destroyed from the last in destroy_workqueue()
5887 put_pwq_unlocked(unbound_pwq(wq, -1)); in destroy_workqueue()
5888 RCU_INIT_POINTER(*unbound_pwq_slot(wq, -1), NULL); in destroy_workqueue()
5895 * workqueue_set_max_active - adjust max_active of a workqueue
5908 if (WARN_ON(wq->flags & WQ_BH)) in workqueue_set_max_active()
5911 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_set_max_active()
5914 max_active = wq_clamp_max_active(max_active, wq->flags, wq->name); in workqueue_set_max_active()
5916 mutex_lock(&wq->mutex); in workqueue_set_max_active()
5918 wq->saved_max_active = max_active; in workqueue_set_max_active()
5919 if (wq->flags & WQ_UNBOUND) in workqueue_set_max_active()
5920 wq->saved_min_active = min(wq->saved_min_active, max_active); in workqueue_set_max_active()
5924 mutex_unlock(&wq->mutex); in workqueue_set_max_active()
5929 * workqueue_set_min_active - adjust min_active of an unbound workqueue
5944 /* min_active is only meaningful for non-ordered unbound workqueues */ in workqueue_set_min_active()
5945 if (WARN_ON((wq->flags & (WQ_BH | WQ_UNBOUND | __WQ_ORDERED)) != in workqueue_set_min_active()
5949 mutex_lock(&wq->mutex); in workqueue_set_min_active()
5950 wq->saved_min_active = clamp(min_active, 0, wq->saved_max_active); in workqueue_set_min_active()
5952 mutex_unlock(&wq->mutex); in workqueue_set_min_active()
5956 * current_work - retrieve %current task's work struct
5967 return worker ? worker->current_work : NULL; in current_work()
5972 * current_is_workqueue_rescuer - is %current workqueue rescuer?
5983 return worker && worker->rescue_wq; in current_is_workqueue_rescuer()
5987 * workqueue_congested - test whether a workqueue is congested
5997 * With the exception of ordered workqueues, all workqueues have per-cpu
6016 pwq = *per_cpu_ptr(wq->cpu_pwq, cpu); in workqueue_congested()
6017 ret = !list_empty(&pwq->inactive_works); in workqueue_congested()
6027 * work_busy - test whether a work is currently pending or running
6049 raw_spin_lock_irqsave(&pool->lock, irq_flags); in work_busy()
6052 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in work_busy()
6061 * set_worker_desc - set description for the current work item
6062 * @fmt: printf-style format string
6077 vsnprintf(worker->desc, sizeof(worker->desc), fmt, args); in set_worker_desc()
6084 * print_worker_info - print out worker information and description
6105 if (!(task->flags & PF_WQ_WORKER)) in print_worker_info()
6118 copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn)); in print_worker_info()
6119 copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq)); in print_worker_info()
6120 copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq)); in print_worker_info()
6121 copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1); in print_worker_info()
6122 copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1); in print_worker_info()
6134 pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); in pr_cont_pool_info()
6135 if (pool->node != NUMA_NO_NODE) in pr_cont_pool_info()
6136 pr_cont(" node=%d", pool->node); in pr_cont_pool_info()
6137 pr_cont(" flags=0x%x", pool->flags); in pr_cont_pool_info()
6138 if (pool->flags & POOL_BH) in pr_cont_pool_info()
6140 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_pool_info()
6142 pr_cont(" nice=%d", pool->attrs->nice); in pr_cont_pool_info()
6147 struct worker_pool *pool = worker->pool; in pr_cont_worker_id()
6149 if (pool->flags & WQ_BH) in pr_cont_worker_id()
6151 pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); in pr_cont_worker_id()
6153 pr_cont("%d%s", task_pid_nr(worker->task), in pr_cont_worker_id()
6154 worker->rescue_wq ? "(RESCUER)" : ""); in pr_cont_worker_id()
6165 if (!pcwsp->ctr) in pr_cont_work_flush()
6167 if (func == pcwsp->func) { in pr_cont_work_flush()
6168 pcwsp->ctr++; in pr_cont_work_flush()
6171 if (pcwsp->ctr == 1) in pr_cont_work_flush()
6172 pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func); in pr_cont_work_flush()
6174 pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func); in pr_cont_work_flush()
6175 pcwsp->ctr = 0; in pr_cont_work_flush()
6177 if ((long)func == -1L) in pr_cont_work_flush()
6179 pcwsp->comma = comma; in pr_cont_work_flush()
6180 pcwsp->func = func; in pr_cont_work_flush()
6181 pcwsp->ctr = 1; in pr_cont_work_flush()
6186 if (work->func == wq_barrier_func) { in pr_cont_work()
6191 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6193 task_pid_nr(barr->task)); in pr_cont_work()
6196 pr_cont_work_flush(comma, (work_func_t)-1, pcwsp); in pr_cont_work()
6197 pr_cont_work_flush(comma, work->func, pcwsp); in pr_cont_work()
6204 struct worker_pool *pool = pwq->pool; in show_pwq()
6210 pr_info(" pwq %d:", pool->id); in show_pwq()
6214 pwq->nr_active, pwq->refcnt, in show_pwq()
6215 !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); in show_pwq()
6217 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6218 if (worker->current_pwq == pwq) { in show_pwq()
6226 pr_info(" in-flight:"); in show_pwq()
6227 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_pwq()
6228 if (worker->current_pwq != pwq) in show_pwq()
6233 pr_cont(":%ps", worker->current_func); in show_pwq()
6234 list_for_each_entry(work, &worker->scheduled, entry) in show_pwq()
6236 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6242 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6252 list_for_each_entry(work, &pool->worklist, entry) { in show_pwq()
6259 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6263 if (!list_empty(&pwq->inactive_works)) { in show_pwq()
6267 list_for_each_entry(work, &pwq->inactive_works, entry) { in show_pwq()
6271 pr_cont_work_flush(comma, (work_func_t)-1L, &pcws); in show_pwq()
6277 * show_one_workqueue - dump state of specified workqueue
6295 pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags); in show_one_workqueue()
6298 raw_spin_lock_irqsave(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6309 raw_spin_unlock_irqrestore(&pwq->pool->lock, irq_flags); in show_one_workqueue()
6312 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_workqueue()
6321 * show_one_worker_pool - dump state of specified worker pool
6331 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_one_worker_pool()
6332 if (pool->nr_workers == pool->nr_idle) in show_one_worker_pool()
6336 if (!list_empty(&pool->worklist)) in show_one_worker_pool()
6337 hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000; in show_one_worker_pool()
6345 pr_info("pool %d:", pool->id); in show_one_worker_pool()
6347 pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers); in show_one_worker_pool()
6348 if (pool->manager) in show_one_worker_pool()
6350 task_pid_nr(pool->manager->task)); in show_one_worker_pool()
6351 list_for_each_entry(worker, &pool->idle_list, entry) { in show_one_worker_pool()
6359 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_one_worker_pool()
6362 * sysrq-t -> show_all_workqueues(). Avoid triggering in show_one_worker_pool()
6370 * show_all_workqueues - dump workqueue state
6394 * show_freezable_workqueues - dump freezable workqueue state
6408 if (!(wq->flags & WQ_FREEZABLE)) in show_freezable_workqueues()
6422 if (task->flags & PF_WQ_WORKER) { in wq_worker_comm()
6424 struct worker_pool *pool = worker->pool; in wq_worker_comm()
6430 raw_spin_lock_irq(&pool->lock); in wq_worker_comm()
6432 * ->desc tracks information (wq name or in wq_worker_comm()
6434 * current, prepend '+', otherwise '-'. in wq_worker_comm()
6436 if (worker->desc[0] != '\0') { in wq_worker_comm()
6437 if (worker->current_work) in wq_worker_comm()
6438 scnprintf(buf + off, size - off, "+%s", in wq_worker_comm()
6439 worker->desc); in wq_worker_comm()
6441 scnprintf(buf + off, size - off, "-%s", in wq_worker_comm()
6442 worker->desc); in wq_worker_comm()
6444 raw_spin_unlock_irq(&pool->lock); in wq_worker_comm()
6447 strscpy(buf, task->comm, size); in wq_worker_comm()
6477 raw_spin_lock_irq(&pool->lock); in unbind_workers()
6488 worker->flags |= WORKER_UNBOUND; in unbind_workers()
6490 pool->flags |= POOL_DISASSOCIATED; in unbind_workers()
6500 pool->nr_running = 0; in unbind_workers()
6509 raw_spin_unlock_irq(&pool->lock); in unbind_workers()
6519 * rebind_workers - rebind all workers of a pool to the associated CPU
6522 * @pool->cpu is coming online. Rebind all workers to the CPU.
6532 * be on the run-queue of the associated CPU before any local in rebind_workers()
6533 * wake-ups for concurrency management happen, restore CPU affinity in rebind_workers()
6538 kthread_set_per_cpu(worker->task, pool->cpu); in rebind_workers()
6539 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, in rebind_workers()
6543 raw_spin_lock_irq(&pool->lock); in rebind_workers()
6545 pool->flags &= ~POOL_DISASSOCIATED; in rebind_workers()
6548 unsigned int worker_flags = worker->flags; in rebind_workers()
6559 * WRITE_ONCE() is necessary because @worker->flags may be in rebind_workers()
6568 WRITE_ONCE(worker->flags, worker_flags); in rebind_workers()
6571 raw_spin_unlock_irq(&pool->lock); in rebind_workers()
6575 * restore_unbound_workers_cpumask - restore cpumask of unbound workers
6592 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) in restore_unbound_workers_cpumask()
6595 cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask); in restore_unbound_workers_cpumask()
6599 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0); in restore_unbound_workers_cpumask()
6607 if (pool->nr_workers) in workqueue_prepare_cpu()
6610 return -ENOMEM; in workqueue_prepare_cpu()
6627 if (pool->flags & POOL_BH) in workqueue_online_cpu()
6631 if (pool->cpu == cpu) in workqueue_online_cpu()
6633 else if (pool->cpu < 0) in workqueue_online_cpu()
6640 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_online_cpu()
6646 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_online_cpu()
6649 mutex_lock(&wq->mutex); in workqueue_online_cpu()
6650 wq_update_node_max_active(wq, -1); in workqueue_online_cpu()
6651 mutex_unlock(&wq->mutex); in workqueue_online_cpu()
6663 /* unbinding per-cpu workers should happen on the local CPU */ in workqueue_offline_cpu()
6665 return -1; in workqueue_offline_cpu()
6675 struct workqueue_attrs *attrs = wq->unbound_attrs; in workqueue_offline_cpu()
6681 for_each_cpu(tcpu, pt->pod_cpus[pt->cpu_pod[cpu]]) in workqueue_offline_cpu()
6684 mutex_lock(&wq->mutex); in workqueue_offline_cpu()
6686 mutex_unlock(&wq->mutex); in workqueue_offline_cpu()
6705 wfc->ret = wfc->fn(wfc->arg); in work_for_cpu_fn()
6709 * work_on_cpu_key - run a function in thread context on a particular cpu
6734 * work_on_cpu_safe_key - run a function in thread context on a particular cpu
6748 long ret = -ENODEV; in work_on_cpu_safe_key()
6762 * freeze_workqueues_begin - begin freezing workqueues
6766 * pool->worklist.
6769 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6781 mutex_lock(&wq->mutex); in freeze_workqueues_begin()
6783 mutex_unlock(&wq->mutex); in freeze_workqueues_begin()
6790 * freeze_workqueues_busy - are freezable workqueues still busy?
6813 if (!(wq->flags & WQ_FREEZABLE)) in freeze_workqueues_busy()
6821 WARN_ON_ONCE(pwq->nr_active < 0); in freeze_workqueues_busy()
6822 if (pwq->nr_active) { in freeze_workqueues_busy()
6836 * thaw_workqueues - thaw workqueues
6842 * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
6857 mutex_lock(&wq->mutex); in thaw_workqueues()
6859 mutex_unlock(&wq->mutex); in thaw_workqueues()
6877 if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING)) in workqueue_apply_unbound_cpumask()
6880 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask); in workqueue_apply_unbound_cpumask()
6886 list_add_tail(&ctx->list, &ctxs); in workqueue_apply_unbound_cpumask()
6904 * workqueue_unbound_exclude_cpumask - Exclude given CPUs from unbound cpumask
6916 return -ENOMEM; in workqueue_unbound_exclude_cpumask()
6948 return -EINVAL; in parse_affn_scope()
6960 return -EINVAL; in wq_affn_dfl_set()
6996 * per_cpu RO bool : whether the workqueue is per-cpu or unbound
6997 * max_active RW int : maximum number of in-flight work items
7015 return wq_dev->wq; in dev_to_wq()
7023 return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND)); in per_cpu_show()
7032 return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active); in max_active_show()
7043 return -EINVAL; in max_active_store()
7063 mutex_lock(&wq->mutex); in wq_nice_show()
7064 written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice); in wq_nice_show()
7065 mutex_unlock(&wq->mutex); in wq_nice_show()
7081 copy_workqueue_attrs(attrs, wq->unbound_attrs); in wq_sysfs_prep_attrs()
7090 int ret = -ENOMEM; in wq_nice_store()
7098 if (sscanf(buf, "%d", &attrs->nice) == 1 && in wq_nice_store()
7099 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) in wq_nice_store()
7102 ret = -EINVAL; in wq_nice_store()
7116 mutex_lock(&wq->mutex); in wq_cpumask_show()
7118 cpumask_pr_args(wq->unbound_attrs->cpumask)); in wq_cpumask_show()
7119 mutex_unlock(&wq->mutex); in wq_cpumask_show()
7129 int ret = -ENOMEM; in wq_cpumask_store()
7137 ret = cpumask_parse(buf, attrs->cpumask); in wq_cpumask_store()
7153 mutex_lock(&wq->mutex); in wq_affn_scope_show()
7154 if (wq->unbound_attrs->affn_scope == WQ_AFFN_DFL) in wq_affn_scope_show()
7160 wq_affn_names[wq->unbound_attrs->affn_scope]); in wq_affn_scope_show()
7161 mutex_unlock(&wq->mutex); in wq_affn_scope_show()
7172 int affn, ret = -ENOMEM; in wq_affn_scope_store()
7181 attrs->affn_scope = affn; in wq_affn_scope_store()
7195 wq->unbound_attrs->affn_strict); in wq_affinity_strict_show()
7204 int v, ret = -ENOMEM; in wq_affinity_strict_store()
7207 return -EINVAL; in wq_affinity_strict_store()
7212 attrs->affn_strict = (bool)v; in wq_affinity_strict_store()
7234 * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
7237 * The low-level workqueues cpumask is a global cpumask that limits
7241 * Return: 0 - Success
7242 * -EINVAL - Invalid @cpumask
7243 * -ENOMEM - Failed to allocate memory for attrs or pwqs.
7247 int ret = -EINVAL; in workqueue_set_unbound_cpumask()
7306 return -ENOMEM; in cpumask_store()
7339 * workqueue_sysfs_register - make a workqueue visible in sysfs
7351 * Return: 0 on success, -errno on failure.
7362 if (WARN_ON(wq->flags & __WQ_ORDERED)) in workqueue_sysfs_register()
7363 return -EINVAL; in workqueue_sysfs_register()
7365 wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL); in workqueue_sysfs_register()
7367 return -ENOMEM; in workqueue_sysfs_register()
7369 wq_dev->wq = wq; in workqueue_sysfs_register()
7370 wq_dev->dev.bus = &wq_subsys; in workqueue_sysfs_register()
7371 wq_dev->dev.release = wq_device_release; in workqueue_sysfs_register()
7372 dev_set_name(&wq_dev->dev, "%s", wq->name); in workqueue_sysfs_register()
7378 dev_set_uevent_suppress(&wq_dev->dev, true); in workqueue_sysfs_register()
7380 ret = device_register(&wq_dev->dev); in workqueue_sysfs_register()
7382 put_device(&wq_dev->dev); in workqueue_sysfs_register()
7383 wq->wq_dev = NULL; in workqueue_sysfs_register()
7387 if (wq->flags & WQ_UNBOUND) { in workqueue_sysfs_register()
7390 for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) { in workqueue_sysfs_register()
7391 ret = device_create_file(&wq_dev->dev, attr); in workqueue_sysfs_register()
7393 device_unregister(&wq_dev->dev); in workqueue_sysfs_register()
7394 wq->wq_dev = NULL; in workqueue_sysfs_register()
7400 dev_set_uevent_suppress(&wq_dev->dev, false); in workqueue_sysfs_register()
7401 kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); in workqueue_sysfs_register()
7406 * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
7413 struct wq_device *wq_dev = wq->wq_dev; in workqueue_sysfs_unregister()
7415 if (!wq->wq_dev) in workqueue_sysfs_unregister()
7418 wq->wq_dev = NULL; in workqueue_sysfs_unregister()
7419 device_unregister(&wq_dev->dev); in workqueue_sysfs_unregister()
7428 * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
7436 * forward progress is defined as the first item on ->worklist changing.
7455 * The only candidates are CPU-bound workers in the running state.
7465 raw_spin_lock_irqsave(&pool->lock, irq_flags); in show_cpu_pool_hog()
7467 hash_for_each(pool->busy_hash, bkt, worker, hentry) { in show_cpu_pool_hog()
7468 if (task_is_running(worker->task)) { in show_cpu_pool_hog()
7476 pr_info("pool %d:\n", pool->id); in show_cpu_pool_hog()
7477 sched_show_task(worker->task); in show_cpu_pool_hog()
7483 raw_spin_unlock_irqrestore(&pool->lock, irq_flags); in show_cpu_pool_hog()
7491 pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n"); in show_cpu_pools_hogs()
7496 if (pool->cpu_stall) in show_cpu_pools_hogs()
7540 pool->cpu_stall = false; in wq_watchdog_timer_fn()
7541 if (list_empty(&pool->worklist)) in wq_watchdog_timer_fn()
7551 if (pool->cpu >= 0) in wq_watchdog_timer_fn()
7552 touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu)); in wq_watchdog_timer_fn()
7555 pool_ts = READ_ONCE(pool->watchdog_ts); in wq_watchdog_timer_fn()
7565 if (pool->cpu >= 0 && !(pool->flags & POOL_BH)) { in wq_watchdog_timer_fn()
7566 pool->cpu_stall = true; in wq_watchdog_timer_fn()
7569 pr_emerg("BUG: workqueue lockup - pool"); in wq_watchdog_timer_fn()
7572 jiffies_to_msecs(now - pool_ts) / 1000); in wq_watchdog_timer_fn()
7683 pool->cpu = cpu; in init_cpu_worker_pool()
7684 cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7685 cpumask_copy(pool->attrs->__pod_cpumask, cpumask_of(cpu)); in init_cpu_worker_pool()
7686 pool->attrs->nice = nice; in init_cpu_worker_pool()
7687 pool->attrs->affn_strict = true; in init_cpu_worker_pool()
7688 pool->node = cpu_to_node(cpu); in init_cpu_worker_pool()
7697 * workqueue_init_early - early init for workqueue subsystem
7699 * This is the first step of three-staged workqueue subsystem initialization and
7700 * invoked as soon as the bare basics - memory allocation, cpumasks and idr are
7743 pt->pod_cpus = kcalloc(1, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in workqueue_init_early()
7744 pt->pod_node = kcalloc(1, sizeof(pt->pod_node[0]), GFP_KERNEL); in workqueue_init_early()
7745 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in workqueue_init_early()
7746 BUG_ON(!pt->pod_cpus || !pt->pod_node || !pt->cpu_pod); in workqueue_init_early()
7748 BUG_ON(!zalloc_cpumask_var_node(&pt->pod_cpus[0], GFP_KERNEL, NUMA_NO_NODE)); in workqueue_init_early()
7750 pt->nr_pods = 1; in workqueue_init_early()
7751 cpumask_copy(pt->pod_cpus[0], cpu_possible_mask); in workqueue_init_early()
7752 pt->pod_node[0] = NUMA_NO_NODE; in workqueue_init_early()
7753 pt->cpu_pod[0] = 0; in workqueue_init_early()
7762 pool->flags |= POOL_BH; in workqueue_init_early()
7777 attrs->nice = std_nice[i]; in workqueue_init_early()
7785 attrs->nice = std_nice[i]; in workqueue_init_early()
7786 attrs->ordered = true; in workqueue_init_early()
7827 * most consider human-perceivable. However, the kernel also runs on a in wq_cpu_intensive_thresh_init()
7840 /* see init/calibrate.c for lpj -> BogoMIPS calculation */ in wq_cpu_intensive_thresh_init()
7852 * workqueue_init - bring workqueue subsystem fully online
7854 * This is the second step of three-staged workqueue subsystem initialization
7871 * Per-cpu pools created earlier could be missing node hint. Fix them in workqueue_init()
7876 pool->node = cpu_to_node(cpu); in workqueue_init()
7878 pool->node = cpu_to_node(cpu); in workqueue_init()
7884 wq->name); in workqueue_init()
7901 pool->flags &= ~POOL_DISASSOCIATED; in workqueue_init()
7914 * Initialize @pt by first initializing @pt->cpu_pod[] with pod IDs according to
7923 pt->nr_pods = 0; in init_pod_type()
7925 /* init @pt->cpu_pod[] according to @cpus_share_pod() */ in init_pod_type()
7926 pt->cpu_pod = kcalloc(nr_cpu_ids, sizeof(pt->cpu_pod[0]), GFP_KERNEL); in init_pod_type()
7927 BUG_ON(!pt->cpu_pod); in init_pod_type()
7932 pt->cpu_pod[cur] = pt->nr_pods++; in init_pod_type()
7936 pt->cpu_pod[cur] = pt->cpu_pod[pre]; in init_pod_type()
7942 /* init the rest to match @pt->cpu_pod[] */ in init_pod_type()
7943 pt->pod_cpus = kcalloc(pt->nr_pods, sizeof(pt->pod_cpus[0]), GFP_KERNEL); in init_pod_type()
7944 pt->pod_node = kcalloc(pt->nr_pods, sizeof(pt->pod_node[0]), GFP_KERNEL); in init_pod_type()
7945 BUG_ON(!pt->pod_cpus || !pt->pod_node); in init_pod_type()
7947 for (pod = 0; pod < pt->nr_pods; pod++) in init_pod_type()
7948 BUG_ON(!zalloc_cpumask_var(&pt->pod_cpus[pod], GFP_KERNEL)); in init_pod_type()
7951 cpumask_set_cpu(cpu, pt->pod_cpus[pt->cpu_pod[cpu]]); in init_pod_type()
7952 pt->pod_node[pt->cpu_pod[cpu]] = cpu_to_node(cpu); in init_pod_type()
7976 * workqueue_init_topology - initialize CPU pods for unbound workqueues
7978 * This is the third step of three-staged workqueue subsystem initialization and
7999 * and CPU combinations to apply per-pod sharing. in workqueue_init_topology()
8004 if (wq->flags & WQ_UNBOUND) { in workqueue_init_topology()
8005 mutex_lock(&wq->mutex); in workqueue_init_topology()
8006 wq_update_node_max_active(wq, -1); in workqueue_init_topology()
8007 mutex_unlock(&wq->mutex); in workqueue_init_topology()
8016 pr_warn("WARNING: Flushing system-wide workqueues will be prohibited in near future.\n"); in __warn_flushing_systemwide_wq()