Lines Matching +full:local +full:- +full:timers
1 // SPDX-License-Identifier: GPL-2.0-only
3 * Infrastructure for migratable timers
15 #include "tick-internal.h"
34 * GRP0:0 - GRP0:2 GRP0:3 - GRP0:5
37 * CPUS 0-7 8-15 16-23 24-31 32-39 40-47
45 * active CPUs in a group try to migrate expired timers from other CPUs,
53 * If it finds expired timers in one of the group queues it pulls them over
57 * CPUs which go idle arm their CPU local timer hardware for the next local
59 * next local timer or the CPU has no migratable timer pending then the
61 * timer expires before the next local timer then the CPU queues that timer
81 * then has to make sure, that it arms it's own local hardware timer for
86 * ---------------
96 * --------------
102 * the per CPU tmigr_cpu->lock is held.
107 * When @timer_base->lock as well as tmigr related locks are required, the lock
108 * ordering is: first @timer_base->lock, afterwards tmigr related locks.
112 * ------------------------------------------------
144 * --> migrator = TMIGR_NONE migrator = CPU2
145 * --> active = active = CPU2
148 * --> idle idle active idle
161 * --> migrator = CPU1 migrator = CPU2
162 * --> active = CPU1 active = CPU2
165 * idle --> active active idle
173 * --> migrator = GRP0:1
174 * --> active = GRP0:0, GRP0:1
186 * --> migrator = GRP0:1
187 * --> active = GRP0:1
202 * expected value (compare-and-exchange).
210 * ----------------------------------------------------------
241 * migrator = TMIGR_NONE --> migrator = TMIGR_NONE
242 * active = --> active =
246 * idle idle --> idle idle
254 * --> migrator = TMIGR_NONE
255 * --> active =
264 * idle idle --> idle idle
277 * --> next_expiry = TIMER0 next_expiry = KTIME_MAX
291 * --> next_expiry = TIMER0
305 * -------------------------- ---------------------------
307 * cmpxchg(&GRP1:0->state);
309 * spin_lock(&GRP1:0->lock);
312 * spin_unlock(&GRP1:0->lock);
316 * spin_lock(&GRP1:0->lock)
318 * group_state = atomic_read(&GRP1:0->state)
321 * spin_unlock(&GRP1:0->lock) <3>
332 * -----------------------------------------------------------
334 * After expiring timers of a remote CPU, a walk through the hierarchy and
347 * --> timerqueue = evt-GRP0:0
354 * timerqueue = evt-CPU0, timerqueue =
355 * evt-CPU1
360 * 2. CPU2 starts to expire remote timers. It starts with LVL0 group
369 * --> timerqueue =
375 * --> groupevt.cpu = CPU0 groupevt.cpu =
376 * timerqueue = evt-CPU0, timerqueue =
377 * evt-CPU1
382 * 3. Some work has to be done after expiring the timers of CPU0. If we stop
388 * pending then it has to expire after CPU1's first timer because all timers
397 * --> timerqueue = evt-GRP0:0
403 * --> groupevt.cpu = CPU1 groupevt.cpu =
404 * --> timerqueue = evt-CPU1 timerqueue =
430 return !(tmc->tmgroup && tmc->online); in tmigr_is_not_available()
435 * group is not active - so no migrator is set.
441 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator()
455 s.state = atomic_read(&group->migr_state); in tmigr_check_migrator_and_lonely()
471 s.state = atomic_read(&group->migr_state); in tmigr_check_lonely()
479 * struct tmigr_walk - data required for walking the hierarchy
488 * struct of CPU which expires remote timers. It is updated
503 * @check: is set if there is the need to handle remote timers;
528 struct tmigr_group *child = NULL, *group = tmc->tmgroup; in __walk_groups()
531 WARN_ON_ONCE(group->level >= tmigr_hierarchy_levels); in __walk_groups()
537 group = group->parent; in __walk_groups()
538 data->childmask = child->groupmask; in __walk_groups()
544 lockdep_assert_held(&tmc->lock); in walk_groups()
550 * Returns the next event of the timerqueue @group->events
552 * Removes timers with ignore flag and update next_expiry of the group. Values
560 lockdep_assert_held(&group->lock); in tmigr_next_groupevt()
562 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_next_groupevt()
564 while ((node = timerqueue_getnext(&group->events))) { in tmigr_next_groupevt()
567 if (!evt->ignore) { in tmigr_next_groupevt()
568 WRITE_ONCE(group->next_expiry, evt->nextevt.expires); in tmigr_next_groupevt()
573 * Remove next timers with ignore flag, because the group lock in tmigr_next_groupevt()
576 if (!timerqueue_del(&group->events, node)) in tmigr_next_groupevt()
593 if (!evt || now < evt->nextevt.expires) in tmigr_next_expired_groupevt()
599 timerqueue_del(&group->events, &evt->nextevt); in tmigr_next_expired_groupevt()
614 return evt->nextevt.expires; in tmigr_next_groupevt_expires()
625 childmask = data->childmask; in tmigr_active_up()
631 curstate.state = atomic_read(&group->migr_state); in tmigr_active_up()
647 } while (!atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)); in tmigr_active_up()
663 group->groupevt.ignore = true; in tmigr_active_up()
672 data.childmask = tmc->groupmask; in __tmigr_cpu_activate()
676 tmc->cpuevt.ignore = true; in __tmigr_cpu_activate()
677 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in __tmigr_cpu_activate()
683 * tmigr_cpu_activate() - set this CPU active in timer migration hierarchy
694 if (WARN_ON_ONCE(!tmc->idle)) in tmigr_cpu_activate()
697 raw_spin_lock(&tmc->lock); in tmigr_cpu_activate()
698 tmc->idle = false; in tmigr_cpu_activate()
700 raw_spin_unlock(&tmc->lock); in tmigr_cpu_activate()
706 * @data->firstexp is set to expiry of first gobal event of the (top level of
722 bool remote = data->remote; in tmigr_update_events()
727 raw_spin_lock(&child->lock); in tmigr_update_events()
728 raw_spin_lock_nested(&group->lock, SINGLE_DEPTH_NESTING); in tmigr_update_events()
730 childstate.state = atomic_read(&child->migr_state); in tmigr_update_events()
731 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
739 nextexp = child->next_expiry; in tmigr_update_events()
740 evt = &child->groupevt; in tmigr_update_events()
742 evt->ignore = (nextexp == KTIME_MAX) ? true : false; in tmigr_update_events()
744 nextexp = data->nextexp; in tmigr_update_events()
746 first_childevt = evt = data->evt; in tmigr_update_events()
758 * - When entering this path by tmigr_new_timer(), @evt->ignore in tmigr_update_events()
760 * - tmigr_inactive_up() takes care of the propagation by in tmigr_update_events()
772 if (evt->ignore && !remote && group->parent) in tmigr_update_events()
775 raw_spin_lock(&group->lock); in tmigr_update_events()
778 groupstate.state = atomic_read(&group->migr_state); in tmigr_update_events()
785 if (timerqueue_node_queued(&evt->nextevt)) { in tmigr_update_events()
786 if ((evt->nextevt.expires == nextexp) && !evt->ignore) { in tmigr_update_events()
788 evt->cpu = first_childevt->cpu; in tmigr_update_events()
792 if (!timerqueue_del(&group->events, &evt->nextevt)) in tmigr_update_events()
793 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_update_events()
796 if (evt->ignore) { in tmigr_update_events()
813 evt->nextevt.expires = nextexp; in tmigr_update_events()
814 evt->cpu = first_childevt->cpu; in tmigr_update_events()
816 if (timerqueue_add(&group->events, &evt->nextevt)) in tmigr_update_events()
817 WRITE_ONCE(group->next_expiry, nextexp); in tmigr_update_events()
821 if (!group->parent && (groupstate.migrator == TMIGR_NONE)) { in tmigr_update_events()
835 * global timers are handled in time. (This could be optimized in tmigr_update_events()
840 data->firstexp = tmigr_next_groupevt_expires(group); in tmigr_update_events()
847 raw_spin_unlock(&group->lock); in tmigr_update_events()
850 raw_spin_unlock(&child->lock); in tmigr_update_events()
865 * timers.
871 .evt = &tmc->cpuevt }; in tmigr_new_timer()
873 lockdep_assert_held(&tmc->lock); in tmigr_new_timer()
875 if (tmc->remote) in tmigr_new_timer()
880 tmc->cpuevt.ignore = false; in tmigr_new_timer()
898 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
901 * If the remote CPU is offline then the timers have been migrated to in tmigr_handle_remote_cpu()
905 * expires the timers of the remote CPU. in tmigr_handle_remote_cpu()
908 * takes care of its timers. in tmigr_handle_remote_cpu()
911 * updated and there are no timers to expire right now. The CPU which in tmigr_handle_remote_cpu()
915 if (!tmc->online || tmc->remote || tmc->cpuevt.ignore || in tmigr_handle_remote_cpu()
916 now < tmc->cpuevt.nextevt.expires) { in tmigr_handle_remote_cpu()
917 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
923 tmc->remote = true; in tmigr_handle_remote_cpu()
924 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_handle_remote_cpu()
927 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
933 * Lock ordering needs to be preserved - timer_base locks before tmigr in tmigr_handle_remote_cpu()
935 * the top). During fetching the next timer interrupt, also tmc->lock in tmigr_handle_remote_cpu()
949 raw_spin_lock(&tmc->lock); in tmigr_handle_remote_cpu()
956 * When the CPU is no longer idle, the CPU takes care of the timers and in tmigr_handle_remote_cpu()
957 * also of the timers in the hierarchy. in tmigr_handle_remote_cpu()
962 if (!tmc->online || !tmc->idle) { in tmigr_handle_remote_cpu()
973 data.evt = &tmc->cpuevt; in tmigr_handle_remote_cpu()
984 tmc->remote = false; in tmigr_handle_remote_cpu()
985 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote_cpu()
997 jif = data->basej; in tmigr_handle_remote_up()
998 now = data->now; in tmigr_handle_remote_up()
1000 childmask = data->childmask; in tmigr_handle_remote_up()
1012 raw_spin_lock_irq(&group->lock); in tmigr_handle_remote_up()
1017 unsigned int remote_cpu = evt->cpu; in tmigr_handle_remote_up()
1019 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1029 * (group->next_expiry was updated by tmigr_next_expired_groupevt(), in tmigr_handle_remote_up()
1032 data->firstexp = group->next_expiry; in tmigr_handle_remote_up()
1034 raw_spin_unlock_irq(&group->lock); in tmigr_handle_remote_up()
1040 * tmigr_handle_remote() - Handle global timers of remote idle CPUs
1052 data.childmask = tmc->groupmask; in tmigr_handle_remote()
1060 if (!tmigr_check_migrator(tmc->tmgroup, tmc->groupmask)) { in tmigr_handle_remote()
1063 * value so it won't chase timers that have already expired elsewhere. in tmigr_handle_remote()
1066 if (READ_ONCE(tmc->wakeup) == KTIME_MAX) in tmigr_handle_remote()
1073 * Update @tmc->wakeup only at the end and do not reset @tmc->wakeup to in tmigr_handle_remote()
1074 * KTIME_MAX. Even if tmc->lock is not held during the whole remote in tmigr_handle_remote()
1075 * handling, tmc->wakeup is fine to be stale as it is called in in tmigr_handle_remote()
1082 raw_spin_lock_irq(&tmc->lock); in tmigr_handle_remote()
1083 WRITE_ONCE(tmc->wakeup, data.firstexp); in tmigr_handle_remote()
1084 raw_spin_unlock_irq(&tmc->lock); in tmigr_handle_remote()
1093 childmask = data->childmask; in tmigr_requires_handle_remote_up()
1108 if (group->parent && !data->tmc_active) in tmigr_requires_handle_remote_up()
1118 data->firstexp = READ_ONCE(group->next_expiry); in tmigr_requires_handle_remote_up()
1119 if (data->now >= data->firstexp) { in tmigr_requires_handle_remote_up()
1120 data->check = true; in tmigr_requires_handle_remote_up()
1124 raw_spin_lock(&group->lock); in tmigr_requires_handle_remote_up()
1125 data->firstexp = group->next_expiry; in tmigr_requires_handle_remote_up()
1126 if (data->now >= group->next_expiry) { in tmigr_requires_handle_remote_up()
1127 data->check = true; in tmigr_requires_handle_remote_up()
1128 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1131 raw_spin_unlock(&group->lock); in tmigr_requires_handle_remote_up()
1138 * tmigr_requires_handle_remote() - Check the need of remote timer handling
1153 data.childmask = tmc->groupmask; in tmigr_requires_handle_remote()
1155 data.tmc_active = !tmc->idle; in tmigr_requires_handle_remote()
1162 * Check is done lockless as interrupts are disabled and @tmc->idle is in tmigr_requires_handle_remote()
1163 * set only by the local CPU. in tmigr_requires_handle_remote()
1165 if (!tmc->idle) { in tmigr_requires_handle_remote()
1172 * When the CPU is idle, compare @tmc->wakeup with @data.now. The lock in tmigr_requires_handle_remote()
1178 if (data.now >= READ_ONCE(tmc->wakeup)) in tmigr_requires_handle_remote()
1181 raw_spin_lock(&tmc->lock); in tmigr_requires_handle_remote()
1182 if (data.now >= tmc->wakeup) in tmigr_requires_handle_remote()
1184 raw_spin_unlock(&tmc->lock); in tmigr_requires_handle_remote()
1191 * tmigr_cpu_new_timer() - enqueue next global timer into hierarchy (idle tmc)
1196 * and thereby the timer idle path is executed once more. @tmc->wakeup
1211 raw_spin_lock(&tmc->lock); in tmigr_cpu_new_timer()
1213 ret = READ_ONCE(tmc->wakeup); in tmigr_cpu_new_timer()
1215 if (nextexp != tmc->cpuevt.nextevt.expires || in tmigr_cpu_new_timer()
1216 tmc->cpuevt.ignore) { in tmigr_cpu_new_timer()
1219 * Make sure the reevaluation of timers in idle path in tmigr_cpu_new_timer()
1222 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_new_timer()
1226 raw_spin_unlock(&tmc->lock); in tmigr_cpu_new_timer()
1238 childmask = data->childmask; in tmigr_inactive_up()
1247 curstate.state = atomic_read_acquire(&group->migr_state); in tmigr_inactive_up()
1251 childstate.state = atomic_read(&child->migr_state); in tmigr_inactive_up()
1285 if (atomic_try_cmpxchg(&group->migr_state, &curstate.state, newstate.state)) { in tmigr_inactive_up()
1299 data->remote = false; in tmigr_inactive_up()
1311 .evt = &tmc->cpuevt, in __tmigr_cpu_deactivate()
1312 .childmask = tmc->groupmask }; in __tmigr_cpu_deactivate()
1316 * local timer expires before the global timer, no global timer is set in __tmigr_cpu_deactivate()
1320 tmc->cpuevt.ignore = false; in __tmigr_cpu_deactivate()
1327 * tmigr_cpu_deactivate() - Put current CPU into inactive state
1344 raw_spin_lock(&tmc->lock); in tmigr_cpu_deactivate()
1348 tmc->idle = true; in tmigr_cpu_deactivate()
1351 * Make sure the reevaluation of timers in idle path will not miss an in tmigr_cpu_deactivate()
1354 WRITE_ONCE(tmc->wakeup, ret); in tmigr_cpu_deactivate()
1357 raw_spin_unlock(&tmc->lock); in tmigr_cpu_deactivate()
1362 * tmigr_quick_check() - Quick forecast of next tmigr event when CPU wants to
1367 * * KTIME_MAX - when it is probable that nothing has to be done (not
1371 * * nextevt - when CPU is offline and has to handle timer on its own
1375 * * next_expiry - value of lowest expiry encountered while walking groups
1382 struct tmigr_group *group = tmc->tmgroup; in tmigr_quick_check()
1387 if (WARN_ON_ONCE(tmc->idle)) in tmigr_quick_check()
1390 if (!tmigr_check_migrator_and_lonely(tmc->tmgroup, tmc->groupmask)) in tmigr_quick_check()
1403 nextevt = min_t(u64, nextevt, READ_ONCE(group->next_expiry)); in tmigr_quick_check()
1404 if (!group->parent) in tmigr_quick_check()
1407 group = group->parent; in tmigr_quick_check()
1414 * tmigr_trigger_active() - trigger a CPU to become active again
1424 WARN_ON_ONCE(!tmc->online || tmc->idle); in tmigr_trigger_active()
1435 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_offline()
1436 tmc->online = false; in tmigr_cpu_offline()
1437 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_offline()
1440 * CPU has to handle the local events on his own, when on the way to in tmigr_cpu_offline()
1445 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_offline()
1460 if (WARN_ON_ONCE(!tmc->tmgroup)) in tmigr_cpu_online()
1461 return -EINVAL; in tmigr_cpu_online()
1463 raw_spin_lock_irq(&tmc->lock); in tmigr_cpu_online()
1465 tmc->idle = timer_base_is_idle(); in tmigr_cpu_online()
1466 if (!tmc->idle) in tmigr_cpu_online()
1468 tmc->online = true; in tmigr_cpu_online()
1469 raw_spin_unlock_irq(&tmc->lock); in tmigr_cpu_online()
1478 raw_spin_lock_init(&group->lock); in tmigr_init_group()
1480 group->level = lvl; in tmigr_init_group()
1481 group->numa_node = lvl < tmigr_crossnode_level ? node : NUMA_NO_NODE; in tmigr_init_group()
1483 group->num_children = 0; in tmigr_init_group()
1488 atomic_set(&group->migr_state, s.state); in tmigr_init_group()
1490 timerqueue_init_head(&group->events); in tmigr_init_group()
1491 timerqueue_init(&group->groupevt.nextevt); in tmigr_init_group()
1492 group->groupevt.nextevt.expires = KTIME_MAX; in tmigr_init_group()
1493 WRITE_ONCE(group->next_expiry, KTIME_MAX); in tmigr_init_group()
1494 group->groupevt.ignore = true; in tmigr_init_group()
1510 if (lvl < tmigr_crossnode_level && tmp->numa_node != node) in tmigr_get_group()
1514 if (tmp->num_children >= TMIGR_CHILDREN_PER_GROUP) in tmigr_get_group()
1534 return ERR_PTR(-ENOMEM); in tmigr_get_group()
1539 list_add(&group->list, &tmigr_level_list[lvl]); in tmigr_get_group()
1550 raw_spin_lock_irq(&child->lock); in tmigr_connect_child_parent()
1551 raw_spin_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING); in tmigr_connect_child_parent()
1553 child->parent = parent; in tmigr_connect_child_parent()
1554 child->groupmask = BIT(parent->num_children++); in tmigr_connect_child_parent()
1556 raw_spin_unlock(&parent->lock); in tmigr_connect_child_parent()
1557 raw_spin_unlock_irq(&child->lock); in tmigr_connect_child_parent()
1588 data.childmask = child->groupmask; in tmigr_connect_child_parent()
1596 WARN_ON(!tmigr_active_up(parent, child, &data) && parent->parent); in tmigr_connect_child_parent()
1607 return -ENOMEM; in tmigr_setup_groups()
1627 if (group->parent || i == tmigr_hierarchy_levels || in tmigr_setup_groups()
1629 list_is_singular(&tmigr_level_list[i - 1]))) in tmigr_setup_groups()
1635 group = stack[--i]; in tmigr_setup_groups()
1638 list_del(&group->list); in tmigr_setup_groups()
1643 WARN_ON_ONCE(i != group->level); in tmigr_setup_groups()
1646 * Update tmc -> group / child -> group connection in tmigr_setup_groups()
1651 raw_spin_lock_irq(&group->lock); in tmigr_setup_groups()
1653 tmc->tmgroup = group; in tmigr_setup_groups()
1654 tmc->groupmask = BIT(group->num_children++); in tmigr_setup_groups()
1656 raw_spin_unlock_irq(&group->lock); in tmigr_setup_groups()
1663 child = stack[i - 1]; in tmigr_setup_groups()
1675 if (group->num_children == 1 && list_is_singular(lvllist)) { in tmigr_setup_groups()
1685 lvllist = &tmigr_level_list[top - 1]; in tmigr_setup_groups()
1687 if (child->parent) in tmigr_setup_groups()
1718 if (tmc->tmgroup) in tmigr_cpu_prepare()
1721 raw_spin_lock_init(&tmc->lock); in tmigr_cpu_prepare()
1722 timerqueue_init(&tmc->cpuevt.nextevt); in tmigr_cpu_prepare()
1723 tmc->cpuevt.nextevt.expires = KTIME_MAX; in tmigr_cpu_prepare()
1724 tmc->cpuevt.ignore = true; in tmigr_cpu_prepare()
1725 tmc->cpuevt.cpu = cpu; in tmigr_cpu_prepare()
1726 tmc->remote = false; in tmigr_cpu_prepare()
1727 WRITE_ONCE(tmc->wakeup, KTIME_MAX); in tmigr_cpu_prepare()
1733 if (tmc->groupmask == 0) in tmigr_cpu_prepare()
1734 return -EINVAL; in tmigr_cpu_prepare()
1744 int ret = -ENOMEM; in tmigr_init()