Lines Matching +full:cost +full:- +full:effective
7 * Copyright (C) 2004-2007 Silicon Graphics, Inc.
11 * sysfs is Copyright (c) 2001-3 Patrick Mochel
13 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson.
24 #include "cgroup-internal.h"
25 #include "cpuset-internal.h"
50 * node binding, add this key to provide a quick low-cost judgment
68 * Exclusive CPUs distributed out to sub-partitions of top_cpuset
96 * 0 - member (not a partition root)
97 * 1 - partition root
98 * 2 - partition root without load balancing (isolated)
99 * -1 - invalid partition root
100 * -2 - invalid isolated partition root
102 * There are 2 types of partitions - local or remote. Local partitions are
116 #define PRS_INVALID_ROOT -1
117 #define PRS_INVALID_ISOLATED -2
137 cs->nr_deadline_tasks++; in inc_dl_tasks_cs()
144 cs->nr_deadline_tasks--; in dec_dl_tasks_cs()
149 return cs->partition_root_state > 0; in is_partition_valid()
154 return cs->partition_root_state < 0; in is_partition_invalid()
162 if (cs->partition_root_state > 0) in make_partition_invalid()
163 cs->partition_root_state = -cs->partition_root_state; in make_partition_invalid()
171 if (old_prs == cs->partition_root_state) in notify_partition_change()
173 cgroup_file_notify(&cs->partition_file); in notify_partition_change()
177 WRITE_ONCE(cs->prs_err, PERR_NONE); in notify_partition_change()
184 .relax_domain_level = -1,
189 * There are two global locks guarding cpuset structures - cpuset_mutex and
195 * paths that rely on priority inheritance (e.g. scheduler - on RT) for
212 * If a task is only holding callback_lock, then it has read-only
220 * small pieces of code, such as when reading out possibly multi-word
267 * decrease cs->attach_in_progress.
268 * wake_up cpuset_attach_wq if cs->attach_in_progress==0.
274 cs->attach_in_progress--; in dec_attach_in_progress_locked()
275 if (!cs->attach_in_progress) in dec_attach_in_progress_locked()
291 * requested and won't be changed by hotplug events. Only the effective
297 (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE); in is_in_v2_mode()
301 * partition_is_populated - check if partition has tasks
307 * be non-NULL when this cpuset is going to become a partition itself.
315 if (cs->css.cgroup->nr_populated_csets) in partition_is_populated()
317 if (!excluded_child && !cs->nr_subparts) in partition_is_populated()
318 return cgroup_is_populated(cs->css.cgroup); in partition_is_populated()
326 if (cgroup_is_populated(child->css.cgroup)) { in partition_is_populated()
341 * One way or another, we guarantee to return some non-empty subset
358 while (!cpumask_intersects(cs->effective_cpus, pmask)) in guarantee_online_cpus()
361 cpumask_and(pmask, pmask, cs->effective_cpus); in guarantee_online_cpus()
371 * One way or another, we guarantee to return some non-empty subset
378 while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY])) in guarantee_online_mems()
380 nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]); in guarantee_online_mems()
384 * alloc_cpumasks - allocate three cpumasks for cpuset
387 * Return: 0 if successful, -ENOMEM otherwise.
389 * Only one of the two input arguments should be non-NULL.
396 pmask1 = &cs->cpus_allowed; in alloc_cpumasks()
397 pmask2 = &cs->effective_cpus; in alloc_cpumasks()
398 pmask3 = &cs->effective_xcpus; in alloc_cpumasks()
399 pmask4 = &cs->exclusive_cpus; in alloc_cpumasks()
401 pmask1 = &tmp->new_cpus; in alloc_cpumasks()
402 pmask2 = &tmp->addmask; in alloc_cpumasks()
403 pmask3 = &tmp->delmask; in alloc_cpumasks()
408 return -ENOMEM; in alloc_cpumasks()
428 return -ENOMEM; in alloc_cpumasks()
432 * free_cpumasks - free cpumasks in a tmpmasks structure
439 free_cpumask_var(cs->cpus_allowed); in free_cpumasks()
440 free_cpumask_var(cs->effective_cpus); in free_cpumasks()
441 free_cpumask_var(cs->effective_xcpus); in free_cpumasks()
442 free_cpumask_var(cs->exclusive_cpus); in free_cpumasks()
445 free_cpumask_var(tmp->new_cpus); in free_cpumasks()
446 free_cpumask_var(tmp->addmask); in free_cpumasks()
447 free_cpumask_var(tmp->delmask); in free_cpumasks()
452 * alloc_trial_cpuset - allocate a trial cpuset
468 cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); in alloc_trial_cpuset()
469 cpumask_copy(trial->effective_cpus, cs->effective_cpus); in alloc_trial_cpuset()
470 cpumask_copy(trial->effective_xcpus, cs->effective_xcpus); in alloc_trial_cpuset()
471 cpumask_copy(trial->exclusive_cpus, cs->exclusive_cpus); in alloc_trial_cpuset()
476 * free_cpuset - free the cpuset
488 return cpumask_empty(cs->exclusive_cpus) ? cs->cpus_allowed in user_xcpus()
489 : cs->exclusive_cpus; in user_xcpus()
494 return cpumask_empty(cs->cpus_allowed) && in xcpus_empty()
495 cpumask_empty(cs->exclusive_cpus); in xcpus_empty()
499 * cpusets_are_exclusive() - check if two cpusets are exclusive
514 * validate_change() - Used to validate that any proposed cpuset change
522 * 'cur' is the address of an actual, in-use cpuset. Operations
530 * Return 0 if valid, -errno if not.
553 * Cpusets with tasks - existing or newly being attached - can't in validate_change()
556 ret = -ENOSPC; in validate_change()
557 if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) { in validate_change()
558 if (!cpumask_empty(cur->cpus_allowed) && in validate_change()
559 cpumask_empty(trial->cpus_allowed)) in validate_change()
561 if (!nodes_empty(cur->mems_allowed) && in validate_change()
562 nodes_empty(trial->mems_allowed)) in validate_change()
570 ret = -EBUSY; in validate_change()
572 !cpuset_cpumask_can_shrink(cur->cpus_allowed, in validate_change()
573 trial->cpus_allowed)) in validate_change()
580 ret = -EINVAL; in validate_change()
587 txset = !cpumask_empty(trial->exclusive_cpus); in validate_change()
588 cxset = !cpumask_empty(c->exclusive_cpus); in validate_change()
603 xcpus = trial->exclusive_cpus; in validate_change()
604 acpus = c->cpus_allowed; in validate_change()
606 xcpus = c->exclusive_cpus; in validate_change()
607 acpus = trial->cpus_allowed; in validate_change()
613 nodes_intersects(trial->mems_allowed, c->mems_allowed)) in validate_change()
626 * Do cpusets a, b have overlapping effective cpus_allowed masks?
630 return cpumask_intersects(a->effective_cpus, b->effective_cpus); in cpusets_overlap()
636 if (dattr->relax_domain_level < c->relax_domain_level) in update_domain_attr()
637 dattr->relax_domain_level = c->relax_domain_level; in update_domain_attr()
650 if (cpumask_empty(cp->cpus_allowed)) { in update_domain_attr_tree()
664 /* jump label reference count + the top-level cpuset */ in nr_cpusets()
672 * A 'partial partition' is a set of non-overlapping subsets whose
679 * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
690 * cp - cpuset pointer, used (together with pos_css) to perform a
691 * top-down scan of all cpusets. For our purposes, rebuilding
694 * csa - (for CpuSet Array) Array of pointers to all the cpusets
701 * doms - Conversion of 'csa' to an array of cpumasks, for passing to
711 * and merging them using a union-find algorithm.
721 struct cpuset *cp; /* top-down scan of cpusets */ in generate_sched_domains()
781 if (!cpumask_empty(cp->cpus_allowed) && in generate_sched_domains()
783 cpumask_intersects(cp->cpus_allowed, in generate_sched_domains()
788 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
798 * non-empty effective_cpus will be saved into csn[]. in generate_sched_domains()
800 if ((cp->partition_root_state == PRS_ROOT) && in generate_sched_domains()
801 !cpumask_empty(cp->effective_cpus)) in generate_sched_domains()
808 if (!is_partition_valid(cp) && cpumask_empty(cp->exclusive_cpus)) in generate_sched_domains()
821 uf_node_init(&csa[i]->node); in generate_sched_domains()
832 uf_union(&csa[i]->node, &csa[j]->node); in generate_sched_domains()
839 if (uf_find(&csa[i]->node) == &csa[i]->node) in generate_sched_domains()
860 * to SD_ATTR_INIT. Also non-isolating partition root CPUs are a in generate_sched_domains()
865 cpumask_copy(doms[i], csa[i]->effective_cpus); in generate_sched_domains()
875 if (uf_find(&csa[j]->node) == &csa[i]->node) { in generate_sched_domains()
884 cpumask_or(dp, dp, csa[j]->effective_cpus); in generate_sched_domains()
915 if (cs->nr_deadline_tasks == 0) in dl_update_tasks_root_domain()
918 css_task_iter_start(&cs->css, 0, &it); in dl_update_tasks_root_domain()
945 if (cpumask_empty(cs->effective_cpus)) { in dl_rebuild_rd_accounting()
950 css_get(&cs->css); in dl_rebuild_rd_accounting()
957 css_put(&cs->css); in dl_rebuild_rd_accounting()
975 * If the flag 'sched_load_balance' of any cpuset with non-empty
977 * which has that flag enabled, or if any cpuset with a non-empty
999 * With no CPUs in any subpartitions, top_cpuset's effective CPUs in rebuild_sched_domains_locked()
1008 * With subpartition CPUs, however, the effective CPUs of a partition in rebuild_sched_domains_locked()
1019 if (!cpumask_subset(cs->effective_cpus, in rebuild_sched_domains_locked()
1055 * cpuset_update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
1060 * effective cpuset's. As this function is called with cpuset_mutex held,
1071 css_task_iter_start(&cs->css, 0, &it); in cpuset_update_tasks_cpumask()
1083 cpumask_and(new_cpus, possible_mask, cs->effective_cpus); in cpuset_update_tasks_cpumask()
1091 * compute_effective_cpumask - Compute the effective cpumask of the cpuset
1101 cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus); in compute_effective_cpumask()
1146 int new_prs = cs->partition_root_state; in update_partition_sd_lb()
1162 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1164 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in update_partition_sd_lb()
1172 * tasks_nocpu_error - Return true if tasks will have no effective_cpus
1180 return (cpumask_subset(parent->effective_cpus, xcpus) && in tasks_nocpu_error()
1195 cs->nr_subparts = 0; in reset_partition_data()
1196 if (cpumask_empty(cs->exclusive_cpus)) { in reset_partition_data()
1197 cpumask_clear(cs->effective_xcpus); in reset_partition_data()
1199 clear_bit(CS_CPU_EXCLUSIVE, &cs->flags); in reset_partition_data()
1201 if (!cpumask_and(cs->effective_cpus, parent->effective_cpus, cs->cpus_allowed)) in reset_partition_data()
1202 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in reset_partition_data()
1206 * partition_xcpus_newstate - Exclusive CPUs state change
1221 * partition_xcpus_add - Add new exclusive CPUs to partition
1243 isolcpus_updated = (new_prs != parent->partition_root_state); in partition_xcpus_add()
1245 partition_xcpus_newstate(parent->partition_root_state, new_prs, in partition_xcpus_add()
1248 cpumask_andnot(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_add()
1253 * partition_xcpus_del - Remove exclusive CPUs from partition
1274 isolcpus_updated = (old_prs != parent->partition_root_state); in partition_xcpus_del()
1276 partition_xcpus_newstate(old_prs, parent->partition_root_state, in partition_xcpus_del()
1280 cpumask_or(parent->effective_cpus, parent->effective_cpus, xcpus); in partition_xcpus_del()
1298 * cpuset_cpu_is_isolated - Check if the given CPU is isolated
1309 * compute_effective_exclusive_cpumask - compute effective exclusive CPUs
1311 * @xcpus: effective exclusive CPUs value to be set
1323 xcpus = cs->effective_xcpus; in compute_effective_exclusive_cpumask()
1325 return cpumask_and(xcpus, user_xcpus(cs), parent->effective_xcpus); in compute_effective_exclusive_cpumask()
1330 return !list_empty(&cs->remote_sibling); in is_remote_partition()
1339 * remote_partition_enable - Enable current cpuset as a remote partition root
1367 compute_effective_exclusive_cpumask(cs, tmp->new_cpus); in remote_partition_enable()
1368 if (cpumask_empty(tmp->new_cpus) || in remote_partition_enable()
1369 cpumask_intersects(tmp->new_cpus, subpartitions_cpus) || in remote_partition_enable()
1370 cpumask_subset(top_cpuset.effective_cpus, tmp->new_cpus)) in remote_partition_enable()
1374 isolcpus_updated = partition_xcpus_add(new_prs, NULL, tmp->new_cpus); in remote_partition_enable()
1375 list_add(&cs->remote_sibling, &remote_children); in remote_partition_enable()
1382 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_enable()
1388 * remote_partition_disable - Remove current cpuset from remote partition list
1400 compute_effective_exclusive_cpumask(cs, tmp->new_cpus); in remote_partition_disable()
1402 WARN_ON_ONCE(!cpumask_subset(tmp->new_cpus, subpartitions_cpus)); in remote_partition_disable()
1405 list_del_init(&cs->remote_sibling); in remote_partition_disable()
1406 isolcpus_updated = partition_xcpus_del(cs->partition_root_state, in remote_partition_disable()
1407 NULL, tmp->new_cpus); in remote_partition_disable()
1408 cs->partition_root_state = -cs->partition_root_state; in remote_partition_disable()
1409 if (!cs->prs_err) in remote_partition_disable()
1410 cs->prs_err = PERR_INVCPUS; in remote_partition_disable()
1418 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_partition_disable()
1423 * remote_cpus_update - cpus_exclusive change of remote partition
1435 int prs = cs->partition_root_state; in remote_cpus_update()
1441 WARN_ON_ONCE(!cpumask_subset(cs->effective_xcpus, subpartitions_cpus)); in remote_cpus_update()
1446 adding = cpumask_andnot(tmp->addmask, newmask, cs->effective_xcpus); in remote_cpus_update()
1447 deleting = cpumask_andnot(tmp->delmask, cs->effective_xcpus, newmask); in remote_cpus_update()
1455 cpumask_intersects(tmp->addmask, subpartitions_cpus) || in remote_cpus_update()
1456 cpumask_subset(top_cpuset.effective_cpus, tmp->addmask))) in remote_cpus_update()
1461 isolcpus_updated += partition_xcpus_add(prs, NULL, tmp->addmask); in remote_cpus_update()
1463 isolcpus_updated += partition_xcpus_del(prs, NULL, tmp->delmask); in remote_cpus_update()
1470 cpuset_update_tasks_cpumask(&top_cpuset, tmp->new_cpus); in remote_cpus_update()
1479 * remote_partition_check - check if a child remote partition needs update
1495 * Compute the effective exclusive CPUs that will be deleted. in remote_partition_check()
1497 if (!cpumask_andnot(delmask, cs->effective_xcpus, newmask) || in remote_partition_check()
1511 if (cpumask_intersects(child->effective_cpus, delmask)) { in remote_partition_check()
1520 * prstate_housekeeping_conflict - check for partition & housekeeping conflicts
1540 * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
1547 * For partcmd_enable*, the cpuset is being transformed from a non-partition
1554 * root back to a non-partition root. Any CPUs in effective_xcpus will be
1597 old_prs = new_prs = cs->partition_root_state; in update_parent_effective_cpumask()
1608 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1609 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1611 new_prs = -old_prs; in update_parent_effective_cpumask()
1612 subparts_delta--; in update_parent_effective_cpumask()
1638 !cpumask_intersects(xcpus, parent->effective_xcpus)) in update_parent_effective_cpumask()
1651 cpumask_copy(tmp->delmask, xcpus); in update_parent_effective_cpumask()
1661 cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1663 subparts_delta--; in update_parent_effective_cpumask()
1683 * & parent->effective_xcpus in update_parent_effective_cpumask()
1685 * & parent->effective_xcpus in update_parent_effective_cpumask()
1688 * delmask = newmask & parent->effective_xcpus in update_parent_effective_cpumask()
1692 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1693 newmask, parent->effective_xcpus); in update_parent_effective_cpumask()
1695 cpumask_andnot(tmp->addmask, xcpus, newmask); in update_parent_effective_cpumask()
1696 adding = cpumask_and(tmp->addmask, tmp->addmask, in update_parent_effective_cpumask()
1697 parent->effective_xcpus); in update_parent_effective_cpumask()
1699 cpumask_andnot(tmp->delmask, newmask, xcpus); in update_parent_effective_cpumask()
1700 deleting = cpumask_and(tmp->delmask, tmp->delmask, in update_parent_effective_cpumask()
1701 parent->effective_xcpus); in update_parent_effective_cpumask()
1708 !cpumask_intersects(tmp->addmask, cpu_active_mask))) { in update_parent_effective_cpumask()
1711 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1712 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1718 * delmask = effective_xcpus & parent->effective_cpus in update_parent_effective_cpumask()
1728 * its effective CPUs will have to be distributed out. in update_parent_effective_cpumask()
1734 adding = cpumask_and(tmp->addmask, in update_parent_effective_cpumask()
1735 xcpus, parent->effective_xcpus); in update_parent_effective_cpumask()
1737 cpumask_subset(xcpus, parent->effective_xcpus)) { in update_parent_effective_cpumask()
1757 deleting = cpumask_and(tmp->delmask, in update_parent_effective_cpumask()
1758 xcpus, parent->effective_cpus); in update_parent_effective_cpumask()
1766 WRITE_ONCE(cs->prs_err, part_error); in update_parent_effective_cpumask()
1773 switch (cs->partition_root_state) { in update_parent_effective_cpumask()
1777 new_prs = -old_prs; in update_parent_effective_cpumask()
1778 subparts_delta--; in update_parent_effective_cpumask()
1784 new_prs = -old_prs; in update_parent_effective_cpumask()
1816 cs->partition_root_state = new_prs; in update_parent_effective_cpumask()
1818 cs->nr_subparts = 0; in update_parent_effective_cpumask()
1826 tmp->addmask); in update_parent_effective_cpumask()
1829 tmp->delmask); in update_parent_effective_cpumask()
1832 parent->nr_subparts += subparts_delta; in update_parent_effective_cpumask()
1833 WARN_ON_ONCE(parent->nr_subparts < 0); in update_parent_effective_cpumask()
1842 cpuset_update_tasks_cpumask(parent, tmp->addmask); in update_parent_effective_cpumask()
1859 * compute_partition_effective_cpumask - compute effective_cpus for partition
1897 child->prs_err = 0; in compute_partition_effective_cpumask()
1898 if (!cpumask_subset(child->effective_xcpus, in compute_partition_effective_cpumask()
1899 cs->effective_xcpus)) in compute_partition_effective_cpumask()
1900 child->prs_err = PERR_INVCPUS; in compute_partition_effective_cpumask()
1902 cpumask_subset(new_ecpus, child->effective_xcpus)) in compute_partition_effective_cpumask()
1903 child->prs_err = PERR_NOCPUS; in compute_partition_effective_cpumask()
1905 if (child->prs_err) { in compute_partition_effective_cpumask()
1906 int old_prs = child->partition_root_state; in compute_partition_effective_cpumask()
1913 cs->nr_subparts--; in compute_partition_effective_cpumask()
1914 child->nr_subparts = 0; in compute_partition_effective_cpumask()
1920 child->effective_xcpus); in compute_partition_effective_cpumask()
1932 * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
1937 * When configured cpumask is changed, the effective cpumasks of this cpuset
1971 if (!cpumask_empty(cp->exclusive_cpus) && (cp != cs)) { in update_cpumasks_hier()
1977 old_prs = new_prs = cp->partition_root_state; in update_cpumasks_hier()
1980 compute_partition_effective_cpumask(cp, tmp->new_cpus); in update_cpumasks_hier()
1982 compute_effective_cpumask(tmp->new_cpus, cp, parent); in update_cpumasks_hier()
1989 if (is_partition_valid(cp) && cpumask_empty(tmp->new_cpus)) { in update_cpumasks_hier()
1995 * If it becomes empty, inherit the effective mask of the in update_cpumasks_hier()
2000 if (is_in_v2_mode() && !remote && cpumask_empty(tmp->new_cpus)) in update_cpumasks_hier()
2001 cpumask_copy(tmp->new_cpus, parent->effective_cpus); in update_cpumasks_hier()
2013 if (!cp->partition_root_state && !(flags & HIER_CHECKALL) && in update_cpumasks_hier()
2014 cpumask_equal(tmp->new_cpus, cp->effective_cpus) && in update_cpumasks_hier()
2029 switch (parent->partition_root_state) { in update_cpumasks_hier()
2042 new_prs = -cp->partition_root_state; in update_cpumasks_hier()
2043 WRITE_ONCE(cp->prs_err, in update_cpumasks_hier()
2050 if (!css_tryget_online(&cp->css)) in update_cpumasks_hier()
2060 new_prs = cp->partition_root_state; in update_cpumasks_hier()
2064 cpumask_copy(cp->effective_cpus, tmp->new_cpus); in update_cpumasks_hier()
2065 cp->partition_root_state = new_prs; in update_cpumasks_hier()
2070 if ((new_prs > 0) && cpumask_empty(cp->exclusive_cpus)) in update_cpumasks_hier()
2071 cpumask_and(cp->effective_xcpus, in update_cpumasks_hier()
2072 cp->cpus_allowed, parent->effective_xcpus); in update_cpumasks_hier()
2080 !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); in update_cpumasks_hier()
2082 cpuset_update_tasks_cpumask(cp, cp->effective_cpus); in update_cpumasks_hier()
2093 set_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2095 clear_bit(CS_SCHED_LOAD_BALANCE, &cp->flags); in update_cpumasks_hier()
2099 * On legacy hierarchy, if the effective cpumask of any non- in update_cpumasks_hier()
2104 if (!cpumask_empty(cp->cpus_allowed) && in update_cpumasks_hier()
2111 css_put(&cp->css); in update_cpumasks_hier()
2121 * update_sibling_cpumasks - Update siblings cpumasks
2153 compute_effective_cpumask(tmp->new_cpus, sibling, in update_sibling_cpumasks()
2155 if (cpumask_equal(tmp->new_cpus, sibling->effective_cpus)) in update_sibling_cpumasks()
2158 if (!css_tryget_online(&sibling->css)) in update_sibling_cpumasks()
2164 css_put(&sibling->css); in update_sibling_cpumasks()
2170 * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
2183 int old_prs = cs->partition_root_state; in update_cpumask()
2185 /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ in update_cpumask()
2187 return -EACCES; in update_cpumask()
2196 cpumask_clear(trialcs->cpus_allowed); in update_cpumask()
2197 if (cpumask_empty(trialcs->exclusive_cpus)) in update_cpumask()
2198 cpumask_clear(trialcs->effective_xcpus); in update_cpumask()
2200 retval = cpulist_parse(buf, trialcs->cpus_allowed); in update_cpumask()
2204 if (!cpumask_subset(trialcs->cpus_allowed, in update_cpumask()
2206 return -EINVAL; in update_cpumask()
2211 * trialcs->effective_xcpus is used as a temporary cpumask in update_cpumask()
2214 if (!cpumask_empty(trialcs->exclusive_cpus) || is_partition_valid(cs)) in update_cpumask()
2219 if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) in update_cpumask()
2223 return -ENOMEM; in update_cpumask()
2227 cpumask_empty(trialcs->effective_xcpus)) { in update_cpumask()
2229 cs->prs_err = PERR_INVCPUS; in update_cpumask()
2230 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_cpumask()
2232 cs->prs_err = PERR_HKEEPING; in update_cpumask()
2233 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_cpumask()
2235 cs->prs_err = PERR_NOCPUS; in update_cpumask()
2243 if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus)) in update_cpumask()
2248 if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { in update_cpumask()
2253 * The -EINVAL error code indicates that partition sibling in update_cpumask()
2265 cpumask_intersects(xcpus, cp->effective_xcpus)) { in update_cpumask()
2280 struct cpumask *xcpus = trialcs->effective_xcpus; in update_cpumask()
2283 xcpus = trialcs->cpus_allowed; in update_cpumask()
2296 } else if (!cpumask_empty(cs->exclusive_cpus)) { in update_cpumask()
2298 * Use trialcs->effective_cpus as a temp cpumask in update_cpumask()
2300 remote_partition_check(cs, trialcs->effective_xcpus, in update_cpumask()
2301 trialcs->effective_cpus, &tmp); in update_cpumask()
2305 cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); in update_cpumask()
2306 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_cpumask()
2315 if (cs->partition_root_state) in update_cpumask()
2323 * update_exclusive_cpumask - update the exclusive_cpus mask of a cpuset
2338 int old_prs = cs->partition_root_state; in update_exclusive_cpumask()
2341 cpumask_clear(trialcs->exclusive_cpus); in update_exclusive_cpumask()
2342 cpumask_clear(trialcs->effective_xcpus); in update_exclusive_cpumask()
2344 retval = cpulist_parse(buf, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2350 if (cpumask_equal(cs->exclusive_cpus, trialcs->exclusive_cpus)) in update_exclusive_cpumask()
2360 if (!cpumask_equal(cs->effective_xcpus, trialcs->effective_xcpus)) in update_exclusive_cpumask()
2368 return -ENOMEM; in update_exclusive_cpumask()
2371 if (cpumask_empty(trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2373 cs->prs_err = PERR_INVCPUS; in update_exclusive_cpumask()
2374 } else if (prstate_housekeeping_conflict(old_prs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2376 cs->prs_err = PERR_HKEEPING; in update_exclusive_cpumask()
2377 } else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) { in update_exclusive_cpumask()
2379 cs->prs_err = PERR_NOCPUS; in update_exclusive_cpumask()
2386 remote_cpus_update(cs, trialcs->effective_xcpus, in update_exclusive_cpumask()
2393 trialcs->effective_xcpus, &tmp); in update_exclusive_cpumask()
2395 } else if (!cpumask_empty(trialcs->exclusive_cpus)) { in update_exclusive_cpumask()
2397 * Use trialcs->effective_cpus as a temp cpumask in update_exclusive_cpumask()
2399 remote_partition_check(cs, trialcs->effective_xcpus, in update_exclusive_cpumask()
2400 trialcs->effective_cpus, &tmp); in update_exclusive_cpumask()
2403 cpumask_copy(cs->exclusive_cpus, trialcs->exclusive_cpus); in update_exclusive_cpumask()
2404 cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus); in update_exclusive_cpumask()
2418 if (cs->partition_root_state) in update_exclusive_cpumask()
2446 do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); in cpuset_migrate_mm_workfn()
2447 mmput(mwork->mm); in cpuset_migrate_mm_workfn()
2463 mwork->mm = mm; in cpuset_migrate_mm()
2464 mwork->from = *from; in cpuset_migrate_mm()
2465 mwork->to = *to; in cpuset_migrate_mm()
2466 INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); in cpuset_migrate_mm()
2467 queue_work(cpuset_migrate_mm_wq, &mwork->work); in cpuset_migrate_mm()
2479 * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
2483 * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
2494 write_seqcount_begin(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2496 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); in cpuset_change_task_nodemask()
2498 tsk->mems_allowed = *newmems; in cpuset_change_task_nodemask()
2500 write_seqcount_end(&tsk->mems_allowed_seq); in cpuset_change_task_nodemask()
2509 * cpuset_update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
2513 * effective cpuset's. As this function is called with cpuset_mutex held,
2528 * take while holding tasklist_lock. Forks can happen - the in cpuset_update_tasks_nodemask()
2536 css_task_iter_start(&cs->css, 0, &it); in cpuset_update_tasks_nodemask()
2549 mpol_rebind_mm(mm, &cs->mems_allowed); in cpuset_update_tasks_nodemask()
2551 cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); in cpuset_update_tasks_nodemask()
2559 * cs->old_mems_allowed. in cpuset_update_tasks_nodemask()
2561 cs->old_mems_allowed = newmems; in cpuset_update_tasks_nodemask()
2568 * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
2572 * When configured nodemask is changed, the effective nodemasks of this cpuset
2588 nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); in update_nodemasks_hier()
2591 * If it becomes empty, inherit the effective mask of the in update_nodemasks_hier()
2595 *new_mems = parent->effective_mems; in update_nodemasks_hier()
2598 if (nodes_equal(*new_mems, cp->effective_mems)) { in update_nodemasks_hier()
2603 if (!css_tryget_online(&cp->css)) in update_nodemasks_hier()
2608 cp->effective_mems = *new_mems; in update_nodemasks_hier()
2612 !nodes_equal(cp->mems_allowed, cp->effective_mems)); in update_nodemasks_hier()
2617 css_put(&cp->css); in update_nodemasks_hier()
2632 * lock each such tasks mm->mmap_lock, scan its vma's and rebind
2642 * it's read-only in update_nodemask()
2645 retval = -EACCES; in update_nodemask()
2656 nodes_clear(trialcs->mems_allowed); in update_nodemask()
2658 retval = nodelist_parse(buf, trialcs->mems_allowed); in update_nodemask()
2662 if (!nodes_subset(trialcs->mems_allowed, in update_nodemask()
2664 retval = -EINVAL; in update_nodemask()
2669 if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) { in update_nodemask()
2670 retval = 0; /* Too easy - nothing to do */ in update_nodemask()
2677 check_insane_mems_config(&trialcs->mems_allowed); in update_nodemask()
2680 cs->mems_allowed = trialcs->mems_allowed; in update_nodemask()
2683 /* use trialcs->mems_allowed as a temp variable */ in update_nodemask()
2684 update_nodemasks_hier(cs, &trialcs->mems_allowed); in update_nodemask()
2701 * cpuset_update_flag - read a 0 or a 1 in a file and update associated flag
2719 return -ENOMEM; in cpuset_update_flag()
2722 set_bit(bit, &trialcs->flags); in cpuset_update_flag()
2724 clear_bit(bit, &trialcs->flags); in cpuset_update_flag()
2737 cs->flags = trialcs->flags; in cpuset_update_flag()
2740 if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed && in cpuset_update_flag()
2752 * update_prstate - update partition_root_state
2761 int err = PERR_NONE, old_prs = cs->partition_root_state; in update_prstate()
2776 return -ENOMEM; in update_prstate()
2782 if ((new_prs > 0) && cpumask_empty(cs->exclusive_cpus)) { in update_prstate()
2784 cpumask_and(cs->effective_xcpus, in update_prstate()
2785 cs->cpus_allowed, parent->effective_xcpus); in update_prstate()
2841 new_prs = -new_prs; in update_prstate()
2846 cs->partition_root_state = new_prs; in update_prstate()
2847 WRITE_ONCE(cs->prs_err, err); in update_prstate()
2851 partition_xcpus_newstate(old_prs, new_prs, cs->effective_xcpus); in update_prstate()
2876 if (cpumask_empty(cs->effective_cpus) || in cpuset_can_attach_check()
2877 (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) in cpuset_can_attach_check()
2878 return -ENOSPC; in cpuset_can_attach_check()
2884 cs->nr_migrate_dl_tasks = 0; in reset_migrate_dl_data()
2885 cs->sum_migrate_dl_bw = 0; in reset_migrate_dl_data()
2909 cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus); in cpuset_can_attach()
2910 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_can_attach()
2930 cs->nr_migrate_dl_tasks++; in cpuset_can_attach()
2931 cs->sum_migrate_dl_bw += task->dl.dl_bw; in cpuset_can_attach()
2935 if (!cs->nr_migrate_dl_tasks) in cpuset_can_attach()
2938 if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) { in cpuset_can_attach()
2939 int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus); in cpuset_can_attach()
2943 ret = -EINVAL; in cpuset_can_attach()
2947 ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw); in cpuset_can_attach()
2959 cs->attach_in_progress++; in cpuset_can_attach()
2976 if (cs->nr_migrate_dl_tasks) { in cpuset_cancel_attach()
2977 int cpu = cpumask_any(cs->effective_cpus); in cpuset_cancel_attach()
2979 dl_bw_free(cpu, cs->sum_migrate_dl_bw); in cpuset_cancel_attach()
3027 cpus_updated = !cpumask_equal(cs->effective_cpus, in cpuset_attach()
3028 oldcs->effective_cpus); in cpuset_attach()
3029 mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); in cpuset_attach()
3034 * in effective cpus and mems. In that case, we can optimize out in cpuset_attach()
3039 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3054 cpuset_attach_nodemask_to = cs->effective_mems; in cpuset_attach()
3073 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, in cpuset_attach()
3081 cs->old_mems_allowed = cpuset_attach_nodemask_to; in cpuset_attach()
3083 if (cs->nr_migrate_dl_tasks) { in cpuset_attach()
3084 cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks; in cpuset_attach()
3085 oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks; in cpuset_attach()
3102 int retval = -ENODEV; in cpuset_write_resmask()
3125 css_get(&cs->css); in cpuset_write_resmask()
3126 kernfs_break_active_protection(of->kn); in cpuset_write_resmask()
3135 retval = -ENOMEM; in cpuset_write_resmask()
3139 switch (of_cft(of)->private) { in cpuset_write_resmask()
3150 retval = -EINVAL; in cpuset_write_resmask()
3158 kernfs_unbreak_active_protection(of->kn); in cpuset_write_resmask()
3159 css_put(&cs->css); in cpuset_write_resmask()
3175 cpuset_filetype_t type = seq_cft(sf)->private; in cpuset_common_seq_show()
3182 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); in cpuset_common_seq_show()
3185 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); in cpuset_common_seq_show()
3188 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); in cpuset_common_seq_show()
3191 seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); in cpuset_common_seq_show()
3194 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->exclusive_cpus)); in cpuset_common_seq_show()
3197 seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_xcpus)); in cpuset_common_seq_show()
3206 ret = -EINVAL; in cpuset_common_seq_show()
3218 switch (cs->partition_root_state) { in sched_partition_show()
3234 err = perr_strings[READ_ONCE(cs->prs_err)]; in sched_partition_show()
3249 int retval = -ENODEV; in sched_partition_write()
3260 return -EINVAL; in sched_partition_write()
3262 css_get(&cs->css); in sched_partition_write()
3272 css_put(&cs->css); in sched_partition_write()
3300 .name = "cpus.effective",
3306 .name = "mems.effective",
3330 .name = "cpus.exclusive.effective",
3355 * cpuset_css_alloc - Allocate a cpuset css
3358 * Return: cpuset css on success, -ENOMEM on failure.
3360 * Allocate and initialize a new cpuset css, for non-NULL @parent_css, return
3373 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3377 return ERR_PTR(-ENOMEM); in cpuset_css_alloc()
3380 __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_alloc()
3381 fmeter_init(&cs->fmeter); in cpuset_css_alloc()
3382 cs->relax_domain_level = -1; in cpuset_css_alloc()
3383 INIT_LIST_HEAD(&cs->remote_sibling); in cpuset_css_alloc()
3387 __set_bit(CS_MEMORY_MIGRATE, &cs->flags); in cpuset_css_alloc()
3389 return &cs->css; in cpuset_css_alloc()
3405 set_bit(CS_ONLINE, &cs->flags); in cpuset_css_online()
3407 set_bit(CS_SPREAD_PAGE, &cs->flags); in cpuset_css_online()
3409 set_bit(CS_SPREAD_SLAB, &cs->flags); in cpuset_css_online()
3415 clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); in cpuset_css_online()
3421 cpumask_copy(cs->effective_cpus, parent->effective_cpus); in cpuset_css_online()
3422 cs->effective_mems = parent->effective_mems; in cpuset_css_online()
3426 if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) in cpuset_css_online()
3432 * historical reasons - the flag may be specified during mount. in cpuset_css_online()
3435 * refuse to clone the configuration - thereby refusing the task to in cpuset_css_online()
3439 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive in cpuset_css_online()
3452 cs->mems_allowed = parent->mems_allowed; in cpuset_css_online()
3453 cs->effective_mems = parent->mems_allowed; in cpuset_css_online()
3454 cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); in cpuset_css_online()
3455 cpumask_copy(cs->effective_cpus, parent->cpus_allowed); in cpuset_css_online()
3489 clear_bit(CS_ONLINE, &cs->flags); in cpuset_css_offline()
3527 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_can_fork()
3558 cs->attach_in_progress++; in cpuset_can_fork()
3566 struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); in cpuset_cancel_fork()
3598 set_cpus_allowed_ptr(task, current->cpus_ptr); in cpuset_fork()
3599 task->mems_allowed = current->mems_allowed; in cpuset_fork()
3634 * cpuset_init - initialize cpusets at system boot
3675 /* A partition root is allowed to have empty effective cpus */ in hotplug_update_tasks()
3677 cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus); in hotplug_update_tasks()
3679 *new_mems = parent_cs(cs)->effective_mems; in hotplug_update_tasks()
3682 cpumask_copy(cs->effective_cpus, new_cpus); in hotplug_update_tasks()
3683 cs->effective_mems = *new_mems; in hotplug_update_tasks()
3698 * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
3713 int partcmd = -1; in cpuset_hotplug_update_tasks()
3716 wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); in cpuset_hotplug_update_tasks()
3724 if (cs->attach_in_progress) { in cpuset_hotplug_update_tasks()
3731 nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); in cpuset_hotplug_update_tasks()
3733 if (!tmp || !cs->partition_root_state) in cpuset_hotplug_update_tasks()
3755 * 1) empty effective cpus but not valid empty partition. in cpuset_hotplug_update_tasks()
3778 cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); in cpuset_hotplug_update_tasks()
3779 mems_updated = !nodes_equal(new_mems, cs->effective_mems); in cpuset_hotplug_update_tasks()
3798 * cpuset_handle_hotplug - handle CPU/memory hot{,un}plug for a cpuset
3806 * Non-root cpusets are only affected by offlining. If any CPUs or memory
3887 if (cs == &top_cpuset || !css_tryget_online(&cs->css)) in cpuset_handle_hotplug()
3894 css_put(&cs->css); in cpuset_handle_hotplug()
3931 * cpuset_init_smp - initialize cpus_allowed
3954 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
3955 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
3959 * attached to the specified @tsk. Guaranteed to return some non-empty
3997 * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
4001 * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
4002 * mode however, this value is the same as task_cs(tsk)->effective_cpus,
4017 cs_mask = task_cs(tsk)->cpus_allowed; in cpuset_cpus_allowed_fallback()
4025 * We own tsk->cpus_allowed, nobody can change it under us. in cpuset_cpus_allowed_fallback()
4027 * But we used cs && cs->cpus_allowed lockless and thus can in cpuset_cpus_allowed_fallback()
4029 * the wrong tsk->cpus_allowed. However, both cases imply the in cpuset_cpus_allowed_fallback()
4030 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr() in cpuset_cpus_allowed_fallback()
4034 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary in cpuset_cpus_allowed_fallback()
4046 nodes_setall(current->mems_allowed); in cpuset_init_current_mems_allowed()
4050 * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
4051 * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
4054 * attached to the specified @tsk. Guaranteed to return some non-empty
4074 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
4077 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
4081 return nodes_intersects(*nodemask, current->mems_allowed); in cpuset_nodemask_valid_mems_allowed()
4085 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
4098 * cpuset_node_allowed - Can we allocate on a memory node?
4131 * in_interrupt - any node ok (current task context irrelevant)
4132 * GFP_ATOMIC - any node ok
4133 * tsk_is_oom_victim - any node ok
4134 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
4135 * GFP_USER - only nodes in current tasks mems allowed ok.
4145 if (node_isset(node, current->mems_allowed)) in cpuset_node_allowed()
4156 if (current->flags & PF_EXITING) /* Let dying task have memory */ in cpuset_node_allowed()
4164 allowed = node_isset(node, cs->mems_allowed); in cpuset_node_allowed()
4172 * cpuset_spread_node() - On which node to begin search for a page
4188 * only set nodes in task->mems_allowed that are online. So it
4199 return *rotor = next_node_in(*rotor, current->mems_allowed); in cpuset_spread_node()
4203 * cpuset_mem_spread_node() - On which node to begin search for a file page
4207 if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE) in cpuset_mem_spread_node()
4208 current->cpuset_mem_spread_rotor = in cpuset_mem_spread_node()
4209 node_random(¤t->mems_allowed); in cpuset_mem_spread_node()
4211 return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); in cpuset_mem_spread_node()
4215 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
4228 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); in cpuset_mems_allowed_intersects()
4232 * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
4243 cgrp = task_cs(current)->css.cgroup; in cpuset_print_current_mems_allowed()
4247 nodemask_pr_args(¤t->mems_allowed)); in cpuset_print_current_mems_allowed()
4255 * - Print tasks cpuset path into seq_file.
4256 * - Used for /proc/<pid>/cpuset.
4257 * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
4258 * doesn't really matter if tsk->cpuset changes after we read it,
4269 retval = -ENOMEM; in proc_cpuset_show()
4277 retval = cgroup_path_ns_locked(css->cgroup, buf, PATH_MAX, in proc_cpuset_show()
4278 current->nsproxy->cgroup_ns); in proc_cpuset_show()
4282 if (retval == -E2BIG) in proc_cpuset_show()
4283 retval = -ENAMETOOLONG; in proc_cpuset_show()
4300 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()
4302 nodemask_pr_args(&task->mems_allowed)); in cpuset_task_status_allowed()