Lines Matching +full:non +full:- +full:interleave
1 // SPDX-License-Identifier: GPL-2.0-only
15 * interleave Allocate memory interleaved over a set of nodes,
22 * weighted interleave
24 * a set of weights (per-node), with normal fallback if it
25 * fails. Otherwise operates the same as interleave.
26 * Example: nodeset(0,1) & weights (2,1) - 2 pages allocated
38 * but useful to set in a VMA when you have a non default
48 * The process policy is applied for most non interrupt memory allocations
122 #define MPOL_MF_WRLOCK (MPOL_MF_INTERNAL << 2) /* Write-lock walked vmas */
132 * run-time system-wide default policy => local allocation
142 * iw_table is the sysfs-set interleave weight table, a value of 0 denotes
143 * system-default value should be used. A NULL iw_table also denotes that
144 * system-default values should be used. Until the system-default table
145 * is implemented, the system-default is always 1.
168 * numa_nearest_node - Find nearest node by state
181 return -EINVAL; in numa_nearest_node()
201 struct mempolicy *pol = p->mempolicy; in get_task_policy()
211 if (pol->mode) in get_task_policy()
225 return pol->flags & MPOL_MODE_FLAGS; in mpol_store_user_nodemask()
239 return -EINVAL; in mpol_new_nodemask()
240 pol->nodes = *nodes; in mpol_new_nodemask()
247 return -EINVAL; in mpol_new_preferred()
249 nodes_clear(pol->nodes); in mpol_new_preferred()
250 node_set(first_node(*nodes), pol->nodes); in mpol_new_preferred()
272 if (!pol || pol->mode == MPOL_LOCAL) in mpol_set_nodemask()
276 nodes_and(nsc->mask1, in mpol_set_nodemask()
281 if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_set_nodemask()
282 mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); in mpol_set_nodemask()
284 nodes_and(nsc->mask2, *nodes, nsc->mask1); in mpol_set_nodemask()
287 pol->w.user_nodemask = *nodes; in mpol_set_nodemask()
289 pol->w.cpuset_mems_allowed = cpuset_current_mems_allowed; in mpol_set_nodemask()
291 ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); in mpol_set_nodemask()
306 return ERR_PTR(-EINVAL); in mpol_new()
314 * All other modes require a valid pointer to a non-empty nodemask. in mpol_new()
320 return ERR_PTR(-EINVAL); in mpol_new()
328 return ERR_PTR(-EINVAL); in mpol_new()
330 return ERR_PTR(-EINVAL); in mpol_new()
334 return ERR_PTR(-ENOMEM); in mpol_new()
335 atomic_set(&policy->refcnt, 1); in mpol_new()
336 policy->mode = mode; in mpol_new()
337 policy->flags = flags; in mpol_new()
338 policy->home_node = NUMA_NO_NODE; in mpol_new()
346 if (!atomic_dec_and_test(&pol->refcnt)) in __mpol_put()
359 if (pol->flags & MPOL_F_STATIC_NODES) in mpol_rebind_nodemask()
360 nodes_and(tmp, pol->w.user_nodemask, *nodes); in mpol_rebind_nodemask()
361 else if (pol->flags & MPOL_F_RELATIVE_NODES) in mpol_rebind_nodemask()
362 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); in mpol_rebind_nodemask()
364 nodes_remap(tmp, pol->nodes, pol->w.cpuset_mems_allowed, in mpol_rebind_nodemask()
366 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_nodemask()
372 pol->nodes = tmp; in mpol_rebind_nodemask()
378 pol->w.cpuset_mems_allowed = *nodes; in mpol_rebind_preferred()
382 * mpol_rebind_policy - Migrate a policy to a different set of nodes
384 * Per-vma policies are protected by mmap_lock. Allocations using per-task
385 * policies are protected by task->mems_allowed_seq to prevent a premature
390 if (!pol || pol->mode == MPOL_LOCAL) in mpol_rebind_policy()
393 nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) in mpol_rebind_policy()
396 mpol_ops[pol->mode].rebind(pol, newmask); in mpol_rebind_policy()
407 mpol_rebind_policy(tsk->mempolicy, new); in mpol_rebind_task()
413 * Call holding a reference to mm. Takes mm->mmap_lock during call.
423 mpol_rebind_policy(vma->vm_policy, new); in mpol_rebind_mm()
465 * STRICT without MOVE flags lets do_mbind() fail immediately with -EIO in strictly_unmovable()
489 * Check if the folio's nid is in qp->nmask.
491 * If MPOL_MF_INVERT is set in qp->flags, check if the nid is
492 * in the invert of qp->nmask.
498 unsigned long flags = qp->flags; in queue_folio_required()
500 return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); in queue_folio_required()
506 struct queue_pages *qp = walk->private; in queue_folios_pmd()
509 qp->nr_failed++; in queue_folios_pmd()
514 walk->action = ACTION_CONTINUE; in queue_folios_pmd()
519 if (!(qp->flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || in queue_folios_pmd()
520 !vma_migratable(walk->vma) || in queue_folios_pmd()
521 !migrate_folio_add(folio, qp->pagelist, qp->flags)) in queue_folios_pmd()
522 qp->nr_failed++; in queue_folios_pmd()
530 * 0 - continue walking to scan for more, even if an existing folio on the
532 * -EIO - only MPOL_MF_STRICT was specified, without MPOL_MF_MOVE or ..._ALL,
538 struct vm_area_struct *vma = walk->vma; in queue_folios_pte_range()
540 struct queue_pages *qp = walk->private; in queue_folios_pte_range()
541 unsigned long flags = qp->flags; in queue_folios_pte_range()
553 mapped_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); in queue_folios_pte_range()
555 walk->action = ACTION_AGAIN; in queue_folios_pte_range()
564 qp->nr_failed++; in queue_folios_pte_range()
581 * but may be mapped by many PTEs (and Copy-On-Write may in queue_folios_pte_range()
584 * there can be other cases of multi-mapped pages which in queue_folios_pte_range()
585 * this quick check does not help to filter out - and a in queue_folios_pte_range()
595 if (folio == qp->large) in queue_folios_pte_range()
597 qp->large = folio; in queue_folios_pte_range()
601 !migrate_folio_add(folio, qp->pagelist, flags)) { in queue_folios_pte_range()
602 qp->nr_failed++; in queue_folios_pte_range()
610 if (qp->nr_failed && strictly_unmovable(flags)) in queue_folios_pte_range()
611 return -EIO; in queue_folios_pte_range()
620 struct queue_pages *qp = walk->private; in queue_folios_hugetlb()
621 unsigned long flags = qp->flags; in queue_folios_hugetlb()
626 ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); in queue_folios_hugetlb()
627 entry = huge_ptep_get(walk->mm, addr, pte); in queue_folios_hugetlb()
630 qp->nr_failed++; in queue_folios_hugetlb()
637 !vma_migratable(walk->vma)) { in queue_folios_hugetlb()
638 qp->nr_failed++; in queue_folios_hugetlb()
650 if (!isolate_hugetlb(folio, qp->pagelist)) in queue_folios_hugetlb()
651 qp->nr_failed++; in queue_folios_hugetlb()
654 if (qp->nr_failed && strictly_unmovable(flags)) in queue_folios_hugetlb()
655 return -EIO; in queue_folios_hugetlb()
676 tlb_gather_mmu(&tlb, vma->vm_mm); in change_prot_numa()
681 count_memcg_events_mm(vma->vm_mm, NUMA_PTE_UPDATES, nr_updated); in change_prot_numa()
693 struct vm_area_struct *next, *vma = walk->vma; in queue_pages_test_walk()
694 struct queue_pages *qp = walk->private; in queue_pages_test_walk()
695 unsigned long flags = qp->flags; in queue_pages_test_walk()
700 if (!qp->first) { in queue_pages_test_walk()
701 qp->first = vma; in queue_pages_test_walk()
703 (qp->start < vma->vm_start)) in queue_pages_test_walk()
705 return -EFAULT; in queue_pages_test_walk()
707 next = find_vma(vma->vm_mm, vma->vm_end); in queue_pages_test_walk()
709 ((vma->vm_end < qp->end) && in queue_pages_test_walk()
710 (!next || vma->vm_end < next->vm_start))) in queue_pages_test_walk()
712 return -EFAULT; in queue_pages_test_walk()
715 * Need check MPOL_MF_STRICT to return -EIO if possible in queue_pages_test_walk()
752 * 0 - all pages already on the right node, or successfully queued for moving
754 * >0 - this number of misplaced folios could not be queued for moving
756 * -EIO - a misplaced page found, when MPOL_MF_STRICT specified without MOVEs.
757 * -EFAULT - a hole in the memory range, when MPOL_MF_DISCONTIG_OK unspecified.
780 err = -EFAULT; in queue_pages_range()
802 if (vma->vm_ops && vma->vm_ops->set_policy) { in vma_replace_policy()
803 err = vma->vm_ops->set_policy(vma, new); in vma_replace_policy()
808 old = vma->vm_policy; in vma_replace_policy()
809 vma->vm_policy = new; /* protected by mmap_lock */ in vma_replace_policy()
825 vmend = min(end, vma->vm_end); in mbind_range()
826 if (start > vma->vm_start) { in mbind_range()
830 vmstart = vma->vm_start; in mbind_range()
833 if (mpol_equal(vma->vm_policy, new_pol)) { in mbind_range()
855 return -ENOMEM; in do_set_mempolicy()
871 old = current->mempolicy; in do_set_mempolicy()
872 current->mempolicy = new; in do_set_mempolicy()
873 if (new && (new->mode == MPOL_INTERLEAVE || in do_set_mempolicy()
874 new->mode == MPOL_WEIGHTED_INTERLEAVE)) { in do_set_mempolicy()
875 current->il_prev = MAX_NUMNODES-1; in do_set_mempolicy()
876 current->il_weight = 0; in do_set_mempolicy()
897 switch (pol->mode) { in get_policy_nodemask()
903 *nodes = pol->nodes; in get_policy_nodemask()
931 struct mm_struct *mm = current->mm; in do_get_mempolicy()
933 struct mempolicy *pol = current->mempolicy, *pol_refcount = NULL; in do_get_mempolicy()
937 return -EINVAL; in do_get_mempolicy()
941 return -EINVAL; in do_get_mempolicy()
960 return -EFAULT; in do_get_mempolicy()
964 return -EINVAL; in do_get_mempolicy()
984 } else if (pol == current->mempolicy && in do_get_mempolicy()
985 pol->mode == MPOL_INTERLEAVE) { in do_get_mempolicy()
986 *policy = next_node_in(current->il_prev, pol->nodes); in do_get_mempolicy()
987 } else if (pol == current->mempolicy && in do_get_mempolicy()
988 pol->mode == MPOL_WEIGHTED_INTERLEAVE) { in do_get_mempolicy()
989 if (current->il_weight) in do_get_mempolicy()
990 *policy = current->il_prev; in do_get_mempolicy()
992 *policy = next_node_in(current->il_prev, in do_get_mempolicy()
993 pol->nodes); in do_get_mempolicy()
995 err = -EINVAL; in do_get_mempolicy()
1000 pol->mode; in do_get_mempolicy()
1005 *policy |= (pol->flags & MPOL_MODE_FLAGS); in do_get_mempolicy()
1011 *nmask = pol->w.user_nodemask; in do_get_mempolicy()
1041 list_add_tail(&folio->lru, foliolist); in migrate_folio_add()
1047 * Non-movable folio may reach here. And, there may be in migrate_folio_add()
1048 * temporary off LRU folios or non-LRU movable folios. in migrate_folio_add()
1090 nr_failed = queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask, in migrate_to_node()
1171 * Example: [2,3,4] -> [3,4,5] moves everything. in do_migrate_pages()
1172 * [0-7] - > [3,4,5] moves only 0,1,2,6,7. in do_migrate_pages()
1214 struct mempolicy *pol = mmpol->pol; in alloc_migration_target_by_mpol()
1215 pgoff_t ilx = mmpol->ilx; in alloc_migration_target_by_mpol()
1221 ilx += src->index >> order; in alloc_migration_target_by_mpol()
1252 return -ENOSYS; in do_migrate_pages()
1266 struct mm_struct *mm = current->mm; in do_mbind()
1277 return -EINVAL; in do_mbind()
1279 return -EPERM; in do_mbind()
1282 return -EINVAL; in do_mbind()
1291 return -EINVAL; in do_mbind()
1316 err = -ENOMEM; in do_mbind()
1357 if (new->mode == MPOL_INTERLEAVE || in do_mbind()
1358 new->mode == MPOL_WEIGHTED_INTERLEAVE) { in do_mbind()
1361 unsigned long addr = -EFAULT; in do_mbind()
1372 if (addr != -EFAULT) in do_mbind()
1376 if (addr != -EFAULT) { in do_mbind()
1382 mmpol.ilx -= folio->index >> order; in do_mbind()
1397 err = -EIO; in do_mbind()
1425 return -EFAULT; in get_bitmap()
1428 mask[nlongs - 1] &= (1UL << (maxnode % BITS_PER_LONG)) - 1; in get_bitmap()
1437 --maxnode; in get_nodes()
1442 return -EINVAL; in get_nodes()
1446 * if the non supported part is all zero, one word at a time, in get_nodes()
1453 if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) in get_nodes()
1454 return -EFAULT; in get_nodes()
1456 if (maxnode - bits >= MAX_NUMNODES) { in get_nodes()
1457 maxnode -= bits; in get_nodes()
1460 t &= ~((1UL << (MAX_NUMNODES % BITS_PER_LONG)) - 1); in get_nodes()
1463 return -EINVAL; in get_nodes()
1473 unsigned long copy = ALIGN(maxnode-1, 64) / 8; in copy_nodes_to_user()
1482 return -EINVAL; in copy_nodes_to_user()
1483 if (clear_user((char __user *)mask + nbytes, copy - nbytes)) in copy_nodes_to_user()
1484 return -EFAULT; in copy_nodes_to_user()
1493 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; in copy_nodes_to_user()
1503 return -EINVAL; in sanitize_mpol_flags()
1505 return -EINVAL; in sanitize_mpol_flags()
1510 return -EINVAL; in sanitize_mpol_flags()
1539 struct mm_struct *mm = current->mm; in SYSCALL_DEFINE4()
1543 int err = -ENOENT; in SYSCALL_DEFINE4()
1548 return -EINVAL; in SYSCALL_DEFINE4()
1553 return -EINVAL; in SYSCALL_DEFINE4()
1560 return -EINVAL; in SYSCALL_DEFINE4()
1566 return -EINVAL; in SYSCALL_DEFINE4()
1582 if (old->mode != MPOL_BIND && old->mode != MPOL_PREFERRED_MANY) { in SYSCALL_DEFINE4()
1583 err = -EOPNOTSUPP; in SYSCALL_DEFINE4()
1593 new->home_node = home_node; in SYSCALL_DEFINE4()
1649 return -ENOMEM; in kernel_migrate_pages()
1651 old = &scratch->mask1; in kernel_migrate_pages()
1652 new = &scratch->mask2; in kernel_migrate_pages()
1667 err = -ESRCH; in kernel_migrate_pages()
1672 err = -EINVAL; in kernel_migrate_pages()
1680 err = -EPERM; in kernel_migrate_pages()
1688 err = -EPERM; in kernel_migrate_pages()
1705 err = -EINVAL; in kernel_migrate_pages()
1742 return -EINVAL; in kernel_get_mempolicy()
1752 return -EFAULT; in kernel_get_mempolicy()
1769 if (vma->vm_flags & (VM_IO | VM_PFNMAP)) in vma_migratable()
1788 if (vma->vm_file && in vma_migratable()
1789 gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping)) in vma_migratable()
1799 return (vma->vm_ops && vma->vm_ops->get_policy) ? in __get_vma_policy()
1800 vma->vm_ops->get_policy(vma, addr, ilx) : vma->vm_policy; in __get_vma_policy()
1808 * @ilx: interleave index (output), for use only when MPOL_INTERLEAVE or
1812 * Falls back to current->mempolicy or system default policy, as necessary.
1814 * count--added by the get_policy() vm_op, as appropriate--to protect against
1826 if (pol->mode == MPOL_INTERLEAVE || in get_vma_policy()
1827 pol->mode == MPOL_WEIGHTED_INTERLEAVE) { in get_vma_policy()
1828 *ilx += vma->vm_pgoff >> order; in get_vma_policy()
1829 *ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + order); in get_vma_policy()
1838 if (vma->vm_ops && vma->vm_ops->get_policy) { in vma_policy_mof()
1842 pol = vma->vm_ops->get_policy(vma, vma->vm_start, &ilx); in vma_policy_mof()
1843 if (pol && (pol->flags & MPOL_F_MOF)) in vma_policy_mof()
1850 pol = vma->vm_policy; in vma_policy_mof()
1854 return pol->flags & MPOL_F_MOF; in vma_policy_mof()
1864 * if policy->nodes has movable memory only, in apply_policy_zone()
1867 * policy->nodes is intersect with node_states[N_MEMORY]. in apply_policy_zone()
1869 * policy->nodes has movable memory only. in apply_policy_zone()
1871 if (!nodes_intersects(policy->nodes, node_states[N_HIGH_MEMORY])) in apply_policy_zone()
1883 /* to prevent miscount use tsk->mems_allowed_seq to detect rebind */ in weighted_interleave_nodes()
1885 node = current->il_prev; in weighted_interleave_nodes()
1886 if (!current->il_weight || !node_isset(node, policy->nodes)) { in weighted_interleave_nodes()
1887 node = next_node_in(node, policy->nodes); in weighted_interleave_nodes()
1892 current->il_prev = node; in weighted_interleave_nodes()
1893 current->il_weight = get_il_weight(node); in weighted_interleave_nodes()
1895 current->il_weight--; in weighted_interleave_nodes()
1905 /* to prevent miscount, use tsk->mems_allowed_seq to detect rebind */ in interleave_nodes()
1908 nid = next_node_in(current->il_prev, policy->nodes); in interleave_nodes()
1912 current->il_prev = nid; in interleave_nodes()
1928 policy = current->mempolicy; in mempolicy_slab_node()
1932 switch (policy->mode) { in mempolicy_slab_node()
1934 return first_node(policy->nodes); in mempolicy_slab_node()
1953 zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK]; in mempolicy_slab_node()
1955 &policy->nodes); in mempolicy_slab_node()
1975 memcpy(mask, &pol->nodes, sizeof(nodemask_t)); in read_once_policy_nodemask()
2012 target -= weight; in weighted_interleave_nid()
2020 * Do static interleaving for interleave index @ilx. Returns the ilx'th
2021 * node in pol->nodes (starting from ilx=0), wrapping around if ilx
2050 switch (pol->mode) { in policy_nodemask()
2053 *nid = first_node(pol->nodes); in policy_nodemask()
2056 nodemask = &pol->nodes; in policy_nodemask()
2057 if (pol->home_node != NUMA_NO_NODE) in policy_nodemask()
2058 *nid = pol->home_node; in policy_nodemask()
2063 cpuset_nodemask_valid_mems_allowed(&pol->nodes)) in policy_nodemask()
2064 nodemask = &pol->nodes; in policy_nodemask()
2065 if (pol->home_node != NUMA_NO_NODE) in policy_nodemask()
2066 *nid = pol->home_node; in policy_nodemask()
2093 * @addr: address in @vma for shared policy lookup and interleave policy
2096 * @nodemask: pointer to nodemask pointer for 'bind' and 'prefer-many' policy
2100 * If the effective policy is 'bind' or 'prefer-many', returns a pointer
2110 *mpol = get_vma_policy(vma, addr, hstate_vma(vma)->order, &ilx); in huge_node()
2120 * for 'bind' or 'interleave' policy into the argument nodemask, or
2123 * of non-default mempolicy.
2135 if (!(mask && current->mempolicy)) in init_nodemask_of_mempolicy()
2139 mempolicy = current->mempolicy; in init_nodemask_of_mempolicy()
2140 switch (mempolicy->mode) { in init_nodemask_of_mempolicy()
2146 *mask = mempolicy->nodes; in init_nodemask_of_mempolicy()
2167 * including "interleave", as a tsk with "interleave" policy may have
2182 mempolicy = tsk->mempolicy; in mempolicy_in_oom_domain()
2183 if (mempolicy && mempolicy->mode == MPOL_BIND) in mempolicy_in_oom_domain()
2184 ret = nodes_intersects(mempolicy->nodes, *mask); in mempolicy_in_oom_domain()
2212 * alloc_pages_mpol - Allocate pages according to NUMA mempolicy.
2216 * @ilx: Index for interleave mempolicy (also distinguishes alloc_pages()).
2229 if (pol->mode == MPOL_PREFERRED_MANY) in alloc_pages_mpol_noprof()
2236 * For hugepage allocation and non-interleave policy which in alloc_pages_mpol_noprof()
2242 * If the policy is interleave or does not allow the current in alloc_pages_mpol_noprof()
2245 if (pol->mode != MPOL_INTERLEAVE && in alloc_pages_mpol_noprof()
2246 pol->mode != MPOL_WEIGHTED_INTERLEAVE && in alloc_pages_mpol_noprof()
2267 if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) { in alloc_pages_mpol_noprof()
2288 * vma_alloc_folio - Allocate a folio for a VMA.
2310 if (vma->vm_flags & VM_DROPPABLE) in vma_alloc_folio_noprof()
2321 * alloc_pages - Allocate pages.
2326 * first page is naturally aligned (eg an order-3 allocation will be aligned
2339 * No reference counting needed for current->mempolicy in alloc_pages_noprof()
2367 nodes = nodes_weight(pol->nodes); in alloc_pages_bulk_array_interleave()
2369 delta = nr_pages - nodes * nr_pages_per_node; in alloc_pages_bulk_array_interleave()
2377 delta--; in alloc_pages_bulk_array_interleave()
2406 int resume_node = MAX_NUMNODES - 1; in alloc_pages_bulk_array_weighted_interleave()
2425 node = me->il_prev; in alloc_pages_bulk_array_weighted_interleave()
2426 weight = me->il_weight; in alloc_pages_bulk_array_weighted_interleave()
2433 /* if that's all the pages, no need to interleave */ in alloc_pages_bulk_array_weighted_interleave()
2435 me->il_weight -= rem_pages; in alloc_pages_bulk_array_weighted_interleave()
2439 rem_pages -= weight; in alloc_pages_bulk_array_weighted_interleave()
2442 me->il_weight = 0; in alloc_pages_bulk_array_weighted_interleave()
2465 * Track which node weighted interleave should resume from. in alloc_pages_bulk_array_weighted_interleave()
2481 delta -= weight; in alloc_pages_bulk_array_weighted_interleave()
2486 resume_weight = weight - delta; in alloc_pages_bulk_array_weighted_interleave()
2500 me->il_prev = resume_node; in alloc_pages_bulk_array_weighted_interleave()
2501 me->il_weight = resume_weight; in alloc_pages_bulk_array_weighted_interleave()
2516 nr_allocated = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes, in alloc_pages_bulk_array_preferred_many()
2521 nr_pages - nr_allocated, NULL, in alloc_pages_bulk_array_preferred_many()
2542 if (pol->mode == MPOL_INTERLEAVE) in alloc_pages_bulk_array_mempolicy_noprof()
2546 if (pol->mode == MPOL_WEIGHTED_INTERLEAVE) in alloc_pages_bulk_array_mempolicy_noprof()
2550 if (pol->mode == MPOL_PREFERRED_MANY) in alloc_pages_bulk_array_mempolicy_noprof()
2562 struct mempolicy *pol = mpol_dup(src->vm_policy); in vma_dup_policy()
2566 dst->vm_policy = pol; in vma_dup_policy()
2571 * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
2587 return ERR_PTR(-ENOMEM); in __mpol_dup()
2590 if (old == current->mempolicy) { in __mpol_dup()
2601 atomic_set(&new->refcnt, 1); in __mpol_dup()
2610 if (a->mode != b->mode) in __mpol_equal()
2612 if (a->flags != b->flags) in __mpol_equal()
2614 if (a->home_node != b->home_node) in __mpol_equal()
2617 if (!nodes_equal(a->w.user_nodemask, b->w.user_nodemask)) in __mpol_equal()
2620 switch (a->mode) { in __mpol_equal()
2626 return !!nodes_equal(a->nodes, b->nodes); in __mpol_equal()
2639 * The policies are kept in Red-Black tree linked from the inode.
2640 * They are protected by the sp->lock rwlock, which should be held
2645 * lookup first element intersecting start-end. Caller holds sp->lock for
2651 struct rb_node *n = sp->root.rb_node; in sp_lookup()
2656 if (start >= p->end) in sp_lookup()
2657 n = n->rb_right; in sp_lookup()
2658 else if (end <= p->start) in sp_lookup()
2659 n = n->rb_left; in sp_lookup()
2671 if (w->end <= start) in sp_lookup()
2679 * Insert a new shared policy into the list. Caller holds sp->lock for
2684 struct rb_node **p = &sp->root.rb_node; in sp_insert()
2691 if (new->start < nd->start) in sp_insert()
2692 p = &(*p)->rb_left; in sp_insert()
2693 else if (new->end > nd->end) in sp_insert()
2694 p = &(*p)->rb_right; in sp_insert()
2698 rb_link_node(&new->nd, parent, p); in sp_insert()
2699 rb_insert_color(&new->nd, &sp->root); in sp_insert()
2709 if (!sp->root.rb_node) in mpol_shared_policy_lookup()
2711 read_lock(&sp->lock); in mpol_shared_policy_lookup()
2714 mpol_get(sn->policy); in mpol_shared_policy_lookup()
2715 pol = sn->policy; in mpol_shared_policy_lookup()
2717 read_unlock(&sp->lock); in mpol_shared_policy_lookup()
2723 mpol_put(n->policy); in sp_free()
2728 * mpol_misplaced - check whether current folio node is valid in policy
2732 * @addr: virtual address in @vma for shared policy lookup and interleave policy
2748 struct vm_area_struct *vma = vmf->vma; in mpol_misplaced()
2758 lockdep_assert_held(vmf->ptl); in mpol_misplaced()
2760 if (!(pol->flags & MPOL_F_MOF)) in mpol_misplaced()
2763 switch (pol->mode) { in mpol_misplaced()
2773 if (node_isset(curnid, pol->nodes)) in mpol_misplaced()
2775 polnid = first_node(pol->nodes); in mpol_misplaced()
2793 if (pol->flags & MPOL_F_MORON) { in mpol_misplaced()
2798 if (node_isset(thisnid, pol->nodes)) in mpol_misplaced()
2808 if (node_isset(curnid, pol->nodes)) in mpol_misplaced()
2813 &pol->nodes); in mpol_misplaced()
2822 if (pol->flags & MPOL_F_MORON) { in mpol_misplaced()
2839 * Drop the (possibly final) reference to task->mempolicy. It needs to be
2840 * dropped after task->mempolicy is set to NULL so that any allocation done as
2849 pol = task->mempolicy; in mpol_put_task_policy()
2850 task->mempolicy = NULL; in mpol_put_task_policy()
2857 rb_erase(&n->nd, &sp->root); in sp_delete()
2864 node->start = start; in sp_node_init()
2865 node->end = end; in sp_node_init()
2866 node->policy = pol; in sp_node_init()
2884 newpol->flags |= MPOL_F_SHARED; in sp_alloc()
2900 write_lock(&sp->lock); in shared_policy_replace()
2903 while (n && n->start < end) { in shared_policy_replace()
2904 struct rb_node *next = rb_next(&n->nd); in shared_policy_replace()
2905 if (n->start >= start) { in shared_policy_replace()
2906 if (n->end <= end) in shared_policy_replace()
2909 n->start = end; in shared_policy_replace()
2912 if (n->end > end) { in shared_policy_replace()
2916 *mpol_new = *n->policy; in shared_policy_replace()
2917 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2918 sp_node_init(n_new, end, n->end, mpol_new); in shared_policy_replace()
2919 n->end = start; in shared_policy_replace()
2925 n->end = start; in shared_policy_replace()
2933 write_unlock(&sp->lock); in shared_policy_replace()
2945 write_unlock(&sp->lock); in shared_policy_replace()
2946 ret = -ENOMEM; in shared_policy_replace()
2953 atomic_set(&mpol_new->refcnt, 1); in shared_policy_replace()
2958 * mpol_shared_policy_init - initialize shared policy for inode
2962 * Install non-NULL @mpol in inode's shared policy rb-tree.
2963 * On entry, the current task has a reference on a non-NULL @mpol.
2971 sp->root = RB_ROOT; /* empty tree == default mempolicy */ in mpol_shared_policy_init()
2972 rwlock_init(&sp->lock); in mpol_shared_policy_init()
2983 npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); in mpol_shared_policy_init()
2988 ret = mpol_set_nodemask(npol, &mpol->w.user_nodemask, scratch); in mpol_shared_policy_init()
3014 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, pol); in mpol_set_shared_policy()
3016 return -ENOMEM; in mpol_set_shared_policy()
3018 err = shared_policy_replace(sp, vma->vm_pgoff, vma->vm_pgoff + sz, new); in mpol_set_shared_policy()
3030 if (!sp->root.rb_node) in mpol_free_shared_policy()
3032 write_lock(&sp->lock); in mpol_free_shared_policy()
3033 next = rb_first(&sp->root); in mpol_free_shared_policy()
3036 next = rb_next(&n->nd); in mpol_free_shared_policy()
3039 write_unlock(&sp->lock); in mpol_free_shared_policy()
3052 /* Parsed by setup_numabalancing. override == 1 enables, -1 disables */ in check_numabalancing_enable()
3073 numabalancing_override = -1; in setup_numabalancing()
3127 /* Interleave this node? */ in numa_policy_init()
3156 [MPOL_INTERLEAVE] = "interleave",
3157 [MPOL_WEIGHTED_INTERLEAVE] = "weighted interleave",
3164 * mpol_parse_str - parse string to mempolicy, for tmpfs mpol mount option.
3186 /* NUL-terminate mode or flags string */ in mpol_parse_str()
3270 new->nodes = nodes; in mpol_parse_str()
3272 nodes_clear(new->nodes); in mpol_parse_str()
3273 node_set(first_node(nodes), new->nodes); in mpol_parse_str()
3275 new->mode = MPOL_LOCAL; in mpol_parse_str()
3282 new->w.user_nodemask = nodes; in mpol_parse_str()
3289 *--nodelist = ':'; in mpol_parse_str()
3291 *--flags = '='; in mpol_parse_str()
3299 * mpol_to_str - format a mempolicy structure for printing
3306 * interleave", plus the longest flag flags, "relative|balancing", and to
3319 pol <= &preferred_node_policy[ARRAY_SIZE(preferred_node_policy) - 1])) { in mpol_to_str()
3320 mode = pol->mode; in mpol_to_str()
3321 flags = pol->flags; in mpol_to_str()
3333 nodes = pol->nodes; in mpol_to_str()
3344 p += snprintf(p, buffer + maxlen - p, "="); in mpol_to_str()
3350 p += snprintf(p, buffer + maxlen - p, "static"); in mpol_to_str()
3352 p += snprintf(p, buffer + maxlen - p, "relative"); in mpol_to_str()
3356 p += snprintf(p, buffer + maxlen - p, "|"); in mpol_to_str()
3357 p += snprintf(p, buffer + maxlen - p, "balancing"); in mpol_to_str()
3362 p += scnprintf(p, buffer + maxlen - p, ":%*pbl", in mpol_to_str()
3379 weight = get_il_weight(node_attr->nid); in node_show()
3395 return -EINVAL; in node_store()
3399 return -ENOMEM; in node_store()
3406 new[node_attr->nid] = weight; in node_store()
3421 sysfs_remove_file(parent, &node_attr->kobj_attr.attr); in sysfs_wi_node_release()
3422 kfree(node_attr->kobj_attr.attr.name); in sysfs_wi_node_release()
3447 return -ENOMEM; in add_weight_node()
3452 return -ENOMEM; in add_weight_node()
3455 sysfs_attr_init(&node_attr->kobj_attr.attr); in add_weight_node()
3456 node_attr->kobj_attr.attr.name = name; in add_weight_node()
3457 node_attr->kobj_attr.attr.mode = 0644; in add_weight_node()
3458 node_attr->kobj_attr.show = node_show; in add_weight_node()
3459 node_attr->kobj_attr.store = node_store; in add_weight_node()
3460 node_attr->nid = nid; in add_weight_node()
3462 if (sysfs_create_file(wi_kobj, &node_attr->kobj_attr.attr)) { in add_weight_node()
3463 kfree(node_attr->kobj_attr.attr.name); in add_weight_node()
3466 return -ENOMEM; in add_weight_node()
3480 return -ENOMEM; in add_weighted_interleave_group()
3527 err = -ENOMEM; in mempolicy_sysfs_init()
3534 err = -ENOMEM; in mempolicy_sysfs_init()