Lines Matching +full:delta +full:- +full:y +full:- +full:threshold

1 // SPDX-License-Identifier: GPL-2.0-only
3 * mm/percpu.c - percpu memory allocator
13 * a 1-to-1 mapping for units to possible cpus. These units are grouped
17 * ------------------- ------------------- ------------
19 * ------------------- ...... ------------------- .... ------------
23 * c1:u1, c1:u2, etc. On NUMA machines, the mapping may be non-linear
35 * linker. The reserved section, if non-zero, primarily manages static
40 * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT
41 * flag should be passed. All memcg-aware allocations are sharing one set
61 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
65 * - use pcpu_setup_first_chunk() during percpu area initialization to
99 #include "percpu-internal.h"
103 * 1-31 bytes share the same slot.
113 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
116 (void __percpu *)((unsigned long)(addr) - \
123 (unsigned long)pcpu_base_addr - \
149 static const int *pcpu_unit_map __ro_after_init; /* cpu -> unit */
150 const unsigned long *pcpu_unit_offsets __ro_after_init; /* cpu -> unit offset */
208 * pcpu_addr_in_chunk - check if the address is served from this chunk
222 start_addr = chunk->base_addr + chunk->start_offset; in pcpu_addr_in_chunk()
223 end_addr = chunk->base_addr + chunk->nr_pages * PAGE_SIZE - in pcpu_addr_in_chunk()
224 chunk->end_offset; in pcpu_addr_in_chunk()
232 return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1); in __pcpu_size_to_slot()
244 const struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_chunk_slot()
246 if (chunk->free_bytes < PCPU_MIN_ALLOC_SIZE || in pcpu_chunk_slot()
247 chunk_md->contig_hint == 0) in pcpu_chunk_slot()
250 return pcpu_size_to_slot(chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE); in pcpu_chunk_slot()
256 page->index = (unsigned long)pcpu; in pcpu_set_page_chunk()
262 return (struct pcpu_chunk *)page->index; in pcpu_get_page_chunk()
278 return (unsigned long)chunk->base_addr + in pcpu_chunk_addr()
288 return chunk->alloc_map + in pcpu_index_alloc_map()
299 return off & (PCPU_BITMAP_BLOCK_BITS - 1); in pcpu_off_to_block_off()
308 * pcpu_check_block_hint - check against the contig hint
320 int bit_off = ALIGN(block->contig_hint_start, align) - in pcpu_check_block_hint()
321 block->contig_hint_start; in pcpu_check_block_hint()
323 return bit_off + bits <= block->contig_hint; in pcpu_check_block_hint()
327 * pcpu_next_hint - determine which hint to use
346 if (block->scan_hint && in pcpu_next_hint()
347 block->contig_hint_start > block->scan_hint_start && in pcpu_next_hint()
348 alloc_bits > block->scan_hint) in pcpu_next_hint()
349 return block->scan_hint_start + block->scan_hint; in pcpu_next_hint()
351 return block->first_free; in pcpu_next_hint()
355 * pcpu_next_md_free_region - finds the next hint free area
361 * block->contig_hint and performs aggregation across blocks to find the
362 * next hint. It modifies bit_off and bits in-place to be consumed in the
373 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk); in pcpu_next_md_free_region()
377 *bits += block->left_free; in pcpu_next_md_free_region()
378 if (block->left_free == PCPU_BITMAP_BLOCK_BITS) in pcpu_next_md_free_region()
391 *bits = block->contig_hint; in pcpu_next_md_free_region()
392 if (*bits && block->contig_hint_start >= block_off && in pcpu_next_md_free_region()
393 *bits + block->contig_hint_start < PCPU_BITMAP_BLOCK_BITS) { in pcpu_next_md_free_region()
395 block->contig_hint_start); in pcpu_next_md_free_region()
401 *bits = block->right_free; in pcpu_next_md_free_region()
402 *bit_off = (i + 1) * PCPU_BITMAP_BLOCK_BITS - block->right_free; in pcpu_next_md_free_region()
407 * pcpu_next_fit_region - finds fit areas for a given allocation request
416 * allocation. block->first_free is returned if the allocation request fits
428 for (block = chunk->md_blocks + i; i < pcpu_chunk_nr_blocks(chunk); in pcpu_next_fit_region()
432 *bits += block->left_free; in pcpu_next_fit_region()
435 if (block->left_free == PCPU_BITMAP_BLOCK_BITS) in pcpu_next_fit_region()
439 /* check block->contig_hint */ in pcpu_next_fit_region()
440 *bits = ALIGN(block->contig_hint_start, align) - in pcpu_next_fit_region()
441 block->contig_hint_start; in pcpu_next_fit_region()
446 if (block->contig_hint && in pcpu_next_fit_region()
447 block->contig_hint_start >= block_off && in pcpu_next_fit_region()
448 block->contig_hint >= *bits + alloc_bits) { in pcpu_next_fit_region()
451 *bits += alloc_bits + block->contig_hint_start - in pcpu_next_fit_region()
459 *bit_off = ALIGN(PCPU_BITMAP_BLOCK_BITS - block->right_free, in pcpu_next_fit_region()
461 *bits = PCPU_BITMAP_BLOCK_BITS - *bit_off; in pcpu_next_fit_region()
467 /* no valid offsets were found - fail condition */ in pcpu_next_fit_region()
492 * pcpu_mem_zalloc - allocate memory
516 * pcpu_mem_free - free memory
531 list_move(&chunk->list, &pcpu_chunk_lists[slot]); in __pcpu_chunk_move()
533 list_move_tail(&chunk->list, &pcpu_chunk_lists[slot]); in __pcpu_chunk_move()
543 * pcpu_chunk_relocate - put chunk in the appropriate chunk slot
559 /* leave isolated chunks in-place */ in pcpu_chunk_relocate()
560 if (chunk->isolated) in pcpu_chunk_relocate()
571 if (!chunk->isolated) { in pcpu_isolate_chunk()
572 chunk->isolated = true; in pcpu_isolate_chunk()
573 pcpu_nr_empty_pop_pages -= chunk->nr_empty_pop_pages; in pcpu_isolate_chunk()
575 list_move(&chunk->list, &pcpu_chunk_lists[pcpu_to_depopulate_slot]); in pcpu_isolate_chunk()
582 if (chunk->isolated) { in pcpu_reintegrate_chunk()
583 chunk->isolated = false; in pcpu_reintegrate_chunk()
584 pcpu_nr_empty_pop_pages += chunk->nr_empty_pop_pages; in pcpu_reintegrate_chunk()
585 pcpu_chunk_relocate(chunk, -1); in pcpu_reintegrate_chunk()
590 * pcpu_update_empty_pages - update empty page counters
600 chunk->nr_empty_pop_pages += nr; in pcpu_update_empty_pages()
601 if (chunk != pcpu_reserved_chunk && !chunk->isolated) in pcpu_update_empty_pages()
606 * pcpu_region_overlap - determines if two regions overlap
610 * @y: end of second region, exclusive
613 * allocated region [x, y).
615 static inline bool pcpu_region_overlap(int a, int b, int x, int y) in pcpu_region_overlap() argument
617 return (a < y) && (x < b); in pcpu_region_overlap()
621 * pcpu_block_update - updates a block given a free area
632 int contig = end - start; in pcpu_block_update()
634 block->first_free = min(block->first_free, start); in pcpu_block_update()
636 block->left_free = contig; in pcpu_block_update()
638 if (end == block->nr_bits) in pcpu_block_update()
639 block->right_free = contig; in pcpu_block_update()
641 if (contig > block->contig_hint) { in pcpu_block_update()
643 if (start > block->contig_hint_start) { in pcpu_block_update()
644 if (block->contig_hint > block->scan_hint) { in pcpu_block_update()
645 block->scan_hint_start = in pcpu_block_update()
646 block->contig_hint_start; in pcpu_block_update()
647 block->scan_hint = block->contig_hint; in pcpu_block_update()
648 } else if (start < block->scan_hint_start) { in pcpu_block_update()
654 block->scan_hint = 0; in pcpu_block_update()
657 block->scan_hint = 0; in pcpu_block_update()
659 block->contig_hint_start = start; in pcpu_block_update()
660 block->contig_hint = contig; in pcpu_block_update()
661 } else if (contig == block->contig_hint) { in pcpu_block_update()
662 if (block->contig_hint_start && in pcpu_block_update()
664 __ffs(start) > __ffs(block->contig_hint_start))) { in pcpu_block_update()
666 block->contig_hint_start = start; in pcpu_block_update()
667 if (start < block->scan_hint_start && in pcpu_block_update()
668 block->contig_hint > block->scan_hint) in pcpu_block_update()
669 block->scan_hint = 0; in pcpu_block_update()
670 } else if (start > block->scan_hint_start || in pcpu_block_update()
671 block->contig_hint > block->scan_hint) { in pcpu_block_update()
677 block->scan_hint_start = start; in pcpu_block_update()
678 block->scan_hint = contig; in pcpu_block_update()
686 if ((start < block->contig_hint_start && in pcpu_block_update()
687 (contig > block->scan_hint || in pcpu_block_update()
688 (contig == block->scan_hint && in pcpu_block_update()
689 start > block->scan_hint_start)))) { in pcpu_block_update()
690 block->scan_hint_start = start; in pcpu_block_update()
691 block->scan_hint = contig; in pcpu_block_update()
697 * pcpu_block_update_scan - update a block given a free area from a scan
724 block = chunk->md_blocks + s_index; in pcpu_block_update_scan()
734 * pcpu_chunk_refresh_hint - updates metadata about a chunk
747 struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_chunk_refresh_hint()
751 if (!full_scan && chunk_md->scan_hint) { in pcpu_chunk_refresh_hint()
752 bit_off = chunk_md->scan_hint_start + chunk_md->scan_hint; in pcpu_chunk_refresh_hint()
753 chunk_md->contig_hint_start = chunk_md->scan_hint_start; in pcpu_chunk_refresh_hint()
754 chunk_md->contig_hint = chunk_md->scan_hint; in pcpu_chunk_refresh_hint()
755 chunk_md->scan_hint = 0; in pcpu_chunk_refresh_hint()
757 bit_off = chunk_md->first_free; in pcpu_chunk_refresh_hint()
758 chunk_md->contig_hint = 0; in pcpu_chunk_refresh_hint()
776 struct pcpu_block_md *block = chunk->md_blocks + index; in pcpu_block_refresh_hint()
781 if (block->scan_hint) { in pcpu_block_refresh_hint()
782 start = block->scan_hint_start + block->scan_hint; in pcpu_block_refresh_hint()
783 block->contig_hint_start = block->scan_hint_start; in pcpu_block_refresh_hint()
784 block->contig_hint = block->scan_hint; in pcpu_block_refresh_hint()
785 block->scan_hint = 0; in pcpu_block_refresh_hint()
787 start = block->first_free; in pcpu_block_refresh_hint()
788 block->contig_hint = 0; in pcpu_block_refresh_hint()
791 block->right_free = 0; in pcpu_block_refresh_hint()
799 * pcpu_block_update_hint_alloc - update hint on allocation path
811 struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_block_update_hint_alloc()
824 e_index = pcpu_off_to_block_index(bit_off + bits - 1); in pcpu_block_update_hint_alloc()
826 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1; in pcpu_block_update_hint_alloc()
828 s_block = chunk->md_blocks + s_index; in pcpu_block_update_hint_alloc()
829 e_block = chunk->md_blocks + e_index; in pcpu_block_update_hint_alloc()
834 if (s_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) in pcpu_block_update_hint_alloc()
838 * block->first_free must be updated if the allocation takes its place. in pcpu_block_update_hint_alloc()
842 if (s_off == s_block->first_free) in pcpu_block_update_hint_alloc()
843 s_block->first_free = find_next_zero_bit( in pcpu_block_update_hint_alloc()
848 if (pcpu_region_overlap(s_block->scan_hint_start, in pcpu_block_update_hint_alloc()
849 s_block->scan_hint_start + s_block->scan_hint, in pcpu_block_update_hint_alloc()
852 s_block->scan_hint = 0; in pcpu_block_update_hint_alloc()
854 if (pcpu_region_overlap(s_block->contig_hint_start, in pcpu_block_update_hint_alloc()
855 s_block->contig_hint_start + in pcpu_block_update_hint_alloc()
856 s_block->contig_hint, in pcpu_block_update_hint_alloc()
859 /* block contig hint is broken - scan to fix it */ in pcpu_block_update_hint_alloc()
861 s_block->left_free = 0; in pcpu_block_update_hint_alloc()
865 s_block->left_free = min(s_block->left_free, s_off); in pcpu_block_update_hint_alloc()
867 s_block->right_free = min_t(int, s_block->right_free, in pcpu_block_update_hint_alloc()
868 PCPU_BITMAP_BLOCK_BITS - e_off); in pcpu_block_update_hint_alloc()
870 s_block->right_free = 0; in pcpu_block_update_hint_alloc()
877 if (e_block->contig_hint == PCPU_BITMAP_BLOCK_BITS) in pcpu_block_update_hint_alloc()
884 e_block->first_free = find_next_zero_bit( in pcpu_block_update_hint_alloc()
892 if (e_off > e_block->scan_hint_start) in pcpu_block_update_hint_alloc()
893 e_block->scan_hint = 0; in pcpu_block_update_hint_alloc()
895 e_block->left_free = 0; in pcpu_block_update_hint_alloc()
896 if (e_off > e_block->contig_hint_start) { in pcpu_block_update_hint_alloc()
897 /* contig hint is broken - scan to fix it */ in pcpu_block_update_hint_alloc()
900 e_block->right_free = in pcpu_block_update_hint_alloc()
901 min_t(int, e_block->right_free, in pcpu_block_update_hint_alloc()
902 PCPU_BITMAP_BLOCK_BITS - e_off); in pcpu_block_update_hint_alloc()
906 /* update in-between md_blocks */ in pcpu_block_update_hint_alloc()
907 nr_empty_pages += (e_index - s_index - 1); in pcpu_block_update_hint_alloc()
909 block->scan_hint = 0; in pcpu_block_update_hint_alloc()
910 block->contig_hint = 0; in pcpu_block_update_hint_alloc()
911 block->left_free = 0; in pcpu_block_update_hint_alloc()
912 block->right_free = 0; in pcpu_block_update_hint_alloc()
923 pcpu_update_empty_pages(chunk, -nr_empty_pages); in pcpu_block_update_hint_alloc()
925 if (pcpu_region_overlap(chunk_md->scan_hint_start, in pcpu_block_update_hint_alloc()
926 chunk_md->scan_hint_start + in pcpu_block_update_hint_alloc()
927 chunk_md->scan_hint, in pcpu_block_update_hint_alloc()
930 chunk_md->scan_hint = 0; in pcpu_block_update_hint_alloc()
937 if (pcpu_region_overlap(chunk_md->contig_hint_start, in pcpu_block_update_hint_alloc()
938 chunk_md->contig_hint_start + in pcpu_block_update_hint_alloc()
939 chunk_md->contig_hint, in pcpu_block_update_hint_alloc()
946 * pcpu_block_update_hint_free - updates the block hints on the free path
958 * over the block metadata to update chunk_md->contig_hint.
959 * chunk_md->contig_hint may be off by up to a page, but it will never be more
979 e_index = pcpu_off_to_block_index(bit_off + bits - 1); in pcpu_block_update_hint_free()
981 e_off = pcpu_off_to_block_off(bit_off + bits - 1) + 1; in pcpu_block_update_hint_free()
983 s_block = chunk->md_blocks + s_index; in pcpu_block_update_hint_free()
984 e_block = chunk->md_blocks + e_index; in pcpu_block_update_hint_free()
987 * Check if the freed area aligns with the block->contig_hint. in pcpu_block_update_hint_free()
997 if (s_off == s_block->contig_hint + s_block->contig_hint_start) { in pcpu_block_update_hint_free()
998 start = s_block->contig_hint_start; in pcpu_block_update_hint_free()
1012 if (e_off == e_block->contig_hint_start) in pcpu_block_update_hint_free()
1013 end = e_block->contig_hint_start + e_block->contig_hint; in pcpu_block_update_hint_free()
1032 nr_empty_pages += (e_index - s_index - 1); in pcpu_block_update_hint_free()
1034 block->first_free = 0; in pcpu_block_update_hint_free()
1035 block->scan_hint = 0; in pcpu_block_update_hint_free()
1036 block->contig_hint_start = 0; in pcpu_block_update_hint_free()
1037 block->contig_hint = PCPU_BITMAP_BLOCK_BITS; in pcpu_block_update_hint_free()
1038 block->left_free = PCPU_BITMAP_BLOCK_BITS; in pcpu_block_update_hint_free()
1039 block->right_free = PCPU_BITMAP_BLOCK_BITS; in pcpu_block_update_hint_free()
1052 if (((end - start) >= PCPU_BITMAP_BLOCK_BITS) || s_index != e_index) in pcpu_block_update_hint_free()
1055 pcpu_block_update(&chunk->chunk_md, in pcpu_block_update_hint_free()
1061 * pcpu_is_populated - determines if the region is populated
1081 start = find_next_zero_bit(chunk->populated, end, start); in pcpu_is_populated()
1085 end = find_next_bit(chunk->populated, end, start + 1); in pcpu_is_populated()
1092 * pcpu_find_block_fit - finds the block index to start searching
1108 * -1 if no offset is found.
1113 struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_find_block_fit()
1122 return -1; in pcpu_find_block_fit()
1136 return -1; in pcpu_find_block_fit()
1142 * pcpu_find_zero_area - modified from bitmap_find_next_zero_area_off()
1182 area_bits = i - area_off; in pcpu_find_zero_area()
1198 * pcpu_alloc_area - allocates an area from a pcpu_chunk
1207 * @start will be block->first_free. This is an attempt to fill the
1214 * -1 if no matching area is found.
1219 struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_alloc_area()
1220 size_t align_mask = (align) ? (align - 1) : 0; in pcpu_alloc_area()
1233 bit_off = pcpu_find_zero_area(chunk->alloc_map, end, start, alloc_bits, in pcpu_alloc_area()
1236 return -1; in pcpu_alloc_area()
1242 bitmap_set(chunk->alloc_map, bit_off, alloc_bits); in pcpu_alloc_area()
1245 set_bit(bit_off, chunk->bound_map); in pcpu_alloc_area()
1246 bitmap_clear(chunk->bound_map, bit_off + 1, alloc_bits - 1); in pcpu_alloc_area()
1247 set_bit(bit_off + alloc_bits, chunk->bound_map); in pcpu_alloc_area()
1249 chunk->free_bytes -= alloc_bits * PCPU_MIN_ALLOC_SIZE; in pcpu_alloc_area()
1252 if (bit_off == chunk_md->first_free) in pcpu_alloc_area()
1253 chunk_md->first_free = find_next_zero_bit( in pcpu_alloc_area()
1254 chunk->alloc_map, in pcpu_alloc_area()
1266 * pcpu_free_area - frees the corresponding offset
1278 struct pcpu_block_md *chunk_md = &chunk->chunk_md; in pcpu_free_area()
1289 end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk), in pcpu_free_area()
1291 bits = end - bit_off; in pcpu_free_area()
1292 bitmap_clear(chunk->alloc_map, bit_off, bits); in pcpu_free_area()
1297 chunk->free_bytes += freed; in pcpu_free_area()
1300 chunk_md->first_free = min(chunk_md->first_free, bit_off); in pcpu_free_area()
1311 block->scan_hint = 0; in pcpu_init_md_block()
1312 block->contig_hint = nr_bits; in pcpu_init_md_block()
1313 block->left_free = nr_bits; in pcpu_init_md_block()
1314 block->right_free = nr_bits; in pcpu_init_md_block()
1315 block->first_free = 0; in pcpu_init_md_block()
1316 block->nr_bits = nr_bits; in pcpu_init_md_block()
1324 pcpu_init_md_block(&chunk->chunk_md, pcpu_chunk_map_bits(chunk)); in pcpu_init_md_blocks()
1326 for (md_block = chunk->md_blocks; in pcpu_init_md_blocks()
1327 md_block != chunk->md_blocks + pcpu_chunk_nr_blocks(chunk); in pcpu_init_md_blocks()
1333 * pcpu_alloc_first_chunk - creates chunks that serve the first chunk
1356 start_offset = tmp_addr - aligned_addr; in pcpu_alloc_first_chunk()
1367 INIT_LIST_HEAD(&chunk->list); in pcpu_alloc_first_chunk()
1369 chunk->base_addr = (void *)aligned_addr; in pcpu_alloc_first_chunk()
1370 chunk->start_offset = start_offset; in pcpu_alloc_first_chunk()
1371 chunk->end_offset = region_size - chunk->start_offset - map_size; in pcpu_alloc_first_chunk()
1373 chunk->nr_pages = region_size >> PAGE_SHIFT; in pcpu_alloc_first_chunk()
1376 alloc_size = BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]); in pcpu_alloc_first_chunk()
1377 chunk->alloc_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); in pcpu_alloc_first_chunk()
1378 if (!chunk->alloc_map) in pcpu_alloc_first_chunk()
1383 BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]); in pcpu_alloc_first_chunk()
1384 chunk->bound_map = memblock_alloc(alloc_size, SMP_CACHE_BYTES); in pcpu_alloc_first_chunk()
1385 if (!chunk->bound_map) in pcpu_alloc_first_chunk()
1389 alloc_size = pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]); in pcpu_alloc_first_chunk()
1390 chunk->md_blocks = memblock_alloc(alloc_size, SMP_CACHE_BYTES); in pcpu_alloc_first_chunk()
1391 if (!chunk->md_blocks) in pcpu_alloc_first_chunk()
1397 chunk->obj_exts = NULL; in pcpu_alloc_first_chunk()
1402 chunk->immutable = true; in pcpu_alloc_first_chunk()
1403 bitmap_fill(chunk->populated, chunk->nr_pages); in pcpu_alloc_first_chunk()
1404 chunk->nr_populated = chunk->nr_pages; in pcpu_alloc_first_chunk()
1405 chunk->nr_empty_pop_pages = chunk->nr_pages; in pcpu_alloc_first_chunk()
1407 chunk->free_bytes = map_size; in pcpu_alloc_first_chunk()
1409 if (chunk->start_offset) { in pcpu_alloc_first_chunk()
1411 offset_bits = chunk->start_offset / PCPU_MIN_ALLOC_SIZE; in pcpu_alloc_first_chunk()
1412 bitmap_set(chunk->alloc_map, 0, offset_bits); in pcpu_alloc_first_chunk()
1413 set_bit(0, chunk->bound_map); in pcpu_alloc_first_chunk()
1414 set_bit(offset_bits, chunk->bound_map); in pcpu_alloc_first_chunk()
1416 chunk->chunk_md.first_free = offset_bits; in pcpu_alloc_first_chunk()
1421 if (chunk->end_offset) { in pcpu_alloc_first_chunk()
1423 offset_bits = chunk->end_offset / PCPU_MIN_ALLOC_SIZE; in pcpu_alloc_first_chunk()
1424 bitmap_set(chunk->alloc_map, in pcpu_alloc_first_chunk()
1425 pcpu_chunk_map_bits(chunk) - offset_bits, in pcpu_alloc_first_chunk()
1428 chunk->bound_map); in pcpu_alloc_first_chunk()
1429 set_bit(region_bits, chunk->bound_map); in pcpu_alloc_first_chunk()
1432 - offset_bits, offset_bits); in pcpu_alloc_first_chunk()
1447 INIT_LIST_HEAD(&chunk->list); in pcpu_alloc_chunk()
1448 chunk->nr_pages = pcpu_unit_pages; in pcpu_alloc_chunk()
1451 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * in pcpu_alloc_chunk()
1452 sizeof(chunk->alloc_map[0]), gfp); in pcpu_alloc_chunk()
1453 if (!chunk->alloc_map) in pcpu_alloc_chunk()
1456 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * in pcpu_alloc_chunk()
1457 sizeof(chunk->bound_map[0]), gfp); in pcpu_alloc_chunk()
1458 if (!chunk->bound_map) in pcpu_alloc_chunk()
1461 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * in pcpu_alloc_chunk()
1462 sizeof(chunk->md_blocks[0]), gfp); in pcpu_alloc_chunk()
1463 if (!chunk->md_blocks) in pcpu_alloc_chunk()
1468 chunk->obj_exts = in pcpu_alloc_chunk()
1471 if (!chunk->obj_exts) in pcpu_alloc_chunk()
1479 chunk->free_bytes = chunk->nr_pages * PAGE_SIZE; in pcpu_alloc_chunk()
1485 pcpu_mem_free(chunk->md_blocks); in pcpu_alloc_chunk()
1488 pcpu_mem_free(chunk->bound_map); in pcpu_alloc_chunk()
1490 pcpu_mem_free(chunk->alloc_map); in pcpu_alloc_chunk()
1502 pcpu_mem_free(chunk->obj_exts); in pcpu_free_chunk()
1504 pcpu_mem_free(chunk->md_blocks); in pcpu_free_chunk()
1505 pcpu_mem_free(chunk->bound_map); in pcpu_free_chunk()
1506 pcpu_mem_free(chunk->alloc_map); in pcpu_free_chunk()
1511 * pcpu_chunk_populated - post-population bookkeeping
1523 int nr = page_end - page_start; in pcpu_chunk_populated()
1527 bitmap_set(chunk->populated, page_start, nr); in pcpu_chunk_populated()
1528 chunk->nr_populated += nr; in pcpu_chunk_populated()
1535 * pcpu_chunk_depopulated - post-depopulation bookkeeping
1547 int nr = page_end - page_start; in pcpu_chunk_depopulated()
1551 bitmap_clear(chunk->populated, page_start, nr); in pcpu_chunk_depopulated()
1552 chunk->nr_populated -= nr; in pcpu_chunk_depopulated()
1553 pcpu_nr_populated -= nr; in pcpu_chunk_depopulated()
1555 pcpu_update_empty_pages(chunk, -nr); in pcpu_chunk_depopulated()
1566 * pcpu_populate_chunk - populate the specified range of a chunk
1567 * pcpu_depopulate_chunk - depopulate the specified range of a chunk
1568 * pcpu_post_unmap_tlb_flush - flush tlb for the specified range of a chunk
1569 * pcpu_create_chunk - create a new chunk
1570 * pcpu_destroy_chunk - destroy a chunk, always preceded by full depop
1571 * pcpu_addr_to_page - translate address to physical address
1572 * pcpu_verify_alloc_info - check alloc_info is acceptable during init
1586 #include "percpu-km.c"
1588 #include "percpu-vm.c"
1592 * pcpu_chunk_addr_search - determine chunk containing specified address
1649 if (likely(chunk && chunk->obj_exts)) { in pcpu_memcg_post_alloc_hook()
1651 chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = objcg; in pcpu_memcg_post_alloc_hook()
1666 if (unlikely(!chunk->obj_exts)) in pcpu_memcg_free_hook()
1669 objcg = chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup; in pcpu_memcg_free_hook()
1672 chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].cgroup = NULL; in pcpu_memcg_free_hook()
1678 -pcpu_obj_full_size(size)); in pcpu_memcg_free_hook()
1706 if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) { in pcpu_alloc_tag_alloc_hook()
1707 alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, in pcpu_alloc_tag_alloc_hook()
1708 current->alloc_tag, size); in pcpu_alloc_tag_alloc_hook()
1714 if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) in pcpu_alloc_tag_free_hook()
1715 alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size); in pcpu_alloc_tag_free_hook()
1729 * pcpu_alloc - the percpu allocator
1768 * of up to PCPU_MIN_ALLOC_SIZE - 1 bytes. in pcpu_alloc_noprof()
1858 pcpu_chunk_relocate(chunk, -1); in pcpu_alloc_noprof()
1876 for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) { in pcpu_alloc_noprof()
1877 WARN_ON(chunk->immutable); in pcpu_alloc_noprof()
1901 ptr = __addr_to_pcpu_ptr(chunk->base_addr + off); in pcpu_alloc_noprof()
1905 chunk->base_addr, off, ptr, in pcpu_alloc_noprof()
1924 if (!--warn_limit) in pcpu_alloc_noprof()
1943 * pcpu_balance_free - manage the amount of free chunks
1966 WARN_ON(chunk->immutable); in pcpu_balance_free()
1972 if (!empty_only || chunk->nr_empty_pop_pages == 0) in pcpu_balance_free()
1973 list_move(&chunk->list, &to_free); in pcpu_balance_free()
1983 for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) { in pcpu_balance_free()
1996 * pcpu_balance_populated - manage the amount of populated pages
2032 nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - in pcpu_balance_populated()
2044 nr_unpop = chunk->nr_pages - chunk->nr_populated; in pcpu_balance_populated()
2053 for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) { in pcpu_balance_populated()
2054 int nr = min_t(int, re - rs, nr_to_pop); in pcpu_balance_populated()
2061 nr_to_pop -= nr; in pcpu_balance_populated()
2079 pcpu_chunk_relocate(chunk, -1); in pcpu_balance_populated()
2086 * pcpu_reclaim_populated - scan over to_depopulate chunks and free empty pages
2092 * populated pages threshold, reintegrate the chunk if it has empty free pages.
2119 WARN_ON(chunk->immutable); in pcpu_reclaim_populated()
2125 freed_page_start = chunk->nr_pages; in pcpu_reclaim_populated()
2128 for (i = chunk->nr_pages - 1, end = -1; i >= 0; i--) { in pcpu_reclaim_populated()
2130 if (chunk->nr_empty_pop_pages == 0) in pcpu_reclaim_populated()
2145 block = chunk->md_blocks + i; in pcpu_reclaim_populated()
2146 if (block->contig_hint == PCPU_BITMAP_BLOCK_BITS && in pcpu_reclaim_populated()
2147 test_bit(i, chunk->populated)) { in pcpu_reclaim_populated()
2148 if (end == -1) in pcpu_reclaim_populated()
2152 i--; in pcpu_reclaim_populated()
2156 if (end == -1) in pcpu_reclaim_populated()
2169 end = -1; in pcpu_reclaim_populated()
2182 if (reintegrate || chunk->free_bytes == pcpu_unit_size) in pcpu_reclaim_populated()
2185 list_move_tail(&chunk->list, in pcpu_reclaim_populated()
2191 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
2220 * free_percpu - free percpu area
2243 off = addr - chunk->base_addr; in free_percpu()
2257 if (!chunk->isolated && chunk->free_bytes == pcpu_unit_size) { in free_percpu()
2270 trace_percpu_free_percpu(chunk->base_addr, off, ptr); in free_percpu()
2282 const size_t static_size = __per_cpu_end - __per_cpu_start; in __is_kernel_percpu_address()
2292 *can_addr = (unsigned long) (va - start); in __is_kernel_percpu_address()
2305 * is_kernel_percpu_address - test whether address is from static percpu area
2308 * Test whether @addr belongs to in-kernel static percpu area. Module
2313 * %true if @addr is from in-kernel static percpu area, %false otherwise.
2321 * per_cpu_ptr_to_phys - convert translated percpu address to physical address
2388 * pcpu_alloc_alloc_info - allocate percpu allocation info
2411 __alignof__(ai->groups[0].cpu_map[0])); in pcpu_alloc_alloc_info()
2412 ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]); in pcpu_alloc_alloc_info()
2420 ai->groups[0].cpu_map = ptr; in pcpu_alloc_alloc_info()
2423 ai->groups[0].cpu_map[unit] = NR_CPUS; in pcpu_alloc_alloc_info()
2425 ai->nr_groups = nr_groups; in pcpu_alloc_alloc_info()
2426 ai->__ai_size = PFN_ALIGN(ai_size); in pcpu_alloc_alloc_info()
2432 * pcpu_free_alloc_info - free percpu allocation info
2439 memblock_free(ai, ai->__ai_size); in pcpu_free_alloc_info()
2443 * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
2453 char empty_str[] = "--------"; in pcpu_dump_alloc_info()
2458 v = ai->nr_groups; in pcpu_dump_alloc_info()
2465 empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0'; in pcpu_dump_alloc_info()
2467 upa = ai->alloc_size / ai->unit_size; in pcpu_dump_alloc_info()
2471 printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu", in pcpu_dump_alloc_info()
2472 lvl, ai->static_size, ai->reserved_size, ai->dyn_size, in pcpu_dump_alloc_info()
2473 ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size); in pcpu_dump_alloc_info()
2475 for (group = 0; group < ai->nr_groups; group++) { in pcpu_dump_alloc_info()
2476 const struct pcpu_group_info *gi = &ai->groups[group]; in pcpu_dump_alloc_info()
2479 BUG_ON(gi->nr_units % upa); in pcpu_dump_alloc_info()
2480 for (alloc_end += gi->nr_units / upa; in pcpu_dump_alloc_info()
2484 printk("%spcpu-alloc: ", lvl); in pcpu_dump_alloc_info()
2489 if (gi->cpu_map[unit] != NR_CPUS) in pcpu_dump_alloc_info()
2491 cpu_width, gi->cpu_map[unit]); in pcpu_dump_alloc_info()
2500 * pcpu_setup_first_chunk - initialize the first percpu chunk
2511 * @ai->static_size is the size of static percpu area.
2513 * @ai->reserved_size, if non-zero, specifies the amount of bytes to
2521 * @ai->dyn_size determines the number of bytes available for dynamic
2522 * allocation in the first chunk. The area between @ai->static_size +
2523 * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused.
2525 * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE
2526 * and equal to or larger than @ai->static_size + @ai->reserved_size +
2527 * @ai->dyn_size.
2529 * @ai->atom_size is the allocation atom size and used as alignment
2532 * @ai->alloc_size is the allocation size and always multiple of
2533 * @ai->atom_size. This is larger than @ai->atom_size if
2534 * @ai->unit_size is larger than @ai->atom_size.
2536 * @ai->nr_groups and @ai->groups describe virtual memory layout of
2539 * groupings. If @ai->nr_groups is zero, a single group containing
2547 * chunk also contains a reserved region, it is served by two chunks -
2556 size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; in pcpu_setup_first_chunk()
2578 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); in pcpu_setup_first_chunk()
2580 PCPU_SETUP_BUG_ON(!ai->static_size); in pcpu_setup_first_chunk()
2585 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); in pcpu_setup_first_chunk()
2586 PCPU_SETUP_BUG_ON(offset_in_page(ai->unit_size)); in pcpu_setup_first_chunk()
2587 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); in pcpu_setup_first_chunk()
2588 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->unit_size, PCPU_BITMAP_BLOCK_SIZE)); in pcpu_setup_first_chunk()
2589 PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); in pcpu_setup_first_chunk()
2590 PCPU_SETUP_BUG_ON(!IS_ALIGNED(ai->reserved_size, PCPU_MIN_ALLOC_SIZE)); in pcpu_setup_first_chunk()
2596 alloc_size = ai->nr_groups * sizeof(group_offsets[0]); in pcpu_setup_first_chunk()
2602 alloc_size = ai->nr_groups * sizeof(group_sizes[0]); in pcpu_setup_first_chunk()
2626 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { in pcpu_setup_first_chunk()
2627 const struct pcpu_group_info *gi = &ai->groups[group]; in pcpu_setup_first_chunk()
2629 group_offsets[group] = gi->base_offset; in pcpu_setup_first_chunk()
2630 group_sizes[group] = gi->nr_units * ai->unit_size; in pcpu_setup_first_chunk()
2632 for (i = 0; i < gi->nr_units; i++) { in pcpu_setup_first_chunk()
2633 cpu = gi->cpu_map[i]; in pcpu_setup_first_chunk()
2642 unit_off[cpu] = gi->base_offset + i * ai->unit_size; in pcpu_setup_first_chunk()
2662 pcpu_nr_groups = ai->nr_groups; in pcpu_setup_first_chunk()
2669 pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; in pcpu_setup_first_chunk()
2671 pcpu_atom_size = ai->atom_size; in pcpu_setup_first_chunk()
2679 * sidelined_slot - isolated, depopulated chunks in pcpu_setup_first_chunk()
2680 * free_slot - fully free chunks in pcpu_setup_first_chunk()
2681 * to_depopulate_slot - isolated, chunks to depopulate in pcpu_setup_first_chunk()
2705 static_size = ALIGN(ai->static_size, PCPU_MIN_ALLOC_SIZE); in pcpu_setup_first_chunk()
2706 dyn_size = ai->dyn_size - (static_size - ai->static_size); in pcpu_setup_first_chunk()
2712 * - static - there is no backing chunk because these allocations can in pcpu_setup_first_chunk()
2714 * - reserved (pcpu_reserved_chunk) - exists primarily to serve in pcpu_setup_first_chunk()
2716 * - dynamic (pcpu_first_chunk) - serves the dynamic part of the first in pcpu_setup_first_chunk()
2720 if (ai->reserved_size) in pcpu_setup_first_chunk()
2722 ai->reserved_size); in pcpu_setup_first_chunk()
2723 tmp_addr = (unsigned long)base_addr + static_size + ai->reserved_size; in pcpu_setup_first_chunk()
2726 pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; in pcpu_setup_first_chunk()
2727 pcpu_chunk_relocate(pcpu_first_chunk, -1); in pcpu_setup_first_chunk()
2752 return -EINVAL; in percpu_alloc_setup()
2789 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
2817 const size_t static_size = __per_cpu_end - __per_cpu_start; in pcpu_build_alloc_info()
2834 dyn_size = size_sum - static_size - reserved_size; in pcpu_build_alloc_info()
2848 upa--; in pcpu_build_alloc_info()
2880 for (upa = max_upa; upa; upa--) { in pcpu_build_alloc_info()
2889 wasted += this_allocs * upa - group_cnt[group]; in pcpu_build_alloc_info()
2894 * greater-than comparison ensures upa==1 always in pcpu_build_alloc_info()
2915 return ERR_PTR(-ENOMEM); in pcpu_build_alloc_info()
2916 cpu_map = ai->groups[0].cpu_map; in pcpu_build_alloc_info()
2919 ai->groups[group].cpu_map = cpu_map; in pcpu_build_alloc_info()
2923 ai->static_size = static_size; in pcpu_build_alloc_info()
2924 ai->reserved_size = reserved_size; in pcpu_build_alloc_info()
2925 ai->dyn_size = dyn_size; in pcpu_build_alloc_info()
2926 ai->unit_size = alloc_size / upa; in pcpu_build_alloc_info()
2927 ai->atom_size = atom_size; in pcpu_build_alloc_info()
2928 ai->alloc_size = alloc_size; in pcpu_build_alloc_info()
2931 struct pcpu_group_info *gi = &ai->groups[group]; in pcpu_build_alloc_info()
2935 * back-to-back. The caller should update this to in pcpu_build_alloc_info()
2938 gi->base_offset = unit * ai->unit_size; in pcpu_build_alloc_info()
2942 gi->cpu_map[gi->nr_units++] = cpu; in pcpu_build_alloc_info()
2943 gi->nr_units = roundup(gi->nr_units, upa); in pcpu_build_alloc_info()
2944 unit += gi->nr_units; in pcpu_build_alloc_info()
2964 pr_info("cpu %d has no node %d or node-local memory\n", in pcpu_fc_alloc()
2990 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
3001 * by calling pcpu_fc_alloc and used as-is without being mapped into
3007 * can result in very sparse cpu->unit mapping on NUMA machines thus
3018 * 0 on success, -errno on failure.
3037 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size; in pcpu_embed_first_chunk()
3038 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *)); in pcpu_embed_first_chunk()
3042 rc = -ENOMEM; in pcpu_embed_first_chunk()
3048 for (group = 0; group < ai->nr_groups; group++) { in pcpu_embed_first_chunk()
3049 struct pcpu_group_info *gi = &ai->groups[group]; in pcpu_embed_first_chunk()
3053 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++) in pcpu_embed_first_chunk()
3054 cpu = gi->cpu_map[i]; in pcpu_embed_first_chunk()
3058 ptr = pcpu_fc_alloc(cpu, gi->nr_units * ai->unit_size, atom_size, cpu_to_nd_fn); in pcpu_embed_first_chunk()
3060 rc = -ENOMEM; in pcpu_embed_first_chunk()
3071 max_distance = areas[highest_group] - base; in pcpu_embed_first_chunk()
3072 max_distance += ai->unit_size * ai->groups[highest_group].nr_units; in pcpu_embed_first_chunk()
3080 rc = -EINVAL; in pcpu_embed_first_chunk()
3090 for (group = 0; group < ai->nr_groups; group++) { in pcpu_embed_first_chunk()
3091 struct pcpu_group_info *gi = &ai->groups[group]; in pcpu_embed_first_chunk()
3094 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) { in pcpu_embed_first_chunk()
3095 if (gi->cpu_map[i] == NR_CPUS) { in pcpu_embed_first_chunk()
3097 pcpu_fc_free(ptr, ai->unit_size); in pcpu_embed_first_chunk()
3101 memcpy(ptr, __per_cpu_load, ai->static_size); in pcpu_embed_first_chunk()
3102 pcpu_fc_free(ptr + size_sum, ai->unit_size - size_sum); in pcpu_embed_first_chunk()
3107 for (group = 0; group < ai->nr_groups; group++) { in pcpu_embed_first_chunk()
3108 ai->groups[group].base_offset = areas[group] - base; in pcpu_embed_first_chunk()
3112 PFN_DOWN(size_sum), ai->static_size, ai->reserved_size, in pcpu_embed_first_chunk()
3113 ai->dyn_size, ai->unit_size); in pcpu_embed_first_chunk()
3119 for (group = 0; group < ai->nr_groups; group++) in pcpu_embed_first_chunk()
3122 ai->groups[group].nr_units * ai->unit_size); in pcpu_embed_first_chunk()
3196 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
3200 * This is a helper to ease setting up page-remapped first percpu
3204 * page-by-page into vmalloc area.
3207 * 0 on success, -errno on failure.
3226 BUG_ON(ai->nr_groups != 1); in pcpu_page_first_chunk()
3227 upa = ai->alloc_size/ai->unit_size; in pcpu_page_first_chunk()
3229 if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) { in pcpu_page_first_chunk()
3231 return -EINVAL; in pcpu_page_first_chunk()
3234 unit_pages = ai->unit_size >> PAGE_SHIFT; in pcpu_page_first_chunk()
3247 unsigned int cpu = ai->groups[0].cpu_map[unit]; in pcpu_page_first_chunk()
3265 vm.size = num_possible_cpus() * ai->unit_size; in pcpu_page_first_chunk()
3270 (unsigned long)vm.addr + unit * ai->unit_size; in pcpu_page_first_chunk()
3281 flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); in pcpu_page_first_chunk()
3284 memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); in pcpu_page_first_chunk()
3289 unit_pages, psize_str, ai->static_size, in pcpu_page_first_chunk()
3290 ai->reserved_size, ai->dyn_size); in pcpu_page_first_chunk()
3296 while (--j >= 0) in pcpu_page_first_chunk()
3298 rc = -ENOMEM; in pcpu_page_first_chunk()
3311 * the original non-dynamic generic percpu area setup. This is
3314 * location. As an added bonus, in non-NUMA cases, embedding is
3315 * generally a good idea TLB-wise because percpu area can piggy back
3324 unsigned long delta; in setup_per_cpu_areas() local
3337 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; in setup_per_cpu_areas()
3339 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; in setup_per_cpu_areas()
3348 * UP always uses km-based percpu allocator with identity mapping.
3367 ai->dyn_size = unit_size; in setup_per_cpu_areas()
3368 ai->unit_size = unit_size; in setup_per_cpu_areas()
3369 ai->atom_size = unit_size; in setup_per_cpu_areas()
3370 ai->alloc_size = unit_size; in setup_per_cpu_areas()
3371 ai->groups[0].nr_units = 1; in setup_per_cpu_areas()
3372 ai->groups[0].cpu_map[0] = 0; in setup_per_cpu_areas()
3381 * pcpu_nr_pages - calculate total number of populated backing pages