Lines Matching +full:boot +full:- +full:page +full:- +full:step
1 // SPDX-License-Identifier: GPL-2.0-or-later
14 * PowerPC Hashed Page Table functions
20 #define pr_fmt(fmt) "hash-mmu: " fmt
41 #include <linux/elf-randomize.h>
48 #include <asm/page.h>
60 #include <asm/code-patching.h>
66 #include <asm/pte-walk.h>
67 #include <asm/asm-prototypes.h>
92 * Note: pte --> Linux PTE
93 * HPTE --> PowerPC Hashed Page Table Entry
132 * These are definitions of page sizes arrays to be used when none
143 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
158 .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
165 .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
166 [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
233 * * PRS=1, R=0, and RIC!=2 (The only process-scoped in tlbiel_all_isa300()
273 WARN(1, "%s called on pre-POWER7 CPU\n", __func__); in hash__tlbiel_all()
278 * - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
281 * - The above is however not a problem, because we also don't do that
285 * - Under bare metal, we do have the race, so we need R and C set
286 * - We make sure R is always set and never lost
287 * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
293 /* _PAGE_EXEC -> NOEXEC */ in htab_convert_pte_flags()
301 * or PPP=0b011 for read-only (including writeable but clean pages). in htab_convert_pte_flags()
313 VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); in htab_convert_pte_flags()
322 VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request"); in htab_convert_pte_flags()
359 unsigned int step, shift; in htab_bolt_mapping() local
363 step = 1 << shift; in htab_bolt_mapping()
367 DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n", in htab_bolt_mapping()
371 vaddr = ALIGN(vstart, step); in htab_bolt_mapping()
372 paddr = ALIGN(pstart, step); in htab_bolt_mapping()
373 vend = ALIGN_DOWN(vend, step); in htab_bolt_mapping()
375 for (; vaddr < vend; vaddr += step, paddr += step) { in htab_bolt_mapping()
386 return -1; in htab_bolt_mapping()
388 if (overlaps_kernel_text(vaddr, vaddr + step)) in htab_bolt_mapping()
402 overlaps_interrupt_vector_text(vaddr, vaddr + step)) in htab_bolt_mapping()
413 if (ret == -1) { in htab_bolt_mapping()
419 if (ret != -1) in htab_bolt_mapping()
423 if (ret == -1 && !secondary_hash) { in htab_bolt_mapping()
445 unsigned int step, shift; in htab_remove_mapping() local
450 step = 1 << shift; in htab_remove_mapping()
453 return -ENODEV; in htab_remove_mapping()
456 vaddr = ALIGN_DOWN(vstart, step); in htab_remove_mapping()
459 for (;vaddr < vend; vaddr += step) { in htab_remove_mapping()
470 if (rc == -ENOENT) { in htab_remove_mapping()
471 ret = -ENOENT; in htab_remove_mapping()
502 * per-CPU array allocated if we enable stress_hpt.
536 prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size); in htab_dt_scan_seg_sizes()
539 for (; size >= 4; size -= 4, ++prop) { in htab_dt_scan_seg_sizes()
548 cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; in htab_dt_scan_seg_sizes()
552 cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B; in htab_dt_scan_seg_sizes()
558 int idx = -1; in get_idx_from_shift()
592 prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); in htab_dt_scan_page_sizes()
596 pr_info("Page sizes from device-tree:\n"); in htab_dt_scan_page_sizes()
598 cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); in htab_dt_scan_page_sizes()
606 size -= 3; prop += 3; in htab_dt_scan_page_sizes()
610 prop += lpnum * 2; size -= lpnum * 2; in htab_dt_scan_page_sizes()
615 cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; in htab_dt_scan_page_sizes()
617 def->shift = base_shift; in htab_dt_scan_page_sizes()
619 def->avpnm = 0; in htab_dt_scan_page_sizes()
621 def->avpnm = (1 << (base_shift - 23)) - 1; in htab_dt_scan_page_sizes()
622 def->sllp = slbenc; in htab_dt_scan_page_sizes()
628 def->tlbiel = 1; in htab_dt_scan_page_sizes()
630 def->tlbiel = 0; in htab_dt_scan_page_sizes()
636 prop += 2; size -= 2; in htab_dt_scan_page_sizes()
637 lpnum--; in htab_dt_scan_page_sizes()
643 if (penc == -1) in htab_dt_scan_page_sizes()
647 def->penc[idx] = penc; in htab_dt_scan_page_sizes()
650 base_shift, shift, def->sllp, in htab_dt_scan_page_sizes()
651 def->avpnm, def->tlbiel, def->penc[idx]); in htab_dt_scan_page_sizes()
692 printk(KERN_INFO "Huge page(16GB) memory: " in htab_dt_scan_hugepage_blocks()
708 mmu_psize_defs[bpsize].penc[apsize] = -1; in mmu_psize_set_default_penc()
734 /* se the invalid penc to -1 */ in htab_scan_page_sizes()
742 * Try to find the available page sizes in the device-tree in htab_scan_page_sizes()
747 * Nothing in the device-tree, but the CPU supports 16M pages, in htab_scan_page_sizes()
756 /* Reserve 16G huge page memory sections for huge pages */ in htab_scan_page_sizes()
765 * supported base/actual page size combinations. Each combination
767 * the LP field of the HPTE. For actual page sizes less than 1MB,
771 * In diagrammatic form, with r = RPN bits and z = page size bits:
772 * PTE LP actual page size
779 * The zzzz bits are implementation-specific but are chosen so that
780 * no encoding for a larger page size uses the same value in its
781 * low-order N bits as the encoding for the 2^(12+N) byte page size
791 continue; /* not a supported page size */ in init_hpte_page_sizes()
794 if (penc == -1 || !mmu_psize_defs[ap].shift) in init_hpte_page_sizes()
796 shift = mmu_psize_defs[ap].shift - LP_SHIFT; in init_hpte_page_sizes()
800 * For page sizes less than 1MB, this loop in init_hpte_page_sizes()
840 * (and firmware) support cache-inhibited large pages. in htab_init_page_sizes()
842 * hash_page knows to switch processes that use cache-inhibited in htab_init_page_sizes()
866 * and we have at least 1G of RAM at boot in htab_init_page_sizes()
875 printk(KERN_DEBUG "Page orders: linear mapping = %d, " in htab_init_page_sizes()
901 prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL); in htab_dt_scan_pftsize()
921 pteg_shift = memshift - (pshift + 1); in htab_shift_for_mem_size()
934 * retrieve it from the device-tree. If it's not there neither, we in htab_get_table_size()
964 target_hpt_shift < ppc64_pft_size - 1) in resize_hpt_for_hotplug()
977 return -1; in hash__create_section_mapping()
989 BUG_ON(rc2 && (rc2 != -ENOENT)); in hash__create_section_mapping()
999 if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) in hash__remove_section_mapping()
1012 * PS field (VRMA page size) is not used for LPID 0, hence set to 0. in hash_init_partition_table()
1015 htab_size = __ilog2(htab_size) - 18; in hash_init_partition_table()
1045 DBG(" -> htab_initialize()\n"); in htab_initialize()
1078 htab_hash_mask = pteg_count - 1; in htab_initialize()
1123 _SDR1 = table + __ilog2(htab_size_bytes) - 18; in htab_initialize()
1149 size = end - base; in htab_initialize()
1185 DBG(" <- htab_initialize()\n"); in htab_initialize()
1195 /* Initialize page sizes */ in hash__early_init_devtree()
1208 * Where the slot number is between 0-15, and values of 8-15 indicate in hash__early_init_mmu()
1220 * initialize page table size in hash__early_init_mmu()
1245 vmemmap = (struct page *)H_VMEMMAP_START; in hash__early_init_mmu()
1266 * currently where the page size encoding is obtained. in hash__early_init_mmu()
1292 (PATB_SIZE_SHIFT - 12)); in hash__early_init_mmu_secondary()
1320 /* page is dirty */ in hash_page_do_lazy_icache()
1321 if (!test_bit(PG_dcache_clean, &folio->flags) && in hash_page_do_lazy_icache()
1325 set_bit(PG_dcache_clean, &folio->flags); in hash_page_do_lazy_icache()
1338 psizes = get_paca()->mm_ctx_low_slices_psize; in get_paca_psize()
1341 psizes = get_paca()->mm_ctx_high_slices_psize; in get_paca_psize()
1360 if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { in demote_segment_4k()
1370 * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
1373 * Result is 0: full permissions, _PAGE_RW: read-only,
1378 struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); in subpage_protection()
1385 if (ea >= spt->maxaddr) in subpage_protection()
1388 /* addresses below 4GB use spt->low_prot */ in subpage_protection()
1389 sbpm = spt->low_prot; in subpage_protection()
1391 sbpm = spt->protptrs[ea >> SBP_L3_SHIFT]; in subpage_protection()
1395 sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)]; in subpage_protection()
1398 spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)]; in subpage_protection()
1400 /* extract 2-bit bitfield for this 4k subpage */ in subpage_protection()
1401 spp >>= 30 - 2 * ((ea >> 12) & 0xf); in subpage_protection()
1404 * 0 -> full permission in subpage_protection()
1405 * 1 -> Read only in subpage_protection()
1406 * 2 -> no access. in subpage_protection()
1427 ea, access, current->comm); in hash_failure_debug()
1440 } else if (get_paca()->vmalloc_sllp != in check_paca_psize()
1442 get_paca()->vmalloc_sllp = in check_paca_psize()
1450 * 0 - handled
1451 * 1 - normal page fault
1452 * -1 - critical hash insertion error
1453 * -2 - access not permitted by subpage protection mechanism
1482 vsid = get_user_vsid(&mm->context, ea, ssize); in hash_page_mm()
1505 DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid); in hash_page_mm()
1514 pgdir = mm->pgd; in hash_page_mm()
1534 ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); in hash_page_mm()
1537 /* Get PTE and page size from page tables */ in hash_page_mm()
1563 * Pre-check access permissions (will be re-checked atomically in hash_page_mm()
1564 * in __hash_page_XX but this pre-check is a fast path in hash_page_mm()
1590 if (current->mm == mm) in hash_page_mm()
1597 DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); in hash_page_mm()
1599 DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep), in hash_page_mm()
1611 * If this PTE is non-cacheable and we have restrictions on in hash_page_mm()
1620 * some driver did a non-cacheable mapping in hash_page_mm()
1626 "non-cacheable mapping\n"); in hash_page_mm()
1634 if (current->mm == mm) in hash_page_mm()
1646 rc = -2; in hash_page_mm()
1656 if (rc == -1) in hash_page_mm()
1660 DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep)); in hash_page_mm()
1662 DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep), in hash_page_mm()
1665 DBG_LOW(" -> rc=%d\n", rc); in hash_page_mm()
1676 struct mm_struct *mm = current->mm; in hash_page()
1691 unsigned long ea = regs->dar; in DEFINE_INTERRUPT_HANDLER()
1692 unsigned long dsisr = regs->dsisr; in DEFINE_INTERRUPT_HANDLER()
1708 mm = current->mm; in DEFINE_INTERRUPT_HANDLER()
1734 if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2) in DEFINE_INTERRUPT_HANDLER()
1753 if (unlikely(psize != mm_ctx_user_psize(&mm->context))) in should_hash_preload()
1779 DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," in hash_preload()
1780 " trap=%lx\n", mm, mm->pgd, ea, access, trap); in hash_preload()
1783 pgdir = mm->pgd; in hash_preload()
1789 vsid = get_user_vsid(&mm->context, ea, ssize); in hash_preload()
1796 * care of it once we actually try to access the page. in hash_preload()
1798 * page size demotion here in hash_preload()
1826 if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K) in hash_preload()
1837 if (rc == -1) in hash_preload()
1839 mm_ctx_user_psize(&mm->context), in hash_preload()
1840 mm_ctx_user_psize(&mm->context), in hash_preload()
1847 * This is called at the end of handling a user page fault, when the
1848 * fault has been handled by updating a PTE in the linux page tables.
1859 * called with either mm->page_table_lock held or ptl lock held in __update_mmu_cache()
1871 * double-faulting on execution of fresh text. We have to test in __update_mmu_cache()
1872 * for regs NULL since init will get here first thing at boot. in __update_mmu_cache()
1877 trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL; in __update_mmu_cache()
1889 hash_preload(vma->vm_mm, ptep, address, is_exec, trap); in __update_mmu_cache()
1897 * page back to a block device w/PIO could pick up transactional data in tm_flush_hash_page()
1898 * (bad!) so we force an abort here. Before the sync the page will be in tm_flush_hash_page()
1899 * made read-only, which will flush_hash_page. BIG ISSUE here: if the in tm_flush_hash_page()
1900 * kernel uses a page from userspace without unmapping it first, it may in tm_flush_hash_page()
1903 if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs && in tm_flush_hash_page()
1904 MSR_TM_ACTIVE(current->thread.regs->msr)) { in tm_flush_hash_page()
1944 * We use same base page size and actual psize, because we don't in flush_hash_page()
2022 flush_hash_page(batch->vpn[i], batch->pte[i], in flush_hash_range()
2023 batch->psize, batch->ssize, local); in flush_hash_range()
2042 if (unlikely(slot == -1)) { in hpte_insert_repeating()
2047 if (slot == -1) { in hpte_insert_repeating()
2069 if (last_group != -1UL) { in hpt_clear_stress()
2072 if (mmu_hash_ops.hpte_remove(last_group) == -1) in hpt_clear_stress()
2075 stress_hpt_struct[cpu].last_group[g] = -1; in hpt_clear_stress()
2085 last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1]; in hpt_do_stress()
2089 if (last_group != -1UL) { in hpt_do_stress()
2097 if (mmu_hash_ops.hpte_remove(last_group) == -1) in hpt_do_stress()
2100 stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1; in hpt_do_stress()
2112 * work for non-CI PTEs). in hpt_do_stress()
2118 (stress_nr_groups() - 1) * sizeof(unsigned long)); in hpt_do_stress()
2178 int hash__kernel_map_pages(struct page *page, int numpages, int enable) in hash__kernel_map_pages() argument
2184 for (i = 0; i < numpages; i++, page++) { in hash__kernel_map_pages()
2185 vaddr = (unsigned long)page_address(page); in hash__kernel_map_pages()
2210 * non-virtualized 64-bit hash MMU systems don't have a limitation in hash__setup_initial_memory_limit()
2250 return -ENODEV; in hpt_order_set()
2288 return randomize_page(mm->brk, SZ_32M); in arch_randomize_brk()
2290 return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), SZ_1G); in arch_randomize_brk()
2292 return randomize_page(mm->brk, SZ_1G); in arch_randomize_brk()