From: Marc Zyngier Date: Wed, 8 Feb 2023 20:54:27 +0000 (+0800) Subject: arm: cpu: Add optional CMOs by VA X-Git-Tag: v2025.01-rc5-pxa1908~1023^2~38^2~17 X-Git-Url: http://git.dujemihanovic.xyz/img/static/%7B%7B?a=commitdiff_plain;h=46dc54287031759c03c68902283d92076938305c;p=u-boot.git arm: cpu: Add optional CMOs by VA Exposing set/way cache maintenance to a virtual machine is unsafe, not least because the instructions are not permission-checked but also because they are not broadcast between CPUs. Consequently, KVM traps and emulates such maintenance in the host kernel using by-VA operations and looping over the stage-2 page-tables. However, when running under protected KVM, these instructions are not able to be emulated and will instead result in an exception being delivered to the guest. Introduce CONFIG_CMO_BY_VA_ONLY so that virtual platforms can select this option and perform by-VA cache maintenance instead of using the set/way instructions. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Pierre-Clément Tosi [ Paul: pick from the Android tree. Fixup Pierre's commit. And fix some checkpatch warnings. Rebased to upstream. ] Signed-off-by: Ying-Chun Liu (PaulLiu) Cc: Tom Rini Link: https://android.googlesource.com/platform/external/u-boot/+/db5507f47f4f57f766d52f753ff2cc761afc213b Link: https://android.googlesource.com/platform/external/u-boot/+/2baf54e743380a1e4a6bc2dbdde020a2e783ff67 --- diff --git a/arch/arm/cpu/armv8/Kconfig b/arch/arm/cpu/armv8/Kconfig index 1305238c9d..7d5cf1594d 100644 --- a/arch/arm/cpu/armv8/Kconfig +++ b/arch/arm/cpu/armv8/Kconfig @@ -1,5 +1,9 @@ if ARM64 +config CMO_BY_VA_ONLY + bool "Force cache maintenance to be exclusively by VA" + depends on !SYS_DISABLE_DCACHE_OPS + config ARMV8_SPL_EXCEPTION_VECTORS bool "Install crash dump exception vectors" depends on SPL diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S index d1cee23437..3fe935cf28 100644 --- a/arch/arm/cpu/armv8/cache.S +++ b/arch/arm/cpu/armv8/cache.S @@ -12,6 +12,7 @@ #include #include +#ifndef CONFIG_CMO_BY_VA_ONLY /* * void __asm_dcache_level(level) * @@ -116,6 +117,41 @@ ENTRY(__asm_invalidate_dcache_all) ENDPROC(__asm_invalidate_dcache_all) .popsection +.pushsection .text.__asm_flush_l3_dcache, "ax" +WEAK(__asm_flush_l3_dcache) + mov x0, #0 /* return status as success */ + ret +ENDPROC(__asm_flush_l3_dcache) +.popsection + +.pushsection .text.__asm_invalidate_l3_icache, "ax" +WEAK(__asm_invalidate_l3_icache) + mov x0, #0 /* return status as success */ + ret +ENDPROC(__asm_invalidate_l3_icache) +.popsection + +#else /* CONFIG_CMO_BY_VA */ + +/* + * Define these so that they actively clash with in implementation + * accidentally selecting CONFIG_CMO_BY_VA + */ + +.pushsection .text.__asm_invalidate_l3_icache, "ax" +ENTRY(__asm_invalidate_l3_icache) + mov x0, xzr + ret +ENDPROC(__asm_invalidate_l3_icache) +.popsection +.pushsection .text.__asm_flush_l3_dcache, "ax" +ENTRY(__asm_flush_l3_dcache) + mov x0, xzr + ret +ENDPROC(__asm_flush_l3_dcache) +.popsection +#endif /* CONFIG_CMO_BY_VA */ + /* * void __asm_flush_dcache_range(start, end) * @@ -189,20 +225,6 @@ WEAK(__asm_invalidate_l3_dcache) ENDPROC(__asm_invalidate_l3_dcache) .popsection -.pushsection .text.__asm_flush_l3_dcache, "ax" -WEAK(__asm_flush_l3_dcache) - mov x0, #0 /* return status as success */ - ret -ENDPROC(__asm_flush_l3_dcache) -.popsection - -.pushsection .text.__asm_invalidate_l3_icache, "ax" -WEAK(__asm_invalidate_l3_icache) - mov x0, #0 /* return status as success */ - ret -ENDPROC(__asm_invalidate_l3_icache) -.popsection - /* * void __asm_switch_ttbr(ulong new_ttbr) * diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c index 2a226fd063..f333ad8889 100644 --- a/arch/arm/cpu/armv8/cache_v8.c +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -163,6 +163,83 @@ static u64 *find_pte(u64 addr, int level) return NULL; } +#ifdef CONFIG_CMO_BY_VA_ONLY +static void __cmo_on_leaves(void (*cmo_fn)(unsigned long, unsigned long), + u64 pte, int level, u64 base) +{ + u64 *ptep; + int i; + + ptep = (u64 *)(pte & GENMASK_ULL(47, PAGE_SHIFT)); + for (i = 0; i < PAGE_SIZE / sizeof(u64); i++) { + u64 end, va = base + i * BIT(level2shift(level)); + u64 type, attrs; + + pte = ptep[i]; + type = pte & PTE_TYPE_MASK; + attrs = pte & PMD_ATTRINDX_MASK; + debug("PTE %llx at level %d VA %llx\n", pte, level, va); + + /* Not valid? next! */ + if (!(type & PTE_TYPE_VALID)) + continue; + + /* Not a leaf? Recurse on the next level */ + if (!(type == PTE_TYPE_BLOCK || + (level == 3 && type == PTE_TYPE_PAGE))) { + __cmo_on_leaves(cmo_fn, pte, level + 1, va); + continue; + } + + /* + * From this point, this must be a leaf. + * + * Start excluding non memory mappings + */ + if (attrs != PTE_BLOCK_MEMTYPE(MT_NORMAL) && + attrs != PTE_BLOCK_MEMTYPE(MT_NORMAL_NC)) + continue; + + end = va + BIT(level2shift(level)) - 1; + + /* No intersection with RAM? */ + if (end < gd->ram_base || + va >= (gd->ram_base + gd->ram_size)) + continue; + + /* + * OK, we have a partial RAM mapping. However, this + * can cover *more* than the RAM. Yes, u-boot is + * *that* braindead. Compute the intersection we care + * about, and not a byte more. + */ + va = max(va, (u64)gd->ram_base); + end = min(end, gd->ram_base + gd->ram_size); + + debug("Flush PTE %llx at level %d: %llx-%llx\n", + pte, level, va, end); + cmo_fn(va, end); + } +} + +static void apply_cmo_to_mappings(void (*cmo_fn)(unsigned long, unsigned long)) +{ + u64 va_bits; + int sl = 0; + + if (!gd->arch.tlb_addr) + return; + + get_tcr(NULL, &va_bits); + if (va_bits < 39) + sl = 1; + + __cmo_on_leaves(cmo_fn, gd->arch.tlb_addr, sl, 0); +} +#else +static inline void apply_cmo_to_mappings(void *dummy) {} +#endif + /* Returns and creates a new full table (512 entries) */ static u64 *create_table(void) { @@ -447,8 +524,12 @@ __weak void mmu_setup(void) */ void invalidate_dcache_all(void) { +#ifndef CONFIG_CMO_BY_VA_ONLY __asm_invalidate_dcache_all(); __asm_invalidate_l3_dcache(); +#else + apply_cmo_to_mappings(invalidate_dcache_range); +#endif } /* @@ -458,6 +539,7 @@ void invalidate_dcache_all(void) */ inline void flush_dcache_all(void) { +#ifndef CONFIG_CMO_BY_VA_ONLY int ret; __asm_flush_dcache_all(); @@ -466,6 +548,9 @@ inline void flush_dcache_all(void) debug("flushing dcache returns 0x%x\n", ret); else debug("flushing dcache successfully.\n"); +#else + apply_cmo_to_mappings(flush_dcache_range); +#endif } #ifndef CONFIG_SYS_DISABLE_DCACHE_OPS @@ -520,9 +605,19 @@ void dcache_disable(void) if (!(sctlr & CR_C)) return; + if (IS_ENABLED(CONFIG_CMO_BY_VA_ONLY)) { + /* + * When invalidating by VA, do it *before* turning the MMU + * off, so that at least our stack is coherent. + */ + flush_dcache_all(); + } + set_sctlr(sctlr & ~(CR_C|CR_M)); - flush_dcache_all(); + if (!IS_ENABLED(CONFIG_CMO_BY_VA_ONLY)) + flush_dcache_all(); + __asm_invalidate_tlb_all(); } diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c index db5d460eb4..3c7f36ad8d 100644 --- a/arch/arm/cpu/armv8/cpu.c +++ b/arch/arm/cpu/armv8/cpu.c @@ -48,18 +48,26 @@ int cleanup_before_linux(void) disable_interrupts(); - /* - * Turn off I-cache and invalidate it - */ - icache_disable(); - invalidate_icache_all(); + if (IS_ENABLED(CONFIG_CMO_BY_VA_ONLY)) { + /* + * Disable D-cache. + */ + dcache_disable(); + } else { + /* + * Turn off I-cache and invalidate it + */ + icache_disable(); + invalidate_icache_all(); - /* - * turn off D-cache - * dcache_disable() in turn flushes the d-cache and disables MMU - */ - dcache_disable(); - invalidate_dcache_all(); + /* + * turn off D-cache + * dcache_disable() in turn flushes the d-cache and disables + * MMU + */ + dcache_disable(); + invalidate_dcache_all(); + } return 0; }