Files
system76-coreboot/src/arch/arm64/armv8/cpu.S
David Milosevic 41ba11229a arch/arm64: Add EL1/EL2/EL3 support for arm64
Currently, arch/arm64 requires coreboot to run on EL3 due
to EL3 register access. This might be an issue when, for example,
one boots into TF-A first and drops into EL2 for coreboot afterwards.

This patch aims at making arch/arm64 more versatile by removing the
current EL3 constraint and allowing arm64 coreboot to run on EL1,
EL2 and EL3.

The strategy here, is to add a Kconfig option (ARM64_CURRENT_EL) which
lets us specify coreboot's EL upon entry. Based on that, we access the
appropriate ELx registers. So, for example, when running coreboot on
EL1, we would not access vbar_el3 or vbar_el2 but instead vbar_el1.
This way, we don't generate faults when accessing higher-EL registers.

Currently only tested on the qemu-aarch64 target. Exceptions were
tested by enabling FATAL_ASSERTS.

Signed-off-by: David Milosevic <David.Milosevic@9elements.com>
Change-Id: Iae1c57f0846c8d0585384f7e54102a837e701e7e
Reviewed-on: https://review.coreboot.org/c/coreboot/+/74798
Reviewed-by: Werner Zeh <werner.zeh@siemens.com>
Reviewed-by: ron minnich <rminnich@gmail.com>
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Julius Werner <jwerner@chromium.org>
2024-04-22 07:35:36 +00:00

148 lines
4.0 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Optimized assembly for low-level CPU operations on ARM64 processors.
*/
#include <arch/asm.h>
#include <arch/cache.h>
.macro dcache_apply_all crm
dsb sy
mrs x0, clidr_el1 // read CLIDR
and w3, w0, #0x07000000 // narrow to LoC
lsr w3, w3, #23 // left align LoC (low 4 bits)
cbz w3, 5f //done
mov w10, #0 // w10 = 2 * cache level
mov w8, #1 // w8 = constant 0b1
1: //next_level
add w2, w10, w10, lsr #1 // calculate 3 * cache level
lsr w1, w0, w2 // extract 3-bit cache type for this level
and w1, w1, #0x7 // w1 = cache type
cmp w1, #2 // is it data or i&d?
b.lt 4f //skip
msr csselr_el1, x10 // select current cache level
isb // sync change of csselr
mrs x1, ccsidr_el1 // w1 = read ccsidr
and w2, w1, #7 // w2 = log2(linelen_bytes) - 4
add w2, w2, #4 // w2 = log2(linelen_bytes)
ubfx w4, w1, #3, #10 // w4 = associativity - 1 (also
// max way number)
clz w5, w4 // w5 = 32 - log2(ways)
// (bit position of way in DC)
lsl w9, w4, w5 // w9 = max way number
// (aligned for DC)
lsl w16, w8, w5 // w16 = amount to decrement (way
// number per iteration)
2: //next_way
ubfx w7, w1, #13, #15 // w7 = max set #, right aligned
lsl w7, w7, w2 // w7 = max set #, DC aligned
lsl w17, w8, w2 // w17 = amount to decrement (set
// number per iteration)
3: //next_set
orr w11, w10, w9 // w11 = combine way # & cache #
orr w11, w11, w7 // ... and set #
dc \crm, x11 // clean and/or invalidate line
subs w7, w7, w17 // decrement set number
b.ge 3b //next_set
subs x9, x9, x16 // decrement way number
b.ge 2b //next_way
4: //skip
add w10, w10, #2 // increment 2 *cache level
cmp w3, w10 // Went beyond LoC?
b.gt 1b //next_level
5: //done
dsb sy
isb
ret
.endm
ENTRY(dcache_invalidate_all)
dcache_apply_all crm=isw
ENDPROC(dcache_invalidate_all)
ENTRY(dcache_clean_all)
dcache_apply_all crm=csw
ENDPROC(dcache_clean_all)
ENTRY(dcache_clean_invalidate_all)
dcache_apply_all crm=cisw
ENDPROC(dcache_clean_invalidate_all)
/* This must be implemented in assembly to ensure there are no accesses to
memory (e.g. the stack) in between disabling and flushing the cache. */
ENTRY(mmu_disable)
str x30, [sp, #-0x8]
mrs x0, CURRENT_EL(sctlr)
mov x1, #~(SCTLR_C | SCTLR_M)
and x0, x0, x1
msr CURRENT_EL(sctlr), x0
isb
bl dcache_clean_invalidate_all
ldr x30, [sp, #-0x8]
ret
ENDPROC(mmu_disable)
/*
* Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a
* known state regarding caches/SCTLR/SCR/PSTATE. Completely invalidates
* icache/dcache, disables MMU and dcache (if active), and enables unaligned
* accesses, icache. Seeds stack and initializes SP_EL0. Clobbers R22 and R23.
*/
ENTRY(arm64_init_cpu)
/* Initialize PSTATE (mask all exceptions, select SP_EL0). */
msr SPSel, #0
msr DAIFSet, #0xf
/* TODO: This is where we'd put non-boot CPUs into WFI if needed. */
/* x22: SCTLR, return address: x23 (callee-saved by subroutine) */
mov x23, x30
mrs x22, CURRENT_EL(sctlr)
/* Activate ICache already for speed during cache flush below. */
orr x22, x22, #SCTLR_I
msr CURRENT_EL(sctlr), x22
isb
/* Invalidate dcache */
bl dcache_invalidate_all
/* Reinitialize SCTLR from scratch to known-good state.
This may disable MMU or DCache. */
ldr w22, =(SCTLR_RES1 | SCTLR_I | SCTLR_SA)
msr CURRENT_EL(sctlr), x22
#if CONFIG_ARM64_CURRENT_EL == EL3
/* Initialize SCR to unmask all interrupts (so that if we get a spurious
IRQ/SError we'll see it when it happens, not hang in BL31). This will
only have an effect after we DAIFClr in exception_init(). */
mov x22, #SCR_RES1 | SCR_IRQ | SCR_FIQ | SCR_EA
msr scr_el3, x22
#endif
/* Invalidate icache and TLB for good measure */
ic iallu
tlbi alle3
dsb sy
isb
/* Initialize stack with sentinel value to later check overflow. */
ldr x2, =0xdeadbeefdeadbeef
ldr x0, =_stack
ldr x1, =_estack
1:
stp x2, x2, [x0], #16
cmp x0, x1
bne 1b
/* Leave a line of beef dead for easier visibility in stack dumps. */
sub sp, x0, #16
ret x23
ENDPROC(arm64_init_cpu)