These issues were found and fixed by codespell, a useful tool for finding spelling errors. Signed-off-by: Martin Roth <martin@coreboot.org> Change-Id: Ieafbc93e49fcef198ac6e31fc8a3b708c395e08e Reviewed-on: https://review.coreboot.org/c/coreboot/+/58082 Reviewed-by: Felix Held <felix-coreboot@felixheld.de> Reviewed-by: Angel Pons <th3fanbus@gmail.com> Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
247 lines
7.6 KiB
ArmAsm
247 lines
7.6 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/* Early initialization code for aarch64 (a.k.a. armv8) */
|
|
|
|
#include <arch/asm.h>
|
|
#include <soc/addressmap.h>
|
|
|
|
ENTRY(_start)
|
|
.org 0
|
|
/**
|
|
* According to the reference manual the first instruction is fetched from
|
|
* offset 0x100, but at offset 0 a branch instruction is always placed.
|
|
* Support two entry points for now.
|
|
* To save memory put the cavium specific init code between those to entry
|
|
* points.
|
|
*/
|
|
ic ialluis
|
|
fmov d30, x0 /* Save X0 in FPR for use later */
|
|
/**
|
|
* The BDK stores X1 for later use, but it turns out that we don't need
|
|
* this "feature". The idea is to hide the devicetree somewhere in
|
|
* flash, that only the ROM will find it and point to it using X1.
|
|
*/
|
|
adr x1, _start /* x1 = _start location based on PC */
|
|
fmov d29, x1 /* Save PC in FPR for use later */
|
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
|
/* Change the core to big endian mode for EL3 */
|
|
mrs x0, SCTLR_EL3
|
|
mov x1, 1<<25 /* Set SCTLR_EL3[ee]=1 */
|
|
orr x0, x0, x1
|
|
msr SCTLR_EL3, x0
|
|
#define ENDIAN_CONVERT64(reg) rev reg, reg
|
|
#define ENDIAN_CONVERT32(reg) rev reg, reg
|
|
#define ENDIAN_CONVERT16(reg) rev16 reg, reg
|
|
#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
|
/* Nothing needed, default is little endian */
|
|
#define ENDIAN_CONVERT64(reg)
|
|
#define ENDIAN_CONVERT32(reg)
|
|
#define ENDIAN_CONVERT16(reg)
|
|
#else
|
|
#error Unknown endianness
|
|
#endif
|
|
|
|
mov x0, (LMC0_PF_BAR0 >> 32)
|
|
lsl x0, x0, 32
|
|
mov x1, (LMC0_PF_BAR0 & 0xffffffff)
|
|
orr x0, x0, x1
|
|
|
|
/* Test if DRAM PLL is running */
|
|
ldr x1, [x0, LMC0_DDR_PLL_CTL0]
|
|
|
|
tst x1, 0x80
|
|
|
|
b.ne cache_setup_done
|
|
|
|
bl _setup_car
|
|
|
|
cache_setup_done:
|
|
|
|
/* Check that we're running on the node we're linked for */
|
|
mrs x0, MPIDR_EL1
|
|
ubfx x0, x0, 16, 8 /* Bits 23:16 are the physical node ID */
|
|
mov x1, 0x0
|
|
cmp x0, x1
|
|
|
|
b.ne _wfi
|
|
|
|
node_check_done:
|
|
/* Get code position */
|
|
mov x1, 0x020000
|
|
mov x0, BOOTROM_OFFSET
|
|
add x1, x0, x1
|
|
|
|
adr x0, _start
|
|
|
|
/**
|
|
* Check if IROM has loaded the code to BOOTROM_OFFSET.
|
|
* In case the offset is wrong, try to relocate.
|
|
* Ideally the following code is never executed.
|
|
* FIXME: Add region overlap check.
|
|
*/
|
|
cmp x0, x1
|
|
b.eq after_relocate
|
|
|
|
relocate:
|
|
/* Get bootblock length */
|
|
ldr x2, =_program
|
|
ldr x3, =_eprogram
|
|
sub x2, x2, x3
|
|
b copy_code
|
|
|
|
.align 7
|
|
copy_code:
|
|
ldp q0, q1, [x1], 32 /* Load 32 bytes */
|
|
subs w2, w2, 32 /* Subtract 32 from length, setting flags */
|
|
stp q0, q1, [x0], 32 /* Store 32 bytes */
|
|
b.gt copy_code /* Repeat if length is still positive */
|
|
dmb sy
|
|
|
|
/* Load the actual location we're suppose to be at */
|
|
adr x0, after_relocate /* Relative address */
|
|
adr x1, _start /* Relative address */
|
|
sub x0, x0, x1 /* This only works if _start is suppose to be zero */
|
|
mov x1, BOOTROM_OFFSET
|
|
add x0, x0, x1
|
|
br x0 /* Branch to relocated code */
|
|
|
|
ic ialluis /* Clear the icache now that all code is correct */
|
|
|
|
after_relocate:
|
|
/* Allow unaligned memory access as long as MMU is disabled */
|
|
mrs x22, s3_0_c11_c0_4
|
|
orr x22, x22, # (1 << 37) /* Set DCVA47 */
|
|
msr s3_0_c11_c0_4, x22
|
|
|
|
bl start
|
|
|
|
/* Real entry point */
|
|
.org 0x100
|
|
b _start
|
|
ENDPROC(_start)
|
|
|
|
|
|
ENTRY(_setup_car)
|
|
mrs x0, MIDR_EL1
|
|
ubfx x0, x0, 4, 12 /* Bits 15:4 are the part number */
|
|
cmp x0, 0xb0
|
|
b.ge _wfi
|
|
|
|
thunder1_cache_setup:
|
|
/**
|
|
* Setup L2 cache to allow secure access to all of the address space
|
|
* thunder1 compatibility list:
|
|
* - CN81XX
|
|
* - CN83XX
|
|
* - CN88XX
|
|
*/
|
|
#define REGIONX_START 0x1000
|
|
#define REGIONX_END 0x1008
|
|
#define REGIONX_ATTR 0x1010
|
|
mov x0, L2C_PF_BAR0 >> 32
|
|
lsl x0, x0, 32
|
|
mov x1, (L2C_PF_BAR0 & 0xffffffff)
|
|
orr x0, x0, x1
|
|
str xzr, [x0, REGIONX_START] /* Start of zero */
|
|
mov x1, 0x3fffff00000 /* End of max address */
|
|
ENDIAN_CONVERT64(x1)
|
|
str x1, [x0, REGIONX_END]
|
|
mov x1, 2 /* Secure only access */
|
|
ENDIAN_CONVERT64(x1)
|
|
str x1, [x0, REGIONX_ATTR]
|
|
/* Update way partition to allow core 0 to write to L2 */
|
|
#define L2C_WPAR_PP0_OFFSET 0x40000
|
|
mov x1, L2C_WPAR_PP0_OFFSET
|
|
str xzr, [x0, x1]
|
|
ldr xzr, [x0, x1] /* Read back to make sure done */
|
|
#undef REGIONX_START
|
|
#undef REGIONX_END
|
|
#undef REGIONX_ATTR
|
|
#undef L2C_WPAR_PP0_OFFSET
|
|
|
|
/**
|
|
* At this point the whole CAR is readable and writeable, but if
|
|
* we touch to many cache-lines our code might get flushed out.
|
|
* We have to lock all cache-lines that are to be used as RAM, which are
|
|
* the ones marked as SRAM in memlayout.
|
|
*/
|
|
mrs x0, CTR_EL0 /* Get cache-line size */
|
|
/* [19:16] - Indicates (Log2(number of words in cache line) */
|
|
ubfx x0, x0, 16, 4
|
|
mov x1, 4 /* Bytes in a word (32-bit) */
|
|
lsl x0, x1, x0 /* Number of Bytes in x0 */
|
|
|
|
sub x1, x0, 1
|
|
mvn x1, x1 /* Place mask in x1 */
|
|
|
|
ldr x3, =_sram
|
|
and x3, x3, x1 /* Align addresses with cache-lines */
|
|
ldr x4, =_esram
|
|
add x4, x4, x0
|
|
sub x4, x4, 1
|
|
and x4, x4, x1 /* Align addresses with cache-lines */
|
|
sub x2, x4, x3 /* Store sram length in x2 */
|
|
|
|
lock_cache_lines:
|
|
sys #0, c11, c1, #4, x3
|
|
add x3, x3, x0 /* Increment address by cache-line bytes */
|
|
subs w2, w2, w0 /* Subtract cache-line bytes from length */
|
|
b.gt lock_cache_lines /* Repeat if length is still positive */
|
|
|
|
/**
|
|
* The locked region isn't considered dirty by L2. Do read/write of
|
|
* each cache line to force each to be dirty. This is needed across the
|
|
* whole line to make sure the L2 dirty bits are all up to date.
|
|
* NOTE: If we'd relocate we could memset the whole memory !
|
|
*/
|
|
ldr x3, =_sram
|
|
and x3, x3, x1 /* Align addresses with cache-lines */
|
|
ldr x4, =_esram
|
|
add x4, x4, x0
|
|
sub x4, x4, 1
|
|
and x4, x4, x1 /* Align addresses with cache-lines */
|
|
sub x2, x4, x3 /* Store sram length in x2 */
|
|
mov x4, x3
|
|
b dirty_cache_line
|
|
|
|
.align 7
|
|
dirty_cache_line:
|
|
ldp q0, q1, [x3], 32 /* Load 32 bytes */
|
|
subs w2, w2, 32 /* Subtract 32 from length, setting flags */
|
|
stp q0, q1, [x4], 32 /* Store 32 bytes */
|
|
b.gt dirty_cache_line /* Repeat if length is still positive */
|
|
dmb sy
|
|
|
|
clear_interrupts:
|
|
/**
|
|
* As the memory controller isn't running, but we access the DRAM's
|
|
* address space, some interrupt flags had been set.
|
|
* Tidy up our mess now on (valid for CN81XX only).
|
|
*/
|
|
mov x0, (L2C_TAD0_INT_W1C >> 32)
|
|
lsl x0, x0, 32
|
|
mov x1, (L2C_TAD0_INT_W1C & 0xffffffff)
|
|
orr x0, x0, x1
|
|
|
|
ldr x1, [x0]
|
|
orr x1, x1, 0x1c00 /* Clear WRDISLMC, RDDISLMC, RDNXM */
|
|
str x1, [x0]
|
|
|
|
ret
|
|
ENDPROC(_setup_car)
|
|
|
|
ENTRY(_wfi)
|
|
wfi
|
|
ENDPROC(_wfi)
|
|
|
|
ENTRY(start)
|
|
bl arm64_init_cpu
|
|
|
|
fmov x0, d30 /* The original X0, info from previous image */
|
|
fmov x1, d29 /* The original PC we were loaded at */
|
|
|
|
/* Call C entry */
|
|
bl bootblock_main
|
|
|
|
ENDPROC(start)
|