1*91f16700Schasinglulu/* 2*91f16700Schasinglulu * Copyright (c) 2016-2021, Arm Limited and Contributors. All rights reserved. 3*91f16700Schasinglulu * 4*91f16700Schasinglulu * SPDX-License-Identifier: BSD-3-Clause 5*91f16700Schasinglulu */ 6*91f16700Schasinglulu 7*91f16700Schasinglulu#include <arch.h> 8*91f16700Schasinglulu#include <asm_macros.S> 9*91f16700Schasinglulu#include <assert_macros.S> 10*91f16700Schasinglulu#include <common/bl_common.h> 11*91f16700Schasinglulu#include <lib/xlat_tables/xlat_tables_defs.h> 12*91f16700Schasinglulu 13*91f16700Schasinglulu .globl smc 14*91f16700Schasinglulu .globl zeromem 15*91f16700Schasinglulu .globl zero_normalmem 16*91f16700Schasinglulu .globl memcpy4 17*91f16700Schasinglulu .globl disable_mmu_icache_secure 18*91f16700Schasinglulu .globl disable_mmu_secure 19*91f16700Schasinglulu .globl fixup_gdt_reloc 20*91f16700Schasinglulu 21*91f16700Schasinglulu#define PAGE_START_MASK ~(PAGE_SIZE_MASK) 22*91f16700Schasinglulu 23*91f16700Schasinglulufunc smc 24*91f16700Schasinglulu /* 25*91f16700Schasinglulu * For AArch32 only r0-r3 will be in the registers; 26*91f16700Schasinglulu * rest r4-r6 will be pushed on to the stack. So here, we'll 27*91f16700Schasinglulu * have to load them from the stack to registers r4-r6 explicitly. 28*91f16700Schasinglulu * Clobbers: r4-r6 29*91f16700Schasinglulu */ 30*91f16700Schasinglulu ldm sp, {r4, r5, r6} 31*91f16700Schasinglulu smc #0 32*91f16700Schasingluluendfunc smc 33*91f16700Schasinglulu 34*91f16700Schasinglulu/* ----------------------------------------------------------------------- 35*91f16700Schasinglulu * void zeromem(void *mem, unsigned int length) 36*91f16700Schasinglulu * 37*91f16700Schasinglulu * Initialise a region in normal memory to 0. This functions complies with the 38*91f16700Schasinglulu * AAPCS and can be called from C code. 39*91f16700Schasinglulu * 40*91f16700Schasinglulu * ----------------------------------------------------------------------- 41*91f16700Schasinglulu */ 42*91f16700Schasinglulufunc zeromem 43*91f16700Schasinglulu /* 44*91f16700Schasinglulu * Readable names for registers 45*91f16700Schasinglulu * 46*91f16700Schasinglulu * Registers r0, r1 and r2 are also set by zeromem which 47*91f16700Schasinglulu * branches into the fallback path directly, so cursor, length and 48*91f16700Schasinglulu * stop_address should not be retargeted to other registers. 49*91f16700Schasinglulu */ 50*91f16700Schasinglulu cursor .req r0 /* Start address and then current address */ 51*91f16700Schasinglulu length .req r1 /* Length in bytes of the region to zero out */ 52*91f16700Schasinglulu /* 53*91f16700Schasinglulu * Reusing the r1 register as length is only used at the beginning of 54*91f16700Schasinglulu * the function. 55*91f16700Schasinglulu */ 56*91f16700Schasinglulu stop_address .req r1 /* Address past the last zeroed byte */ 57*91f16700Schasinglulu zeroreg1 .req r2 /* Source register filled with 0 */ 58*91f16700Schasinglulu zeroreg2 .req r3 /* Source register filled with 0 */ 59*91f16700Schasinglulu tmp .req r12 /* Temporary scratch register */ 60*91f16700Schasinglulu 61*91f16700Schasinglulu mov zeroreg1, #0 62*91f16700Schasinglulu 63*91f16700Schasinglulu /* stop_address is the address past the last to zero */ 64*91f16700Schasinglulu add stop_address, cursor, length 65*91f16700Schasinglulu 66*91f16700Schasinglulu /* 67*91f16700Schasinglulu * Length cannot be used anymore as it shares the same register with 68*91f16700Schasinglulu * stop_address. 69*91f16700Schasinglulu */ 70*91f16700Schasinglulu .unreq length 71*91f16700Schasinglulu 72*91f16700Schasinglulu /* 73*91f16700Schasinglulu * If the start address is already aligned to 8 bytes, skip this loop. 74*91f16700Schasinglulu */ 75*91f16700Schasinglulu tst cursor, #(8-1) 76*91f16700Schasinglulu beq .Lzeromem_8bytes_aligned 77*91f16700Schasinglulu 78*91f16700Schasinglulu /* Calculate the next address aligned to 8 bytes */ 79*91f16700Schasinglulu orr tmp, cursor, #(8-1) 80*91f16700Schasinglulu adds tmp, tmp, #1 81*91f16700Schasinglulu /* If it overflows, fallback to byte per byte zeroing */ 82*91f16700Schasinglulu beq .Lzeromem_1byte_aligned 83*91f16700Schasinglulu /* If the next aligned address is after the stop address, fall back */ 84*91f16700Schasinglulu cmp tmp, stop_address 85*91f16700Schasinglulu bhs .Lzeromem_1byte_aligned 86*91f16700Schasinglulu 87*91f16700Schasinglulu /* zero byte per byte */ 88*91f16700Schasinglulu1: 89*91f16700Schasinglulu strb zeroreg1, [cursor], #1 90*91f16700Schasinglulu cmp cursor, tmp 91*91f16700Schasinglulu bne 1b 92*91f16700Schasinglulu 93*91f16700Schasinglulu /* zero 8 bytes at a time */ 94*91f16700Schasinglulu.Lzeromem_8bytes_aligned: 95*91f16700Schasinglulu 96*91f16700Schasinglulu /* Calculate the last 8 bytes aligned address. */ 97*91f16700Schasinglulu bic tmp, stop_address, #(8-1) 98*91f16700Schasinglulu 99*91f16700Schasinglulu cmp cursor, tmp 100*91f16700Schasinglulu bhs 2f 101*91f16700Schasinglulu 102*91f16700Schasinglulu mov zeroreg2, #0 103*91f16700Schasinglulu1: 104*91f16700Schasinglulu stmia cursor!, {zeroreg1, zeroreg2} 105*91f16700Schasinglulu cmp cursor, tmp 106*91f16700Schasinglulu blo 1b 107*91f16700Schasinglulu2: 108*91f16700Schasinglulu 109*91f16700Schasinglulu /* zero byte per byte */ 110*91f16700Schasinglulu.Lzeromem_1byte_aligned: 111*91f16700Schasinglulu cmp cursor, stop_address 112*91f16700Schasinglulu beq 2f 113*91f16700Schasinglulu1: 114*91f16700Schasinglulu strb zeroreg1, [cursor], #1 115*91f16700Schasinglulu cmp cursor, stop_address 116*91f16700Schasinglulu bne 1b 117*91f16700Schasinglulu2: 118*91f16700Schasinglulu bx lr 119*91f16700Schasinglulu 120*91f16700Schasinglulu .unreq cursor 121*91f16700Schasinglulu /* 122*91f16700Schasinglulu * length is already unreq'ed to reuse the register for another 123*91f16700Schasinglulu * variable. 124*91f16700Schasinglulu */ 125*91f16700Schasinglulu .unreq stop_address 126*91f16700Schasinglulu .unreq zeroreg1 127*91f16700Schasinglulu .unreq zeroreg2 128*91f16700Schasinglulu .unreq tmp 129*91f16700Schasingluluendfunc zeromem 130*91f16700Schasinglulu 131*91f16700Schasinglulu/* 132*91f16700Schasinglulu * AArch32 does not have special ways of zeroing normal memory as AArch64 does 133*91f16700Schasinglulu * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem. 134*91f16700Schasinglulu */ 135*91f16700Schasinglulu.equ zero_normalmem, zeromem 136*91f16700Schasinglulu 137*91f16700Schasinglulu/* -------------------------------------------------------------------------- 138*91f16700Schasinglulu * void memcpy4(void *dest, const void *src, unsigned int length) 139*91f16700Schasinglulu * 140*91f16700Schasinglulu * Copy length bytes from memory area src to memory area dest. 141*91f16700Schasinglulu * The memory areas should not overlap. 142*91f16700Schasinglulu * Destination and source addresses must be 4-byte aligned. 143*91f16700Schasinglulu * -------------------------------------------------------------------------- 144*91f16700Schasinglulu */ 145*91f16700Schasinglulufunc memcpy4 146*91f16700Schasinglulu#if ENABLE_ASSERTIONS 147*91f16700Schasinglulu orr r3, r0, r1 148*91f16700Schasinglulu tst r3, #0x3 149*91f16700Schasinglulu ASM_ASSERT(eq) 150*91f16700Schasinglulu#endif 151*91f16700Schasinglulu/* copy 4 bytes at a time */ 152*91f16700Schasinglulum_loop4: 153*91f16700Schasinglulu cmp r2, #4 154*91f16700Schasinglulu blo m_loop1 155*91f16700Schasinglulu ldr r3, [r1], #4 156*91f16700Schasinglulu str r3, [r0], #4 157*91f16700Schasinglulu subs r2, r2, #4 158*91f16700Schasinglulu bne m_loop4 159*91f16700Schasinglulu bx lr 160*91f16700Schasinglulu 161*91f16700Schasinglulu/* copy byte per byte */ 162*91f16700Schasinglulum_loop1: 163*91f16700Schasinglulu ldrb r3, [r1], #1 164*91f16700Schasinglulu strb r3, [r0], #1 165*91f16700Schasinglulu subs r2, r2, #1 166*91f16700Schasinglulu bne m_loop1 167*91f16700Schasinglulu bx lr 168*91f16700Schasingluluendfunc memcpy4 169*91f16700Schasinglulu 170*91f16700Schasinglulu/* --------------------------------------------------------------------------- 171*91f16700Schasinglulu * Disable the MMU in Secure State 172*91f16700Schasinglulu * --------------------------------------------------------------------------- 173*91f16700Schasinglulu */ 174*91f16700Schasinglulu 175*91f16700Schasinglulufunc disable_mmu_secure 176*91f16700Schasinglulu mov r1, #(SCTLR_M_BIT | SCTLR_C_BIT) 177*91f16700Schasingluludo_disable_mmu: 178*91f16700Schasinglulu#if ERRATA_A9_794073 179*91f16700Schasinglulu stcopr r0, BPIALL 180*91f16700Schasinglulu dsb 181*91f16700Schasinglulu#endif 182*91f16700Schasinglulu ldcopr r0, SCTLR 183*91f16700Schasinglulu bic r0, r0, r1 184*91f16700Schasinglulu stcopr r0, SCTLR 185*91f16700Schasinglulu isb // ensure MMU is off 186*91f16700Schasinglulu dsb sy 187*91f16700Schasinglulu bx lr 188*91f16700Schasingluluendfunc disable_mmu_secure 189*91f16700Schasinglulu 190*91f16700Schasinglulu 191*91f16700Schasinglulufunc disable_mmu_icache_secure 192*91f16700Schasinglulu ldr r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 193*91f16700Schasinglulu b do_disable_mmu 194*91f16700Schasingluluendfunc disable_mmu_icache_secure 195*91f16700Schasinglulu 196*91f16700Schasinglulu/* --------------------------------------------------------------------------- 197*91f16700Schasinglulu * Helper to fixup Global Descriptor table (GDT) and dynamic relocations 198*91f16700Schasinglulu * (.rel.dyn) at runtime. 199*91f16700Schasinglulu * 200*91f16700Schasinglulu * This function is meant to be used when the firmware is compiled with -fpie 201*91f16700Schasinglulu * and linked with -pie options. We rely on the linker script exporting 202*91f16700Schasinglulu * appropriate markers for start and end of the section. For GOT, we 203*91f16700Schasinglulu * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect 204*91f16700Schasinglulu * __RELA_START__ and __RELA_END__. 205*91f16700Schasinglulu * 206*91f16700Schasinglulu * The function takes the limits of the memory to apply fixups to as 207*91f16700Schasinglulu * arguments (which is usually the limits of the relocable BL image). 208*91f16700Schasinglulu * r0 - the start of the fixup region 209*91f16700Schasinglulu * r1 - the limit of the fixup region 210*91f16700Schasinglulu * These addresses have to be 4KB page aligned. 211*91f16700Schasinglulu * --------------------------------------------------------------------------- 212*91f16700Schasinglulu */ 213*91f16700Schasinglulu 214*91f16700Schasinglulu/* Relocation codes */ 215*91f16700Schasinglulu#define R_ARM_RELATIVE 23 216*91f16700Schasinglulu 217*91f16700Schasinglulufunc fixup_gdt_reloc 218*91f16700Schasinglulu mov r6, r0 219*91f16700Schasinglulu mov r7, r1 220*91f16700Schasinglulu 221*91f16700Schasinglulu#if ENABLE_ASSERTIONS 222*91f16700Schasinglulu /* Test if the limits are 4K aligned */ 223*91f16700Schasinglulu orr r0, r0, r1 224*91f16700Schasinglulu mov r1, #(PAGE_SIZE_MASK) 225*91f16700Schasinglulu tst r0, r1 226*91f16700Schasinglulu ASM_ASSERT(eq) 227*91f16700Schasinglulu#endif 228*91f16700Schasinglulu /* 229*91f16700Schasinglulu * Calculate the offset based on return address in lr. 230*91f16700Schasinglulu * Assume that this function is called within a page at the start of 231*91f16700Schasinglulu * fixup region. 232*91f16700Schasinglulu */ 233*91f16700Schasinglulu ldr r1, =PAGE_START_MASK 234*91f16700Schasinglulu and r2, lr, r1 235*91f16700Schasinglulu subs r0, r2, r6 /* Diff(S) = Current Address - Compiled Address */ 236*91f16700Schasinglulu beq 3f /* Diff(S) = 0. No relocation needed */ 237*91f16700Schasinglulu 238*91f16700Schasinglulu ldr r1, =__GOT_START__ 239*91f16700Schasinglulu add r1, r1, r0 240*91f16700Schasinglulu ldr r2, =__GOT_END__ 241*91f16700Schasinglulu add r2, r2, r0 242*91f16700Schasinglulu 243*91f16700Schasinglulu /* 244*91f16700Schasinglulu * GOT is an array of 32_bit addresses which must be fixed up as 245*91f16700Schasinglulu * new_addr = old_addr + Diff(S). 246*91f16700Schasinglulu * The new_addr is the address currently the binary is executing from 247*91f16700Schasinglulu * and old_addr is the address at compile time. 248*91f16700Schasinglulu */ 249*91f16700Schasinglulu1: ldr r3, [r1] 250*91f16700Schasinglulu 251*91f16700Schasinglulu /* Skip adding offset if address is < lower limit */ 252*91f16700Schasinglulu cmp r3, r6 253*91f16700Schasinglulu blo 2f 254*91f16700Schasinglulu 255*91f16700Schasinglulu /* Skip adding offset if address is > upper limit */ 256*91f16700Schasinglulu cmp r3, r7 257*91f16700Schasinglulu bhi 2f 258*91f16700Schasinglulu add r3, r3, r0 259*91f16700Schasinglulu str r3, [r1] 260*91f16700Schasinglulu 261*91f16700Schasinglulu2: add r1, r1, #4 262*91f16700Schasinglulu cmp r1, r2 263*91f16700Schasinglulu blo 1b 264*91f16700Schasinglulu 265*91f16700Schasinglulu /* Starting dynamic relocations. Use ldr to get RELA_START and END */ 266*91f16700Schasinglulu3: ldr r1, =__RELA_START__ 267*91f16700Schasinglulu add r1, r1, r0 268*91f16700Schasinglulu ldr r2, =__RELA_END__ 269*91f16700Schasinglulu add r2, r2, r0 270*91f16700Schasinglulu 271*91f16700Schasinglulu /* 272*91f16700Schasinglulu * According to ELF-32 specification, the RELA data structure is as 273*91f16700Schasinglulu * follows: 274*91f16700Schasinglulu * typedef struct { 275*91f16700Schasinglulu * Elf32_Addr r_offset; 276*91f16700Schasinglulu * Elf32_Xword r_info; 277*91f16700Schasinglulu * } Elf32_Rela; 278*91f16700Schasinglulu * 279*91f16700Schasinglulu * r_offset is address of reference 280*91f16700Schasinglulu * r_info is symbol index and type of relocation (in this case 281*91f16700Schasinglulu * code 23 which corresponds to R_ARM_RELATIVE). 282*91f16700Schasinglulu * 283*91f16700Schasinglulu * Size of Elf32_Rela structure is 8 bytes. 284*91f16700Schasinglulu */ 285*91f16700Schasinglulu 286*91f16700Schasinglulu /* Skip R_ARM_NONE entry with code 0 */ 287*91f16700Schasinglulu1: ldr r3, [r1, #4] 288*91f16700Schasinglulu ands r3, r3, #0xff 289*91f16700Schasinglulu beq 2f 290*91f16700Schasinglulu 291*91f16700Schasinglulu#if ENABLE_ASSERTIONS 292*91f16700Schasinglulu /* Assert that the relocation type is R_ARM_RELATIVE */ 293*91f16700Schasinglulu cmp r3, #R_ARM_RELATIVE 294*91f16700Schasinglulu ASM_ASSERT(eq) 295*91f16700Schasinglulu#endif 296*91f16700Schasinglulu ldr r3, [r1] /* r_offset */ 297*91f16700Schasinglulu add r3, r0, r3 /* Diff(S) + r_offset */ 298*91f16700Schasinglulu ldr r4, [r3] 299*91f16700Schasinglulu 300*91f16700Schasinglulu /* Skip adding offset if address is < lower limit */ 301*91f16700Schasinglulu cmp r4, r6 302*91f16700Schasinglulu blo 2f 303*91f16700Schasinglulu 304*91f16700Schasinglulu /* Skip adding offset if address is > upper limit */ 305*91f16700Schasinglulu cmp r4, r7 306*91f16700Schasinglulu bhi 2f 307*91f16700Schasinglulu 308*91f16700Schasinglulu add r4, r0, r4 309*91f16700Schasinglulu str r4, [r3] 310*91f16700Schasinglulu 311*91f16700Schasinglulu2: add r1, r1, #8 312*91f16700Schasinglulu cmp r1, r2 313*91f16700Schasinglulu blo 1b 314*91f16700Schasinglulu bx lr 315*91f16700Schasingluluendfunc fixup_gdt_reloc 316