1/* 2 * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include <arch.h> 8#include <asm_macros.S> 9#include <assert_macros.S> 10#include <common/bl_common.h> 11#include <lib/xlat_tables/xlat_tables_defs.h> 12 13 .globl smc 14 .globl zeromem 15 .globl zero_normalmem 16 .globl memcpy4 17 .globl disable_mmu_icache_secure 18 .globl disable_mmu_secure 19 .globl fixup_gdt_reloc 20 21#define PAGE_START_MASK ~(PAGE_SIZE_MASK) 22 23func smc 24 /* 25 * For AArch32 only r0-r3 will be in the registers; 26 * rest r4-r6 will be pushed on to the stack. So here, we'll 27 * have to load them from the stack to registers r4-r6 explicitly. 28 * Clobbers: r4-r6 29 */ 30 ldm sp, {r4, r5, r6} 31 smc #0 32endfunc smc 33 34/* ----------------------------------------------------------------------- 35 * void zeromem(void *mem, unsigned int length) 36 * 37 * Initialise a region in normal memory to 0. This functions complies with the 38 * AAPCS and can be called from C code. 39 * 40 * ----------------------------------------------------------------------- 41 */ 42func zeromem 43 /* 44 * Readable names for registers 45 * 46 * Registers r0, r1 and r2 are also set by zeromem which 47 * branches into the fallback path directly, so cursor, length and 48 * stop_address should not be retargeted to other registers. 49 */ 50 cursor .req r0 /* Start address and then current address */ 51 length .req r1 /* Length in bytes of the region to zero out */ 52 /* 53 * Reusing the r1 register as length is only used at the beginning of 54 * the function. 55 */ 56 stop_address .req r1 /* Address past the last zeroed byte */ 57 zeroreg1 .req r2 /* Source register filled with 0 */ 58 zeroreg2 .req r3 /* Source register filled with 0 */ 59 tmp .req r12 /* Temporary scratch register */ 60 61 mov zeroreg1, #0 62 63 /* stop_address is the address past the last to zero */ 64 add stop_address, cursor, length 65 66 /* 67 * Length cannot be used anymore as it shares the same register with 68 * stop_address. 69 */ 70 .unreq length 71 72 /* 73 * If the start address is already aligned to 8 bytes, skip this loop. 74 */ 75 tst cursor, #(8-1) 76 beq .Lzeromem_8bytes_aligned 77 78 /* Calculate the next address aligned to 8 bytes */ 79 orr tmp, cursor, #(8-1) 80 adds tmp, tmp, #1 81 /* If it overflows, fallback to byte per byte zeroing */ 82 beq .Lzeromem_1byte_aligned 83 /* If the next aligned address is after the stop address, fall back */ 84 cmp tmp, stop_address 85 bhs .Lzeromem_1byte_aligned 86 87 /* zero byte per byte */ 881: 89 strb zeroreg1, [cursor], #1 90 cmp cursor, tmp 91 bne 1b 92 93 /* zero 8 bytes at a time */ 94.Lzeromem_8bytes_aligned: 95 96 /* Calculate the last 8 bytes aligned address. */ 97 bic tmp, stop_address, #(8-1) 98 99 cmp cursor, tmp 100 bhs 2f 101 102 mov zeroreg2, #0 1031: 104 stmia cursor!, {zeroreg1, zeroreg2} 105 cmp cursor, tmp 106 blo 1b 1072: 108 109 /* zero byte per byte */ 110.Lzeromem_1byte_aligned: 111 cmp cursor, stop_address 112 beq 2f 1131: 114 strb zeroreg1, [cursor], #1 115 cmp cursor, stop_address 116 bne 1b 1172: 118 bx lr 119 120 .unreq cursor 121 /* 122 * length is already unreq'ed to reuse the register for another 123 * variable. 124 */ 125 .unreq stop_address 126 .unreq zeroreg1 127 .unreq zeroreg2 128 .unreq tmp 129endfunc zeromem 130 131/* 132 * AArch32 does not have special ways of zeroing normal memory as AArch64 does 133 * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem. 134 */ 135.equ zero_normalmem, zeromem 136 137/* -------------------------------------------------------------------------- 138 * void memcpy4(void *dest, const void *src, unsigned int length) 139 * 140 * Copy length bytes from memory area src to memory area dest. 141 * The memory areas should not overlap. 142 * Destination and source addresses must be 4-byte aligned. 143 * -------------------------------------------------------------------------- 144 */ 145func memcpy4 146#if ENABLE_ASSERTIONS 147 orr r3, r0, r1 148 tst r3, #0x3 149 ASM_ASSERT(eq) 150#endif 151/* copy 4 bytes at a time */ 152m_loop4: 153 cmp r2, #4 154 blo m_loop1 155 ldr r3, [r1], #4 156 str r3, [r0], #4 157 subs r2, r2, #4 158 bne m_loop4 159 bx lr 160 161/* copy byte per byte */ 162m_loop1: 163 ldrb r3, [r1], #1 164 strb r3, [r0], #1 165 subs r2, r2, #1 166 bne m_loop1 167 bx lr 168endfunc memcpy4 169 170/* --------------------------------------------------------------------------- 171 * Disable the MMU in Secure State 172 * --------------------------------------------------------------------------- 173 */ 174 175func disable_mmu_secure 176 mov r1, #(SCTLR_M_BIT | SCTLR_C_BIT) 177do_disable_mmu: 178#if ERRATA_A9_794073 179 stcopr r0, BPIALL 180 dsb 181#endif 182 ldcopr r0, SCTLR 183 bic r0, r0, r1 184 stcopr r0, SCTLR 185 isb // ensure MMU is off 186 dsb sy 187 bx lr 188endfunc disable_mmu_secure 189 190 191func disable_mmu_icache_secure 192 ldr r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) 193 b do_disable_mmu 194endfunc disable_mmu_icache_secure 195 196/* --------------------------------------------------------------------------- 197 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations 198 * (.rel.dyn) at runtime. 199 * 200 * This function is meant to be used when the firmware is compiled with -fpie 201 * and linked with -pie options. We rely on the linker script exporting 202 * appropriate markers for start and end of the section. For GOT, we 203 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect 204 * __RELA_START__ and __RELA_END__. 205 * 206 * The function takes the limits of the memory to apply fixups to as 207 * arguments (which is usually the limits of the relocable BL image). 208 * r0 - the start of the fixup region 209 * r1 - the limit of the fixup region 210 * These addresses have to be 4KB page aligned. 211 * --------------------------------------------------------------------------- 212 */ 213 214/* Relocation codes */ 215#define R_ARM_RELATIVE 23 216 217func fixup_gdt_reloc 218 mov r6, r0 219 mov r7, r1 220 221#if ENABLE_ASSERTIONS 222 /* Test if the limits are 4K aligned */ 223 orr r0, r0, r1 224 mov r1, #(PAGE_SIZE_MASK) 225 tst r0, r1 226 ASM_ASSERT(eq) 227#endif 228 /* 229 * Calculate the offset based on return address in lr. 230 * Assume that this function is called within a page at the start of 231 * fixup region. 232 */ 233 ldr r1, =PAGE_START_MASK 234 and r2, lr, r1 235 subs r0, r2, r6 /* Diff(S) = Current Address - Compiled Address */ 236 beq 3f /* Diff(S) = 0. No relocation needed */ 237 238 ldr r1, =__GOT_START__ 239 add r1, r1, r0 240 ldr r2, =__GOT_END__ 241 add r2, r2, r0 242 243 /* 244 * GOT is an array of 32_bit addresses which must be fixed up as 245 * new_addr = old_addr + Diff(S). 246 * The new_addr is the address currently the binary is executing from 247 * and old_addr is the address at compile time. 248 */ 2491: ldr r3, [r1] 250 251 /* Skip adding offset if address is < lower limit */ 252 cmp r3, r6 253 blo 2f 254 255 /* Skip adding offset if address is > upper limit */ 256 cmp r3, r7 257 bhi 2f 258 add r3, r3, r0 259 str r3, [r1] 260 2612: add r1, r1, #4 262 cmp r1, r2 263 blo 1b 264 265 /* Starting dynamic relocations. Use ldr to get RELA_START and END */ 2663: ldr r1, =__RELA_START__ 267 add r1, r1, r0 268 ldr r2, =__RELA_END__ 269 add r2, r2, r0 270 271 /* 272 * According to ELF-32 specification, the RELA data structure is as 273 * follows: 274 * typedef struct { 275 * Elf32_Addr r_offset; 276 * Elf32_Xword r_info; 277 * } Elf32_Rela; 278 * 279 * r_offset is address of reference 280 * r_info is symbol index and type of relocation (in this case 281 * code 23 which corresponds to R_ARM_RELATIVE). 282 * 283 * Size of Elf32_Rela structure is 8 bytes. 284 */ 285 286 /* Skip R_ARM_NONE entry with code 0 */ 2871: ldr r3, [r1, #4] 288 ands r3, r3, #0xff 289 beq 2f 290 291#if ENABLE_ASSERTIONS 292 /* Assert that the relocation type is R_ARM_RELATIVE */ 293 cmp r3, #R_ARM_RELATIVE 294 ASM_ASSERT(eq) 295#endif 296 ldr r3, [r1] /* r_offset */ 297 add r3, r0, r3 /* Diff(S) + r_offset */ 298 ldr r4, [r3] 299 300 /* Skip adding offset if address is < lower limit */ 301 cmp r4, r6 302 blo 2f 303 304 /* Skip adding offset if address is >= upper limit */ 305 cmp r4, r7 306 bhs 2f 307 308 add r4, r0, r4 309 str r4, [r3] 310 3112: add r1, r1, #8 312 cmp r1, r2 313 blo 1b 314 bx lr 315endfunc fixup_gdt_reloc 316