1/*
2 * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include <arch.h>
8#include <asm_macros.S>
9#include <assert_macros.S>
10#include <common/bl_common.h>
11#include <lib/xlat_tables/xlat_tables_defs.h>
12
13	.globl	smc
14	.globl	zeromem
15	.globl	zero_normalmem
16	.globl	memcpy4
17	.globl	disable_mmu_icache_secure
18	.globl	disable_mmu_secure
19	.globl	fixup_gdt_reloc
20
21#define PAGE_START_MASK		~(PAGE_SIZE_MASK)
22
23func smc
24	/*
25	 * For AArch32 only r0-r3 will be in the registers;
26	 * rest r4-r6 will be pushed on to the stack. So here, we'll
27	 * have to load them from the stack to registers r4-r6 explicitly.
28	 * Clobbers: r4-r6
29	 */
30	ldm	sp, {r4, r5, r6}
31	smc	#0
32endfunc smc
33
34/* -----------------------------------------------------------------------
35 * void zeromem(void *mem, unsigned int length)
36 *
37 * Initialise a region in normal memory to 0. This functions complies with the
38 * AAPCS and can be called from C code.
39 *
40 * -----------------------------------------------------------------------
41 */
42func zeromem
43	/*
44	 * Readable names for registers
45	 *
46	 * Registers r0, r1 and r2 are also set by zeromem which
47	 * branches into the fallback path directly, so cursor, length and
48	 * stop_address should not be retargeted to other registers.
49	 */
50	cursor       .req r0 /* Start address and then current address */
51	length       .req r1 /* Length in bytes of the region to zero out */
52	/*
53	 * Reusing the r1 register as length is only used at the beginning of
54	 * the function.
55	 */
56	stop_address .req r1  /* Address past the last zeroed byte */
57	zeroreg1     .req r2  /* Source register filled with 0 */
58	zeroreg2     .req r3  /* Source register filled with 0 */
59	tmp	     .req r12 /* Temporary scratch register */
60
61	mov	zeroreg1, #0
62
63	/* stop_address is the address past the last to zero */
64	add	stop_address, cursor, length
65
66	/*
67	 * Length cannot be used anymore as it shares the same register with
68	 * stop_address.
69	 */
70	.unreq	length
71
72	/*
73	 * If the start address is already aligned to 8 bytes, skip this loop.
74	 */
75	tst	cursor, #(8-1)
76	beq	.Lzeromem_8bytes_aligned
77
78	/* Calculate the next address aligned to 8 bytes */
79	orr	tmp, cursor, #(8-1)
80	adds	tmp, tmp, #1
81	/* If it overflows, fallback to byte per byte zeroing */
82	beq	.Lzeromem_1byte_aligned
83	/* If the next aligned address is after the stop address, fall back */
84	cmp	tmp, stop_address
85	bhs	.Lzeromem_1byte_aligned
86
87	/* zero byte per byte */
881:
89	strb	zeroreg1, [cursor], #1
90	cmp	cursor, tmp
91	bne	1b
92
93	/* zero 8 bytes at a time */
94.Lzeromem_8bytes_aligned:
95
96	/* Calculate the last 8 bytes aligned address. */
97	bic	tmp, stop_address, #(8-1)
98
99	cmp	cursor, tmp
100	bhs	2f
101
102	mov	zeroreg2, #0
1031:
104	stmia	cursor!, {zeroreg1, zeroreg2}
105	cmp	cursor, tmp
106	blo	1b
1072:
108
109	/* zero byte per byte */
110.Lzeromem_1byte_aligned:
111	cmp	cursor, stop_address
112	beq	2f
1131:
114	strb	zeroreg1, [cursor], #1
115	cmp	cursor, stop_address
116	bne	1b
1172:
118	bx	lr
119
120	.unreq	cursor
121	/*
122	 * length is already unreq'ed to reuse the register for another
123	 * variable.
124	 */
125	.unreq	stop_address
126	.unreq	zeroreg1
127	.unreq	zeroreg2
128	.unreq	tmp
129endfunc zeromem
130
131/*
132 * AArch32 does not have special ways of zeroing normal memory as AArch64 does
133 * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem.
134 */
135.equ	zero_normalmem, zeromem
136
137/* --------------------------------------------------------------------------
138 * void memcpy4(void *dest, const void *src, unsigned int length)
139 *
140 * Copy length bytes from memory area src to memory area dest.
141 * The memory areas should not overlap.
142 * Destination and source addresses must be 4-byte aligned.
143 * --------------------------------------------------------------------------
144 */
145func memcpy4
146#if ENABLE_ASSERTIONS
147	orr	r3, r0, r1
148	tst	r3, #0x3
149	ASM_ASSERT(eq)
150#endif
151/* copy 4 bytes at a time */
152m_loop4:
153	cmp	r2, #4
154	blo	m_loop1
155	ldr	r3, [r1], #4
156	str	r3, [r0], #4
157	subs	r2, r2, #4
158	bne	m_loop4
159	bx	lr
160
161/* copy byte per byte */
162m_loop1:
163	ldrb	r3, [r1], #1
164	strb	r3, [r0], #1
165	subs	r2, r2, #1
166	bne	m_loop1
167	bx	lr
168endfunc memcpy4
169
170/* ---------------------------------------------------------------------------
171 * Disable the MMU in Secure State
172 * ---------------------------------------------------------------------------
173 */
174
175func disable_mmu_secure
176	mov	r1, #(SCTLR_M_BIT | SCTLR_C_BIT)
177do_disable_mmu:
178#if ERRATA_A9_794073
179	stcopr	r0, BPIALL
180	dsb
181#endif
182	ldcopr	r0, SCTLR
183	bic	r0, r0, r1
184	stcopr	r0, SCTLR
185	isb				// ensure MMU is off
186	dsb	sy
187	bx	lr
188endfunc disable_mmu_secure
189
190
191func disable_mmu_icache_secure
192	ldr	r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
193	b	do_disable_mmu
194endfunc disable_mmu_icache_secure
195
196/* ---------------------------------------------------------------------------
197 * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
198 * (.rel.dyn) at runtime.
199 *
200 * This function is meant to be used when the firmware is compiled with -fpie
201 * and linked with -pie options. We rely on the linker script exporting
202 * appropriate markers for start and end of the section. For GOT, we
203 * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
204 * __RELA_START__ and __RELA_END__.
205 *
206 * The function takes the limits of the memory to apply fixups to as
207 * arguments (which is usually the limits of the relocable BL image).
208 *   r0 -  the start of the fixup region
209 *   r1 -  the limit of the fixup region
210 * These addresses have to be 4KB page aligned.
211 * ---------------------------------------------------------------------------
212 */
213
214/* Relocation codes */
215#define R_ARM_RELATIVE 	23
216
217func fixup_gdt_reloc
218	mov	r6, r0
219	mov	r7, r1
220
221#if ENABLE_ASSERTIONS
222	/* Test if the limits are 4K aligned */
223	orr	r0, r0, r1
224	mov	r1, #(PAGE_SIZE_MASK)
225	tst	r0, r1
226	ASM_ASSERT(eq)
227#endif
228	/*
229	 * Calculate the offset based on return address in lr.
230	 * Assume that this function is called within a page at the start of
231	 * fixup region.
232	 */
233	ldr	r1, =PAGE_START_MASK
234	and	r2, lr, r1
235	subs	r0, r2, r6	/* Diff(S) = Current Address - Compiled Address */
236	beq	3f		/* Diff(S) = 0. No relocation needed */
237
238	ldr	r1, =__GOT_START__
239	add	r1, r1, r0
240	ldr	r2, =__GOT_END__
241	add	r2, r2, r0
242
243	/*
244	 * GOT is an array of 32_bit addresses which must be fixed up as
245	 * new_addr = old_addr + Diff(S).
246	 * The new_addr is the address currently the binary is executing from
247	 * and old_addr is the address at compile time.
248	 */
2491:	ldr	r3, [r1]
250
251	/* Skip adding offset if address is < lower limit */
252	cmp	r3, r6
253	blo	2f
254
255	/* Skip adding offset if address is > upper limit */
256	cmp	r3, r7
257	bhi	2f
258	add	r3, r3, r0
259	str	r3, [r1]
260
2612:	add	r1, r1, #4
262	cmp	r1, r2
263	blo	1b
264
265	/* Starting dynamic relocations. Use ldr to get RELA_START and END */
2663:	ldr	r1, =__RELA_START__
267	add	r1, r1, r0
268	ldr	r2, =__RELA_END__
269	add	r2, r2, r0
270
271	/*
272	 * According to ELF-32 specification, the RELA data structure is as
273	 * follows:
274	 *	typedef struct {
275	 *		Elf32_Addr r_offset;
276	 *		Elf32_Xword r_info;
277	 *	} Elf32_Rela;
278	 *
279	 * r_offset is address of reference
280	 * r_info is symbol index and type of relocation (in this case
281	 * code 23  which corresponds to R_ARM_RELATIVE).
282	 *
283	 * Size of Elf32_Rela structure is 8 bytes.
284	 */
285
286	/* Skip R_ARM_NONE entry with code 0 */
2871:	ldr	r3, [r1, #4]
288	ands	r3, r3, #0xff
289	beq	2f
290
291#if ENABLE_ASSERTIONS
292	/* Assert that the relocation type is R_ARM_RELATIVE */
293	cmp	r3, #R_ARM_RELATIVE
294	ASM_ASSERT(eq)
295#endif
296	ldr	r3, [r1]	/* r_offset */
297	add	r3, r0, r3	/* Diff(S) + r_offset */
298	ldr 	r4, [r3]
299
300	/* Skip adding offset if address is < lower limit */
301	cmp	r4, r6
302	blo	2f
303
304	/* Skip adding offset if address is >= upper limit */
305	cmp	r4, r7
306	bhs	2f
307
308	add 	r4, r0, r4
309	str	r4, [r3]
310
3112:	add	r1, r1, #8
312	cmp	r1, r2
313	blo	1b
314	bx	lr
315endfunc fixup_gdt_reloc
316