xref: /arm-trusted-firmware/lib/aarch64/misc_helpers.S (revision 91f16700b400a8c0651d24a598fc48ee2997a0d7)
1*91f16700Schasinglulu/*
2*91f16700Schasinglulu * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
3*91f16700Schasinglulu *
4*91f16700Schasinglulu * SPDX-License-Identifier: BSD-3-Clause
5*91f16700Schasinglulu */
6*91f16700Schasinglulu
7*91f16700Schasinglulu#include <arch.h>
8*91f16700Schasinglulu#include <asm_macros.S>
9*91f16700Schasinglulu#include <assert_macros.S>
10*91f16700Schasinglulu#include <common/bl_common.h>
11*91f16700Schasinglulu#include <lib/xlat_tables/xlat_tables_defs.h>
12*91f16700Schasinglulu
13*91f16700Schasinglulu	.globl	smc
14*91f16700Schasinglulu
15*91f16700Schasinglulu	.globl	zero_normalmem
16*91f16700Schasinglulu	.globl	zeromem
17*91f16700Schasinglulu	.globl	memcpy16
18*91f16700Schasinglulu	.globl	gpt_tlbi_by_pa_ll
19*91f16700Schasinglulu
20*91f16700Schasinglulu	.globl	disable_mmu_el1
21*91f16700Schasinglulu	.globl	disable_mmu_el3
22*91f16700Schasinglulu	.globl	disable_mmu_icache_el1
23*91f16700Schasinglulu	.globl	disable_mmu_icache_el3
24*91f16700Schasinglulu	.globl	fixup_gdt_reloc
25*91f16700Schasinglulu#if SUPPORT_VFP
26*91f16700Schasinglulu	.globl	enable_vfp
27*91f16700Schasinglulu#endif
28*91f16700Schasinglulu
29*91f16700Schasinglulufunc smc
30*91f16700Schasinglulu	smc	#0
31*91f16700Schasingluluendfunc smc
32*91f16700Schasinglulu
33*91f16700Schasinglulu/* -----------------------------------------------------------------------
34*91f16700Schasinglulu * void zero_normalmem(void *mem, unsigned int length);
35*91f16700Schasinglulu *
36*91f16700Schasinglulu * Initialise a region in normal memory to 0. This functions complies with the
37*91f16700Schasinglulu * AAPCS and can be called from C code.
38*91f16700Schasinglulu *
39*91f16700Schasinglulu * NOTE: MMU must be enabled when using this function as it can only operate on
40*91f16700Schasinglulu *       normal memory. It is intended to be mainly used from C code when MMU
41*91f16700Schasinglulu *       is usually enabled.
42*91f16700Schasinglulu * -----------------------------------------------------------------------
43*91f16700Schasinglulu */
44*91f16700Schasinglulu.equ	zero_normalmem, zeromem_dczva
45*91f16700Schasinglulu
46*91f16700Schasinglulu/* -----------------------------------------------------------------------
47*91f16700Schasinglulu * void zeromem(void *mem, unsigned int length);
48*91f16700Schasinglulu *
49*91f16700Schasinglulu * Initialise a region of device memory to 0. This functions complies with the
50*91f16700Schasinglulu * AAPCS and can be called from C code.
51*91f16700Schasinglulu *
52*91f16700Schasinglulu * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
53*91f16700Schasinglulu *       used instead for faster zeroing.
54*91f16700Schasinglulu *
55*91f16700Schasinglulu * -----------------------------------------------------------------------
56*91f16700Schasinglulu */
57*91f16700Schasinglulufunc zeromem
58*91f16700Schasinglulu	/* x2 is the address past the last zeroed address */
59*91f16700Schasinglulu	add	x2, x0, x1
60*91f16700Schasinglulu	/*
61*91f16700Schasinglulu	 * Uses the fallback path that does not use DC ZVA instruction and
62*91f16700Schasinglulu	 * therefore does not need enabled MMU
63*91f16700Schasinglulu	 */
64*91f16700Schasinglulu	b	.Lzeromem_dczva_fallback_entry
65*91f16700Schasingluluendfunc zeromem
66*91f16700Schasinglulu
67*91f16700Schasinglulu/* -----------------------------------------------------------------------
68*91f16700Schasinglulu * void zeromem_dczva(void *mem, unsigned int length);
69*91f16700Schasinglulu *
70*91f16700Schasinglulu * Fill a region of normal memory of size "length" in bytes with null bytes.
71*91f16700Schasinglulu * MMU must be enabled and the memory be of
72*91f16700Schasinglulu * normal type. This is because this function internally uses the DC ZVA
73*91f16700Schasinglulu * instruction, which generates an Alignment fault if used on any type of
74*91f16700Schasinglulu * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
75*91f16700Schasinglulu * is disabled, all memory behaves like Device-nGnRnE memory (see section
76*91f16700Schasinglulu * D4.2.8), hence the requirement on the MMU being enabled.
77*91f16700Schasinglulu * NOTE: The code assumes that the block size as defined in DCZID_EL0
78*91f16700Schasinglulu *       register is at least 16 bytes.
79*91f16700Schasinglulu *
80*91f16700Schasinglulu * -----------------------------------------------------------------------
81*91f16700Schasinglulu */
82*91f16700Schasinglulufunc zeromem_dczva
83*91f16700Schasinglulu
84*91f16700Schasinglulu	/*
85*91f16700Schasinglulu	 * The function consists of a series of loops that zero memory one byte
86*91f16700Schasinglulu	 * at a time, 16 bytes at a time or using the DC ZVA instruction to
87*91f16700Schasinglulu	 * zero aligned block of bytes, which is assumed to be more than 16.
88*91f16700Schasinglulu	 * In the case where the DC ZVA instruction cannot be used or if the
89*91f16700Schasinglulu	 * first 16 bytes loop would overflow, there is fallback path that does
90*91f16700Schasinglulu	 * not use DC ZVA.
91*91f16700Schasinglulu	 * Note: The fallback path is also used by the zeromem function that
92*91f16700Schasinglulu	 *       branches to it directly.
93*91f16700Schasinglulu	 *
94*91f16700Schasinglulu	 *              +---------+   zeromem_dczva
95*91f16700Schasinglulu	 *              |  entry  |
96*91f16700Schasinglulu	 *              +----+----+
97*91f16700Schasinglulu	 *                   |
98*91f16700Schasinglulu	 *                   v
99*91f16700Schasinglulu	 *              +---------+
100*91f16700Schasinglulu	 *              | checks  |>o-------+ (If any check fails, fallback)
101*91f16700Schasinglulu	 *              +----+----+         |
102*91f16700Schasinglulu	 *                   |              |---------------+
103*91f16700Schasinglulu	 *                   v              | Fallback path |
104*91f16700Schasinglulu	 *            +------+------+       |---------------+
105*91f16700Schasinglulu	 *            | 1 byte loop |       |
106*91f16700Schasinglulu	 *            +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
107*91f16700Schasinglulu	 *                   |              |
108*91f16700Schasinglulu	 *                   v              |
109*91f16700Schasinglulu	 *           +-------+-------+      |
110*91f16700Schasinglulu	 *           | 16 bytes loop |      |
111*91f16700Schasinglulu	 *           +-------+-------+      |
112*91f16700Schasinglulu	 *                   |              |
113*91f16700Schasinglulu	 *                   v              |
114*91f16700Schasinglulu	 *            +------+------+ .Lzeromem_dczva_blocksize_aligned
115*91f16700Schasinglulu	 *            | DC ZVA loop |       |
116*91f16700Schasinglulu	 *            +------+------+       |
117*91f16700Schasinglulu	 *       +--------+  |              |
118*91f16700Schasinglulu	 *       |        |  |              |
119*91f16700Schasinglulu	 *       |        v  v              |
120*91f16700Schasinglulu	 *       |   +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
121*91f16700Schasinglulu	 *       |   | 16 bytes loop |      |
122*91f16700Schasinglulu	 *       |   +-------+-------+      |
123*91f16700Schasinglulu	 *       |           |              |
124*91f16700Schasinglulu	 *       |           v              |
125*91f16700Schasinglulu	 *       |    +------+------+ .Lzeromem_dczva_final_1byte_aligned
126*91f16700Schasinglulu	 *       |    | 1 byte loop |       |
127*91f16700Schasinglulu	 *       |    +-------------+       |
128*91f16700Schasinglulu	 *       |           |              |
129*91f16700Schasinglulu	 *       |           v              |
130*91f16700Schasinglulu	 *       |       +---+--+           |
131*91f16700Schasinglulu	 *       |       | exit |           |
132*91f16700Schasinglulu	 *       |       +------+           |
133*91f16700Schasinglulu	 *       |			    |
134*91f16700Schasinglulu	 *       |           +--------------+    +------------------+ zeromem
135*91f16700Schasinglulu	 *       |           |  +----------------| zeromem function |
136*91f16700Schasinglulu	 *       |           |  |                +------------------+
137*91f16700Schasinglulu	 *       |           v  v
138*91f16700Schasinglulu	 *       |    +-------------+ .Lzeromem_dczva_fallback_entry
139*91f16700Schasinglulu	 *       |    | 1 byte loop |
140*91f16700Schasinglulu	 *       |    +------+------+
141*91f16700Schasinglulu	 *       |           |
142*91f16700Schasinglulu	 *       +-----------+
143*91f16700Schasinglulu	 */
144*91f16700Schasinglulu
145*91f16700Schasinglulu	/*
146*91f16700Schasinglulu	 * Readable names for registers
147*91f16700Schasinglulu	 *
148*91f16700Schasinglulu	 * Registers x0, x1 and x2 are also set by zeromem which
149*91f16700Schasinglulu	 * branches into the fallback path directly, so cursor, length and
150*91f16700Schasinglulu	 * stop_address should not be retargeted to other registers.
151*91f16700Schasinglulu	 */
152*91f16700Schasinglulu	cursor       .req x0 /* Start address and then current address */
153*91f16700Schasinglulu	length       .req x1 /* Length in bytes of the region to zero out */
154*91f16700Schasinglulu	/* Reusing x1 as length is never used after block_mask is set */
155*91f16700Schasinglulu	block_mask   .req x1 /* Bitmask of the block size read in DCZID_EL0 */
156*91f16700Schasinglulu	stop_address .req x2 /* Address past the last zeroed byte */
157*91f16700Schasinglulu	block_size   .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
158*91f16700Schasinglulu	tmp1         .req x4
159*91f16700Schasinglulu	tmp2         .req x5
160*91f16700Schasinglulu
161*91f16700Schasinglulu#if ENABLE_ASSERTIONS
162*91f16700Schasinglulu	/*
163*91f16700Schasinglulu	 * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
164*91f16700Schasinglulu	 * register value and panic if the MMU is disabled.
165*91f16700Schasinglulu	 */
166*91f16700Schasinglulu#if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && \
167*91f16700Schasinglulu	BL2_RUNS_AT_EL3)
168*91f16700Schasinglulu	mrs	tmp1, sctlr_el3
169*91f16700Schasinglulu#else
170*91f16700Schasinglulu	mrs	tmp1, sctlr_el1
171*91f16700Schasinglulu#endif
172*91f16700Schasinglulu
173*91f16700Schasinglulu	tst	tmp1, #SCTLR_M_BIT
174*91f16700Schasinglulu	ASM_ASSERT(ne)
175*91f16700Schasinglulu#endif /* ENABLE_ASSERTIONS */
176*91f16700Schasinglulu
177*91f16700Schasinglulu	/* stop_address is the address past the last to zero */
178*91f16700Schasinglulu	add	stop_address, cursor, length
179*91f16700Schasinglulu
180*91f16700Schasinglulu	/*
181*91f16700Schasinglulu	 * Get block_size = (log2(<block size>) >> 2) (see encoding of
182*91f16700Schasinglulu	 * dczid_el0 reg)
183*91f16700Schasinglulu	 */
184*91f16700Schasinglulu	mrs	block_size, dczid_el0
185*91f16700Schasinglulu
186*91f16700Schasinglulu	/*
187*91f16700Schasinglulu	 * Select the 4 lowest bits and convert the extracted log2(<block size
188*91f16700Schasinglulu	 * in words>) to <block size in bytes>
189*91f16700Schasinglulu	 */
190*91f16700Schasinglulu	ubfx	block_size, block_size, #0, #4
191*91f16700Schasinglulu	mov	tmp2, #(1 << 2)
192*91f16700Schasinglulu	lsl	block_size, tmp2, block_size
193*91f16700Schasinglulu
194*91f16700Schasinglulu#if ENABLE_ASSERTIONS
195*91f16700Schasinglulu	/*
196*91f16700Schasinglulu	 * Assumes block size is at least 16 bytes to avoid manual realignment
197*91f16700Schasinglulu	 * of the cursor at the end of the DCZVA loop.
198*91f16700Schasinglulu	 */
199*91f16700Schasinglulu	cmp	block_size, #16
200*91f16700Schasinglulu	ASM_ASSERT(hs)
201*91f16700Schasinglulu#endif
202*91f16700Schasinglulu	/*
203*91f16700Schasinglulu	 * Not worth doing all the setup for a region less than a block and
204*91f16700Schasinglulu	 * protects against zeroing a whole block when the area to zero is
205*91f16700Schasinglulu	 * smaller than that. Also, as it is assumed that the block size is at
206*91f16700Schasinglulu	 * least 16 bytes, this also protects the initial aligning loops from
207*91f16700Schasinglulu	 * trying to zero 16 bytes when length is less than 16.
208*91f16700Schasinglulu	 */
209*91f16700Schasinglulu	cmp	length, block_size
210*91f16700Schasinglulu	b.lo	.Lzeromem_dczva_fallback_entry
211*91f16700Schasinglulu
212*91f16700Schasinglulu	/*
213*91f16700Schasinglulu	 * Calculate the bitmask of the block alignment. It will never
214*91f16700Schasinglulu	 * underflow as the block size is between 4 bytes and 2kB.
215*91f16700Schasinglulu	 * block_mask = block_size - 1
216*91f16700Schasinglulu	 */
217*91f16700Schasinglulu	sub	block_mask, block_size, #1
218*91f16700Schasinglulu
219*91f16700Schasinglulu	/*
220*91f16700Schasinglulu	 * length alias should not be used after this point unless it is
221*91f16700Schasinglulu	 * defined as a register other than block_mask's.
222*91f16700Schasinglulu	 */
223*91f16700Schasinglulu	 .unreq length
224*91f16700Schasinglulu
225*91f16700Schasinglulu	/*
226*91f16700Schasinglulu	 * If the start address is already aligned to zero block size, go
227*91f16700Schasinglulu	 * straight to the cache zeroing loop. This is safe because at this
228*91f16700Schasinglulu	 * point, the length cannot be smaller than a block size.
229*91f16700Schasinglulu	 */
230*91f16700Schasinglulu	tst	cursor, block_mask
231*91f16700Schasinglulu	b.eq	.Lzeromem_dczva_blocksize_aligned
232*91f16700Schasinglulu
233*91f16700Schasinglulu	/*
234*91f16700Schasinglulu	 * Calculate the first block-size-aligned address. It is assumed that
235*91f16700Schasinglulu	 * the zero block size is at least 16 bytes. This address is the last
236*91f16700Schasinglulu	 * address of this initial loop.
237*91f16700Schasinglulu	 */
238*91f16700Schasinglulu	orr	tmp1, cursor, block_mask
239*91f16700Schasinglulu	add	tmp1, tmp1, #1
240*91f16700Schasinglulu
241*91f16700Schasinglulu	/*
242*91f16700Schasinglulu	 * If the addition overflows, skip the cache zeroing loops. This is
243*91f16700Schasinglulu	 * quite unlikely however.
244*91f16700Schasinglulu	 */
245*91f16700Schasinglulu	cbz	tmp1, .Lzeromem_dczva_fallback_entry
246*91f16700Schasinglulu
247*91f16700Schasinglulu	/*
248*91f16700Schasinglulu	 * If the first block-size-aligned address is past the last address,
249*91f16700Schasinglulu	 * fallback to the simpler code.
250*91f16700Schasinglulu	 */
251*91f16700Schasinglulu	cmp	tmp1, stop_address
252*91f16700Schasinglulu	b.hi	.Lzeromem_dczva_fallback_entry
253*91f16700Schasinglulu
254*91f16700Schasinglulu	/*
255*91f16700Schasinglulu	 * If the start address is already aligned to 16 bytes, skip this loop.
256*91f16700Schasinglulu	 * It is safe to do this because tmp1 (the stop address of the initial
257*91f16700Schasinglulu	 * 16 bytes loop) will never be greater than the final stop address.
258*91f16700Schasinglulu	 */
259*91f16700Schasinglulu	tst	cursor, #0xf
260*91f16700Schasinglulu	b.eq	.Lzeromem_dczva_initial_1byte_aligned_end
261*91f16700Schasinglulu
262*91f16700Schasinglulu	/* Calculate the next address aligned to 16 bytes */
263*91f16700Schasinglulu	orr	tmp2, cursor, #0xf
264*91f16700Schasinglulu	add	tmp2, tmp2, #1
265*91f16700Schasinglulu	/* If it overflows, fallback to the simple path (unlikely) */
266*91f16700Schasinglulu	cbz	tmp2, .Lzeromem_dczva_fallback_entry
267*91f16700Schasinglulu	/*
268*91f16700Schasinglulu	 * Next aligned address cannot be after the stop address because the
269*91f16700Schasinglulu	 * length cannot be smaller than 16 at this point.
270*91f16700Schasinglulu	 */
271*91f16700Schasinglulu
272*91f16700Schasinglulu	/* First loop: zero byte per byte */
273*91f16700Schasinglulu1:
274*91f16700Schasinglulu	strb	wzr, [cursor], #1
275*91f16700Schasinglulu	cmp	cursor, tmp2
276*91f16700Schasinglulu	b.ne	1b
277*91f16700Schasinglulu.Lzeromem_dczva_initial_1byte_aligned_end:
278*91f16700Schasinglulu
279*91f16700Schasinglulu	/*
280*91f16700Schasinglulu	 * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
281*91f16700Schasinglulu	 * before being able to use the code that deals with block-size-aligned
282*91f16700Schasinglulu	 * addresses.
283*91f16700Schasinglulu	 */
284*91f16700Schasinglulu	cmp	cursor, tmp1
285*91f16700Schasinglulu	b.hs	2f
286*91f16700Schasinglulu1:
287*91f16700Schasinglulu	stp	xzr, xzr, [cursor], #16
288*91f16700Schasinglulu	cmp	cursor, tmp1
289*91f16700Schasinglulu	b.lo	1b
290*91f16700Schasinglulu2:
291*91f16700Schasinglulu
292*91f16700Schasinglulu	/*
293*91f16700Schasinglulu	 * Third loop: zero a block at a time using DC ZVA cache block zeroing
294*91f16700Schasinglulu	 * instruction.
295*91f16700Schasinglulu	 */
296*91f16700Schasinglulu.Lzeromem_dczva_blocksize_aligned:
297*91f16700Schasinglulu	/*
298*91f16700Schasinglulu	 * Calculate the last block-size-aligned address. If the result equals
299*91f16700Schasinglulu	 * to the start address, the loop will exit immediately.
300*91f16700Schasinglulu	 */
301*91f16700Schasinglulu	bic	tmp1, stop_address, block_mask
302*91f16700Schasinglulu
303*91f16700Schasinglulu	cmp	cursor, tmp1
304*91f16700Schasinglulu	b.hs	2f
305*91f16700Schasinglulu1:
306*91f16700Schasinglulu	/* Zero the block containing the cursor */
307*91f16700Schasinglulu	dc	zva, cursor
308*91f16700Schasinglulu	/* Increment the cursor by the size of a block */
309*91f16700Schasinglulu	add	cursor, cursor, block_size
310*91f16700Schasinglulu	cmp	cursor, tmp1
311*91f16700Schasinglulu	b.lo	1b
312*91f16700Schasinglulu2:
313*91f16700Schasinglulu
314*91f16700Schasinglulu	/*
315*91f16700Schasinglulu	 * Fourth loop: zero 16 bytes at a time and then byte per byte the
316*91f16700Schasinglulu	 * remaining area
317*91f16700Schasinglulu	 */
318*91f16700Schasinglulu.Lzeromem_dczva_final_16bytes_aligned:
319*91f16700Schasinglulu	/*
320*91f16700Schasinglulu	 * Calculate the last 16 bytes aligned address. It is assumed that the
321*91f16700Schasinglulu	 * block size will never be smaller than 16 bytes so that the current
322*91f16700Schasinglulu	 * cursor is aligned to at least 16 bytes boundary.
323*91f16700Schasinglulu	 */
324*91f16700Schasinglulu	bic	tmp1, stop_address, #15
325*91f16700Schasinglulu
326*91f16700Schasinglulu	cmp	cursor, tmp1
327*91f16700Schasinglulu	b.hs	2f
328*91f16700Schasinglulu1:
329*91f16700Schasinglulu	stp	xzr, xzr, [cursor], #16
330*91f16700Schasinglulu	cmp	cursor, tmp1
331*91f16700Schasinglulu	b.lo	1b
332*91f16700Schasinglulu2:
333*91f16700Schasinglulu
334*91f16700Schasinglulu	/* Fifth and final loop: zero byte per byte */
335*91f16700Schasinglulu.Lzeromem_dczva_final_1byte_aligned:
336*91f16700Schasinglulu	cmp	cursor, stop_address
337*91f16700Schasinglulu	b.eq	2f
338*91f16700Schasinglulu1:
339*91f16700Schasinglulu	strb	wzr, [cursor], #1
340*91f16700Schasinglulu	cmp	cursor, stop_address
341*91f16700Schasinglulu	b.ne	1b
342*91f16700Schasinglulu2:
343*91f16700Schasinglulu	ret
344*91f16700Schasinglulu
345*91f16700Schasinglulu	/* Fallback for unaligned start addresses */
346*91f16700Schasinglulu.Lzeromem_dczva_fallback_entry:
347*91f16700Schasinglulu	/*
348*91f16700Schasinglulu	 * If the start address is already aligned to 16 bytes, skip this loop.
349*91f16700Schasinglulu	 */
350*91f16700Schasinglulu	tst	cursor, #0xf
351*91f16700Schasinglulu	b.eq	.Lzeromem_dczva_final_16bytes_aligned
352*91f16700Schasinglulu
353*91f16700Schasinglulu	/* Calculate the next address aligned to 16 bytes */
354*91f16700Schasinglulu	orr	tmp1, cursor, #15
355*91f16700Schasinglulu	add	tmp1, tmp1, #1
356*91f16700Schasinglulu	/* If it overflows, fallback to byte per byte zeroing */
357*91f16700Schasinglulu	cbz	tmp1, .Lzeromem_dczva_final_1byte_aligned
358*91f16700Schasinglulu	/* If the next aligned address is after the stop address, fall back */
359*91f16700Schasinglulu	cmp	tmp1, stop_address
360*91f16700Schasinglulu	b.hs	.Lzeromem_dczva_final_1byte_aligned
361*91f16700Schasinglulu
362*91f16700Schasinglulu	/* Fallback entry loop: zero byte per byte */
363*91f16700Schasinglulu1:
364*91f16700Schasinglulu	strb	wzr, [cursor], #1
365*91f16700Schasinglulu	cmp	cursor, tmp1
366*91f16700Schasinglulu	b.ne	1b
367*91f16700Schasinglulu
368*91f16700Schasinglulu	b	.Lzeromem_dczva_final_16bytes_aligned
369*91f16700Schasinglulu
370*91f16700Schasinglulu	.unreq	cursor
371*91f16700Schasinglulu	/*
372*91f16700Schasinglulu	 * length is already unreq'ed to reuse the register for another
373*91f16700Schasinglulu	 * variable.
374*91f16700Schasinglulu	 */
375*91f16700Schasinglulu	.unreq	stop_address
376*91f16700Schasinglulu	.unreq	block_size
377*91f16700Schasinglulu	.unreq	block_mask
378*91f16700Schasinglulu	.unreq	tmp1
379*91f16700Schasinglulu	.unreq	tmp2
380*91f16700Schasingluluendfunc zeromem_dczva
381*91f16700Schasinglulu
382*91f16700Schasinglulu/* --------------------------------------------------------------------------
383*91f16700Schasinglulu * void memcpy16(void *dest, const void *src, unsigned int length)
384*91f16700Schasinglulu *
385*91f16700Schasinglulu * Copy length bytes from memory area src to memory area dest.
386*91f16700Schasinglulu * The memory areas should not overlap.
387*91f16700Schasinglulu * Destination and source addresses must be 16-byte aligned.
388*91f16700Schasinglulu * --------------------------------------------------------------------------
389*91f16700Schasinglulu */
390*91f16700Schasinglulufunc memcpy16
391*91f16700Schasinglulu#if ENABLE_ASSERTIONS
392*91f16700Schasinglulu	orr	x3, x0, x1
393*91f16700Schasinglulu	tst	x3, #0xf
394*91f16700Schasinglulu	ASM_ASSERT(eq)
395*91f16700Schasinglulu#endif
396*91f16700Schasinglulu/* copy 16 bytes at a time */
397*91f16700Schasinglulum_loop16:
398*91f16700Schasinglulu	cmp	x2, #16
399*91f16700Schasinglulu	b.lo	m_loop1
400*91f16700Schasinglulu	ldp	x3, x4, [x1], #16
401*91f16700Schasinglulu	stp	x3, x4, [x0], #16
402*91f16700Schasinglulu	sub	x2, x2, #16
403*91f16700Schasinglulu	b	m_loop16
404*91f16700Schasinglulu/* copy byte per byte */
405*91f16700Schasinglulum_loop1:
406*91f16700Schasinglulu	cbz	x2, m_end
407*91f16700Schasinglulu	ldrb	w3, [x1], #1
408*91f16700Schasinglulu	strb	w3, [x0], #1
409*91f16700Schasinglulu	subs	x2, x2, #1
410*91f16700Schasinglulu	b.ne	m_loop1
411*91f16700Schasinglulum_end:
412*91f16700Schasinglulu	ret
413*91f16700Schasingluluendfunc memcpy16
414*91f16700Schasinglulu
415*91f16700Schasinglulu/* ---------------------------------------------------------------------------
416*91f16700Schasinglulu * Disable the MMU at EL3
417*91f16700Schasinglulu * ---------------------------------------------------------------------------
418*91f16700Schasinglulu */
419*91f16700Schasinglulu
420*91f16700Schasinglulufunc disable_mmu_el3
421*91f16700Schasinglulu	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
422*91f16700Schasingluludo_disable_mmu_el3:
423*91f16700Schasinglulu	mrs	x0, sctlr_el3
424*91f16700Schasinglulu	bic	x0, x0, x1
425*91f16700Schasinglulu	msr	sctlr_el3, x0
426*91f16700Schasinglulu	isb	/* ensure MMU is off */
427*91f16700Schasinglulu	dsb	sy
428*91f16700Schasinglulu	ret
429*91f16700Schasingluluendfunc disable_mmu_el3
430*91f16700Schasinglulu
431*91f16700Schasinglulu
432*91f16700Schasinglulufunc disable_mmu_icache_el3
433*91f16700Schasinglulu	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
434*91f16700Schasinglulu	b	do_disable_mmu_el3
435*91f16700Schasingluluendfunc disable_mmu_icache_el3
436*91f16700Schasinglulu
437*91f16700Schasinglulu/* ---------------------------------------------------------------------------
438*91f16700Schasinglulu * Disable the MMU at EL1
439*91f16700Schasinglulu * ---------------------------------------------------------------------------
440*91f16700Schasinglulu */
441*91f16700Schasinglulu
442*91f16700Schasinglulufunc disable_mmu_el1
443*91f16700Schasinglulu	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
444*91f16700Schasingluludo_disable_mmu_el1:
445*91f16700Schasinglulu	mrs	x0, sctlr_el1
446*91f16700Schasinglulu	bic	x0, x0, x1
447*91f16700Schasinglulu	msr	sctlr_el1, x0
448*91f16700Schasinglulu	isb	/* ensure MMU is off */
449*91f16700Schasinglulu	dsb	sy
450*91f16700Schasinglulu	ret
451*91f16700Schasingluluendfunc disable_mmu_el1
452*91f16700Schasinglulu
453*91f16700Schasinglulu
454*91f16700Schasinglulufunc disable_mmu_icache_el1
455*91f16700Schasinglulu	mov	x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
456*91f16700Schasinglulu	b	do_disable_mmu_el1
457*91f16700Schasingluluendfunc disable_mmu_icache_el1
458*91f16700Schasinglulu
459*91f16700Schasinglulu/* ---------------------------------------------------------------------------
460*91f16700Schasinglulu * Enable the use of VFP at EL3
461*91f16700Schasinglulu * ---------------------------------------------------------------------------
462*91f16700Schasinglulu */
463*91f16700Schasinglulu#if SUPPORT_VFP
464*91f16700Schasinglulufunc enable_vfp
465*91f16700Schasinglulu	mrs	x0, cpacr_el1
466*91f16700Schasinglulu	orr	x0, x0, #CPACR_VFP_BITS
467*91f16700Schasinglulu	msr	cpacr_el1, x0
468*91f16700Schasinglulu	mrs	x0, cptr_el3
469*91f16700Schasinglulu	mov	x1, #AARCH64_CPTR_TFP
470*91f16700Schasinglulu	bic	x0, x0, x1
471*91f16700Schasinglulu	msr	cptr_el3, x0
472*91f16700Schasinglulu	isb
473*91f16700Schasinglulu	ret
474*91f16700Schasingluluendfunc enable_vfp
475*91f16700Schasinglulu#endif
476*91f16700Schasinglulu
477*91f16700Schasinglulu/* ---------------------------------------------------------------------------
478*91f16700Schasinglulu * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
479*91f16700Schasinglulu * (.rela.dyn) at runtime.
480*91f16700Schasinglulu *
481*91f16700Schasinglulu * This function is meant to be used when the firmware is compiled with -fpie
482*91f16700Schasinglulu * and linked with -pie options. We rely on the linker script exporting
483*91f16700Schasinglulu * appropriate markers for start and end of the section. For GOT, we
484*91f16700Schasinglulu * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
485*91f16700Schasinglulu * __RELA_START__ and __RELA_END__.
486*91f16700Schasinglulu *
487*91f16700Schasinglulu * The function takes the limits of the memory to apply fixups to as
488*91f16700Schasinglulu * arguments (which is usually the limits of the relocable BL image).
489*91f16700Schasinglulu *   x0 -  the start of the fixup region
490*91f16700Schasinglulu *   x1 -  the limit of the fixup region
491*91f16700Schasinglulu * These addresses have to be 4KB page aligned.
492*91f16700Schasinglulu * ---------------------------------------------------------------------------
493*91f16700Schasinglulu */
494*91f16700Schasinglulu
495*91f16700Schasinglulu/* Relocation codes */
496*91f16700Schasinglulu#define	R_AARCH64_NONE		0
497*91f16700Schasinglulu#define	R_AARCH64_RELATIVE	1027
498*91f16700Schasinglulu
499*91f16700Schasinglulufunc fixup_gdt_reloc
500*91f16700Schasinglulu	mov	x6, x0
501*91f16700Schasinglulu	mov	x7, x1
502*91f16700Schasinglulu
503*91f16700Schasinglulu#if ENABLE_ASSERTIONS
504*91f16700Schasinglulu	/* Test if the limits are 4KB aligned */
505*91f16700Schasinglulu	orr	x0, x0, x1
506*91f16700Schasinglulu	tst	x0, #(PAGE_SIZE_MASK)
507*91f16700Schasinglulu	ASM_ASSERT(eq)
508*91f16700Schasinglulu#endif
509*91f16700Schasinglulu	/*
510*91f16700Schasinglulu	 * Calculate the offset based on return address in x30.
511*91f16700Schasinglulu	 * Assume that this function is called within a page at the start of
512*91f16700Schasinglulu	 * fixup region.
513*91f16700Schasinglulu	 */
514*91f16700Schasinglulu	and	x2, x30, #~(PAGE_SIZE_MASK)
515*91f16700Schasinglulu	subs	x0, x2, x6	/* Diff(S) = Current Address - Compiled Address */
516*91f16700Schasinglulu	b.eq	3f		/* Diff(S) = 0. No relocation needed */
517*91f16700Schasinglulu
518*91f16700Schasinglulu	adrp	x1, __GOT_START__
519*91f16700Schasinglulu	add	x1, x1, :lo12:__GOT_START__
520*91f16700Schasinglulu	adrp	x2, __GOT_END__
521*91f16700Schasinglulu	add	x2, x2, :lo12:__GOT_END__
522*91f16700Schasinglulu
523*91f16700Schasinglulu	/*
524*91f16700Schasinglulu	 * GOT is an array of 64_bit addresses which must be fixed up as
525*91f16700Schasinglulu	 * new_addr = old_addr + Diff(S).
526*91f16700Schasinglulu	 * The new_addr is the address currently the binary is executing from
527*91f16700Schasinglulu	 * and old_addr is the address at compile time.
528*91f16700Schasinglulu	 */
529*91f16700Schasinglulu1:	ldr	x3, [x1]
530*91f16700Schasinglulu
531*91f16700Schasinglulu	/* Skip adding offset if address is < lower limit */
532*91f16700Schasinglulu	cmp	x3, x6
533*91f16700Schasinglulu	b.lo	2f
534*91f16700Schasinglulu
535*91f16700Schasinglulu	/* Skip adding offset if address is > upper limit */
536*91f16700Schasinglulu	cmp	x3, x7
537*91f16700Schasinglulu	b.hi	2f
538*91f16700Schasinglulu	add	x3, x3, x0
539*91f16700Schasinglulu	str	x3, [x1]
540*91f16700Schasinglulu
541*91f16700Schasinglulu2:	add	x1, x1, #8
542*91f16700Schasinglulu	cmp	x1, x2
543*91f16700Schasinglulu	b.lo	1b
544*91f16700Schasinglulu
545*91f16700Schasinglulu	/* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
546*91f16700Schasinglulu3:	adrp	x1, __RELA_START__
547*91f16700Schasinglulu	add	x1, x1, :lo12:__RELA_START__
548*91f16700Schasinglulu	adrp	x2, __RELA_END__
549*91f16700Schasinglulu	add	x2, x2, :lo12:__RELA_END__
550*91f16700Schasinglulu
551*91f16700Schasinglulu	/*
552*91f16700Schasinglulu	 * According to ELF-64 specification, the RELA data structure is as
553*91f16700Schasinglulu	 * follows:
554*91f16700Schasinglulu	 *	typedef struct {
555*91f16700Schasinglulu	 *		Elf64_Addr r_offset;
556*91f16700Schasinglulu	 *		Elf64_Xword r_info;
557*91f16700Schasinglulu	 *		Elf64_Sxword r_addend;
558*91f16700Schasinglulu	 *	} Elf64_Rela;
559*91f16700Schasinglulu	 *
560*91f16700Schasinglulu	 * r_offset is address of reference
561*91f16700Schasinglulu	 * r_info is symbol index and type of relocation (in this case
562*91f16700Schasinglulu	 * code 1027 which corresponds to R_AARCH64_RELATIVE).
563*91f16700Schasinglulu	 * r_addend is constant part of expression.
564*91f16700Schasinglulu	 *
565*91f16700Schasinglulu	 * Size of Elf64_Rela structure is 24 bytes.
566*91f16700Schasinglulu	 */
567*91f16700Schasinglulu
568*91f16700Schasinglulu	/* Skip R_AARCH64_NONE entry with code 0 */
569*91f16700Schasinglulu1:	ldr	x3, [x1, #8]
570*91f16700Schasinglulu	cbz	x3, 2f
571*91f16700Schasinglulu
572*91f16700Schasinglulu#if ENABLE_ASSERTIONS
573*91f16700Schasinglulu	/* Assert that the relocation type is R_AARCH64_RELATIVE */
574*91f16700Schasinglulu	cmp	x3, #R_AARCH64_RELATIVE
575*91f16700Schasinglulu	ASM_ASSERT(eq)
576*91f16700Schasinglulu#endif
577*91f16700Schasinglulu	ldr	x3, [x1]	/* r_offset */
578*91f16700Schasinglulu	add	x3, x0, x3
579*91f16700Schasinglulu	ldr	x4, [x1, #16]	/* r_addend */
580*91f16700Schasinglulu
581*91f16700Schasinglulu	/* Skip adding offset if r_addend is < lower limit */
582*91f16700Schasinglulu	cmp	x4, x6
583*91f16700Schasinglulu	b.lo	2f
584*91f16700Schasinglulu
585*91f16700Schasinglulu	/* Skip adding offset if r_addend entry is > upper limit */
586*91f16700Schasinglulu	cmp	x4, x7
587*91f16700Schasinglulu	b.hi	2f
588*91f16700Schasinglulu
589*91f16700Schasinglulu	add	x4, x0, x4	/* Diff(S) + r_addend */
590*91f16700Schasinglulu	str	x4, [x3]
591*91f16700Schasinglulu
592*91f16700Schasinglulu2:	add	x1, x1, #24
593*91f16700Schasinglulu	cmp	x1, x2
594*91f16700Schasinglulu	b.lo	1b
595*91f16700Schasinglulu	ret
596*91f16700Schasingluluendfunc fixup_gdt_reloc
597*91f16700Schasinglulu
598*91f16700Schasinglulu/*
599*91f16700Schasinglulu * TODO: Currently only supports size of 4KB,
600*91f16700Schasinglulu * support other sizes as well.
601*91f16700Schasinglulu */
602*91f16700Schasinglulufunc gpt_tlbi_by_pa_ll
603*91f16700Schasinglulu#if ENABLE_ASSERTIONS
604*91f16700Schasinglulu	cmp	x1, #PAGE_SIZE_4KB
605*91f16700Schasinglulu	ASM_ASSERT(eq)
606*91f16700Schasinglulu	tst	x0, #(PAGE_SIZE_MASK)
607*91f16700Schasinglulu	ASM_ASSERT(eq)
608*91f16700Schasinglulu#endif
609*91f16700Schasinglulu	lsr	x0, x0, #FOUR_KB_SHIFT	/* 4KB size encoding is zero */
610*91f16700Schasinglulu	sys	#6, c8, c4, #7, x0 	/* TLBI RPALOS, <Xt> */
611*91f16700Schasinglulu	dsb	sy
612*91f16700Schasinglulu	ret
613*91f16700Schasingluluendfunc gpt_tlbi_by_pa_ll
614