xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  *  linux/arch/arm/boot/compressed/head.S
4  *
5  *  Copyright (C) 1996-2002 Russell King
6  *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7  */
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
10 #include <asm/v7m.h>
11 
12 #include "efi-header.S"
13 
14 #ifdef __ARMEB__
15 #define OF_DT_MAGIC 0xd00dfeed
16 #else
17 #define OF_DT_MAGIC 0xedfe0dd0
18 #endif
19 
20  AR_CLASS(	.arch	armv7-a	)
21  M_CLASS(	.arch	armv7-m	)
22 
23 /*
24  * Debugging stuff
25  *
26  * Note that these macros must not contain any code which is not
27  * 100% relocatable.  Any attempt to do so will result in a crash.
28  * Please select one of the following when turning on debugging.
29  */
30 #ifdef DEBUG
31 
32 #if defined(CONFIG_DEBUG_ICEDCC)
33 
34 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
35 		.macro	loadsp, rb, tmp1, tmp2
36 		.endm
37 		.macro	writeb, ch, rb, tmp
38 		mcr	p14, 0, \ch, c0, c5, 0
39 		.endm
40 #elif defined(CONFIG_CPU_XSCALE)
41 		.macro	loadsp, rb, tmp1, tmp2
42 		.endm
43 		.macro	writeb, ch, rb, tmp
44 		mcr	p14, 0, \ch, c8, c0, 0
45 		.endm
46 #else
47 		.macro	loadsp, rb, tmp1, tmp2
48 		.endm
49 		.macro	writeb, ch, rb, tmp
50 		mcr	p14, 0, \ch, c1, c0, 0
51 		.endm
52 #endif
53 
54 #else
55 
56 #include CONFIG_DEBUG_LL_INCLUDE
57 
58 		.macro	writeb,	ch, rb, tmp
59 #ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
60 		waituartcts \tmp, \rb
61 #endif
62 		waituarttxrdy \tmp, \rb
63 		senduart \ch, \rb
64 		busyuart \tmp, \rb
65 		.endm
66 
67 #if defined(CONFIG_ARCH_SA1100)
68 		.macro	loadsp, rb, tmp1, tmp2
69 		mov	\rb, #0x80000000	@ physical base address
70 		add	\rb, \rb, #0x00010000	@ Ser1
71 		.endm
72 #else
73 		.macro	loadsp,	rb, tmp1, tmp2
74 		addruart \rb, \tmp1, \tmp2
75 		.endm
76 #endif
77 #endif
78 #endif
79 
80 		.macro	kputc,val
81 		mov	r0, \val
82 		bl	putc
83 		.endm
84 
85 		.macro	kphex,val,len
86 		mov	r0, \val
87 		mov	r1, #\len
88 		bl	phex
89 		.endm
90 
91 		/*
92 		 * Debug kernel copy by printing the memory addresses involved
93 		 */
94 		.macro dbgkc, begin, end, cbegin, cend
95 #ifdef DEBUG
96 		kputc   #'C'
97 		kputc   #':'
98 		kputc   #'0'
99 		kputc   #'x'
100 		kphex   \begin, 8	/* Start of compressed kernel */
101 		kputc	#'-'
102 		kputc	#'0'
103 		kputc	#'x'
104 		kphex	\end, 8		/* End of compressed kernel */
105 		kputc	#'-'
106 		kputc	#'>'
107 		kputc   #'0'
108 		kputc   #'x'
109 		kphex   \cbegin, 8	/* Start of kernel copy */
110 		kputc	#'-'
111 		kputc	#'0'
112 		kputc	#'x'
113 		kphex	\cend, 8	/* End of kernel copy */
114 		kputc	#'\n'
115 #endif
116 		.endm
117 
118 		/*
119 		 * Debug print of the final appended DTB location
120 		 */
121 		.macro dbgadtb, begin, size
122 #ifdef DEBUG
123 		kputc   #'D'
124 		kputc   #'T'
125 		kputc   #'B'
126 		kputc   #':'
127 		kputc   #'0'
128 		kputc   #'x'
129 		kphex   \begin, 8	/* Start of appended DTB */
130 		kputc	#' '
131 		kputc	#'('
132 		kputc	#'0'
133 		kputc	#'x'
134 		kphex	\size, 8	/* Size of appended DTB */
135 		kputc	#')'
136 		kputc	#'\n'
137 #endif
138 		.endm
139 
140 		.macro	enable_cp15_barriers, reg
141 		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
142 		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
143 		bne	.L_\@
144 		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
145 		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
146  ARM(		.inst   0xf57ff06f		@ v7+ isb	)
147  THUMB(		isb						)
148 .L_\@:
149 		.endm
150 
151 		/*
152 		 * The kernel build system appends the size of the
153 		 * decompressed kernel at the end of the compressed data
154 		 * in little-endian form.
155 		 */
156 		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
157 		adr	\res, .Linflated_image_size_offset
158 		ldr	\tmp1, [\res]
159 		add	\tmp1, \tmp1, \res	@ address of inflated image size
160 
161 		ldrb	\res, [\tmp1]		@ get_unaligned_le32
162 		ldrb	\tmp2, [\tmp1, #1]
163 		orr	\res, \res, \tmp2, lsl #8
164 		ldrb	\tmp2, [\tmp1, #2]
165 		ldrb	\tmp1, [\tmp1, #3]
166 		orr	\res, \res, \tmp2, lsl #16
167 		orr	\res, \res, \tmp1, lsl #24
168 		.endm
169 
170 		.macro	be32tocpu, val, tmp
171 #ifndef __ARMEB__
172 		/* convert to little endian */
173 		rev_l	\val, \tmp
174 #endif
175 		.endm
176 
177 		.section ".start", "ax"
178 /*
179  * sort out different calling conventions
180  */
181 		.align
182 		/*
183 		 * Always enter in ARM state for CPUs that support the ARM ISA.
184 		 * As of today (2014) that's exactly the members of the A and R
185 		 * classes.
186 		 */
187  AR_CLASS(	.arm	)
188 start:
189 		.type	start,#function
190 		/*
191 		 * These 7 nops along with the 1 nop immediately below for
192 		 * !THUMB2 form 8 nops that make the compressed kernel bootable
193 		 * on legacy ARM systems that were assuming the kernel in a.out
194 		 * binary format. The boot loaders on these systems would
195 		 * jump 32 bytes into the image to skip the a.out header.
196 		 * with these 8 nops filling exactly 32 bytes, things still
197 		 * work as expected on these legacy systems. Thumb2 mode keeps
198 		 * 7 of the nops as it turns out that some boot loaders
199 		 * were patching the initial instructions of the kernel, i.e
200 		 * had started to exploit this "patch area".
201 		 */
202 		__initial_nops
203 		.rept	5
204 		__nop
205 		.endr
206 #ifndef CONFIG_THUMB2_KERNEL
207 		__nop
208 #else
209  AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
210   M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
211 		.thumb
212 #endif
213 		W(b)	1f
214 
215 		.word	_magic_sig	@ Magic numbers to help the loader
216 		.word	_magic_start	@ absolute load/run zImage address
217 		.word	_magic_end	@ zImage end address
218 		.word	0x04030201	@ endianness flag
219 		.word	0x45454545	@ another magic number to indicate
220 		.word	_magic_table	@ additional data table
221 
222 		__EFI_HEADER
223 1:
224  ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
225  AR_CLASS(	mrs	r9, cpsr	)
226 #ifdef CONFIG_ARM_VIRT_EXT
227 		bl	__hyp_stub_install	@ get into SVC mode, reversibly
228 #endif
229 		mov	r7, r1			@ save architecture ID
230 		mov	r8, r2			@ save atags pointer
231 
232 #ifndef CONFIG_CPU_V7M
233 		/*
234 		 * Booting from Angel - need to enter SVC mode and disable
235 		 * FIQs/IRQs (numeric definitions from angel arm.h source).
236 		 * We only do this if we were in user mode on entry.
237 		 */
238 		mrs	r2, cpsr		@ get current mode
239 		tst	r2, #3			@ not user?
240 		bne	not_angel
241 		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
242  ARM(		swi	0x123456	)	@ angel_SWI_ARM
243  THUMB(		svc	0xab		)	@ angel_SWI_THUMB
244 not_angel:
245 		safe_svcmode_maskall r0
246 		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
247 						@ SPSR
248 #endif
249 		/*
250 		 * Note that some cache flushing and other stuff may
251 		 * be needed here - is there an Angel SWI call for this?
252 		 */
253 
254 		/*
255 		 * some architecture specific code can be inserted
256 		 * by the linker here, but it should preserve r7, r8, and r9.
257 		 */
258 
259 		.text
260 
261 #ifdef CONFIG_AUTO_ZRELADDR
262 		/*
263 		 * Find the start of physical memory.  As we are executing
264 		 * without the MMU on, we are in the physical address space.
265 		 * We just need to get rid of any offset by aligning the
266 		 * address.
267 		 *
268 		 * This alignment is a balance between the requirements of
269 		 * different platforms - we have chosen 128MB to allow
270 		 * platforms which align the start of their physical memory
271 		 * to 128MB to use this feature, while allowing the zImage
272 		 * to be placed within the first 128MB of memory on other
273 		 * platforms.  Increasing the alignment means we place
274 		 * stricter alignment requirements on the start of physical
275 		 * memory, but relaxing it means that we break people who
276 		 * are already placing their zImage in (eg) the top 64MB
277 		 * of this range.
278 		 */
279 		mov	r0, pc
280 		and	r0, r0, #0xf8000000
281 #ifdef CONFIG_USE_OF
282 		adr	r1, LC1
283 #ifdef CONFIG_ARM_APPENDED_DTB
284 		/*
285 		 * Look for an appended DTB.  If found, we cannot use it to
286 		 * validate the calculated start of physical memory, as its
287 		 * memory nodes may need to be augmented by ATAGS stored at
288 		 * an offset from the same start of physical memory.
289 		 */
290 		ldr	r2, [r1, #4]	@ get &_edata
291 		add	r2, r2, r1	@ relocate it
292 		ldr	r2, [r2]	@ get DTB signature
293 		ldr	r3, =OF_DT_MAGIC
294 		cmp	r2, r3		@ do we have a DTB there?
295 		beq	1f		@ if yes, skip validation
296 #endif /* CONFIG_ARM_APPENDED_DTB */
297 
298 		/*
299 		 * Make sure we have some stack before calling C code.
300 		 * No GOT fixup has occurred yet, but none of the code we're
301 		 * about to call uses any global variables.
302 		 */
303 		ldr	sp, [r1]	@ get stack location
304 		add	sp, sp, r1	@ apply relocation
305 
306 		/* Validate calculated start against passed DTB */
307 		mov	r1, r8
308 		bl	fdt_check_mem_start
309 1:
310 #endif /* CONFIG_USE_OF */
311 		/* Determine final kernel image address. */
312 		add	r4, r0, #TEXT_OFFSET
313 #else
314 		ldr	r4, =zreladdr
315 #endif
316 
317 		/*
318 		 * Set up a page table only if it won't overwrite ourself.
319 		 * That means r4 < pc || r4 - 16k page directory > &_end.
320 		 * Given that r4 > &_end is most unfrequent, we add a rough
321 		 * additional 1MB of room for a possible appended DTB.
322 		 */
323 		mov	r0, pc
324 		cmp	r0, r4
325 		ldrcc	r0, .Lheadroom
326 		addcc	r0, r0, pc
327 		cmpcc	r4, r0
328 		orrcc	r4, r4, #1		@ remember we skipped cache_on
329 		blcs	cache_on
330 
331 restart:	adr	r0, LC1
332 		ldr	sp, [r0]
333 		ldr	r6, [r0, #4]
334 		add	sp, sp, r0
335 		add	r6, r6, r0
336 
337 		get_inflated_image_size	r9, r10, lr
338 
339 #ifndef CONFIG_ZBOOT_ROM
340 		/* malloc space is above the relocated stack (64k max) */
341 		add	r10, sp, #MALLOC_SIZE
342 #else
343 		/*
344 		 * With ZBOOT_ROM the bss/stack is non relocatable,
345 		 * but someone could still run this code from RAM,
346 		 * in which case our reference is _edata.
347 		 */
348 		mov	r10, r6
349 #endif
350 
351 		mov	r5, #0			@ init dtb size to 0
352 #ifdef CONFIG_ARM_APPENDED_DTB
353 /*
354  *   r4  = final kernel address (possibly with LSB set)
355  *   r5  = appended dtb size (still unknown)
356  *   r6  = _edata
357  *   r7  = architecture ID
358  *   r8  = atags/device tree pointer
359  *   r9  = size of decompressed image
360  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
361  *   sp  = stack pointer
362  *
363  * if there are device trees (dtb) appended to zImage, advance r10 so that the
364  * dtb data will get relocated along with the kernel if necessary.
365  */
366 
367 		ldr	lr, [r6, #0]
368 		ldr	r1, =OF_DT_MAGIC
369 		cmp	lr, r1
370 		bne	dtb_check_done		@ not found
371 
372 #ifdef CONFIG_ARM_ATAG_DTB_COMPAT
373 		/*
374 		 * OK... Let's do some funky business here.
375 		 * If we do have a DTB appended to zImage, and we do have
376 		 * an ATAG list around, we want the later to be translated
377 		 * and folded into the former here. No GOT fixup has occurred
378 		 * yet, but none of the code we're about to call uses any
379 		 * global variable.
380 		*/
381 
382 		/* Get the initial DTB size */
383 		ldr	r5, [r6, #4]
384 		be32tocpu r5, r1
385 		dbgadtb	r6, r5
386 		/* 50% DTB growth should be good enough */
387 		add	r5, r5, r5, lsr #1
388 		/* preserve 64-bit alignment */
389 		add	r5, r5, #7
390 		bic	r5, r5, #7
391 		/* clamp to 32KB min and 1MB max */
392 		cmp	r5, #(1 << 15)
393 		movlo	r5, #(1 << 15)
394 		cmp	r5, #(1 << 20)
395 		movhi	r5, #(1 << 20)
396 		/* temporarily relocate the stack past the DTB work space */
397 		add	sp, sp, r5
398 
399 		mov	r0, r8
400 		mov	r1, r6
401 		mov	r2, r5
402 		bl	atags_to_fdt
403 
404 		/*
405 		 * If returned value is 1, there is no ATAG at the location
406 		 * pointed by r8.  Try the typical 0x100 offset from start
407 		 * of RAM and hope for the best.
408 		 */
409 		cmp	r0, #1
410 		sub	r0, r4, #TEXT_OFFSET
411 		bic	r0, r0, #1
412 		add	r0, r0, #0x100
413 		mov	r1, r6
414 		mov	r2, r5
415 		bleq	atags_to_fdt
416 
417 		sub	sp, sp, r5
418 #endif
419 
420 		mov	r8, r6			@ use the appended device tree
421 
422 		/*
423 		 * Make sure that the DTB doesn't end up in the final
424 		 * kernel's .bss area. To do so, we adjust the decompressed
425 		 * kernel size to compensate if that .bss size is larger
426 		 * than the relocated code.
427 		 */
428 		ldr	r5, =_kernel_bss_size
429 		adr	r1, wont_overwrite
430 		sub	r1, r6, r1
431 		subs	r1, r5, r1
432 		addhi	r9, r9, r1
433 
434 		/* Get the current DTB size */
435 		ldr	r5, [r6, #4]
436 		be32tocpu r5, r1
437 
438 		/* preserve 64-bit alignment */
439 		add	r5, r5, #7
440 		bic	r5, r5, #7
441 
442 		/* relocate some pointers past the appended dtb */
443 		add	r6, r6, r5
444 		add	r10, r10, r5
445 		add	sp, sp, r5
446 dtb_check_done:
447 #endif
448 
449 /*
450  * Check to see if we will overwrite ourselves.
451  *   r4  = final kernel address (possibly with LSB set)
452  *   r9  = size of decompressed image
453  *   r10 = end of this image, including  bss/stack/malloc space if non XIP
454  * We basically want:
455  *   r4 - 16k page directory >= r10 -> OK
456  *   r4 + image length <= address of wont_overwrite -> OK
457  * Note: the possible LSB in r4 is harmless here.
458  */
459 		add	r10, r10, #16384
460 		cmp	r4, r10
461 		bhs	wont_overwrite
462 		add	r10, r4, r9
463 		adr	r9, wont_overwrite
464 		cmp	r10, r9
465 		bls	wont_overwrite
466 
467 /*
468  * Relocate ourselves past the end of the decompressed kernel.
469  *   r6  = _edata
470  *   r10 = end of the decompressed kernel
471  * Because we always copy ahead, we need to do it from the end and go
472  * backward in case the source and destination overlap.
473  */
474 		/*
475 		 * Bump to the next 256-byte boundary with the size of
476 		 * the relocation code added. This avoids overwriting
477 		 * ourself when the offset is small.
478 		 */
479 		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
480 		bic	r10, r10, #255
481 
482 		/* Get start of code we want to copy and align it down. */
483 		adr	r5, restart
484 		bic	r5, r5, #31
485 
486 /* Relocate the hyp vector base if necessary */
487 #ifdef CONFIG_ARM_VIRT_EXT
488 		mrs	r0, spsr
489 		and	r0, r0, #MODE_MASK
490 		cmp	r0, #HYP_MODE
491 		bne	1f
492 
493 		/*
494 		 * Compute the address of the hyp vectors after relocation.
495 		 * Call __hyp_set_vectors with the new address so that we
496 		 * can HVC again after the copy.
497 		 */
498 		adr_l	r0, __hyp_stub_vectors
499 		sub	r0, r0, r5
500 		add	r0, r0, r10
501 		bl	__hyp_set_vectors
502 1:
503 #endif
504 
505 		sub	r9, r6, r5		@ size to copy
506 		add	r9, r9, #31		@ rounded up to a multiple
507 		bic	r9, r9, #31		@ ... of 32 bytes
508 		add	r6, r9, r5
509 		add	r9, r9, r10
510 
511 #ifdef DEBUG
512 		sub     r10, r6, r5
513 		sub     r10, r9, r10
514 		/*
515 		 * We are about to copy the kernel to a new memory area.
516 		 * The boundaries of the new memory area can be found in
517 		 * r10 and r9, whilst r5 and r6 contain the boundaries
518 		 * of the memory we are going to copy.
519 		 * Calling dbgkc will help with the printing of this
520 		 * information.
521 		 */
522 		dbgkc	r5, r6, r10, r9
523 #endif
524 
525 1:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
526 		cmp	r6, r5
527 		stmdb	r9!, {r0 - r3, r10 - r12, lr}
528 		bhi	1b
529 
530 		/* Preserve offset to relocated code. */
531 		sub	r6, r9, r6
532 
533 		mov	r0, r9			@ start of relocated zImage
534 		add	r1, sp, r6		@ end of relocated zImage
535 		bl	cache_clean_flush
536 
537 		badr	r0, restart
538 		add	r0, r0, r6
539 		mov	pc, r0
540 
541 wont_overwrite:
542 		adr	r0, LC0
543 		ldmia	r0, {r1, r2, r3, r11, r12}
544 		sub	r0, r0, r1		@ calculate the delta offset
545 
546 /*
547  * If delta is zero, we are running at the address we were linked at.
548  *   r0  = delta
549  *   r2  = BSS start
550  *   r3  = BSS end
551  *   r4  = kernel execution address (possibly with LSB set)
552  *   r5  = appended dtb size (0 if not present)
553  *   r7  = architecture ID
554  *   r8  = atags pointer
555  *   r11 = GOT start
556  *   r12 = GOT end
557  *   sp  = stack pointer
558  */
559 		orrs	r1, r0, r5
560 		beq	not_relocated
561 
562 		add	r11, r11, r0
563 		add	r12, r12, r0
564 
565 #ifndef CONFIG_ZBOOT_ROM
566 		/*
567 		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
568 		 * we need to fix up pointers into the BSS region.
569 		 * Note that the stack pointer has already been fixed up.
570 		 */
571 		add	r2, r2, r0
572 		add	r3, r3, r0
573 
574 		/*
575 		 * Relocate all entries in the GOT table.
576 		 * Bump bss entries to _edata + dtb size
577 		 */
578 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
579 		add	r1, r1, r0		@ This fixes up C references
580 		cmp	r1, r2			@ if entry >= bss_start &&
581 		cmphs	r3, r1			@       bss_end > entry
582 		addhi	r1, r1, r5		@    entry += dtb size
583 		str	r1, [r11], #4		@ next entry
584 		cmp	r11, r12
585 		blo	1b
586 
587 		/* bump our bss pointers too */
588 		add	r2, r2, r5
589 		add	r3, r3, r5
590 
591 #else
592 
593 		/*
594 		 * Relocate entries in the GOT table.  We only relocate
595 		 * the entries that are outside the (relocated) BSS region.
596 		 */
597 1:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
598 		cmp	r1, r2			@ entry < bss_start ||
599 		cmphs	r3, r1			@ _end < entry
600 		addlo	r1, r1, r0		@ table.  This fixes up the
601 		str	r1, [r11], #4		@ C references.
602 		cmp	r11, r12
603 		blo	1b
604 #endif
605 
606 not_relocated:	mov	r0, #0
607 1:		str	r0, [r2], #4		@ clear bss
608 		str	r0, [r2], #4
609 		str	r0, [r2], #4
610 		str	r0, [r2], #4
611 		cmp	r2, r3
612 		blo	1b
613 
614 		/*
615 		 * Did we skip the cache setup earlier?
616 		 * That is indicated by the LSB in r4.
617 		 * Do it now if so.
618 		 */
619 		tst	r4, #1
620 		bic	r4, r4, #1
621 		blne	cache_on
622 
623 /*
624  * The C runtime environment should now be setup sufficiently.
625  * Set up some pointers, and start decompressing.
626  *   r4  = kernel execution address
627  *   r7  = architecture ID
628  *   r8  = atags pointer
629  */
630 		mov	r0, r4
631 		mov	r1, sp			@ malloc space above stack
632 		add	r2, sp, #MALLOC_SIZE	@ 64k max
633 		mov	r3, r7
634 		bl	decompress_kernel
635 
636 		get_inflated_image_size	r1, r2, r3
637 
638 		mov	r0, r4			@ start of inflated image
639 		add	r1, r1, r0		@ end of inflated image
640 		bl	cache_clean_flush
641 		bl	cache_off
642 
643 #ifdef CONFIG_ARM_VIRT_EXT
644 		mrs	r0, spsr		@ Get saved CPU boot mode
645 		and	r0, r0, #MODE_MASK
646 		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
647 		bne	__enter_kernel		@ boot kernel directly
648 
649 		adr_l	r0, __hyp_reentry_vectors
650 		bl	__hyp_set_vectors
651 		__HVC(0)			@ otherwise bounce to hyp mode
652 
653 		b	.			@ should never be reached
654 #else
655 		b	__enter_kernel
656 #endif
657 
658 		.align	2
659 		.type	LC0, #object
660 LC0:		.word	LC0			@ r1
661 		.word	__bss_start		@ r2
662 		.word	_end			@ r3
663 		.word	_got_start		@ r11
664 		.word	_got_end		@ ip
665 		.size	LC0, . - LC0
666 
667 		.type	LC1, #object
668 LC1:		.word	.L_user_stack_end - LC1	@ sp
669 		.word	_edata - LC1		@ r6
670 		.size	LC1, . - LC1
671 
672 .Lheadroom:
673 		.word	_end - restart + 16384 + 1024*1024
674 
675 .Linflated_image_size_offset:
676 		.long	(input_data_end - 4) - .
677 
678 #ifdef CONFIG_ARCH_RPC
679 		.globl	params
680 params:		ldr	r0, =0x10000100		@ params_phys for RPC
681 		mov	pc, lr
682 		.ltorg
683 		.align
684 #endif
685 
686 /*
687  * dcache_line_size - get the minimum D-cache line size from the CTR register
688  * on ARMv7.
689  */
690 		.macro	dcache_line_size, reg, tmp
691 #ifdef CONFIG_CPU_V7M
692 		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
693 		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
694 		ldr	\tmp, [\tmp]
695 #else
696 		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
697 #endif
698 		lsr	\tmp, \tmp, #16
699 		and	\tmp, \tmp, #0xf		@ cache line size encoding
700 		mov	\reg, #4			@ bytes per word
701 		mov	\reg, \reg, lsl \tmp		@ actual cache line size
702 		.endm
703 
704 /*
705  * Turn on the cache.  We need to setup some page tables so that we
706  * can have both the I and D caches on.
707  *
708  * We place the page tables 16k down from the kernel execution address,
709  * and we hope that nothing else is using it.  If we're using it, we
710  * will go pop!
711  *
712  * On entry,
713  *  r4 = kernel execution address
714  *  r7 = architecture number
715  *  r8 = atags pointer
716  * On exit,
717  *  r0, r1, r2, r3, r9, r10, r12 corrupted
718  * This routine must preserve:
719  *  r4, r7, r8
720  */
721 		.align	5
722 cache_on:	mov	r3, #8			@ cache_on function
723 		b	call_cache_fn
724 
725 /*
726  * Initialize the highest priority protection region, PR7
727  * to cover all 32bit address and cacheable and bufferable.
728  */
729 __armv4_mpu_cache_on:
730 		mov	r0, #0x3f		@ 4G, the whole
731 		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
732 		mcr 	p15, 0, r0, c6, c7, 1
733 
734 		mov	r0, #0x80		@ PR7
735 		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
736 		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
737 		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
738 
739 		mov	r0, #0xc000
740 		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
741 		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
742 
743 		mov	r0, #0
744 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
745 		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
746 		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
747 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
748 						@ ...I .... ..D. WC.M
749 		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
750 		orr	r0, r0, #0x1000		@ ...1 .... .... ....
751 
752 		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
753 
754 		mov	r0, #0
755 		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
756 		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
757 		mov	pc, lr
758 
759 __armv3_mpu_cache_on:
760 		mov	r0, #0x3f		@ 4G, the whole
761 		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
762 
763 		mov	r0, #0x80		@ PR7
764 		mcr	p15, 0, r0, c2, c0, 0	@ cache on
765 		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
766 
767 		mov	r0, #0xc000
768 		mcr	p15, 0, r0, c5, c0, 0	@ access permission
769 
770 		mov	r0, #0
771 		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
772 		/*
773 		 * ?? ARMv3 MMU does not allow reading the control register,
774 		 * does this really work on ARMv3 MPU?
775 		 */
776 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
777 						@ .... .... .... WC.M
778 		orr	r0, r0, #0x000d		@ .... .... .... 11.1
779 		/* ?? this overwrites the value constructed above? */
780 		mov	r0, #0
781 		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
782 
783 		/* ?? invalidate for the second time? */
784 		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
785 		mov	pc, lr
786 
787 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
788 #define CB_BITS 0x08
789 #else
790 #define CB_BITS 0x0c
791 #endif
792 
793 __setup_mmu:	sub	r3, r4, #16384		@ Page directory size
794 		bic	r3, r3, #0xff		@ Align the pointer
795 		bic	r3, r3, #0x3f00
796 /*
797  * Initialise the page tables, turning on the cacheable and bufferable
798  * bits for the RAM area only.
799  */
800 		mov	r0, r3
801 		mov	r9, r0, lsr #18
802 		mov	r9, r9, lsl #18		@ start of RAM
803 		add	r10, r9, #0x10000000	@ a reasonable RAM size
804 		mov	r1, #0x12		@ XN|U + section mapping
805 		orr	r1, r1, #3 << 10	@ AP=11
806 		add	r2, r3, #16384
807 1:		cmp	r1, r9			@ if virt > start of RAM
808 		cmphs	r10, r1			@   && end of RAM > virt
809 		bic	r1, r1, #0x1c		@ clear XN|U + C + B
810 		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
811 		orrhs	r1, r1, r6		@ set RAM section settings
812 		str	r1, [r0], #4		@ 1:1 mapping
813 		add	r1, r1, #1048576
814 		teq	r0, r2
815 		bne	1b
816 /*
817  * If ever we are running from Flash, then we surely want the cache
818  * to be enabled also for our execution instance...  We map 2MB of it
819  * so there is no map overlap problem for up to 1 MB compressed kernel.
820  * If the execution is in RAM then we would only be duplicating the above.
821  */
822 		orr	r1, r6, #0x04		@ ensure B is set for this
823 		orr	r1, r1, #3 << 10
824 		mov	r2, pc
825 		mov	r2, r2, lsr #20
826 		orr	r1, r1, r2, lsl #20
827 		add	r0, r3, r2, lsl #2
828 		str	r1, [r0], #4
829 		add	r1, r1, #1048576
830 		str	r1, [r0]
831 		mov	pc, lr
832 ENDPROC(__setup_mmu)
833 
834 @ Enable unaligned access on v6, to allow better code generation
835 @ for the decompressor C code:
836 __armv6_mmu_cache_on:
837 		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
838 		bic	r0, r0, #2		@ A (no unaligned access fault)
839 		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
840 		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
841 		b	__armv4_mmu_cache_on
842 
843 __arm926ejs_mmu_cache_on:
844 #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
845 		mov	r0, #4			@ put dcache in WT mode
846 		mcr	p15, 7, r0, c15, c0, 0
847 #endif
848 
849 __armv4_mmu_cache_on:
850 		mov	r12, lr
851 #ifdef CONFIG_MMU
852 		mov	r6, #CB_BITS | 0x12	@ U
853 		bl	__setup_mmu
854 		mov	r0, #0
855 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
856 		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
857 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
858 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
859 		orr	r0, r0, #0x0030
860  ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
861 		bl	__common_mmu_cache_on
862 		mov	r0, #0
863 		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
864 #endif
865 		mov	pc, r12
866 
867 __armv7_mmu_cache_on:
868 		enable_cp15_barriers	r11
869 		mov	r12, lr
870 #ifdef CONFIG_MMU
871 		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
872 		tst	r11, #0xf		@ VMSA
873 		movne	r6, #CB_BITS | 0x02	@ !XN
874 		blne	__setup_mmu
875 		mov	r0, #0
876 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
877 		tst	r11, #0xf		@ VMSA
878 		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
879 #endif
880 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
881 		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
882 		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
883 		orr	r0, r0, #0x003c		@ write buffer
884 		bic	r0, r0, #2		@ A (no unaligned access fault)
885 		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
886 						@ (needed for ARM1176)
887 #ifdef CONFIG_MMU
888  ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
889 		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
890 		orrne	r0, r0, #1		@ MMU enabled
891 		movne	r1, #0xfffffffd		@ domain 0 = client
892 		bic     r6, r6, #1 << 31        @ 32-bit translation system
893 		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
894 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
895 		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
896 		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
897 #endif
898 		mcr	p15, 0, r0, c7, c5, 4	@ ISB
899 		mcr	p15, 0, r0, c1, c0, 0	@ load control register
900 		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
901 		mov	r0, #0
902 		mcr	p15, 0, r0, c7, c5, 4	@ ISB
903 		mov	pc, r12
904 
905 __fa526_cache_on:
906 		mov	r12, lr
907 		mov	r6, #CB_BITS | 0x12	@ U
908 		bl	__setup_mmu
909 		mov	r0, #0
910 		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
911 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
912 		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
913 		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
914 		orr	r0, r0, #0x1000		@ I-cache enable
915 		bl	__common_mmu_cache_on
916 		mov	r0, #0
917 		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
918 		mov	pc, r12
919 
920 __common_mmu_cache_on:
921 #ifndef CONFIG_THUMB2_KERNEL
922 #ifndef DEBUG
923 		orr	r0, r0, #0x000d		@ Write buffer, mmu
924 #endif
925 		mov	r1, #-1
926 		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
927 		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
928 		b	1f
929 		.align	5			@ cache line aligned
930 1:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
931 		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
932 		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
933 #endif
934 
935 #define PROC_ENTRY_SIZE (4*5)
936 
937 /*
938  * Here follow the relocatable cache support functions for the
939  * various processors.  This is a generic hook for locating an
940  * entry and jumping to an instruction at the specified offset
941  * from the start of the block.  Please note this is all position
942  * independent code.
943  *
944  *  r1  = corrupted
945  *  r2  = corrupted
946  *  r3  = block offset
947  *  r9  = corrupted
948  *  r12 = corrupted
949  */
950 
951 call_cache_fn:	adr	r12, proc_types
952 #ifdef CONFIG_CPU_CP15
953 		mrc	p15, 0, r9, c0, c0	@ get processor ID
954 #elif defined(CONFIG_CPU_V7M)
955 		/*
956 		 * On v7-M the processor id is located in the V7M_SCB_CPUID
957 		 * register, but as cache handling is IMPLEMENTATION DEFINED on
958 		 * v7-M (if existant at all) we just return early here.
959 		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
960 		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
961 		 * use cp15 registers that are not implemented on v7-M.
962 		 */
963 		bx	lr
964 #else
965 		ldr	r9, =CONFIG_PROCESSOR_ID
966 #endif
967 1:		ldr	r1, [r12, #0]		@ get value
968 		ldr	r2, [r12, #4]		@ get mask
969 		eor	r1, r1, r9		@ (real ^ match)
970 		tst	r1, r2			@       & mask
971  ARM(		addeq	pc, r12, r3		) @ call cache function
972  THUMB(		addeq	r12, r3			)
973  THUMB(		moveq	pc, r12			) @ call cache function
974 		add	r12, r12, #PROC_ENTRY_SIZE
975 		b	1b
976 
977 /*
978  * Table for cache operations.  This is basically:
979  *   - CPU ID match
980  *   - CPU ID mask
981  *   - 'cache on' method instruction
982  *   - 'cache off' method instruction
983  *   - 'cache flush' method instruction
984  *
985  * We match an entry using: ((real_id ^ match) & mask) == 0
986  *
987  * Writethrough caches generally only need 'on' and 'off'
988  * methods.  Writeback caches _must_ have the flush method
989  * defined.
990  */
991 		.align	2
992 		.type	proc_types,#object
993 proc_types:
994 		.word	0x41000000		@ old ARM ID
995 		.word	0xff00f000
996 		mov	pc, lr
997  THUMB(		nop				)
998 		mov	pc, lr
999  THUMB(		nop				)
1000 		mov	pc, lr
1001  THUMB(		nop				)
1002 
1003 		.word	0x41007000		@ ARM7/710
1004 		.word	0xfff8fe00
1005 		mov	pc, lr
1006  THUMB(		nop				)
1007 		mov	pc, lr
1008  THUMB(		nop				)
1009 		mov	pc, lr
1010  THUMB(		nop				)
1011 
1012 		.word	0x41807200		@ ARM720T (writethrough)
1013 		.word	0xffffff00
1014 		W(b)	__armv4_mmu_cache_on
1015 		W(b)	__armv4_mmu_cache_off
1016 		mov	pc, lr
1017  THUMB(		nop				)
1018 
1019 		.word	0x41007400		@ ARM74x
1020 		.word	0xff00ff00
1021 		W(b)	__armv3_mpu_cache_on
1022 		W(b)	__armv3_mpu_cache_off
1023 		W(b)	__armv3_mpu_cache_flush
1024 
1025 		.word	0x41009400		@ ARM94x
1026 		.word	0xff00ff00
1027 		W(b)	__armv4_mpu_cache_on
1028 		W(b)	__armv4_mpu_cache_off
1029 		W(b)	__armv4_mpu_cache_flush
1030 
1031 		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1032 		.word	0xff0ffff0
1033 		W(b)	__arm926ejs_mmu_cache_on
1034 		W(b)	__armv4_mmu_cache_off
1035 		W(b)	__armv5tej_mmu_cache_flush
1036 
1037 		.word	0x00007000		@ ARM7 IDs
1038 		.word	0x0000f000
1039 		mov	pc, lr
1040  THUMB(		nop				)
1041 		mov	pc, lr
1042  THUMB(		nop				)
1043 		mov	pc, lr
1044  THUMB(		nop				)
1045 
1046 		@ Everything from here on will be the new ID system.
1047 
1048 		.word	0x4401a100		@ sa110 / sa1100
1049 		.word	0xffffffe0
1050 		W(b)	__armv4_mmu_cache_on
1051 		W(b)	__armv4_mmu_cache_off
1052 		W(b)	__armv4_mmu_cache_flush
1053 
1054 		.word	0x6901b110		@ sa1110
1055 		.word	0xfffffff0
1056 		W(b)	__armv4_mmu_cache_on
1057 		W(b)	__armv4_mmu_cache_off
1058 		W(b)	__armv4_mmu_cache_flush
1059 
1060 		.word	0x56056900
1061 		.word	0xffffff00		@ PXA9xx
1062 		W(b)	__armv4_mmu_cache_on
1063 		W(b)	__armv4_mmu_cache_off
1064 		W(b)	__armv4_mmu_cache_flush
1065 
1066 		.word	0x56158000		@ PXA168
1067 		.word	0xfffff000
1068 		W(b)	__armv4_mmu_cache_on
1069 		W(b)	__armv4_mmu_cache_off
1070 		W(b)	__armv5tej_mmu_cache_flush
1071 
1072 		.word	0x56050000		@ Feroceon
1073 		.word	0xff0f0000
1074 		W(b)	__armv4_mmu_cache_on
1075 		W(b)	__armv4_mmu_cache_off
1076 		W(b)	__armv5tej_mmu_cache_flush
1077 
1078 #ifdef CONFIG_CPU_FEROCEON_OLD_ID
1079 		/* this conflicts with the standard ARMv5TE entry */
1080 		.long	0x41009260		@ Old Feroceon
1081 		.long	0xff00fff0
1082 		b	__armv4_mmu_cache_on
1083 		b	__armv4_mmu_cache_off
1084 		b	__armv5tej_mmu_cache_flush
1085 #endif
1086 
1087 		.word	0x66015261		@ FA526
1088 		.word	0xff01fff1
1089 		W(b)	__fa526_cache_on
1090 		W(b)	__armv4_mmu_cache_off
1091 		W(b)	__fa526_cache_flush
1092 
1093 		@ These match on the architecture ID
1094 
1095 		.word	0x00020000		@ ARMv4T
1096 		.word	0x000f0000
1097 		W(b)	__armv4_mmu_cache_on
1098 		W(b)	__armv4_mmu_cache_off
1099 		W(b)	__armv4_mmu_cache_flush
1100 
1101 		.word	0x00050000		@ ARMv5TE
1102 		.word	0x000f0000
1103 		W(b)	__armv4_mmu_cache_on
1104 		W(b)	__armv4_mmu_cache_off
1105 		W(b)	__armv4_mmu_cache_flush
1106 
1107 		.word	0x00060000		@ ARMv5TEJ
1108 		.word	0x000f0000
1109 		W(b)	__armv4_mmu_cache_on
1110 		W(b)	__armv4_mmu_cache_off
1111 		W(b)	__armv5tej_mmu_cache_flush
1112 
1113 		.word	0x0007b000		@ ARMv6
1114 		.word	0x000ff000
1115 		W(b)	__armv6_mmu_cache_on
1116 		W(b)	__armv4_mmu_cache_off
1117 		W(b)	__armv6_mmu_cache_flush
1118 
1119 		.word	0x000f0000		@ new CPU Id
1120 		.word	0x000f0000
1121 		W(b)	__armv7_mmu_cache_on
1122 		W(b)	__armv7_mmu_cache_off
1123 		W(b)	__armv7_mmu_cache_flush
1124 
1125 		.word	0			@ unrecognised type
1126 		.word	0
1127 		mov	pc, lr
1128  THUMB(		nop				)
1129 		mov	pc, lr
1130  THUMB(		nop				)
1131 		mov	pc, lr
1132  THUMB(		nop				)
1133 
1134 		.size	proc_types, . - proc_types
1135 
1136 		/*
1137 		 * If you get a "non-constant expression in ".if" statement"
1138 		 * error from the assembler on this line, check that you have
1139 		 * not accidentally written a "b" instruction where you should
1140 		 * have written W(b).
1141 		 */
1142 		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1143 		.error "The size of one or more proc_types entries is wrong."
1144 		.endif
1145 
1146 /*
1147  * Turn off the Cache and MMU.  ARMv3 does not support
1148  * reading the control register, but ARMv4 does.
1149  *
1150  * On exit,
1151  *  r0, r1, r2, r3, r9, r12 corrupted
1152  * This routine must preserve:
1153  *  r4, r7, r8
1154  */
1155 		.align	5
1156 cache_off:	mov	r3, #12			@ cache_off function
1157 		b	call_cache_fn
1158 
1159 __armv4_mpu_cache_off:
1160 		mrc	p15, 0, r0, c1, c0
1161 		bic	r0, r0, #0x000d
1162 		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1163 		mov	r0, #0
1164 		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1165 		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1166 		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1167 		mov	pc, lr
1168 
1169 __armv3_mpu_cache_off:
1170 		mrc	p15, 0, r0, c1, c0
1171 		bic	r0, r0, #0x000d
1172 		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1173 		mov	r0, #0
1174 		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1175 		mov	pc, lr
1176 
1177 __armv4_mmu_cache_off:
1178 #ifdef CONFIG_MMU
1179 		mrc	p15, 0, r0, c1, c0
1180 		bic	r0, r0, #0x000d
1181 		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1182 		mov	r0, #0
1183 		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1184 		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1185 #endif
1186 		mov	pc, lr
1187 
1188 __armv7_mmu_cache_off:
1189 		mrc	p15, 0, r0, c1, c0
1190 #ifdef CONFIG_MMU
1191 		bic	r0, r0, #0x0005
1192 #else
1193 		bic	r0, r0, #0x0004
1194 #endif
1195 		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1196 		mov	r0, #0
1197 #ifdef CONFIG_MMU
1198 		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1199 #endif
1200 		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1201 		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1202 		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1203 		mov	pc, lr
1204 
1205 /*
1206  * Clean and flush the cache to maintain consistency.
1207  *
1208  * On entry,
1209  *  r0 = start address
1210  *  r1 = end address (exclusive)
1211  * On exit,
1212  *  r1, r2, r3, r9, r10, r11, r12 corrupted
1213  * This routine must preserve:
1214  *  r4, r6, r7, r8
1215  */
1216 		.align	5
1217 cache_clean_flush:
1218 		mov	r3, #16
1219 		mov	r11, r1
1220 		b	call_cache_fn
1221 
1222 __armv4_mpu_cache_flush:
1223 		tst	r4, #1
1224 		movne	pc, lr
1225 		mov	r2, #1
1226 		mov	r3, #0
1227 		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1228 		mov	r1, #7 << 5		@ 8 segments
1229 1:		orr	r3, r1, #63 << 26	@ 64 entries
1230 2:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1231 		subs	r3, r3, #1 << 26
1232 		bcs	2b			@ entries 63 to 0
1233 		subs 	r1, r1, #1 << 5
1234 		bcs	1b			@ segments 7 to 0
1235 
1236 		teq	r2, #0
1237 		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1238 		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1239 		mov	pc, lr
1240 
1241 __fa526_cache_flush:
1242 		tst	r4, #1
1243 		movne	pc, lr
1244 		mov	r1, #0
1245 		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1246 		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1247 		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248 		mov	pc, lr
1249 
1250 __armv6_mmu_cache_flush:
1251 		mov	r1, #0
1252 		tst	r4, #1
1253 		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1254 		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1255 		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1256 		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1257 		mov	pc, lr
1258 
1259 __armv7_mmu_cache_flush:
1260 		enable_cp15_barriers	r10
1261 		tst	r4, #1
1262 		bne	iflush
1263 		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1264 		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1265 		mov	r10, #0
1266 		beq	hierarchical
1267 		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1268 		b	iflush
1269 hierarchical:
1270 		dcache_line_size r1, r2		@ r1 := dcache min line size
1271 		sub	r2, r1, #1		@ r2 := line size mask
1272 		bic	r0, r0, r2		@ round down start to line size
1273 		sub	r11, r11, #1		@ end address is exclusive
1274 		bic	r11, r11, r2		@ round down end to line size
1275 0:		cmp	r0, r11			@ finished?
1276 		bgt	iflush
1277 		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1278 		add	r0, r0, r1
1279 		b	0b
1280 iflush:
1281 		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1282 		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1283 		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1284 		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1285 		mov	pc, lr
1286 
1287 __armv5tej_mmu_cache_flush:
1288 		tst	r4, #1
1289 		movne	pc, lr
1290 1:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1291 		bne	1b
1292 		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1293 		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1294 		mov	pc, lr
1295 
1296 __armv4_mmu_cache_flush:
1297 		tst	r4, #1
1298 		movne	pc, lr
1299 		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1300 		mov	r11, #32		@ default: 32 byte line size
1301 		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1302 		teq	r3, r9			@ cache ID register present?
1303 		beq	no_cache_id
1304 		mov	r1, r3, lsr #18
1305 		and	r1, r1, #7
1306 		mov	r2, #1024
1307 		mov	r2, r2, lsl r1		@ base dcache size *2
1308 		tst	r3, #1 << 14		@ test M bit
1309 		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1310 		mov	r3, r3, lsr #12
1311 		and	r3, r3, #3
1312 		mov	r11, #8
1313 		mov	r11, r11, lsl r3	@ cache line size in bytes
1314 no_cache_id:
1315 		mov	r1, pc
1316 		bic	r1, r1, #63		@ align to longest cache line
1317 		add	r2, r1, r2
1318 1:
1319  ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1320  THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1321  THUMB(		add     r1, r1, r11		)
1322 		teq	r1, r2
1323 		bne	1b
1324 
1325 		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1326 		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1327 		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1328 		mov	pc, lr
1329 
1330 __armv3_mmu_cache_flush:
1331 __armv3_mpu_cache_flush:
1332 		tst	r4, #1
1333 		movne	pc, lr
1334 		mov	r1, #0
1335 		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1336 		mov	pc, lr
1337 
1338 /*
1339  * Various debugging routines for printing hex characters and
1340  * memory, which again must be relocatable.
1341  */
1342 #ifdef DEBUG
1343 		.align	2
1344 		.type	phexbuf,#object
1345 phexbuf:	.space	12
1346 		.size	phexbuf, . - phexbuf
1347 
1348 @ phex corrupts {r0, r1, r2, r3}
1349 phex:		adr	r3, phexbuf
1350 		mov	r2, #0
1351 		strb	r2, [r3, r1]
1352 1:		subs	r1, r1, #1
1353 		movmi	r0, r3
1354 		bmi	puts
1355 		and	r2, r0, #15
1356 		mov	r0, r0, lsr #4
1357 		cmp	r2, #10
1358 		addge	r2, r2, #7
1359 		add	r2, r2, #'0'
1360 		strb	r2, [r3, r1]
1361 		b	1b
1362 
1363 @ puts corrupts {r0, r1, r2, r3}
1364 puts:		loadsp	r3, r2, r1
1365 1:		ldrb	r2, [r0], #1
1366 		teq	r2, #0
1367 		moveq	pc, lr
1368 2:		writeb	r2, r3, r1
1369 		mov	r1, #0x00020000
1370 3:		subs	r1, r1, #1
1371 		bne	3b
1372 		teq	r2, #'\n'
1373 		moveq	r2, #'\r'
1374 		beq	2b
1375 		teq	r0, #0
1376 		bne	1b
1377 		mov	pc, lr
1378 @ putc corrupts {r0, r1, r2, r3}
1379 putc:
1380 		mov	r2, r0
1381 		loadsp	r3, r1, r0
1382 		mov	r0, #0
1383 		b	2b
1384 
1385 @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1386 memdump:	mov	r12, r0
1387 		mov	r10, lr
1388 		mov	r11, #0
1389 2:		mov	r0, r11, lsl #2
1390 		add	r0, r0, r12
1391 		mov	r1, #8
1392 		bl	phex
1393 		mov	r0, #':'
1394 		bl	putc
1395 1:		mov	r0, #' '
1396 		bl	putc
1397 		ldr	r0, [r12, r11, lsl #2]
1398 		mov	r1, #8
1399 		bl	phex
1400 		and	r0, r11, #7
1401 		teq	r0, #3
1402 		moveq	r0, #' '
1403 		bleq	putc
1404 		and	r0, r11, #7
1405 		add	r11, r11, #1
1406 		teq	r0, #7
1407 		bne	1b
1408 		mov	r0, #'\n'
1409 		bl	putc
1410 		cmp	r11, #64
1411 		blt	2b
1412 		mov	pc, r10
1413 #endif
1414 
1415 		.ltorg
1416 
1417 #ifdef CONFIG_ARM_VIRT_EXT
1418 .align 5
1419 __hyp_reentry_vectors:
1420 		W(b)	.			@ reset
1421 		W(b)	.			@ undef
1422 #ifdef CONFIG_EFI_STUB
1423 		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1424 #else
1425 		W(b)	.			@ svc
1426 #endif
1427 		W(b)	.			@ pabort
1428 		W(b)	.			@ dabort
1429 		W(b)	__enter_kernel		@ hyp
1430 		W(b)	.			@ irq
1431 		W(b)	.			@ fiq
1432 #endif /* CONFIG_ARM_VIRT_EXT */
1433 
1434 __enter_kernel:
1435 		mov	r0, #0			@ must be 0
1436 		mov	r1, r7			@ restore architecture number
1437 		mov	r2, r8			@ restore atags pointer
1438  ARM(		mov	pc, r4		)	@ call kernel
1439  M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1440  THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1441 
1442 reloc_code_end:
1443 
1444 #ifdef CONFIG_EFI_STUB
1445 __enter_kernel_from_hyp:
1446 		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1447 		bic	r0, r0, #0x5		@ disable MMU and caches
1448 		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1449 		isb
1450 		b	__enter_kernel
1451 
1452 ENTRY(efi_enter_kernel)
1453 		mov	r4, r0			@ preserve image base
1454 		mov	r8, r1			@ preserve DT pointer
1455 
1456 		adr_l	r0, call_cache_fn
1457 		adr	r1, 0f			@ clean the region of code we
1458 		bl	cache_clean_flush	@ may run with the MMU off
1459 
1460 #ifdef CONFIG_ARM_VIRT_EXT
1461 		@
1462 		@ The EFI spec does not support booting on ARM in HYP mode,
1463 		@ since it mandates that the MMU and caches are on, with all
1464 		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1465 		@
1466 		@ While the EDK2 reference implementation adheres to this,
1467 		@ U-Boot might decide to enter the EFI stub in HYP mode
1468 		@ anyway, with the MMU and caches either on or off.
1469 		@
1470 		mrs	r0, cpsr		@ get the current mode
1471 		msr	spsr_cxsf, r0		@ record boot mode
1472 		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1473 		cmp	r0, #HYP_MODE
1474 		bne	.Lefi_svc
1475 
1476 		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1477 		tst	r1, #0x1		@ MMU enabled at HYP?
1478 		beq	1f
1479 
1480 		@
1481 		@ When running in HYP mode with the caches on, we're better
1482 		@ off just carrying on using the cached 1:1 mapping that the
1483 		@ firmware provided. Set up the HYP vectors so HVC instructions
1484 		@ issued from HYP mode take us to the correct handler code. We
1485 		@ will disable the MMU before jumping to the kernel proper.
1486 		@
1487  ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1488  THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1489 		mcr	p15, 4, r1, c1, c0, 0
1490 		adr	r0, __hyp_reentry_vectors
1491 		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1492 		isb
1493 		b	.Lefi_hyp
1494 
1495 		@
1496 		@ When running in HYP mode with the caches off, we need to drop
1497 		@ into SVC mode now, and let the decompressor set up its cached
1498 		@ 1:1 mapping as usual.
1499 		@
1500 1:		mov	r9, r4			@ preserve image base
1501 		bl	__hyp_stub_install	@ install HYP stub vectors
1502 		safe_svcmode_maskall	r1	@ drop to SVC mode
1503 		msr	spsr_cxsf, r0		@ record boot mode
1504 		orr	r4, r9, #1		@ restore image base and set LSB
1505 		b	.Lefi_hyp
1506 .Lefi_svc:
1507 #endif
1508 		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1509 		tst	r0, #0x1		@ MMU enabled?
1510 		orreq	r4, r4, #1		@ set LSB if not
1511 
1512 .Lefi_hyp:
1513 		mov	r0, r8			@ DT start
1514 		add	r1, r8, r2		@ DT end
1515 		bl	cache_clean_flush
1516 
1517 		adr	r0, 0f			@ switch to our stack
1518 		ldr	sp, [r0]
1519 		add	sp, sp, r0
1520 
1521 		mov	r5, #0			@ appended DTB size
1522 		mov	r7, #0xFFFFFFFF		@ machine ID
1523 		b	wont_overwrite
1524 ENDPROC(efi_enter_kernel)
1525 0:		.long	.L_user_stack_end - .
1526 #endif
1527 
1528 		.align
1529 		.section ".stack", "aw", %nobits
1530 .L_user_stack:	.space	4096
1531 .L_user_stack_end:
1532