xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 67e3f828)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb, tmp
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb, tmp
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb, tmp
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb, tmp
53#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
54		waituartcts \tmp, \rb
55#endif
56		waituarttxrdy \tmp, \rb
57		senduart \ch, \rb
58		busyuart \tmp, \rb
59		.endm
60
61#if defined(CONFIG_ARCH_SA1100)
62		.macro	loadsp, rb, tmp1, tmp2
63		mov	\rb, #0x80000000	@ physical base address
64#ifdef CONFIG_DEBUG_LL_SER3
65		add	\rb, \rb, #0x00050000	@ Ser3
66#else
67		add	\rb, \rb, #0x00010000	@ Ser1
68#endif
69		.endm
70#else
71		.macro	loadsp,	rb, tmp1, tmp2
72		addruart \rb, \tmp1, \tmp2
73		.endm
74#endif
75#endif
76#endif
77
78		.macro	kputc,val
79		mov	r0, \val
80		bl	putc
81		.endm
82
83		.macro	kphex,val,len
84		mov	r0, \val
85		mov	r1, #\len
86		bl	phex
87		.endm
88
89		/*
90		 * Debug kernel copy by printing the memory addresses involved
91		 */
92		.macro dbgkc, begin, end, cbegin, cend
93#ifdef DEBUG
94		kputc   #'C'
95		kputc   #':'
96		kputc   #'0'
97		kputc   #'x'
98		kphex   \begin, 8	/* Start of compressed kernel */
99		kputc	#'-'
100		kputc	#'0'
101		kputc	#'x'
102		kphex	\end, 8		/* End of compressed kernel */
103		kputc	#'-'
104		kputc	#'>'
105		kputc   #'0'
106		kputc   #'x'
107		kphex   \cbegin, 8	/* Start of kernel copy */
108		kputc	#'-'
109		kputc	#'0'
110		kputc	#'x'
111		kphex	\cend, 8	/* End of kernel copy */
112		kputc	#'\n'
113#endif
114		.endm
115
116		/*
117		 * Debug print of the final appended DTB location
118		 */
119		.macro dbgadtb, begin, end
120#ifdef DEBUG
121		kputc   #'D'
122		kputc   #'T'
123		kputc   #'B'
124		kputc   #':'
125		kputc   #'0'
126		kputc   #'x'
127		kphex   \begin, 8	/* Start of appended DTB */
128		kputc	#' '
129		kputc	#'('
130		kputc	#'0'
131		kputc	#'x'
132		kphex	\end, 8		/* End of appended DTB */
133		kputc	#')'
134		kputc	#'\n'
135#endif
136		.endm
137
138		.macro	enable_cp15_barriers, reg
139		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
140		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
141		bne	.L_\@
142		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
143		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
144 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
145 THUMB(		isb						)
146.L_\@:
147		.endm
148
149		/*
150		 * The kernel build system appends the size of the
151		 * decompressed kernel at the end of the compressed data
152		 * in little-endian form.
153		 */
154		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
155		adr	\res, .Linflated_image_size_offset
156		ldr	\tmp1, [\res]
157		add	\tmp1, \tmp1, \res	@ address of inflated image size
158
159		ldrb	\res, [\tmp1]		@ get_unaligned_le32
160		ldrb	\tmp2, [\tmp1, #1]
161		orr	\res, \res, \tmp2, lsl #8
162		ldrb	\tmp2, [\tmp1, #2]
163		ldrb	\tmp1, [\tmp1, #3]
164		orr	\res, \res, \tmp2, lsl #16
165		orr	\res, \res, \tmp1, lsl #24
166		.endm
167
168		.section ".start", "ax"
169/*
170 * sort out different calling conventions
171 */
172		.align
173		/*
174		 * Always enter in ARM state for CPUs that support the ARM ISA.
175		 * As of today (2014) that's exactly the members of the A and R
176		 * classes.
177		 */
178 AR_CLASS(	.arm	)
179start:
180		.type	start,#function
181		/*
182		 * These 7 nops along with the 1 nop immediately below for
183		 * !THUMB2 form 8 nops that make the compressed kernel bootable
184		 * on legacy ARM systems that were assuming the kernel in a.out
185		 * binary format. The boot loaders on these systems would
186		 * jump 32 bytes into the image to skip the a.out header.
187		 * with these 8 nops filling exactly 32 bytes, things still
188		 * work as expected on these legacy systems. Thumb2 mode keeps
189		 * 7 of the nops as it turns out that some boot loaders
190		 * were patching the initial instructions of the kernel, i.e
191		 * had started to exploit this "patch area".
192		 */
193		.rept	7
194		__nop
195		.endr
196#ifndef CONFIG_THUMB2_KERNEL
197		__nop
198#else
199 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
200  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
201		.thumb
202#endif
203		W(b)	1f
204
205		.word	_magic_sig	@ Magic numbers to help the loader
206		.word	_magic_start	@ absolute load/run zImage address
207		.word	_magic_end	@ zImage end address
208		.word	0x04030201	@ endianness flag
209		.word	0x45454545	@ another magic number to indicate
210		.word	_magic_table	@ additional data table
211
212		__EFI_HEADER
2131:
214 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
215 AR_CLASS(	mrs	r9, cpsr	)
216#ifdef CONFIG_ARM_VIRT_EXT
217		bl	__hyp_stub_install	@ get into SVC mode, reversibly
218#endif
219		mov	r7, r1			@ save architecture ID
220		mov	r8, r2			@ save atags pointer
221
222#ifndef CONFIG_CPU_V7M
223		/*
224		 * Booting from Angel - need to enter SVC mode and disable
225		 * FIQs/IRQs (numeric definitions from angel arm.h source).
226		 * We only do this if we were in user mode on entry.
227		 */
228		mrs	r2, cpsr		@ get current mode
229		tst	r2, #3			@ not user?
230		bne	not_angel
231		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
232 ARM(		swi	0x123456	)	@ angel_SWI_ARM
233 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
234not_angel:
235		safe_svcmode_maskall r0
236		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
237						@ SPSR
238#endif
239		/*
240		 * Note that some cache flushing and other stuff may
241		 * be needed here - is there an Angel SWI call for this?
242		 */
243
244		/*
245		 * some architecture specific code can be inserted
246		 * by the linker here, but it should preserve r7, r8, and r9.
247		 */
248
249		.text
250
251#ifdef CONFIG_AUTO_ZRELADDR
252		/*
253		 * Find the start of physical memory.  As we are executing
254		 * without the MMU on, we are in the physical address space.
255		 * We just need to get rid of any offset by aligning the
256		 * address.
257		 *
258		 * This alignment is a balance between the requirements of
259		 * different platforms - we have chosen 128MB to allow
260		 * platforms which align the start of their physical memory
261		 * to 128MB to use this feature, while allowing the zImage
262		 * to be placed within the first 128MB of memory on other
263		 * platforms.  Increasing the alignment means we place
264		 * stricter alignment requirements on the start of physical
265		 * memory, but relaxing it means that we break people who
266		 * are already placing their zImage in (eg) the top 64MB
267		 * of this range.
268		 */
269		mov	r4, pc
270		and	r4, r4, #0xf8000000
271		/* Determine final kernel image address. */
272		add	r4, r4, #TEXT_OFFSET
273#else
274		ldr	r4, =zreladdr
275#endif
276
277		/*
278		 * Set up a page table only if it won't overwrite ourself.
279		 * That means r4 < pc || r4 - 16k page directory > &_end.
280		 * Given that r4 > &_end is most unfrequent, we add a rough
281		 * additional 1MB of room for a possible appended DTB.
282		 */
283		mov	r0, pc
284		cmp	r0, r4
285		ldrcc	r0, .Lheadroom
286		addcc	r0, r0, pc
287		cmpcc	r4, r0
288		orrcc	r4, r4, #1		@ remember we skipped cache_on
289		blcs	cache_on
290
291restart:	adr	r0, LC1
292		ldr	sp, [r0]
293		ldr	r6, [r0, #4]
294		add	sp, sp, r0
295		add	r6, r6, r0
296
297		get_inflated_image_size	r9, r10, lr
298
299#ifndef CONFIG_ZBOOT_ROM
300		/* malloc space is above the relocated stack (64k max) */
301		add	r10, sp, #MALLOC_SIZE
302#else
303		/*
304		 * With ZBOOT_ROM the bss/stack is non relocatable,
305		 * but someone could still run this code from RAM,
306		 * in which case our reference is _edata.
307		 */
308		mov	r10, r6
309#endif
310
311		mov	r5, #0			@ init dtb size to 0
312#ifdef CONFIG_ARM_APPENDED_DTB
313/*
314 *   r4  = final kernel address (possibly with LSB set)
315 *   r5  = appended dtb size (still unknown)
316 *   r6  = _edata
317 *   r7  = architecture ID
318 *   r8  = atags/device tree pointer
319 *   r9  = size of decompressed image
320 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
321 *   sp  = stack pointer
322 *
323 * if there are device trees (dtb) appended to zImage, advance r10 so that the
324 * dtb data will get relocated along with the kernel if necessary.
325 */
326
327		ldr	lr, [r6, #0]
328#ifndef __ARMEB__
329		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
330#else
331		ldr	r1, =0xd00dfeed
332#endif
333		cmp	lr, r1
334		bne	dtb_check_done		@ not found
335
336#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
337		/*
338		 * OK... Let's do some funky business here.
339		 * If we do have a DTB appended to zImage, and we do have
340		 * an ATAG list around, we want the later to be translated
341		 * and folded into the former here. No GOT fixup has occurred
342		 * yet, but none of the code we're about to call uses any
343		 * global variable.
344		*/
345
346		/* Get the initial DTB size */
347		ldr	r5, [r6, #4]
348#ifndef __ARMEB__
349		/* convert to little endian */
350		eor	r1, r5, r5, ror #16
351		bic	r1, r1, #0x00ff0000
352		mov	r5, r5, ror #8
353		eor	r5, r5, r1, lsr #8
354#endif
355		dbgadtb	r6, r5
356		/* 50% DTB growth should be good enough */
357		add	r5, r5, r5, lsr #1
358		/* preserve 64-bit alignment */
359		add	r5, r5, #7
360		bic	r5, r5, #7
361		/* clamp to 32KB min and 1MB max */
362		cmp	r5, #(1 << 15)
363		movlo	r5, #(1 << 15)
364		cmp	r5, #(1 << 20)
365		movhi	r5, #(1 << 20)
366		/* temporarily relocate the stack past the DTB work space */
367		add	sp, sp, r5
368
369		mov	r0, r8
370		mov	r1, r6
371		mov	r2, r5
372		bl	atags_to_fdt
373
374		/*
375		 * If returned value is 1, there is no ATAG at the location
376		 * pointed by r8.  Try the typical 0x100 offset from start
377		 * of RAM and hope for the best.
378		 */
379		cmp	r0, #1
380		sub	r0, r4, #TEXT_OFFSET
381		bic	r0, r0, #1
382		add	r0, r0, #0x100
383		mov	r1, r6
384		mov	r2, r5
385		bleq	atags_to_fdt
386
387		sub	sp, sp, r5
388#endif
389
390		mov	r8, r6			@ use the appended device tree
391
392		/*
393		 * Make sure that the DTB doesn't end up in the final
394		 * kernel's .bss area. To do so, we adjust the decompressed
395		 * kernel size to compensate if that .bss size is larger
396		 * than the relocated code.
397		 */
398		ldr	r5, =_kernel_bss_size
399		adr	r1, wont_overwrite
400		sub	r1, r6, r1
401		subs	r1, r5, r1
402		addhi	r9, r9, r1
403
404		/* Get the current DTB size */
405		ldr	r5, [r6, #4]
406#ifndef __ARMEB__
407		/* convert r5 (dtb size) to little endian */
408		eor	r1, r5, r5, ror #16
409		bic	r1, r1, #0x00ff0000
410		mov	r5, r5, ror #8
411		eor	r5, r5, r1, lsr #8
412#endif
413
414		/* preserve 64-bit alignment */
415		add	r5, r5, #7
416		bic	r5, r5, #7
417
418		/* relocate some pointers past the appended dtb */
419		add	r6, r6, r5
420		add	r10, r10, r5
421		add	sp, sp, r5
422dtb_check_done:
423#endif
424
425/*
426 * Check to see if we will overwrite ourselves.
427 *   r4  = final kernel address (possibly with LSB set)
428 *   r9  = size of decompressed image
429 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
430 * We basically want:
431 *   r4 - 16k page directory >= r10 -> OK
432 *   r4 + image length <= address of wont_overwrite -> OK
433 * Note: the possible LSB in r4 is harmless here.
434 */
435		add	r10, r10, #16384
436		cmp	r4, r10
437		bhs	wont_overwrite
438		add	r10, r4, r9
439		adr	r9, wont_overwrite
440		cmp	r10, r9
441		bls	wont_overwrite
442
443/*
444 * Relocate ourselves past the end of the decompressed kernel.
445 *   r6  = _edata
446 *   r10 = end of the decompressed kernel
447 * Because we always copy ahead, we need to do it from the end and go
448 * backward in case the source and destination overlap.
449 */
450		/*
451		 * Bump to the next 256-byte boundary with the size of
452		 * the relocation code added. This avoids overwriting
453		 * ourself when the offset is small.
454		 */
455		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
456		bic	r10, r10, #255
457
458		/* Get start of code we want to copy and align it down. */
459		adr	r5, restart
460		bic	r5, r5, #31
461
462/* Relocate the hyp vector base if necessary */
463#ifdef CONFIG_ARM_VIRT_EXT
464		mrs	r0, spsr
465		and	r0, r0, #MODE_MASK
466		cmp	r0, #HYP_MODE
467		bne	1f
468
469		/*
470		 * Compute the address of the hyp vectors after relocation.
471		 * This requires some arithmetic since we cannot directly
472		 * reference __hyp_stub_vectors in a PC-relative way.
473		 * Call __hyp_set_vectors with the new address so that we
474		 * can HVC again after the copy.
475		 */
4760:		adr	r0, 0b
477		movw	r1, #:lower16:__hyp_stub_vectors - 0b
478		movt	r1, #:upper16:__hyp_stub_vectors - 0b
479		add	r0, r0, r1
480		sub	r0, r0, r5
481		add	r0, r0, r10
482		bl	__hyp_set_vectors
4831:
484#endif
485
486		sub	r9, r6, r5		@ size to copy
487		add	r9, r9, #31		@ rounded up to a multiple
488		bic	r9, r9, #31		@ ... of 32 bytes
489		add	r6, r9, r5
490		add	r9, r9, r10
491
492#ifdef DEBUG
493		sub     r10, r6, r5
494		sub     r10, r9, r10
495		/*
496		 * We are about to copy the kernel to a new memory area.
497		 * The boundaries of the new memory area can be found in
498		 * r10 and r9, whilst r5 and r6 contain the boundaries
499		 * of the memory we are going to copy.
500		 * Calling dbgkc will help with the printing of this
501		 * information.
502		 */
503		dbgkc	r5, r6, r10, r9
504#endif
505
5061:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
507		cmp	r6, r5
508		stmdb	r9!, {r0 - r3, r10 - r12, lr}
509		bhi	1b
510
511		/* Preserve offset to relocated code. */
512		sub	r6, r9, r6
513
514		mov	r0, r9			@ start of relocated zImage
515		add	r1, sp, r6		@ end of relocated zImage
516		bl	cache_clean_flush
517
518		badr	r0, restart
519		add	r0, r0, r6
520		mov	pc, r0
521
522wont_overwrite:
523		adr	r0, LC0
524		ldmia	r0, {r1, r2, r3, r11, r12}
525		sub	r0, r0, r1		@ calculate the delta offset
526
527/*
528 * If delta is zero, we are running at the address we were linked at.
529 *   r0  = delta
530 *   r2  = BSS start
531 *   r3  = BSS end
532 *   r4  = kernel execution address (possibly with LSB set)
533 *   r5  = appended dtb size (0 if not present)
534 *   r7  = architecture ID
535 *   r8  = atags pointer
536 *   r11 = GOT start
537 *   r12 = GOT end
538 *   sp  = stack pointer
539 */
540		orrs	r1, r0, r5
541		beq	not_relocated
542
543		add	r11, r11, r0
544		add	r12, r12, r0
545
546#ifndef CONFIG_ZBOOT_ROM
547		/*
548		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
549		 * we need to fix up pointers into the BSS region.
550		 * Note that the stack pointer has already been fixed up.
551		 */
552		add	r2, r2, r0
553		add	r3, r3, r0
554
555		/*
556		 * Relocate all entries in the GOT table.
557		 * Bump bss entries to _edata + dtb size
558		 */
5591:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
560		add	r1, r1, r0		@ This fixes up C references
561		cmp	r1, r2			@ if entry >= bss_start &&
562		cmphs	r3, r1			@       bss_end > entry
563		addhi	r1, r1, r5		@    entry += dtb size
564		str	r1, [r11], #4		@ next entry
565		cmp	r11, r12
566		blo	1b
567
568		/* bump our bss pointers too */
569		add	r2, r2, r5
570		add	r3, r3, r5
571
572#else
573
574		/*
575		 * Relocate entries in the GOT table.  We only relocate
576		 * the entries that are outside the (relocated) BSS region.
577		 */
5781:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
579		cmp	r1, r2			@ entry < bss_start ||
580		cmphs	r3, r1			@ _end < entry
581		addlo	r1, r1, r0		@ table.  This fixes up the
582		str	r1, [r11], #4		@ C references.
583		cmp	r11, r12
584		blo	1b
585#endif
586
587not_relocated:	mov	r0, #0
5881:		str	r0, [r2], #4		@ clear bss
589		str	r0, [r2], #4
590		str	r0, [r2], #4
591		str	r0, [r2], #4
592		cmp	r2, r3
593		blo	1b
594
595		/*
596		 * Did we skip the cache setup earlier?
597		 * That is indicated by the LSB in r4.
598		 * Do it now if so.
599		 */
600		tst	r4, #1
601		bic	r4, r4, #1
602		blne	cache_on
603
604/*
605 * The C runtime environment should now be setup sufficiently.
606 * Set up some pointers, and start decompressing.
607 *   r4  = kernel execution address
608 *   r7  = architecture ID
609 *   r8  = atags pointer
610 */
611		mov	r0, r4
612		mov	r1, sp			@ malloc space above stack
613		add	r2, sp, #MALLOC_SIZE	@ 64k max
614		mov	r3, r7
615		bl	decompress_kernel
616
617		get_inflated_image_size	r1, r2, r3
618
619		mov	r0, r4			@ start of inflated image
620		add	r1, r1, r0		@ end of inflated image
621		bl	cache_clean_flush
622		bl	cache_off
623
624#ifdef CONFIG_ARM_VIRT_EXT
625		mrs	r0, spsr		@ Get saved CPU boot mode
626		and	r0, r0, #MODE_MASK
627		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
628		bne	__enter_kernel		@ boot kernel directly
629
630		adr	r12, .L__hyp_reentry_vectors_offset
631		ldr	r0, [r12]
632		add	r0, r0, r12
633
634		bl	__hyp_set_vectors
635		__HVC(0)			@ otherwise bounce to hyp mode
636
637		b	.			@ should never be reached
638
639		.align	2
640.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
641#else
642		b	__enter_kernel
643#endif
644
645		.align	2
646		.type	LC0, #object
647LC0:		.word	LC0			@ r1
648		.word	__bss_start		@ r2
649		.word	_end			@ r3
650		.word	_got_start		@ r11
651		.word	_got_end		@ ip
652		.size	LC0, . - LC0
653
654		.type	LC1, #object
655LC1:		.word	.L_user_stack_end - LC1	@ sp
656		.word	_edata - LC1		@ r6
657		.size	LC1, . - LC1
658
659.Lheadroom:
660		.word	_end - restart + 16384 + 1024*1024
661
662.Linflated_image_size_offset:
663		.long	(input_data_end - 4) - .
664
665#ifdef CONFIG_ARCH_RPC
666		.globl	params
667params:		ldr	r0, =0x10000100		@ params_phys for RPC
668		mov	pc, lr
669		.ltorg
670		.align
671#endif
672
673/*
674 * dcache_line_size - get the minimum D-cache line size from the CTR register
675 * on ARMv7.
676 */
677		.macro	dcache_line_size, reg, tmp
678#ifdef CONFIG_CPU_V7M
679		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
680		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
681		ldr	\tmp, [\tmp]
682#else
683		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
684#endif
685		lsr	\tmp, \tmp, #16
686		and	\tmp, \tmp, #0xf		@ cache line size encoding
687		mov	\reg, #4			@ bytes per word
688		mov	\reg, \reg, lsl \tmp		@ actual cache line size
689		.endm
690
691/*
692 * Turn on the cache.  We need to setup some page tables so that we
693 * can have both the I and D caches on.
694 *
695 * We place the page tables 16k down from the kernel execution address,
696 * and we hope that nothing else is using it.  If we're using it, we
697 * will go pop!
698 *
699 * On entry,
700 *  r4 = kernel execution address
701 *  r7 = architecture number
702 *  r8 = atags pointer
703 * On exit,
704 *  r0, r1, r2, r3, r9, r10, r12 corrupted
705 * This routine must preserve:
706 *  r4, r7, r8
707 */
708		.align	5
709cache_on:	mov	r3, #8			@ cache_on function
710		b	call_cache_fn
711
712/*
713 * Initialize the highest priority protection region, PR7
714 * to cover all 32bit address and cacheable and bufferable.
715 */
716__armv4_mpu_cache_on:
717		mov	r0, #0x3f		@ 4G, the whole
718		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
719		mcr 	p15, 0, r0, c6, c7, 1
720
721		mov	r0, #0x80		@ PR7
722		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
723		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
724		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
725
726		mov	r0, #0xc000
727		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
728		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
729
730		mov	r0, #0
731		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
732		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
733		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
734		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
735						@ ...I .... ..D. WC.M
736		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
737		orr	r0, r0, #0x1000		@ ...1 .... .... ....
738
739		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
740
741		mov	r0, #0
742		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
743		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
744		mov	pc, lr
745
746__armv3_mpu_cache_on:
747		mov	r0, #0x3f		@ 4G, the whole
748		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
749
750		mov	r0, #0x80		@ PR7
751		mcr	p15, 0, r0, c2, c0, 0	@ cache on
752		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
753
754		mov	r0, #0xc000
755		mcr	p15, 0, r0, c5, c0, 0	@ access permission
756
757		mov	r0, #0
758		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
759		/*
760		 * ?? ARMv3 MMU does not allow reading the control register,
761		 * does this really work on ARMv3 MPU?
762		 */
763		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
764						@ .... .... .... WC.M
765		orr	r0, r0, #0x000d		@ .... .... .... 11.1
766		/* ?? this overwrites the value constructed above? */
767		mov	r0, #0
768		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
769
770		/* ?? invalidate for the second time? */
771		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
772		mov	pc, lr
773
774#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
775#define CB_BITS 0x08
776#else
777#define CB_BITS 0x0c
778#endif
779
780__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
781		bic	r3, r3, #0xff		@ Align the pointer
782		bic	r3, r3, #0x3f00
783/*
784 * Initialise the page tables, turning on the cacheable and bufferable
785 * bits for the RAM area only.
786 */
787		mov	r0, r3
788		mov	r9, r0, lsr #18
789		mov	r9, r9, lsl #18		@ start of RAM
790		add	r10, r9, #0x10000000	@ a reasonable RAM size
791		mov	r1, #0x12		@ XN|U + section mapping
792		orr	r1, r1, #3 << 10	@ AP=11
793		add	r2, r3, #16384
7941:		cmp	r1, r9			@ if virt > start of RAM
795		cmphs	r10, r1			@   && end of RAM > virt
796		bic	r1, r1, #0x1c		@ clear XN|U + C + B
797		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
798		orrhs	r1, r1, r6		@ set RAM section settings
799		str	r1, [r0], #4		@ 1:1 mapping
800		add	r1, r1, #1048576
801		teq	r0, r2
802		bne	1b
803/*
804 * If ever we are running from Flash, then we surely want the cache
805 * to be enabled also for our execution instance...  We map 2MB of it
806 * so there is no map overlap problem for up to 1 MB compressed kernel.
807 * If the execution is in RAM then we would only be duplicating the above.
808 */
809		orr	r1, r6, #0x04		@ ensure B is set for this
810		orr	r1, r1, #3 << 10
811		mov	r2, pc
812		mov	r2, r2, lsr #20
813		orr	r1, r1, r2, lsl #20
814		add	r0, r3, r2, lsl #2
815		str	r1, [r0], #4
816		add	r1, r1, #1048576
817		str	r1, [r0]
818		mov	pc, lr
819ENDPROC(__setup_mmu)
820
821@ Enable unaligned access on v6, to allow better code generation
822@ for the decompressor C code:
823__armv6_mmu_cache_on:
824		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
825		bic	r0, r0, #2		@ A (no unaligned access fault)
826		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
827		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
828		b	__armv4_mmu_cache_on
829
830__arm926ejs_mmu_cache_on:
831#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
832		mov	r0, #4			@ put dcache in WT mode
833		mcr	p15, 7, r0, c15, c0, 0
834#endif
835
836__armv4_mmu_cache_on:
837		mov	r12, lr
838#ifdef CONFIG_MMU
839		mov	r6, #CB_BITS | 0x12	@ U
840		bl	__setup_mmu
841		mov	r0, #0
842		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
843		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
844		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
845		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
846		orr	r0, r0, #0x0030
847 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
848		bl	__common_mmu_cache_on
849		mov	r0, #0
850		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
851#endif
852		mov	pc, r12
853
854__armv7_mmu_cache_on:
855		enable_cp15_barriers	r11
856		mov	r12, lr
857#ifdef CONFIG_MMU
858		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
859		tst	r11, #0xf		@ VMSA
860		movne	r6, #CB_BITS | 0x02	@ !XN
861		blne	__setup_mmu
862		mov	r0, #0
863		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
864		tst	r11, #0xf		@ VMSA
865		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
866#endif
867		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
868		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
869		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
870		orr	r0, r0, #0x003c		@ write buffer
871		bic	r0, r0, #2		@ A (no unaligned access fault)
872		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
873						@ (needed for ARM1176)
874#ifdef CONFIG_MMU
875 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
876		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
877		orrne	r0, r0, #1		@ MMU enabled
878		movne	r1, #0xfffffffd		@ domain 0 = client
879		bic     r6, r6, #1 << 31        @ 32-bit translation system
880		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
881		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
882		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
883		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
884#endif
885		mcr	p15, 0, r0, c7, c5, 4	@ ISB
886		mcr	p15, 0, r0, c1, c0, 0	@ load control register
887		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
888		mov	r0, #0
889		mcr	p15, 0, r0, c7, c5, 4	@ ISB
890		mov	pc, r12
891
892__fa526_cache_on:
893		mov	r12, lr
894		mov	r6, #CB_BITS | 0x12	@ U
895		bl	__setup_mmu
896		mov	r0, #0
897		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
898		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
899		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
900		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
901		orr	r0, r0, #0x1000		@ I-cache enable
902		bl	__common_mmu_cache_on
903		mov	r0, #0
904		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
905		mov	pc, r12
906
907__common_mmu_cache_on:
908#ifndef CONFIG_THUMB2_KERNEL
909#ifndef DEBUG
910		orr	r0, r0, #0x000d		@ Write buffer, mmu
911#endif
912		mov	r1, #-1
913		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
914		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
915		b	1f
916		.align	5			@ cache line aligned
9171:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
918		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
919		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
920#endif
921
922#define PROC_ENTRY_SIZE (4*5)
923
924/*
925 * Here follow the relocatable cache support functions for the
926 * various processors.  This is a generic hook for locating an
927 * entry and jumping to an instruction at the specified offset
928 * from the start of the block.  Please note this is all position
929 * independent code.
930 *
931 *  r1  = corrupted
932 *  r2  = corrupted
933 *  r3  = block offset
934 *  r9  = corrupted
935 *  r12 = corrupted
936 */
937
938call_cache_fn:	adr	r12, proc_types
939#ifdef CONFIG_CPU_CP15
940		mrc	p15, 0, r9, c0, c0	@ get processor ID
941#elif defined(CONFIG_CPU_V7M)
942		/*
943		 * On v7-M the processor id is located in the V7M_SCB_CPUID
944		 * register, but as cache handling is IMPLEMENTATION DEFINED on
945		 * v7-M (if existant at all) we just return early here.
946		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
947		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
948		 * use cp15 registers that are not implemented on v7-M.
949		 */
950		bx	lr
951#else
952		ldr	r9, =CONFIG_PROCESSOR_ID
953#endif
9541:		ldr	r1, [r12, #0]		@ get value
955		ldr	r2, [r12, #4]		@ get mask
956		eor	r1, r1, r9		@ (real ^ match)
957		tst	r1, r2			@       & mask
958 ARM(		addeq	pc, r12, r3		) @ call cache function
959 THUMB(		addeq	r12, r3			)
960 THUMB(		moveq	pc, r12			) @ call cache function
961		add	r12, r12, #PROC_ENTRY_SIZE
962		b	1b
963
964/*
965 * Table for cache operations.  This is basically:
966 *   - CPU ID match
967 *   - CPU ID mask
968 *   - 'cache on' method instruction
969 *   - 'cache off' method instruction
970 *   - 'cache flush' method instruction
971 *
972 * We match an entry using: ((real_id ^ match) & mask) == 0
973 *
974 * Writethrough caches generally only need 'on' and 'off'
975 * methods.  Writeback caches _must_ have the flush method
976 * defined.
977 */
978		.align	2
979		.type	proc_types,#object
980proc_types:
981		.word	0x41000000		@ old ARM ID
982		.word	0xff00f000
983		mov	pc, lr
984 THUMB(		nop				)
985		mov	pc, lr
986 THUMB(		nop				)
987		mov	pc, lr
988 THUMB(		nop				)
989
990		.word	0x41007000		@ ARM7/710
991		.word	0xfff8fe00
992		mov	pc, lr
993 THUMB(		nop				)
994		mov	pc, lr
995 THUMB(		nop				)
996		mov	pc, lr
997 THUMB(		nop				)
998
999		.word	0x41807200		@ ARM720T (writethrough)
1000		.word	0xffffff00
1001		W(b)	__armv4_mmu_cache_on
1002		W(b)	__armv4_mmu_cache_off
1003		mov	pc, lr
1004 THUMB(		nop				)
1005
1006		.word	0x41007400		@ ARM74x
1007		.word	0xff00ff00
1008		W(b)	__armv3_mpu_cache_on
1009		W(b)	__armv3_mpu_cache_off
1010		W(b)	__armv3_mpu_cache_flush
1011
1012		.word	0x41009400		@ ARM94x
1013		.word	0xff00ff00
1014		W(b)	__armv4_mpu_cache_on
1015		W(b)	__armv4_mpu_cache_off
1016		W(b)	__armv4_mpu_cache_flush
1017
1018		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1019		.word	0xff0ffff0
1020		W(b)	__arm926ejs_mmu_cache_on
1021		W(b)	__armv4_mmu_cache_off
1022		W(b)	__armv5tej_mmu_cache_flush
1023
1024		.word	0x00007000		@ ARM7 IDs
1025		.word	0x0000f000
1026		mov	pc, lr
1027 THUMB(		nop				)
1028		mov	pc, lr
1029 THUMB(		nop				)
1030		mov	pc, lr
1031 THUMB(		nop				)
1032
1033		@ Everything from here on will be the new ID system.
1034
1035		.word	0x4401a100		@ sa110 / sa1100
1036		.word	0xffffffe0
1037		W(b)	__armv4_mmu_cache_on
1038		W(b)	__armv4_mmu_cache_off
1039		W(b)	__armv4_mmu_cache_flush
1040
1041		.word	0x6901b110		@ sa1110
1042		.word	0xfffffff0
1043		W(b)	__armv4_mmu_cache_on
1044		W(b)	__armv4_mmu_cache_off
1045		W(b)	__armv4_mmu_cache_flush
1046
1047		.word	0x56056900
1048		.word	0xffffff00		@ PXA9xx
1049		W(b)	__armv4_mmu_cache_on
1050		W(b)	__armv4_mmu_cache_off
1051		W(b)	__armv4_mmu_cache_flush
1052
1053		.word	0x56158000		@ PXA168
1054		.word	0xfffff000
1055		W(b)	__armv4_mmu_cache_on
1056		W(b)	__armv4_mmu_cache_off
1057		W(b)	__armv5tej_mmu_cache_flush
1058
1059		.word	0x56050000		@ Feroceon
1060		.word	0xff0f0000
1061		W(b)	__armv4_mmu_cache_on
1062		W(b)	__armv4_mmu_cache_off
1063		W(b)	__armv5tej_mmu_cache_flush
1064
1065#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1066		/* this conflicts with the standard ARMv5TE entry */
1067		.long	0x41009260		@ Old Feroceon
1068		.long	0xff00fff0
1069		b	__armv4_mmu_cache_on
1070		b	__armv4_mmu_cache_off
1071		b	__armv5tej_mmu_cache_flush
1072#endif
1073
1074		.word	0x66015261		@ FA526
1075		.word	0xff01fff1
1076		W(b)	__fa526_cache_on
1077		W(b)	__armv4_mmu_cache_off
1078		W(b)	__fa526_cache_flush
1079
1080		@ These match on the architecture ID
1081
1082		.word	0x00020000		@ ARMv4T
1083		.word	0x000f0000
1084		W(b)	__armv4_mmu_cache_on
1085		W(b)	__armv4_mmu_cache_off
1086		W(b)	__armv4_mmu_cache_flush
1087
1088		.word	0x00050000		@ ARMv5TE
1089		.word	0x000f0000
1090		W(b)	__armv4_mmu_cache_on
1091		W(b)	__armv4_mmu_cache_off
1092		W(b)	__armv4_mmu_cache_flush
1093
1094		.word	0x00060000		@ ARMv5TEJ
1095		.word	0x000f0000
1096		W(b)	__armv4_mmu_cache_on
1097		W(b)	__armv4_mmu_cache_off
1098		W(b)	__armv5tej_mmu_cache_flush
1099
1100		.word	0x0007b000		@ ARMv6
1101		.word	0x000ff000
1102		W(b)	__armv6_mmu_cache_on
1103		W(b)	__armv4_mmu_cache_off
1104		W(b)	__armv6_mmu_cache_flush
1105
1106		.word	0x000f0000		@ new CPU Id
1107		.word	0x000f0000
1108		W(b)	__armv7_mmu_cache_on
1109		W(b)	__armv7_mmu_cache_off
1110		W(b)	__armv7_mmu_cache_flush
1111
1112		.word	0			@ unrecognised type
1113		.word	0
1114		mov	pc, lr
1115 THUMB(		nop				)
1116		mov	pc, lr
1117 THUMB(		nop				)
1118		mov	pc, lr
1119 THUMB(		nop				)
1120
1121		.size	proc_types, . - proc_types
1122
1123		/*
1124		 * If you get a "non-constant expression in ".if" statement"
1125		 * error from the assembler on this line, check that you have
1126		 * not accidentally written a "b" instruction where you should
1127		 * have written W(b).
1128		 */
1129		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1130		.error "The size of one or more proc_types entries is wrong."
1131		.endif
1132
1133/*
1134 * Turn off the Cache and MMU.  ARMv3 does not support
1135 * reading the control register, but ARMv4 does.
1136 *
1137 * On exit,
1138 *  r0, r1, r2, r3, r9, r12 corrupted
1139 * This routine must preserve:
1140 *  r4, r7, r8
1141 */
1142		.align	5
1143cache_off:	mov	r3, #12			@ cache_off function
1144		b	call_cache_fn
1145
1146__armv4_mpu_cache_off:
1147		mrc	p15, 0, r0, c1, c0
1148		bic	r0, r0, #0x000d
1149		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1150		mov	r0, #0
1151		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1152		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1153		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1154		mov	pc, lr
1155
1156__armv3_mpu_cache_off:
1157		mrc	p15, 0, r0, c1, c0
1158		bic	r0, r0, #0x000d
1159		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1160		mov	r0, #0
1161		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1162		mov	pc, lr
1163
1164__armv4_mmu_cache_off:
1165#ifdef CONFIG_MMU
1166		mrc	p15, 0, r0, c1, c0
1167		bic	r0, r0, #0x000d
1168		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1169		mov	r0, #0
1170		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1171		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1172#endif
1173		mov	pc, lr
1174
1175__armv7_mmu_cache_off:
1176		mrc	p15, 0, r0, c1, c0
1177#ifdef CONFIG_MMU
1178		bic	r0, r0, #0x000d
1179#else
1180		bic	r0, r0, #0x000c
1181#endif
1182		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1183		mov	r0, #0
1184#ifdef CONFIG_MMU
1185		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1186#endif
1187		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1188		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1189		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1190		mov	pc, lr
1191
1192/*
1193 * Clean and flush the cache to maintain consistency.
1194 *
1195 * On entry,
1196 *  r0 = start address
1197 *  r1 = end address (exclusive)
1198 * On exit,
1199 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1200 * This routine must preserve:
1201 *  r4, r6, r7, r8
1202 */
1203		.align	5
1204cache_clean_flush:
1205		mov	r3, #16
1206		mov	r11, r1
1207		b	call_cache_fn
1208
1209__armv4_mpu_cache_flush:
1210		tst	r4, #1
1211		movne	pc, lr
1212		mov	r2, #1
1213		mov	r3, #0
1214		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1215		mov	r1, #7 << 5		@ 8 segments
12161:		orr	r3, r1, #63 << 26	@ 64 entries
12172:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1218		subs	r3, r3, #1 << 26
1219		bcs	2b			@ entries 63 to 0
1220		subs 	r1, r1, #1 << 5
1221		bcs	1b			@ segments 7 to 0
1222
1223		teq	r2, #0
1224		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1225		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1226		mov	pc, lr
1227
1228__fa526_cache_flush:
1229		tst	r4, #1
1230		movne	pc, lr
1231		mov	r1, #0
1232		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1233		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1234		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1235		mov	pc, lr
1236
1237__armv6_mmu_cache_flush:
1238		mov	r1, #0
1239		tst	r4, #1
1240		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1241		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1242		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1243		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1244		mov	pc, lr
1245
1246__armv7_mmu_cache_flush:
1247		enable_cp15_barriers	r10
1248		tst	r4, #1
1249		bne	iflush
1250		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1251		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1252		mov	r10, #0
1253		beq	hierarchical
1254		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1255		b	iflush
1256hierarchical:
1257		dcache_line_size r1, r2		@ r1 := dcache min line size
1258		sub	r2, r1, #1		@ r2 := line size mask
1259		bic	r0, r0, r2		@ round down start to line size
1260		sub	r11, r11, #1		@ end address is exclusive
1261		bic	r11, r11, r2		@ round down end to line size
12620:		cmp	r0, r11			@ finished?
1263		bgt	iflush
1264		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1265		add	r0, r0, r1
1266		b	0b
1267iflush:
1268		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1269		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1270		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1271		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1272		mov	pc, lr
1273
1274__armv5tej_mmu_cache_flush:
1275		tst	r4, #1
1276		movne	pc, lr
12771:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1278		bne	1b
1279		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1280		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1281		mov	pc, lr
1282
1283__armv4_mmu_cache_flush:
1284		tst	r4, #1
1285		movne	pc, lr
1286		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1287		mov	r11, #32		@ default: 32 byte line size
1288		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1289		teq	r3, r9			@ cache ID register present?
1290		beq	no_cache_id
1291		mov	r1, r3, lsr #18
1292		and	r1, r1, #7
1293		mov	r2, #1024
1294		mov	r2, r2, lsl r1		@ base dcache size *2
1295		tst	r3, #1 << 14		@ test M bit
1296		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1297		mov	r3, r3, lsr #12
1298		and	r3, r3, #3
1299		mov	r11, #8
1300		mov	r11, r11, lsl r3	@ cache line size in bytes
1301no_cache_id:
1302		mov	r1, pc
1303		bic	r1, r1, #63		@ align to longest cache line
1304		add	r2, r1, r2
13051:
1306 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1307 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1308 THUMB(		add     r1, r1, r11		)
1309		teq	r1, r2
1310		bne	1b
1311
1312		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1313		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1314		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1315		mov	pc, lr
1316
1317__armv3_mmu_cache_flush:
1318__armv3_mpu_cache_flush:
1319		tst	r4, #1
1320		movne	pc, lr
1321		mov	r1, #0
1322		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1323		mov	pc, lr
1324
1325/*
1326 * Various debugging routines for printing hex characters and
1327 * memory, which again must be relocatable.
1328 */
1329#ifdef DEBUG
1330		.align	2
1331		.type	phexbuf,#object
1332phexbuf:	.space	12
1333		.size	phexbuf, . - phexbuf
1334
1335@ phex corrupts {r0, r1, r2, r3}
1336phex:		adr	r3, phexbuf
1337		mov	r2, #0
1338		strb	r2, [r3, r1]
13391:		subs	r1, r1, #1
1340		movmi	r0, r3
1341		bmi	puts
1342		and	r2, r0, #15
1343		mov	r0, r0, lsr #4
1344		cmp	r2, #10
1345		addge	r2, r2, #7
1346		add	r2, r2, #'0'
1347		strb	r2, [r3, r1]
1348		b	1b
1349
1350@ puts corrupts {r0, r1, r2, r3}
1351puts:		loadsp	r3, r2, r1
13521:		ldrb	r2, [r0], #1
1353		teq	r2, #0
1354		moveq	pc, lr
13552:		writeb	r2, r3, r1
1356		mov	r1, #0x00020000
13573:		subs	r1, r1, #1
1358		bne	3b
1359		teq	r2, #'\n'
1360		moveq	r2, #'\r'
1361		beq	2b
1362		teq	r0, #0
1363		bne	1b
1364		mov	pc, lr
1365@ putc corrupts {r0, r1, r2, r3}
1366putc:
1367		mov	r2, r0
1368		loadsp	r3, r1, r0
1369		mov	r0, #0
1370		b	2b
1371
1372@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1373memdump:	mov	r12, r0
1374		mov	r10, lr
1375		mov	r11, #0
13762:		mov	r0, r11, lsl #2
1377		add	r0, r0, r12
1378		mov	r1, #8
1379		bl	phex
1380		mov	r0, #':'
1381		bl	putc
13821:		mov	r0, #' '
1383		bl	putc
1384		ldr	r0, [r12, r11, lsl #2]
1385		mov	r1, #8
1386		bl	phex
1387		and	r0, r11, #7
1388		teq	r0, #3
1389		moveq	r0, #' '
1390		bleq	putc
1391		and	r0, r11, #7
1392		add	r11, r11, #1
1393		teq	r0, #7
1394		bne	1b
1395		mov	r0, #'\n'
1396		bl	putc
1397		cmp	r11, #64
1398		blt	2b
1399		mov	pc, r10
1400#endif
1401
1402		.ltorg
1403
1404#ifdef CONFIG_ARM_VIRT_EXT
1405.align 5
1406__hyp_reentry_vectors:
1407		W(b)	.			@ reset
1408		W(b)	.			@ undef
1409#ifdef CONFIG_EFI_STUB
1410		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1411#else
1412		W(b)	.			@ svc
1413#endif
1414		W(b)	.			@ pabort
1415		W(b)	.			@ dabort
1416		W(b)	__enter_kernel		@ hyp
1417		W(b)	.			@ irq
1418		W(b)	.			@ fiq
1419#endif /* CONFIG_ARM_VIRT_EXT */
1420
1421__enter_kernel:
1422		mov	r0, #0			@ must be 0
1423		mov	r1, r7			@ restore architecture number
1424		mov	r2, r8			@ restore atags pointer
1425 ARM(		mov	pc, r4		)	@ call kernel
1426 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1427 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1428
1429reloc_code_end:
1430
1431#ifdef CONFIG_EFI_STUB
1432__enter_kernel_from_hyp:
1433		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1434		bic	r0, r0, #0x5		@ disable MMU and caches
1435		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1436		isb
1437		b	__enter_kernel
1438
1439ENTRY(efi_enter_kernel)
1440		mov	r4, r0			@ preserve image base
1441		mov	r8, r1			@ preserve DT pointer
1442
1443		adr_l	r0, call_cache_fn
1444		adr	r1, 0f			@ clean the region of code we
1445		bl	cache_clean_flush	@ may run with the MMU off
1446
1447#ifdef CONFIG_ARM_VIRT_EXT
1448		@
1449		@ The EFI spec does not support booting on ARM in HYP mode,
1450		@ since it mandates that the MMU and caches are on, with all
1451		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1452		@
1453		@ While the EDK2 reference implementation adheres to this,
1454		@ U-Boot might decide to enter the EFI stub in HYP mode
1455		@ anyway, with the MMU and caches either on or off.
1456		@
1457		mrs	r0, cpsr		@ get the current mode
1458		msr	spsr_cxsf, r0		@ record boot mode
1459		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1460		cmp	r0, #HYP_MODE
1461		bne	.Lefi_svc
1462
1463		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1464		tst	r1, #0x1		@ MMU enabled at HYP?
1465		beq	1f
1466
1467		@
1468		@ When running in HYP mode with the caches on, we're better
1469		@ off just carrying on using the cached 1:1 mapping that the
1470		@ firmware provided. Set up the HYP vectors so HVC instructions
1471		@ issued from HYP mode take us to the correct handler code. We
1472		@ will disable the MMU before jumping to the kernel proper.
1473		@
1474		adr	r0, __hyp_reentry_vectors
1475		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1476		isb
1477		b	.Lefi_hyp
1478
1479		@
1480		@ When running in HYP mode with the caches off, we need to drop
1481		@ into SVC mode now, and let the decompressor set up its cached
1482		@ 1:1 mapping as usual.
1483		@
14841:		mov	r9, r4			@ preserve image base
1485		bl	__hyp_stub_install	@ install HYP stub vectors
1486		safe_svcmode_maskall	r1	@ drop to SVC mode
1487		msr	spsr_cxsf, r0		@ record boot mode
1488		orr	r4, r9, #1		@ restore image base and set LSB
1489		b	.Lefi_hyp
1490.Lefi_svc:
1491#endif
1492		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1493		tst	r0, #0x1		@ MMU enabled?
1494		orreq	r4, r4, #1		@ set LSB if not
1495
1496.Lefi_hyp:
1497		mov	r0, r8			@ DT start
1498		add	r1, r8, r2		@ DT end
1499		bl	cache_clean_flush
1500
1501		adr	r0, 0f			@ switch to our stack
1502		ldr	sp, [r0]
1503		add	sp, sp, r0
1504
1505		mov	r5, #0			@ appended DTB size
1506		mov	r7, #0xFFFFFFFF		@ machine ID
1507		b	wont_overwrite
1508ENDPROC(efi_enter_kernel)
15090:		.long	.L_user_stack_end - .
1510#endif
1511
1512		.align
1513		.section ".stack", "aw", %nobits
1514.L_user_stack:	.space	4096
1515.L_user_stack_end:
1516