xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 0f4b20ef)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14#ifdef __ARMEB__
15#define OF_DT_MAGIC 0xd00dfeed
16#else
17#define OF_DT_MAGIC 0xedfe0dd0
18#endif
19
20 AR_CLASS(	.arch	armv7-a	)
21 M_CLASS(	.arch	armv7-m	)
22
23/*
24 * Debugging stuff
25 *
26 * Note that these macros must not contain any code which is not
27 * 100% relocatable.  Any attempt to do so will result in a crash.
28 * Please select one of the following when turning on debugging.
29 */
30#ifdef DEBUG
31
32#if defined(CONFIG_DEBUG_ICEDCC)
33
34#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb, tmp
38		mcr	p14, 0, \ch, c0, c5, 0
39		.endm
40#elif defined(CONFIG_CPU_XSCALE)
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb, tmp
44		mcr	p14, 0, \ch, c8, c0, 0
45		.endm
46#else
47		.macro	loadsp, rb, tmp1, tmp2
48		.endm
49		.macro	writeb, ch, rb, tmp
50		mcr	p14, 0, \ch, c1, c0, 0
51		.endm
52#endif
53
54#else
55
56#include CONFIG_DEBUG_LL_INCLUDE
57
58		.macro	writeb,	ch, rb, tmp
59#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
60		waituartcts \tmp, \rb
61#endif
62		waituarttxrdy \tmp, \rb
63		senduart \ch, \rb
64		busyuart \tmp, \rb
65		.endm
66
67#if defined(CONFIG_ARCH_SA1100)
68		.macro	loadsp, rb, tmp1, tmp2
69		mov	\rb, #0x80000000	@ physical base address
70#ifdef CONFIG_DEBUG_LL_SER3
71		add	\rb, \rb, #0x00050000	@ Ser3
72#else
73		add	\rb, \rb, #0x00010000	@ Ser1
74#endif
75		.endm
76#else
77		.macro	loadsp,	rb, tmp1, tmp2
78		addruart \rb, \tmp1, \tmp2
79		.endm
80#endif
81#endif
82#endif
83
84		.macro	kputc,val
85		mov	r0, \val
86		bl	putc
87		.endm
88
89		.macro	kphex,val,len
90		mov	r0, \val
91		mov	r1, #\len
92		bl	phex
93		.endm
94
95		/*
96		 * Debug kernel copy by printing the memory addresses involved
97		 */
98		.macro dbgkc, begin, end, cbegin, cend
99#ifdef DEBUG
100		kputc   #'C'
101		kputc   #':'
102		kputc   #'0'
103		kputc   #'x'
104		kphex   \begin, 8	/* Start of compressed kernel */
105		kputc	#'-'
106		kputc	#'0'
107		kputc	#'x'
108		kphex	\end, 8		/* End of compressed kernel */
109		kputc	#'-'
110		kputc	#'>'
111		kputc   #'0'
112		kputc   #'x'
113		kphex   \cbegin, 8	/* Start of kernel copy */
114		kputc	#'-'
115		kputc	#'0'
116		kputc	#'x'
117		kphex	\cend, 8	/* End of kernel copy */
118		kputc	#'\n'
119#endif
120		.endm
121
122		/*
123		 * Debug print of the final appended DTB location
124		 */
125		.macro dbgadtb, begin, size
126#ifdef DEBUG
127		kputc   #'D'
128		kputc   #'T'
129		kputc   #'B'
130		kputc   #':'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \begin, 8	/* Start of appended DTB */
134		kputc	#' '
135		kputc	#'('
136		kputc	#'0'
137		kputc	#'x'
138		kphex	\size, 8	/* Size of appended DTB */
139		kputc	#')'
140		kputc	#'\n'
141#endif
142		.endm
143
144		.macro	enable_cp15_barriers, reg
145		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
146		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
147		bne	.L_\@
148		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
149		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
150 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
151 THUMB(		isb						)
152.L_\@:
153		.endm
154
155		/*
156		 * The kernel build system appends the size of the
157		 * decompressed kernel at the end of the compressed data
158		 * in little-endian form.
159		 */
160		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
161		adr	\res, .Linflated_image_size_offset
162		ldr	\tmp1, [\res]
163		add	\tmp1, \tmp1, \res	@ address of inflated image size
164
165		ldrb	\res, [\tmp1]		@ get_unaligned_le32
166		ldrb	\tmp2, [\tmp1, #1]
167		orr	\res, \res, \tmp2, lsl #8
168		ldrb	\tmp2, [\tmp1, #2]
169		ldrb	\tmp1, [\tmp1, #3]
170		orr	\res, \res, \tmp2, lsl #16
171		orr	\res, \res, \tmp1, lsl #24
172		.endm
173
174		.macro	be32tocpu, val, tmp
175#ifndef __ARMEB__
176		/* convert to little endian */
177		rev_l	\val, \tmp
178#endif
179		.endm
180
181		.section ".start", "ax"
182/*
183 * sort out different calling conventions
184 */
185		.align
186		/*
187		 * Always enter in ARM state for CPUs that support the ARM ISA.
188		 * As of today (2014) that's exactly the members of the A and R
189		 * classes.
190		 */
191 AR_CLASS(	.arm	)
192start:
193		.type	start,#function
194		/*
195		 * These 7 nops along with the 1 nop immediately below for
196		 * !THUMB2 form 8 nops that make the compressed kernel bootable
197		 * on legacy ARM systems that were assuming the kernel in a.out
198		 * binary format. The boot loaders on these systems would
199		 * jump 32 bytes into the image to skip the a.out header.
200		 * with these 8 nops filling exactly 32 bytes, things still
201		 * work as expected on these legacy systems. Thumb2 mode keeps
202		 * 7 of the nops as it turns out that some boot loaders
203		 * were patching the initial instructions of the kernel, i.e
204		 * had started to exploit this "patch area".
205		 */
206		__initial_nops
207		.rept	5
208		__nop
209		.endr
210#ifndef CONFIG_THUMB2_KERNEL
211		__nop
212#else
213 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
214  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
215		.thumb
216#endif
217		W(b)	1f
218
219		.word	_magic_sig	@ Magic numbers to help the loader
220		.word	_magic_start	@ absolute load/run zImage address
221		.word	_magic_end	@ zImage end address
222		.word	0x04030201	@ endianness flag
223		.word	0x45454545	@ another magic number to indicate
224		.word	_magic_table	@ additional data table
225
226		__EFI_HEADER
2271:
228 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
229 AR_CLASS(	mrs	r9, cpsr	)
230#ifdef CONFIG_ARM_VIRT_EXT
231		bl	__hyp_stub_install	@ get into SVC mode, reversibly
232#endif
233		mov	r7, r1			@ save architecture ID
234		mov	r8, r2			@ save atags pointer
235
236#ifndef CONFIG_CPU_V7M
237		/*
238		 * Booting from Angel - need to enter SVC mode and disable
239		 * FIQs/IRQs (numeric definitions from angel arm.h source).
240		 * We only do this if we were in user mode on entry.
241		 */
242		mrs	r2, cpsr		@ get current mode
243		tst	r2, #3			@ not user?
244		bne	not_angel
245		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
246 ARM(		swi	0x123456	)	@ angel_SWI_ARM
247 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
248not_angel:
249		safe_svcmode_maskall r0
250		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
251						@ SPSR
252#endif
253		/*
254		 * Note that some cache flushing and other stuff may
255		 * be needed here - is there an Angel SWI call for this?
256		 */
257
258		/*
259		 * some architecture specific code can be inserted
260		 * by the linker here, but it should preserve r7, r8, and r9.
261		 */
262
263		.text
264
265#ifdef CONFIG_AUTO_ZRELADDR
266		/*
267		 * Find the start of physical memory.  As we are executing
268		 * without the MMU on, we are in the physical address space.
269		 * We just need to get rid of any offset by aligning the
270		 * address.
271		 *
272		 * This alignment is a balance between the requirements of
273		 * different platforms - we have chosen 128MB to allow
274		 * platforms which align the start of their physical memory
275		 * to 128MB to use this feature, while allowing the zImage
276		 * to be placed within the first 128MB of memory on other
277		 * platforms.  Increasing the alignment means we place
278		 * stricter alignment requirements on the start of physical
279		 * memory, but relaxing it means that we break people who
280		 * are already placing their zImage in (eg) the top 64MB
281		 * of this range.
282		 */
283		mov	r0, pc
284		and	r0, r0, #0xf8000000
285#ifdef CONFIG_USE_OF
286		adr	r1, LC1
287#ifdef CONFIG_ARM_APPENDED_DTB
288		/*
289		 * Look for an appended DTB.  If found, we cannot use it to
290		 * validate the calculated start of physical memory, as its
291		 * memory nodes may need to be augmented by ATAGS stored at
292		 * an offset from the same start of physical memory.
293		 */
294		ldr	r2, [r1, #4]	@ get &_edata
295		add	r2, r2, r1	@ relocate it
296		ldr	r2, [r2]	@ get DTB signature
297		ldr	r3, =OF_DT_MAGIC
298		cmp	r2, r3		@ do we have a DTB there?
299		beq	1f		@ if yes, skip validation
300#endif /* CONFIG_ARM_APPENDED_DTB */
301
302		/*
303		 * Make sure we have some stack before calling C code.
304		 * No GOT fixup has occurred yet, but none of the code we're
305		 * about to call uses any global variables.
306		 */
307		ldr	sp, [r1]	@ get stack location
308		add	sp, sp, r1	@ apply relocation
309
310		/* Validate calculated start against passed DTB */
311		mov	r1, r8
312		bl	fdt_check_mem_start
3131:
314#endif /* CONFIG_USE_OF */
315		/* Determine final kernel image address. */
316		add	r4, r0, #TEXT_OFFSET
317#else
318		ldr	r4, =zreladdr
319#endif
320
321		/*
322		 * Set up a page table only if it won't overwrite ourself.
323		 * That means r4 < pc || r4 - 16k page directory > &_end.
324		 * Given that r4 > &_end is most unfrequent, we add a rough
325		 * additional 1MB of room for a possible appended DTB.
326		 */
327		mov	r0, pc
328		cmp	r0, r4
329		ldrcc	r0, .Lheadroom
330		addcc	r0, r0, pc
331		cmpcc	r4, r0
332		orrcc	r4, r4, #1		@ remember we skipped cache_on
333		blcs	cache_on
334
335restart:	adr	r0, LC1
336		ldr	sp, [r0]
337		ldr	r6, [r0, #4]
338		add	sp, sp, r0
339		add	r6, r6, r0
340
341		get_inflated_image_size	r9, r10, lr
342
343#ifndef CONFIG_ZBOOT_ROM
344		/* malloc space is above the relocated stack (64k max) */
345		add	r10, sp, #MALLOC_SIZE
346#else
347		/*
348		 * With ZBOOT_ROM the bss/stack is non relocatable,
349		 * but someone could still run this code from RAM,
350		 * in which case our reference is _edata.
351		 */
352		mov	r10, r6
353#endif
354
355		mov	r5, #0			@ init dtb size to 0
356#ifdef CONFIG_ARM_APPENDED_DTB
357/*
358 *   r4  = final kernel address (possibly with LSB set)
359 *   r5  = appended dtb size (still unknown)
360 *   r6  = _edata
361 *   r7  = architecture ID
362 *   r8  = atags/device tree pointer
363 *   r9  = size of decompressed image
364 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
365 *   sp  = stack pointer
366 *
367 * if there are device trees (dtb) appended to zImage, advance r10 so that the
368 * dtb data will get relocated along with the kernel if necessary.
369 */
370
371		ldr	lr, [r6, #0]
372		ldr	r1, =OF_DT_MAGIC
373		cmp	lr, r1
374		bne	dtb_check_done		@ not found
375
376#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
377		/*
378		 * OK... Let's do some funky business here.
379		 * If we do have a DTB appended to zImage, and we do have
380		 * an ATAG list around, we want the later to be translated
381		 * and folded into the former here. No GOT fixup has occurred
382		 * yet, but none of the code we're about to call uses any
383		 * global variable.
384		*/
385
386		/* Get the initial DTB size */
387		ldr	r5, [r6, #4]
388		be32tocpu r5, r1
389		dbgadtb	r6, r5
390		/* 50% DTB growth should be good enough */
391		add	r5, r5, r5, lsr #1
392		/* preserve 64-bit alignment */
393		add	r5, r5, #7
394		bic	r5, r5, #7
395		/* clamp to 32KB min and 1MB max */
396		cmp	r5, #(1 << 15)
397		movlo	r5, #(1 << 15)
398		cmp	r5, #(1 << 20)
399		movhi	r5, #(1 << 20)
400		/* temporarily relocate the stack past the DTB work space */
401		add	sp, sp, r5
402
403		mov	r0, r8
404		mov	r1, r6
405		mov	r2, r5
406		bl	atags_to_fdt
407
408		/*
409		 * If returned value is 1, there is no ATAG at the location
410		 * pointed by r8.  Try the typical 0x100 offset from start
411		 * of RAM and hope for the best.
412		 */
413		cmp	r0, #1
414		sub	r0, r4, #TEXT_OFFSET
415		bic	r0, r0, #1
416		add	r0, r0, #0x100
417		mov	r1, r6
418		mov	r2, r5
419		bleq	atags_to_fdt
420
421		sub	sp, sp, r5
422#endif
423
424		mov	r8, r6			@ use the appended device tree
425
426		/*
427		 * Make sure that the DTB doesn't end up in the final
428		 * kernel's .bss area. To do so, we adjust the decompressed
429		 * kernel size to compensate if that .bss size is larger
430		 * than the relocated code.
431		 */
432		ldr	r5, =_kernel_bss_size
433		adr	r1, wont_overwrite
434		sub	r1, r6, r1
435		subs	r1, r5, r1
436		addhi	r9, r9, r1
437
438		/* Get the current DTB size */
439		ldr	r5, [r6, #4]
440		be32tocpu r5, r1
441
442		/* preserve 64-bit alignment */
443		add	r5, r5, #7
444		bic	r5, r5, #7
445
446		/* relocate some pointers past the appended dtb */
447		add	r6, r6, r5
448		add	r10, r10, r5
449		add	sp, sp, r5
450dtb_check_done:
451#endif
452
453/*
454 * Check to see if we will overwrite ourselves.
455 *   r4  = final kernel address (possibly with LSB set)
456 *   r9  = size of decompressed image
457 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
458 * We basically want:
459 *   r4 - 16k page directory >= r10 -> OK
460 *   r4 + image length <= address of wont_overwrite -> OK
461 * Note: the possible LSB in r4 is harmless here.
462 */
463		add	r10, r10, #16384
464		cmp	r4, r10
465		bhs	wont_overwrite
466		add	r10, r4, r9
467		adr	r9, wont_overwrite
468		cmp	r10, r9
469		bls	wont_overwrite
470
471/*
472 * Relocate ourselves past the end of the decompressed kernel.
473 *   r6  = _edata
474 *   r10 = end of the decompressed kernel
475 * Because we always copy ahead, we need to do it from the end and go
476 * backward in case the source and destination overlap.
477 */
478		/*
479		 * Bump to the next 256-byte boundary with the size of
480		 * the relocation code added. This avoids overwriting
481		 * ourself when the offset is small.
482		 */
483		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
484		bic	r10, r10, #255
485
486		/* Get start of code we want to copy and align it down. */
487		adr	r5, restart
488		bic	r5, r5, #31
489
490/* Relocate the hyp vector base if necessary */
491#ifdef CONFIG_ARM_VIRT_EXT
492		mrs	r0, spsr
493		and	r0, r0, #MODE_MASK
494		cmp	r0, #HYP_MODE
495		bne	1f
496
497		/*
498		 * Compute the address of the hyp vectors after relocation.
499		 * Call __hyp_set_vectors with the new address so that we
500		 * can HVC again after the copy.
501		 */
502		adr_l	r0, __hyp_stub_vectors
503		sub	r0, r0, r5
504		add	r0, r0, r10
505		bl	__hyp_set_vectors
5061:
507#endif
508
509		sub	r9, r6, r5		@ size to copy
510		add	r9, r9, #31		@ rounded up to a multiple
511		bic	r9, r9, #31		@ ... of 32 bytes
512		add	r6, r9, r5
513		add	r9, r9, r10
514
515#ifdef DEBUG
516		sub     r10, r6, r5
517		sub     r10, r9, r10
518		/*
519		 * We are about to copy the kernel to a new memory area.
520		 * The boundaries of the new memory area can be found in
521		 * r10 and r9, whilst r5 and r6 contain the boundaries
522		 * of the memory we are going to copy.
523		 * Calling dbgkc will help with the printing of this
524		 * information.
525		 */
526		dbgkc	r5, r6, r10, r9
527#endif
528
5291:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
530		cmp	r6, r5
531		stmdb	r9!, {r0 - r3, r10 - r12, lr}
532		bhi	1b
533
534		/* Preserve offset to relocated code. */
535		sub	r6, r9, r6
536
537		mov	r0, r9			@ start of relocated zImage
538		add	r1, sp, r6		@ end of relocated zImage
539		bl	cache_clean_flush
540
541		badr	r0, restart
542		add	r0, r0, r6
543		mov	pc, r0
544
545wont_overwrite:
546		adr	r0, LC0
547		ldmia	r0, {r1, r2, r3, r11, r12}
548		sub	r0, r0, r1		@ calculate the delta offset
549
550/*
551 * If delta is zero, we are running at the address we were linked at.
552 *   r0  = delta
553 *   r2  = BSS start
554 *   r3  = BSS end
555 *   r4  = kernel execution address (possibly with LSB set)
556 *   r5  = appended dtb size (0 if not present)
557 *   r7  = architecture ID
558 *   r8  = atags pointer
559 *   r11 = GOT start
560 *   r12 = GOT end
561 *   sp  = stack pointer
562 */
563		orrs	r1, r0, r5
564		beq	not_relocated
565
566		add	r11, r11, r0
567		add	r12, r12, r0
568
569#ifndef CONFIG_ZBOOT_ROM
570		/*
571		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
572		 * we need to fix up pointers into the BSS region.
573		 * Note that the stack pointer has already been fixed up.
574		 */
575		add	r2, r2, r0
576		add	r3, r3, r0
577
578		/*
579		 * Relocate all entries in the GOT table.
580		 * Bump bss entries to _edata + dtb size
581		 */
5821:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
583		add	r1, r1, r0		@ This fixes up C references
584		cmp	r1, r2			@ if entry >= bss_start &&
585		cmphs	r3, r1			@       bss_end > entry
586		addhi	r1, r1, r5		@    entry += dtb size
587		str	r1, [r11], #4		@ next entry
588		cmp	r11, r12
589		blo	1b
590
591		/* bump our bss pointers too */
592		add	r2, r2, r5
593		add	r3, r3, r5
594
595#else
596
597		/*
598		 * Relocate entries in the GOT table.  We only relocate
599		 * the entries that are outside the (relocated) BSS region.
600		 */
6011:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
602		cmp	r1, r2			@ entry < bss_start ||
603		cmphs	r3, r1			@ _end < entry
604		addlo	r1, r1, r0		@ table.  This fixes up the
605		str	r1, [r11], #4		@ C references.
606		cmp	r11, r12
607		blo	1b
608#endif
609
610not_relocated:	mov	r0, #0
6111:		str	r0, [r2], #4		@ clear bss
612		str	r0, [r2], #4
613		str	r0, [r2], #4
614		str	r0, [r2], #4
615		cmp	r2, r3
616		blo	1b
617
618		/*
619		 * Did we skip the cache setup earlier?
620		 * That is indicated by the LSB in r4.
621		 * Do it now if so.
622		 */
623		tst	r4, #1
624		bic	r4, r4, #1
625		blne	cache_on
626
627/*
628 * The C runtime environment should now be setup sufficiently.
629 * Set up some pointers, and start decompressing.
630 *   r4  = kernel execution address
631 *   r7  = architecture ID
632 *   r8  = atags pointer
633 */
634		mov	r0, r4
635		mov	r1, sp			@ malloc space above stack
636		add	r2, sp, #MALLOC_SIZE	@ 64k max
637		mov	r3, r7
638		bl	decompress_kernel
639
640		get_inflated_image_size	r1, r2, r3
641
642		mov	r0, r4			@ start of inflated image
643		add	r1, r1, r0		@ end of inflated image
644		bl	cache_clean_flush
645		bl	cache_off
646
647#ifdef CONFIG_ARM_VIRT_EXT
648		mrs	r0, spsr		@ Get saved CPU boot mode
649		and	r0, r0, #MODE_MASK
650		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
651		bne	__enter_kernel		@ boot kernel directly
652
653		adr_l	r0, __hyp_reentry_vectors
654		bl	__hyp_set_vectors
655		__HVC(0)			@ otherwise bounce to hyp mode
656
657		b	.			@ should never be reached
658#else
659		b	__enter_kernel
660#endif
661
662		.align	2
663		.type	LC0, #object
664LC0:		.word	LC0			@ r1
665		.word	__bss_start		@ r2
666		.word	_end			@ r3
667		.word	_got_start		@ r11
668		.word	_got_end		@ ip
669		.size	LC0, . - LC0
670
671		.type	LC1, #object
672LC1:		.word	.L_user_stack_end - LC1	@ sp
673		.word	_edata - LC1		@ r6
674		.size	LC1, . - LC1
675
676.Lheadroom:
677		.word	_end - restart + 16384 + 1024*1024
678
679.Linflated_image_size_offset:
680		.long	(input_data_end - 4) - .
681
682#ifdef CONFIG_ARCH_RPC
683		.globl	params
684params:		ldr	r0, =0x10000100		@ params_phys for RPC
685		mov	pc, lr
686		.ltorg
687		.align
688#endif
689
690/*
691 * dcache_line_size - get the minimum D-cache line size from the CTR register
692 * on ARMv7.
693 */
694		.macro	dcache_line_size, reg, tmp
695#ifdef CONFIG_CPU_V7M
696		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
697		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
698		ldr	\tmp, [\tmp]
699#else
700		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
701#endif
702		lsr	\tmp, \tmp, #16
703		and	\tmp, \tmp, #0xf		@ cache line size encoding
704		mov	\reg, #4			@ bytes per word
705		mov	\reg, \reg, lsl \tmp		@ actual cache line size
706		.endm
707
708/*
709 * Turn on the cache.  We need to setup some page tables so that we
710 * can have both the I and D caches on.
711 *
712 * We place the page tables 16k down from the kernel execution address,
713 * and we hope that nothing else is using it.  If we're using it, we
714 * will go pop!
715 *
716 * On entry,
717 *  r4 = kernel execution address
718 *  r7 = architecture number
719 *  r8 = atags pointer
720 * On exit,
721 *  r0, r1, r2, r3, r9, r10, r12 corrupted
722 * This routine must preserve:
723 *  r4, r7, r8
724 */
725		.align	5
726cache_on:	mov	r3, #8			@ cache_on function
727		b	call_cache_fn
728
729/*
730 * Initialize the highest priority protection region, PR7
731 * to cover all 32bit address and cacheable and bufferable.
732 */
733__armv4_mpu_cache_on:
734		mov	r0, #0x3f		@ 4G, the whole
735		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
736		mcr 	p15, 0, r0, c6, c7, 1
737
738		mov	r0, #0x80		@ PR7
739		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
740		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
741		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
742
743		mov	r0, #0xc000
744		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
745		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
746
747		mov	r0, #0
748		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
749		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
750		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
751		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
752						@ ...I .... ..D. WC.M
753		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
754		orr	r0, r0, #0x1000		@ ...1 .... .... ....
755
756		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
757
758		mov	r0, #0
759		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
760		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
761		mov	pc, lr
762
763__armv3_mpu_cache_on:
764		mov	r0, #0x3f		@ 4G, the whole
765		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
766
767		mov	r0, #0x80		@ PR7
768		mcr	p15, 0, r0, c2, c0, 0	@ cache on
769		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
770
771		mov	r0, #0xc000
772		mcr	p15, 0, r0, c5, c0, 0	@ access permission
773
774		mov	r0, #0
775		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
776		/*
777		 * ?? ARMv3 MMU does not allow reading the control register,
778		 * does this really work on ARMv3 MPU?
779		 */
780		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
781						@ .... .... .... WC.M
782		orr	r0, r0, #0x000d		@ .... .... .... 11.1
783		/* ?? this overwrites the value constructed above? */
784		mov	r0, #0
785		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
786
787		/* ?? invalidate for the second time? */
788		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
789		mov	pc, lr
790
791#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
792#define CB_BITS 0x08
793#else
794#define CB_BITS 0x0c
795#endif
796
797__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
798		bic	r3, r3, #0xff		@ Align the pointer
799		bic	r3, r3, #0x3f00
800/*
801 * Initialise the page tables, turning on the cacheable and bufferable
802 * bits for the RAM area only.
803 */
804		mov	r0, r3
805		mov	r9, r0, lsr #18
806		mov	r9, r9, lsl #18		@ start of RAM
807		add	r10, r9, #0x10000000	@ a reasonable RAM size
808		mov	r1, #0x12		@ XN|U + section mapping
809		orr	r1, r1, #3 << 10	@ AP=11
810		add	r2, r3, #16384
8111:		cmp	r1, r9			@ if virt > start of RAM
812		cmphs	r10, r1			@   && end of RAM > virt
813		bic	r1, r1, #0x1c		@ clear XN|U + C + B
814		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
815		orrhs	r1, r1, r6		@ set RAM section settings
816		str	r1, [r0], #4		@ 1:1 mapping
817		add	r1, r1, #1048576
818		teq	r0, r2
819		bne	1b
820/*
821 * If ever we are running from Flash, then we surely want the cache
822 * to be enabled also for our execution instance...  We map 2MB of it
823 * so there is no map overlap problem for up to 1 MB compressed kernel.
824 * If the execution is in RAM then we would only be duplicating the above.
825 */
826		orr	r1, r6, #0x04		@ ensure B is set for this
827		orr	r1, r1, #3 << 10
828		mov	r2, pc
829		mov	r2, r2, lsr #20
830		orr	r1, r1, r2, lsl #20
831		add	r0, r3, r2, lsl #2
832		str	r1, [r0], #4
833		add	r1, r1, #1048576
834		str	r1, [r0]
835		mov	pc, lr
836ENDPROC(__setup_mmu)
837
838@ Enable unaligned access on v6, to allow better code generation
839@ for the decompressor C code:
840__armv6_mmu_cache_on:
841		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
842		bic	r0, r0, #2		@ A (no unaligned access fault)
843		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
844		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
845		b	__armv4_mmu_cache_on
846
847__arm926ejs_mmu_cache_on:
848#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
849		mov	r0, #4			@ put dcache in WT mode
850		mcr	p15, 7, r0, c15, c0, 0
851#endif
852
853__armv4_mmu_cache_on:
854		mov	r12, lr
855#ifdef CONFIG_MMU
856		mov	r6, #CB_BITS | 0x12	@ U
857		bl	__setup_mmu
858		mov	r0, #0
859		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
860		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
861		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
862		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
863		orr	r0, r0, #0x0030
864 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
865		bl	__common_mmu_cache_on
866		mov	r0, #0
867		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
868#endif
869		mov	pc, r12
870
871__armv7_mmu_cache_on:
872		enable_cp15_barriers	r11
873		mov	r12, lr
874#ifdef CONFIG_MMU
875		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
876		tst	r11, #0xf		@ VMSA
877		movne	r6, #CB_BITS | 0x02	@ !XN
878		blne	__setup_mmu
879		mov	r0, #0
880		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
881		tst	r11, #0xf		@ VMSA
882		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
883#endif
884		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
885		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
886		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
887		orr	r0, r0, #0x003c		@ write buffer
888		bic	r0, r0, #2		@ A (no unaligned access fault)
889		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
890						@ (needed for ARM1176)
891#ifdef CONFIG_MMU
892 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
893		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
894		orrne	r0, r0, #1		@ MMU enabled
895		movne	r1, #0xfffffffd		@ domain 0 = client
896		bic     r6, r6, #1 << 31        @ 32-bit translation system
897		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
898		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
899		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
900		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
901#endif
902		mcr	p15, 0, r0, c7, c5, 4	@ ISB
903		mcr	p15, 0, r0, c1, c0, 0	@ load control register
904		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
905		mov	r0, #0
906		mcr	p15, 0, r0, c7, c5, 4	@ ISB
907		mov	pc, r12
908
909__fa526_cache_on:
910		mov	r12, lr
911		mov	r6, #CB_BITS | 0x12	@ U
912		bl	__setup_mmu
913		mov	r0, #0
914		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
915		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
916		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
917		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
918		orr	r0, r0, #0x1000		@ I-cache enable
919		bl	__common_mmu_cache_on
920		mov	r0, #0
921		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
922		mov	pc, r12
923
924__common_mmu_cache_on:
925#ifndef CONFIG_THUMB2_KERNEL
926#ifndef DEBUG
927		orr	r0, r0, #0x000d		@ Write buffer, mmu
928#endif
929		mov	r1, #-1
930		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
931		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
932		b	1f
933		.align	5			@ cache line aligned
9341:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
935		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
936		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
937#endif
938
939#define PROC_ENTRY_SIZE (4*5)
940
941/*
942 * Here follow the relocatable cache support functions for the
943 * various processors.  This is a generic hook for locating an
944 * entry and jumping to an instruction at the specified offset
945 * from the start of the block.  Please note this is all position
946 * independent code.
947 *
948 *  r1  = corrupted
949 *  r2  = corrupted
950 *  r3  = block offset
951 *  r9  = corrupted
952 *  r12 = corrupted
953 */
954
955call_cache_fn:	adr	r12, proc_types
956#ifdef CONFIG_CPU_CP15
957		mrc	p15, 0, r9, c0, c0	@ get processor ID
958#elif defined(CONFIG_CPU_V7M)
959		/*
960		 * On v7-M the processor id is located in the V7M_SCB_CPUID
961		 * register, but as cache handling is IMPLEMENTATION DEFINED on
962		 * v7-M (if existant at all) we just return early here.
963		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
964		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
965		 * use cp15 registers that are not implemented on v7-M.
966		 */
967		bx	lr
968#else
969		ldr	r9, =CONFIG_PROCESSOR_ID
970#endif
9711:		ldr	r1, [r12, #0]		@ get value
972		ldr	r2, [r12, #4]		@ get mask
973		eor	r1, r1, r9		@ (real ^ match)
974		tst	r1, r2			@       & mask
975 ARM(		addeq	pc, r12, r3		) @ call cache function
976 THUMB(		addeq	r12, r3			)
977 THUMB(		moveq	pc, r12			) @ call cache function
978		add	r12, r12, #PROC_ENTRY_SIZE
979		b	1b
980
981/*
982 * Table for cache operations.  This is basically:
983 *   - CPU ID match
984 *   - CPU ID mask
985 *   - 'cache on' method instruction
986 *   - 'cache off' method instruction
987 *   - 'cache flush' method instruction
988 *
989 * We match an entry using: ((real_id ^ match) & mask) == 0
990 *
991 * Writethrough caches generally only need 'on' and 'off'
992 * methods.  Writeback caches _must_ have the flush method
993 * defined.
994 */
995		.align	2
996		.type	proc_types,#object
997proc_types:
998		.word	0x41000000		@ old ARM ID
999		.word	0xff00f000
1000		mov	pc, lr
1001 THUMB(		nop				)
1002		mov	pc, lr
1003 THUMB(		nop				)
1004		mov	pc, lr
1005 THUMB(		nop				)
1006
1007		.word	0x41007000		@ ARM7/710
1008		.word	0xfff8fe00
1009		mov	pc, lr
1010 THUMB(		nop				)
1011		mov	pc, lr
1012 THUMB(		nop				)
1013		mov	pc, lr
1014 THUMB(		nop				)
1015
1016		.word	0x41807200		@ ARM720T (writethrough)
1017		.word	0xffffff00
1018		W(b)	__armv4_mmu_cache_on
1019		W(b)	__armv4_mmu_cache_off
1020		mov	pc, lr
1021 THUMB(		nop				)
1022
1023		.word	0x41007400		@ ARM74x
1024		.word	0xff00ff00
1025		W(b)	__armv3_mpu_cache_on
1026		W(b)	__armv3_mpu_cache_off
1027		W(b)	__armv3_mpu_cache_flush
1028
1029		.word	0x41009400		@ ARM94x
1030		.word	0xff00ff00
1031		W(b)	__armv4_mpu_cache_on
1032		W(b)	__armv4_mpu_cache_off
1033		W(b)	__armv4_mpu_cache_flush
1034
1035		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1036		.word	0xff0ffff0
1037		W(b)	__arm926ejs_mmu_cache_on
1038		W(b)	__armv4_mmu_cache_off
1039		W(b)	__armv5tej_mmu_cache_flush
1040
1041		.word	0x00007000		@ ARM7 IDs
1042		.word	0x0000f000
1043		mov	pc, lr
1044 THUMB(		nop				)
1045		mov	pc, lr
1046 THUMB(		nop				)
1047		mov	pc, lr
1048 THUMB(		nop				)
1049
1050		@ Everything from here on will be the new ID system.
1051
1052		.word	0x4401a100		@ sa110 / sa1100
1053		.word	0xffffffe0
1054		W(b)	__armv4_mmu_cache_on
1055		W(b)	__armv4_mmu_cache_off
1056		W(b)	__armv4_mmu_cache_flush
1057
1058		.word	0x6901b110		@ sa1110
1059		.word	0xfffffff0
1060		W(b)	__armv4_mmu_cache_on
1061		W(b)	__armv4_mmu_cache_off
1062		W(b)	__armv4_mmu_cache_flush
1063
1064		.word	0x56056900
1065		.word	0xffffff00		@ PXA9xx
1066		W(b)	__armv4_mmu_cache_on
1067		W(b)	__armv4_mmu_cache_off
1068		W(b)	__armv4_mmu_cache_flush
1069
1070		.word	0x56158000		@ PXA168
1071		.word	0xfffff000
1072		W(b)	__armv4_mmu_cache_on
1073		W(b)	__armv4_mmu_cache_off
1074		W(b)	__armv5tej_mmu_cache_flush
1075
1076		.word	0x56050000		@ Feroceon
1077		.word	0xff0f0000
1078		W(b)	__armv4_mmu_cache_on
1079		W(b)	__armv4_mmu_cache_off
1080		W(b)	__armv5tej_mmu_cache_flush
1081
1082#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1083		/* this conflicts with the standard ARMv5TE entry */
1084		.long	0x41009260		@ Old Feroceon
1085		.long	0xff00fff0
1086		b	__armv4_mmu_cache_on
1087		b	__armv4_mmu_cache_off
1088		b	__armv5tej_mmu_cache_flush
1089#endif
1090
1091		.word	0x66015261		@ FA526
1092		.word	0xff01fff1
1093		W(b)	__fa526_cache_on
1094		W(b)	__armv4_mmu_cache_off
1095		W(b)	__fa526_cache_flush
1096
1097		@ These match on the architecture ID
1098
1099		.word	0x00020000		@ ARMv4T
1100		.word	0x000f0000
1101		W(b)	__armv4_mmu_cache_on
1102		W(b)	__armv4_mmu_cache_off
1103		W(b)	__armv4_mmu_cache_flush
1104
1105		.word	0x00050000		@ ARMv5TE
1106		.word	0x000f0000
1107		W(b)	__armv4_mmu_cache_on
1108		W(b)	__armv4_mmu_cache_off
1109		W(b)	__armv4_mmu_cache_flush
1110
1111		.word	0x00060000		@ ARMv5TEJ
1112		.word	0x000f0000
1113		W(b)	__armv4_mmu_cache_on
1114		W(b)	__armv4_mmu_cache_off
1115		W(b)	__armv5tej_mmu_cache_flush
1116
1117		.word	0x0007b000		@ ARMv6
1118		.word	0x000ff000
1119		W(b)	__armv6_mmu_cache_on
1120		W(b)	__armv4_mmu_cache_off
1121		W(b)	__armv6_mmu_cache_flush
1122
1123		.word	0x000f0000		@ new CPU Id
1124		.word	0x000f0000
1125		W(b)	__armv7_mmu_cache_on
1126		W(b)	__armv7_mmu_cache_off
1127		W(b)	__armv7_mmu_cache_flush
1128
1129		.word	0			@ unrecognised type
1130		.word	0
1131		mov	pc, lr
1132 THUMB(		nop				)
1133		mov	pc, lr
1134 THUMB(		nop				)
1135		mov	pc, lr
1136 THUMB(		nop				)
1137
1138		.size	proc_types, . - proc_types
1139
1140		/*
1141		 * If you get a "non-constant expression in ".if" statement"
1142		 * error from the assembler on this line, check that you have
1143		 * not accidentally written a "b" instruction where you should
1144		 * have written W(b).
1145		 */
1146		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1147		.error "The size of one or more proc_types entries is wrong."
1148		.endif
1149
1150/*
1151 * Turn off the Cache and MMU.  ARMv3 does not support
1152 * reading the control register, but ARMv4 does.
1153 *
1154 * On exit,
1155 *  r0, r1, r2, r3, r9, r12 corrupted
1156 * This routine must preserve:
1157 *  r4, r7, r8
1158 */
1159		.align	5
1160cache_off:	mov	r3, #12			@ cache_off function
1161		b	call_cache_fn
1162
1163__armv4_mpu_cache_off:
1164		mrc	p15, 0, r0, c1, c0
1165		bic	r0, r0, #0x000d
1166		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1167		mov	r0, #0
1168		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1169		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1170		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1171		mov	pc, lr
1172
1173__armv3_mpu_cache_off:
1174		mrc	p15, 0, r0, c1, c0
1175		bic	r0, r0, #0x000d
1176		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1177		mov	r0, #0
1178		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1179		mov	pc, lr
1180
1181__armv4_mmu_cache_off:
1182#ifdef CONFIG_MMU
1183		mrc	p15, 0, r0, c1, c0
1184		bic	r0, r0, #0x000d
1185		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1186		mov	r0, #0
1187		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1188		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1189#endif
1190		mov	pc, lr
1191
1192__armv7_mmu_cache_off:
1193		mrc	p15, 0, r0, c1, c0
1194#ifdef CONFIG_MMU
1195		bic	r0, r0, #0x0005
1196#else
1197		bic	r0, r0, #0x0004
1198#endif
1199		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1200		mov	r0, #0
1201#ifdef CONFIG_MMU
1202		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1203#endif
1204		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1205		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1206		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1207		mov	pc, lr
1208
1209/*
1210 * Clean and flush the cache to maintain consistency.
1211 *
1212 * On entry,
1213 *  r0 = start address
1214 *  r1 = end address (exclusive)
1215 * On exit,
1216 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1217 * This routine must preserve:
1218 *  r4, r6, r7, r8
1219 */
1220		.align	5
1221cache_clean_flush:
1222		mov	r3, #16
1223		mov	r11, r1
1224		b	call_cache_fn
1225
1226__armv4_mpu_cache_flush:
1227		tst	r4, #1
1228		movne	pc, lr
1229		mov	r2, #1
1230		mov	r3, #0
1231		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1232		mov	r1, #7 << 5		@ 8 segments
12331:		orr	r3, r1, #63 << 26	@ 64 entries
12342:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1235		subs	r3, r3, #1 << 26
1236		bcs	2b			@ entries 63 to 0
1237		subs 	r1, r1, #1 << 5
1238		bcs	1b			@ segments 7 to 0
1239
1240		teq	r2, #0
1241		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1242		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1243		mov	pc, lr
1244
1245__fa526_cache_flush:
1246		tst	r4, #1
1247		movne	pc, lr
1248		mov	r1, #0
1249		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1250		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1251		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1252		mov	pc, lr
1253
1254__armv6_mmu_cache_flush:
1255		mov	r1, #0
1256		tst	r4, #1
1257		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1258		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1259		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1260		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1261		mov	pc, lr
1262
1263__armv7_mmu_cache_flush:
1264		enable_cp15_barriers	r10
1265		tst	r4, #1
1266		bne	iflush
1267		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1268		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1269		mov	r10, #0
1270		beq	hierarchical
1271		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1272		b	iflush
1273hierarchical:
1274		dcache_line_size r1, r2		@ r1 := dcache min line size
1275		sub	r2, r1, #1		@ r2 := line size mask
1276		bic	r0, r0, r2		@ round down start to line size
1277		sub	r11, r11, #1		@ end address is exclusive
1278		bic	r11, r11, r2		@ round down end to line size
12790:		cmp	r0, r11			@ finished?
1280		bgt	iflush
1281		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1282		add	r0, r0, r1
1283		b	0b
1284iflush:
1285		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1286		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1287		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1288		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1289		mov	pc, lr
1290
1291__armv5tej_mmu_cache_flush:
1292		tst	r4, #1
1293		movne	pc, lr
12941:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1295		bne	1b
1296		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1297		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1298		mov	pc, lr
1299
1300__armv4_mmu_cache_flush:
1301		tst	r4, #1
1302		movne	pc, lr
1303		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1304		mov	r11, #32		@ default: 32 byte line size
1305		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1306		teq	r3, r9			@ cache ID register present?
1307		beq	no_cache_id
1308		mov	r1, r3, lsr #18
1309		and	r1, r1, #7
1310		mov	r2, #1024
1311		mov	r2, r2, lsl r1		@ base dcache size *2
1312		tst	r3, #1 << 14		@ test M bit
1313		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1314		mov	r3, r3, lsr #12
1315		and	r3, r3, #3
1316		mov	r11, #8
1317		mov	r11, r11, lsl r3	@ cache line size in bytes
1318no_cache_id:
1319		mov	r1, pc
1320		bic	r1, r1, #63		@ align to longest cache line
1321		add	r2, r1, r2
13221:
1323 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1324 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1325 THUMB(		add     r1, r1, r11		)
1326		teq	r1, r2
1327		bne	1b
1328
1329		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1330		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1331		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1332		mov	pc, lr
1333
1334__armv3_mmu_cache_flush:
1335__armv3_mpu_cache_flush:
1336		tst	r4, #1
1337		movne	pc, lr
1338		mov	r1, #0
1339		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1340		mov	pc, lr
1341
1342/*
1343 * Various debugging routines for printing hex characters and
1344 * memory, which again must be relocatable.
1345 */
1346#ifdef DEBUG
1347		.align	2
1348		.type	phexbuf,#object
1349phexbuf:	.space	12
1350		.size	phexbuf, . - phexbuf
1351
1352@ phex corrupts {r0, r1, r2, r3}
1353phex:		adr	r3, phexbuf
1354		mov	r2, #0
1355		strb	r2, [r3, r1]
13561:		subs	r1, r1, #1
1357		movmi	r0, r3
1358		bmi	puts
1359		and	r2, r0, #15
1360		mov	r0, r0, lsr #4
1361		cmp	r2, #10
1362		addge	r2, r2, #7
1363		add	r2, r2, #'0'
1364		strb	r2, [r3, r1]
1365		b	1b
1366
1367@ puts corrupts {r0, r1, r2, r3}
1368puts:		loadsp	r3, r2, r1
13691:		ldrb	r2, [r0], #1
1370		teq	r2, #0
1371		moveq	pc, lr
13722:		writeb	r2, r3, r1
1373		mov	r1, #0x00020000
13743:		subs	r1, r1, #1
1375		bne	3b
1376		teq	r2, #'\n'
1377		moveq	r2, #'\r'
1378		beq	2b
1379		teq	r0, #0
1380		bne	1b
1381		mov	pc, lr
1382@ putc corrupts {r0, r1, r2, r3}
1383putc:
1384		mov	r2, r0
1385		loadsp	r3, r1, r0
1386		mov	r0, #0
1387		b	2b
1388
1389@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1390memdump:	mov	r12, r0
1391		mov	r10, lr
1392		mov	r11, #0
13932:		mov	r0, r11, lsl #2
1394		add	r0, r0, r12
1395		mov	r1, #8
1396		bl	phex
1397		mov	r0, #':'
1398		bl	putc
13991:		mov	r0, #' '
1400		bl	putc
1401		ldr	r0, [r12, r11, lsl #2]
1402		mov	r1, #8
1403		bl	phex
1404		and	r0, r11, #7
1405		teq	r0, #3
1406		moveq	r0, #' '
1407		bleq	putc
1408		and	r0, r11, #7
1409		add	r11, r11, #1
1410		teq	r0, #7
1411		bne	1b
1412		mov	r0, #'\n'
1413		bl	putc
1414		cmp	r11, #64
1415		blt	2b
1416		mov	pc, r10
1417#endif
1418
1419		.ltorg
1420
1421#ifdef CONFIG_ARM_VIRT_EXT
1422.align 5
1423__hyp_reentry_vectors:
1424		W(b)	.			@ reset
1425		W(b)	.			@ undef
1426#ifdef CONFIG_EFI_STUB
1427		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1428#else
1429		W(b)	.			@ svc
1430#endif
1431		W(b)	.			@ pabort
1432		W(b)	.			@ dabort
1433		W(b)	__enter_kernel		@ hyp
1434		W(b)	.			@ irq
1435		W(b)	.			@ fiq
1436#endif /* CONFIG_ARM_VIRT_EXT */
1437
1438__enter_kernel:
1439		mov	r0, #0			@ must be 0
1440		mov	r1, r7			@ restore architecture number
1441		mov	r2, r8			@ restore atags pointer
1442 ARM(		mov	pc, r4		)	@ call kernel
1443 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1444 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1445
1446reloc_code_end:
1447
1448#ifdef CONFIG_EFI_STUB
1449__enter_kernel_from_hyp:
1450		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1451		bic	r0, r0, #0x5		@ disable MMU and caches
1452		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1453		isb
1454		b	__enter_kernel
1455
1456ENTRY(efi_enter_kernel)
1457		mov	r4, r0			@ preserve image base
1458		mov	r8, r1			@ preserve DT pointer
1459
1460		adr_l	r0, call_cache_fn
1461		adr	r1, 0f			@ clean the region of code we
1462		bl	cache_clean_flush	@ may run with the MMU off
1463
1464#ifdef CONFIG_ARM_VIRT_EXT
1465		@
1466		@ The EFI spec does not support booting on ARM in HYP mode,
1467		@ since it mandates that the MMU and caches are on, with all
1468		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1469		@
1470		@ While the EDK2 reference implementation adheres to this,
1471		@ U-Boot might decide to enter the EFI stub in HYP mode
1472		@ anyway, with the MMU and caches either on or off.
1473		@
1474		mrs	r0, cpsr		@ get the current mode
1475		msr	spsr_cxsf, r0		@ record boot mode
1476		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1477		cmp	r0, #HYP_MODE
1478		bne	.Lefi_svc
1479
1480		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1481		tst	r1, #0x1		@ MMU enabled at HYP?
1482		beq	1f
1483
1484		@
1485		@ When running in HYP mode with the caches on, we're better
1486		@ off just carrying on using the cached 1:1 mapping that the
1487		@ firmware provided. Set up the HYP vectors so HVC instructions
1488		@ issued from HYP mode take us to the correct handler code. We
1489		@ will disable the MMU before jumping to the kernel proper.
1490		@
1491 ARM(		bic	r1, r1, #(1 << 30)	) @ clear HSCTLR.TE
1492 THUMB(		orr	r1, r1, #(1 << 30)	) @ set HSCTLR.TE
1493		mcr	p15, 4, r1, c1, c0, 0
1494		adr	r0, __hyp_reentry_vectors
1495		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1496		isb
1497		b	.Lefi_hyp
1498
1499		@
1500		@ When running in HYP mode with the caches off, we need to drop
1501		@ into SVC mode now, and let the decompressor set up its cached
1502		@ 1:1 mapping as usual.
1503		@
15041:		mov	r9, r4			@ preserve image base
1505		bl	__hyp_stub_install	@ install HYP stub vectors
1506		safe_svcmode_maskall	r1	@ drop to SVC mode
1507		msr	spsr_cxsf, r0		@ record boot mode
1508		orr	r4, r9, #1		@ restore image base and set LSB
1509		b	.Lefi_hyp
1510.Lefi_svc:
1511#endif
1512		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1513		tst	r0, #0x1		@ MMU enabled?
1514		orreq	r4, r4, #1		@ set LSB if not
1515
1516.Lefi_hyp:
1517		mov	r0, r8			@ DT start
1518		add	r1, r8, r2		@ DT end
1519		bl	cache_clean_flush
1520
1521		adr	r0, 0f			@ switch to our stack
1522		ldr	sp, [r0]
1523		add	sp, sp, r0
1524
1525		mov	r5, #0			@ appended DTB size
1526		mov	r7, #0xFFFFFFFF		@ machine ID
1527		b	wont_overwrite
1528ENDPROC(efi_enter_kernel)
15290:		.long	.L_user_stack_end - .
1530#endif
1531
1532		.align
1533		.section ".stack", "aw", %nobits
1534.L_user_stack:	.space	4096
1535.L_user_stack_end:
1536