xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision e2028c8e)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb
53		senduart \ch, \rb
54		.endm
55
56#if defined(CONFIG_ARCH_SA1100)
57		.macro	loadsp, rb, tmp1, tmp2
58		mov	\rb, #0x80000000	@ physical base address
59#ifdef CONFIG_DEBUG_LL_SER3
60		add	\rb, \rb, #0x00050000	@ Ser3
61#else
62		add	\rb, \rb, #0x00010000	@ Ser1
63#endif
64		.endm
65#else
66		.macro	loadsp,	rb, tmp1, tmp2
67		addruart \rb, \tmp1, \tmp2
68		.endm
69#endif
70#endif
71#endif
72
73		.macro	kputc,val
74		mov	r0, \val
75		bl	putc
76		.endm
77
78		.macro	kphex,val,len
79		mov	r0, \val
80		mov	r1, #\len
81		bl	phex
82		.endm
83
84		.macro	debug_reloc_start
85#ifdef DEBUG
86		kputc	#'\n'
87		kphex	r6, 8		/* processor id */
88		kputc	#':'
89		kphex	r7, 8		/* architecture id */
90#ifdef CONFIG_CPU_CP15
91		kputc	#':'
92		mrc	p15, 0, r0, c1, c0
93		kphex	r0, 8		/* control reg */
94#endif
95		kputc	#'\n'
96		kphex	r5, 8		/* decompressed kernel start */
97		kputc	#'-'
98		kphex	r9, 8		/* decompressed kernel end  */
99		kputc	#'>'
100		kphex	r4, 8		/* kernel execution address */
101		kputc	#'\n'
102#endif
103		.endm
104
105		.macro	debug_reloc_end
106#ifdef DEBUG
107		kphex	r5, 8		/* end of kernel */
108		kputc	#'\n'
109		mov	r0, r4
110		bl	memdump		/* dump 256 bytes at start of kernel */
111#endif
112		.endm
113
114		/*
115		 * Debug kernel copy by printing the memory addresses involved
116		 */
117		.macro dbgkc, begin, end, cbegin, cend
118#ifdef DEBUG
119		kputc   #'\n'
120		kputc   #'C'
121		kputc   #':'
122		kputc   #'0'
123		kputc   #'x'
124		kphex   \begin, 8	/* Start of compressed kernel */
125		kputc	#'-'
126		kputc	#'0'
127		kputc	#'x'
128		kphex	\end, 8		/* End of compressed kernel */
129		kputc	#'-'
130		kputc	#'>'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \cbegin, 8	/* Start of kernel copy */
134		kputc	#'-'
135		kputc	#'0'
136		kputc	#'x'
137		kphex	\cend, 8	/* End of kernel copy */
138		kputc	#'\n'
139		kputc	#'\r'
140#endif
141		.endm
142
143		.macro	enable_cp15_barriers, reg
144		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
145		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
146		bne	.L_\@
147		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
148		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
149 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
150 THUMB(		isb						)
151.L_\@:
152		.endm
153
154		/*
155		 * The kernel build system appends the size of the
156		 * decompressed kernel at the end of the compressed data
157		 * in little-endian form.
158		 */
159		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
160		adr	\res, .Linflated_image_size_offset
161		ldr	\tmp1, [\res]
162		add	\tmp1, \tmp1, \res	@ address of inflated image size
163
164		ldrb	\res, [\tmp1]		@ get_unaligned_le32
165		ldrb	\tmp2, [\tmp1, #1]
166		orr	\res, \res, \tmp2, lsl #8
167		ldrb	\tmp2, [\tmp1, #2]
168		ldrb	\tmp1, [\tmp1, #3]
169		orr	\res, \res, \tmp2, lsl #16
170		orr	\res, \res, \tmp1, lsl #24
171		.endm
172
173		.section ".start", "ax"
174/*
175 * sort out different calling conventions
176 */
177		.align
178		/*
179		 * Always enter in ARM state for CPUs that support the ARM ISA.
180		 * As of today (2014) that's exactly the members of the A and R
181		 * classes.
182		 */
183 AR_CLASS(	.arm	)
184start:
185		.type	start,#function
186		/*
187		 * These 7 nops along with the 1 nop immediately below for
188		 * !THUMB2 form 8 nops that make the compressed kernel bootable
189		 * on legacy ARM systems that were assuming the kernel in a.out
190		 * binary format. The boot loaders on these systems would
191		 * jump 32 bytes into the image to skip the a.out header.
192		 * with these 8 nops filling exactly 32 bytes, things still
193		 * work as expected on these legacy systems. Thumb2 mode keeps
194		 * 7 of the nops as it turns out that some boot loaders
195		 * were patching the initial instructions of the kernel, i.e
196		 * had started to exploit this "patch area".
197		 */
198		.rept	7
199		__nop
200		.endr
201#ifndef CONFIG_THUMB2_KERNEL
202		__nop
203#else
204 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
205  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
206		.thumb
207#endif
208		W(b)	1f
209
210		.word	_magic_sig	@ Magic numbers to help the loader
211		.word	_magic_start	@ absolute load/run zImage address
212		.word	_magic_end	@ zImage end address
213		.word	0x04030201	@ endianness flag
214		.word	0x45454545	@ another magic number to indicate
215		.word	_magic_table	@ additional data table
216
217		__EFI_HEADER
2181:
219 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
220 AR_CLASS(	mrs	r9, cpsr	)
221#ifdef CONFIG_ARM_VIRT_EXT
222		bl	__hyp_stub_install	@ get into SVC mode, reversibly
223#endif
224		mov	r7, r1			@ save architecture ID
225		mov	r8, r2			@ save atags pointer
226
227#ifndef CONFIG_CPU_V7M
228		/*
229		 * Booting from Angel - need to enter SVC mode and disable
230		 * FIQs/IRQs (numeric definitions from angel arm.h source).
231		 * We only do this if we were in user mode on entry.
232		 */
233		mrs	r2, cpsr		@ get current mode
234		tst	r2, #3			@ not user?
235		bne	not_angel
236		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
237 ARM(		swi	0x123456	)	@ angel_SWI_ARM
238 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
239not_angel:
240		safe_svcmode_maskall r0
241		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
242						@ SPSR
243#endif
244		/*
245		 * Note that some cache flushing and other stuff may
246		 * be needed here - is there an Angel SWI call for this?
247		 */
248
249		/*
250		 * some architecture specific code can be inserted
251		 * by the linker here, but it should preserve r7, r8, and r9.
252		 */
253
254		.text
255
256#ifdef CONFIG_AUTO_ZRELADDR
257		/*
258		 * Find the start of physical memory.  As we are executing
259		 * without the MMU on, we are in the physical address space.
260		 * We just need to get rid of any offset by aligning the
261		 * address.
262		 *
263		 * This alignment is a balance between the requirements of
264		 * different platforms - we have chosen 128MB to allow
265		 * platforms which align the start of their physical memory
266		 * to 128MB to use this feature, while allowing the zImage
267		 * to be placed within the first 128MB of memory on other
268		 * platforms.  Increasing the alignment means we place
269		 * stricter alignment requirements on the start of physical
270		 * memory, but relaxing it means that we break people who
271		 * are already placing their zImage in (eg) the top 64MB
272		 * of this range.
273		 */
274		mov	r4, pc
275		and	r4, r4, #0xf8000000
276		/* Determine final kernel image address. */
277		add	r4, r4, #TEXT_OFFSET
278#else
279		ldr	r4, =zreladdr
280#endif
281
282		/*
283		 * Set up a page table only if it won't overwrite ourself.
284		 * That means r4 < pc || r4 - 16k page directory > &_end.
285		 * Given that r4 > &_end is most unfrequent, we add a rough
286		 * additional 1MB of room for a possible appended DTB.
287		 */
288		mov	r0, pc
289		cmp	r0, r4
290		ldrcc	r0, .Lheadroom
291		addcc	r0, r0, pc
292		cmpcc	r4, r0
293		orrcc	r4, r4, #1		@ remember we skipped cache_on
294		blcs	cache_on
295
296restart:	adr	r0, LC1
297		ldr	sp, [r0]
298		ldr	r6, [r0, #4]
299		add	sp, sp, r0
300		add	r6, r6, r0
301
302		get_inflated_image_size	r9, r10, lr
303
304#ifndef CONFIG_ZBOOT_ROM
305		/* malloc space is above the relocated stack (64k max) */
306		add	r10, sp, #0x10000
307#else
308		/*
309		 * With ZBOOT_ROM the bss/stack is non relocatable,
310		 * but someone could still run this code from RAM,
311		 * in which case our reference is _edata.
312		 */
313		mov	r10, r6
314#endif
315
316		mov	r5, #0			@ init dtb size to 0
317#ifdef CONFIG_ARM_APPENDED_DTB
318/*
319 *   r4  = final kernel address (possibly with LSB set)
320 *   r5  = appended dtb size (still unknown)
321 *   r6  = _edata
322 *   r7  = architecture ID
323 *   r8  = atags/device tree pointer
324 *   r9  = size of decompressed image
325 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
326 *   sp  = stack pointer
327 *
328 * if there are device trees (dtb) appended to zImage, advance r10 so that the
329 * dtb data will get relocated along with the kernel if necessary.
330 */
331
332		ldr	lr, [r6, #0]
333#ifndef __ARMEB__
334		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
335#else
336		ldr	r1, =0xd00dfeed
337#endif
338		cmp	lr, r1
339		bne	dtb_check_done		@ not found
340
341#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
342		/*
343		 * OK... Let's do some funky business here.
344		 * If we do have a DTB appended to zImage, and we do have
345		 * an ATAG list around, we want the later to be translated
346		 * and folded into the former here. No GOT fixup has occurred
347		 * yet, but none of the code we're about to call uses any
348		 * global variable.
349		*/
350
351		/* Get the initial DTB size */
352		ldr	r5, [r6, #4]
353#ifndef __ARMEB__
354		/* convert to little endian */
355		eor	r1, r5, r5, ror #16
356		bic	r1, r1, #0x00ff0000
357		mov	r5, r5, ror #8
358		eor	r5, r5, r1, lsr #8
359#endif
360		/* 50% DTB growth should be good enough */
361		add	r5, r5, r5, lsr #1
362		/* preserve 64-bit alignment */
363		add	r5, r5, #7
364		bic	r5, r5, #7
365		/* clamp to 32KB min and 1MB max */
366		cmp	r5, #(1 << 15)
367		movlo	r5, #(1 << 15)
368		cmp	r5, #(1 << 20)
369		movhi	r5, #(1 << 20)
370		/* temporarily relocate the stack past the DTB work space */
371		add	sp, sp, r5
372
373		mov	r0, r8
374		mov	r1, r6
375		mov	r2, r5
376		bl	atags_to_fdt
377
378		/*
379		 * If returned value is 1, there is no ATAG at the location
380		 * pointed by r8.  Try the typical 0x100 offset from start
381		 * of RAM and hope for the best.
382		 */
383		cmp	r0, #1
384		sub	r0, r4, #TEXT_OFFSET
385		bic	r0, r0, #1
386		add	r0, r0, #0x100
387		mov	r1, r6
388		mov	r2, r5
389		bleq	atags_to_fdt
390
391		sub	sp, sp, r5
392#endif
393
394		mov	r8, r6			@ use the appended device tree
395
396		/*
397		 * Make sure that the DTB doesn't end up in the final
398		 * kernel's .bss area. To do so, we adjust the decompressed
399		 * kernel size to compensate if that .bss size is larger
400		 * than the relocated code.
401		 */
402		ldr	r5, =_kernel_bss_size
403		adr	r1, wont_overwrite
404		sub	r1, r6, r1
405		subs	r1, r5, r1
406		addhi	r9, r9, r1
407
408		/* Get the current DTB size */
409		ldr	r5, [r6, #4]
410#ifndef __ARMEB__
411		/* convert r5 (dtb size) to little endian */
412		eor	r1, r5, r5, ror #16
413		bic	r1, r1, #0x00ff0000
414		mov	r5, r5, ror #8
415		eor	r5, r5, r1, lsr #8
416#endif
417
418		/* preserve 64-bit alignment */
419		add	r5, r5, #7
420		bic	r5, r5, #7
421
422		/* relocate some pointers past the appended dtb */
423		add	r6, r6, r5
424		add	r10, r10, r5
425		add	sp, sp, r5
426dtb_check_done:
427#endif
428
429/*
430 * Check to see if we will overwrite ourselves.
431 *   r4  = final kernel address (possibly with LSB set)
432 *   r9  = size of decompressed image
433 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
434 * We basically want:
435 *   r4 - 16k page directory >= r10 -> OK
436 *   r4 + image length <= address of wont_overwrite -> OK
437 * Note: the possible LSB in r4 is harmless here.
438 */
439		add	r10, r10, #16384
440		cmp	r4, r10
441		bhs	wont_overwrite
442		add	r10, r4, r9
443		adr	r9, wont_overwrite
444		cmp	r10, r9
445		bls	wont_overwrite
446
447/*
448 * Relocate ourselves past the end of the decompressed kernel.
449 *   r6  = _edata
450 *   r10 = end of the decompressed kernel
451 * Because we always copy ahead, we need to do it from the end and go
452 * backward in case the source and destination overlap.
453 */
454		/*
455		 * Bump to the next 256-byte boundary with the size of
456		 * the relocation code added. This avoids overwriting
457		 * ourself when the offset is small.
458		 */
459		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
460		bic	r10, r10, #255
461
462		/* Get start of code we want to copy and align it down. */
463		adr	r5, restart
464		bic	r5, r5, #31
465
466/* Relocate the hyp vector base if necessary */
467#ifdef CONFIG_ARM_VIRT_EXT
468		mrs	r0, spsr
469		and	r0, r0, #MODE_MASK
470		cmp	r0, #HYP_MODE
471		bne	1f
472
473		/*
474		 * Compute the address of the hyp vectors after relocation.
475		 * This requires some arithmetic since we cannot directly
476		 * reference __hyp_stub_vectors in a PC-relative way.
477		 * Call __hyp_set_vectors with the new address so that we
478		 * can HVC again after the copy.
479		 */
4800:		adr	r0, 0b
481		movw	r1, #:lower16:__hyp_stub_vectors - 0b
482		movt	r1, #:upper16:__hyp_stub_vectors - 0b
483		add	r0, r0, r1
484		sub	r0, r0, r5
485		add	r0, r0, r10
486		bl	__hyp_set_vectors
4871:
488#endif
489
490		sub	r9, r6, r5		@ size to copy
491		add	r9, r9, #31		@ rounded up to a multiple
492		bic	r9, r9, #31		@ ... of 32 bytes
493		add	r6, r9, r5
494		add	r9, r9, r10
495
496#ifdef DEBUG
497		sub     r10, r6, r5
498		sub     r10, r9, r10
499		/*
500		 * We are about to copy the kernel to a new memory area.
501		 * The boundaries of the new memory area can be found in
502		 * r10 and r9, whilst r5 and r6 contain the boundaries
503		 * of the memory we are going to copy.
504		 * Calling dbgkc will help with the printing of this
505		 * information.
506		 */
507		dbgkc	r5, r6, r10, r9
508#endif
509
5101:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
511		cmp	r6, r5
512		stmdb	r9!, {r0 - r3, r10 - r12, lr}
513		bhi	1b
514
515		/* Preserve offset to relocated code. */
516		sub	r6, r9, r6
517
518		mov	r0, r9			@ start of relocated zImage
519		add	r1, sp, r6		@ end of relocated zImage
520		bl	cache_clean_flush
521
522		badr	r0, restart
523		add	r0, r0, r6
524		mov	pc, r0
525
526wont_overwrite:
527		adr	r0, LC0
528		ldmia	r0, {r1, r2, r3, r11, r12}
529		sub	r0, r0, r1		@ calculate the delta offset
530
531/*
532 * If delta is zero, we are running at the address we were linked at.
533 *   r0  = delta
534 *   r2  = BSS start
535 *   r3  = BSS end
536 *   r4  = kernel execution address (possibly with LSB set)
537 *   r5  = appended dtb size (0 if not present)
538 *   r7  = architecture ID
539 *   r8  = atags pointer
540 *   r11 = GOT start
541 *   r12 = GOT end
542 *   sp  = stack pointer
543 */
544		orrs	r1, r0, r5
545		beq	not_relocated
546
547		add	r11, r11, r0
548		add	r12, r12, r0
549
550#ifndef CONFIG_ZBOOT_ROM
551		/*
552		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
553		 * we need to fix up pointers into the BSS region.
554		 * Note that the stack pointer has already been fixed up.
555		 */
556		add	r2, r2, r0
557		add	r3, r3, r0
558
559		/*
560		 * Relocate all entries in the GOT table.
561		 * Bump bss entries to _edata + dtb size
562		 */
5631:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
564		add	r1, r1, r0		@ This fixes up C references
565		cmp	r1, r2			@ if entry >= bss_start &&
566		cmphs	r3, r1			@       bss_end > entry
567		addhi	r1, r1, r5		@    entry += dtb size
568		str	r1, [r11], #4		@ next entry
569		cmp	r11, r12
570		blo	1b
571
572		/* bump our bss pointers too */
573		add	r2, r2, r5
574		add	r3, r3, r5
575
576#else
577
578		/*
579		 * Relocate entries in the GOT table.  We only relocate
580		 * the entries that are outside the (relocated) BSS region.
581		 */
5821:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
583		cmp	r1, r2			@ entry < bss_start ||
584		cmphs	r3, r1			@ _end < entry
585		addlo	r1, r1, r0		@ table.  This fixes up the
586		str	r1, [r11], #4		@ C references.
587		cmp	r11, r12
588		blo	1b
589#endif
590
591not_relocated:	mov	r0, #0
5921:		str	r0, [r2], #4		@ clear bss
593		str	r0, [r2], #4
594		str	r0, [r2], #4
595		str	r0, [r2], #4
596		cmp	r2, r3
597		blo	1b
598
599		/*
600		 * Did we skip the cache setup earlier?
601		 * That is indicated by the LSB in r4.
602		 * Do it now if so.
603		 */
604		tst	r4, #1
605		bic	r4, r4, #1
606		blne	cache_on
607
608/*
609 * The C runtime environment should now be setup sufficiently.
610 * Set up some pointers, and start decompressing.
611 *   r4  = kernel execution address
612 *   r7  = architecture ID
613 *   r8  = atags pointer
614 */
615		mov	r0, r4
616		mov	r1, sp			@ malloc space above stack
617		add	r2, sp, #0x10000	@ 64k max
618		mov	r3, r7
619		bl	decompress_kernel
620
621		get_inflated_image_size	r1, r2, r3
622
623		mov	r0, r4			@ start of inflated image
624		add	r1, r1, r0		@ end of inflated image
625		bl	cache_clean_flush
626		bl	cache_off
627
628#ifdef CONFIG_ARM_VIRT_EXT
629		mrs	r0, spsr		@ Get saved CPU boot mode
630		and	r0, r0, #MODE_MASK
631		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
632		bne	__enter_kernel		@ boot kernel directly
633
634		adr	r12, .L__hyp_reentry_vectors_offset
635		ldr	r0, [r12]
636		add	r0, r0, r12
637
638		bl	__hyp_set_vectors
639		__HVC(0)			@ otherwise bounce to hyp mode
640
641		b	.			@ should never be reached
642
643		.align	2
644.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
645#else
646		b	__enter_kernel
647#endif
648
649		.align	2
650		.type	LC0, #object
651LC0:		.word	LC0			@ r1
652		.word	__bss_start		@ r2
653		.word	_end			@ r3
654		.word	_got_start		@ r11
655		.word	_got_end		@ ip
656		.size	LC0, . - LC0
657
658		.type	LC1, #object
659LC1:		.word	.L_user_stack_end - LC1	@ sp
660		.word	_edata - LC1		@ r6
661		.size	LC1, . - LC1
662
663.Lheadroom:
664		.word	_end - restart + 16384 + 1024*1024
665
666.Linflated_image_size_offset:
667		.long	(input_data_end - 4) - .
668
669#ifdef CONFIG_ARCH_RPC
670		.globl	params
671params:		ldr	r0, =0x10000100		@ params_phys for RPC
672		mov	pc, lr
673		.ltorg
674		.align
675#endif
676
677/*
678 * dcache_line_size - get the minimum D-cache line size from the CTR register
679 * on ARMv7.
680 */
681		.macro	dcache_line_size, reg, tmp
682#ifdef CONFIG_CPU_V7M
683		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
684		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
685		ldr	\tmp, [\tmp]
686#else
687		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
688#endif
689		lsr	\tmp, \tmp, #16
690		and	\tmp, \tmp, #0xf		@ cache line size encoding
691		mov	\reg, #4			@ bytes per word
692		mov	\reg, \reg, lsl \tmp		@ actual cache line size
693		.endm
694
695/*
696 * Turn on the cache.  We need to setup some page tables so that we
697 * can have both the I and D caches on.
698 *
699 * We place the page tables 16k down from the kernel execution address,
700 * and we hope that nothing else is using it.  If we're using it, we
701 * will go pop!
702 *
703 * On entry,
704 *  r4 = kernel execution address
705 *  r7 = architecture number
706 *  r8 = atags pointer
707 * On exit,
708 *  r0, r1, r2, r3, r9, r10, r12 corrupted
709 * This routine must preserve:
710 *  r4, r7, r8
711 */
712		.align	5
713cache_on:	mov	r3, #8			@ cache_on function
714		b	call_cache_fn
715
716/*
717 * Initialize the highest priority protection region, PR7
718 * to cover all 32bit address and cacheable and bufferable.
719 */
720__armv4_mpu_cache_on:
721		mov	r0, #0x3f		@ 4G, the whole
722		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
723		mcr 	p15, 0, r0, c6, c7, 1
724
725		mov	r0, #0x80		@ PR7
726		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
727		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
728		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
729
730		mov	r0, #0xc000
731		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
732		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
733
734		mov	r0, #0
735		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
736		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
737		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
738		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
739						@ ...I .... ..D. WC.M
740		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
741		orr	r0, r0, #0x1000		@ ...1 .... .... ....
742
743		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
744
745		mov	r0, #0
746		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
747		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
748		mov	pc, lr
749
750__armv3_mpu_cache_on:
751		mov	r0, #0x3f		@ 4G, the whole
752		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
753
754		mov	r0, #0x80		@ PR7
755		mcr	p15, 0, r0, c2, c0, 0	@ cache on
756		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
757
758		mov	r0, #0xc000
759		mcr	p15, 0, r0, c5, c0, 0	@ access permission
760
761		mov	r0, #0
762		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
763		/*
764		 * ?? ARMv3 MMU does not allow reading the control register,
765		 * does this really work on ARMv3 MPU?
766		 */
767		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
768						@ .... .... .... WC.M
769		orr	r0, r0, #0x000d		@ .... .... .... 11.1
770		/* ?? this overwrites the value constructed above? */
771		mov	r0, #0
772		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
773
774		/* ?? invalidate for the second time? */
775		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
776		mov	pc, lr
777
778#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
779#define CB_BITS 0x08
780#else
781#define CB_BITS 0x0c
782#endif
783
784__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
785		bic	r3, r3, #0xff		@ Align the pointer
786		bic	r3, r3, #0x3f00
787/*
788 * Initialise the page tables, turning on the cacheable and bufferable
789 * bits for the RAM area only.
790 */
791		mov	r0, r3
792		mov	r9, r0, lsr #18
793		mov	r9, r9, lsl #18		@ start of RAM
794		add	r10, r9, #0x10000000	@ a reasonable RAM size
795		mov	r1, #0x12		@ XN|U + section mapping
796		orr	r1, r1, #3 << 10	@ AP=11
797		add	r2, r3, #16384
7981:		cmp	r1, r9			@ if virt > start of RAM
799		cmphs	r10, r1			@   && end of RAM > virt
800		bic	r1, r1, #0x1c		@ clear XN|U + C + B
801		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
802		orrhs	r1, r1, r6		@ set RAM section settings
803		str	r1, [r0], #4		@ 1:1 mapping
804		add	r1, r1, #1048576
805		teq	r0, r2
806		bne	1b
807/*
808 * If ever we are running from Flash, then we surely want the cache
809 * to be enabled also for our execution instance...  We map 2MB of it
810 * so there is no map overlap problem for up to 1 MB compressed kernel.
811 * If the execution is in RAM then we would only be duplicating the above.
812 */
813		orr	r1, r6, #0x04		@ ensure B is set for this
814		orr	r1, r1, #3 << 10
815		mov	r2, pc
816		mov	r2, r2, lsr #20
817		orr	r1, r1, r2, lsl #20
818		add	r0, r3, r2, lsl #2
819		str	r1, [r0], #4
820		add	r1, r1, #1048576
821		str	r1, [r0]
822		mov	pc, lr
823ENDPROC(__setup_mmu)
824
825@ Enable unaligned access on v6, to allow better code generation
826@ for the decompressor C code:
827__armv6_mmu_cache_on:
828		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
829		bic	r0, r0, #2		@ A (no unaligned access fault)
830		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
831		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
832		b	__armv4_mmu_cache_on
833
834__arm926ejs_mmu_cache_on:
835#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
836		mov	r0, #4			@ put dcache in WT mode
837		mcr	p15, 7, r0, c15, c0, 0
838#endif
839
840__armv4_mmu_cache_on:
841		mov	r12, lr
842#ifdef CONFIG_MMU
843		mov	r6, #CB_BITS | 0x12	@ U
844		bl	__setup_mmu
845		mov	r0, #0
846		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
847		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
848		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
849		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
850		orr	r0, r0, #0x0030
851 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
852		bl	__common_mmu_cache_on
853		mov	r0, #0
854		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
855#endif
856		mov	pc, r12
857
858__armv7_mmu_cache_on:
859		enable_cp15_barriers	r11
860		mov	r12, lr
861#ifdef CONFIG_MMU
862		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
863		tst	r11, #0xf		@ VMSA
864		movne	r6, #CB_BITS | 0x02	@ !XN
865		blne	__setup_mmu
866		mov	r0, #0
867		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
868		tst	r11, #0xf		@ VMSA
869		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
870#endif
871		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
872		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
873		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
874		orr	r0, r0, #0x003c		@ write buffer
875		bic	r0, r0, #2		@ A (no unaligned access fault)
876		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
877						@ (needed for ARM1176)
878#ifdef CONFIG_MMU
879 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
880		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
881		orrne	r0, r0, #1		@ MMU enabled
882		movne	r1, #0xfffffffd		@ domain 0 = client
883		bic     r6, r6, #1 << 31        @ 32-bit translation system
884		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
885		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
886		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
887		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
888#endif
889		mcr	p15, 0, r0, c7, c5, 4	@ ISB
890		mcr	p15, 0, r0, c1, c0, 0	@ load control register
891		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
892		mov	r0, #0
893		mcr	p15, 0, r0, c7, c5, 4	@ ISB
894		mov	pc, r12
895
896__fa526_cache_on:
897		mov	r12, lr
898		mov	r6, #CB_BITS | 0x12	@ U
899		bl	__setup_mmu
900		mov	r0, #0
901		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
902		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
903		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
904		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
905		orr	r0, r0, #0x1000		@ I-cache enable
906		bl	__common_mmu_cache_on
907		mov	r0, #0
908		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
909		mov	pc, r12
910
911__common_mmu_cache_on:
912#ifndef CONFIG_THUMB2_KERNEL
913#ifndef DEBUG
914		orr	r0, r0, #0x000d		@ Write buffer, mmu
915#endif
916		mov	r1, #-1
917		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
918		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
919		b	1f
920		.align	5			@ cache line aligned
9211:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
922		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
923		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
924#endif
925
926#define PROC_ENTRY_SIZE (4*5)
927
928/*
929 * Here follow the relocatable cache support functions for the
930 * various processors.  This is a generic hook for locating an
931 * entry and jumping to an instruction at the specified offset
932 * from the start of the block.  Please note this is all position
933 * independent code.
934 *
935 *  r1  = corrupted
936 *  r2  = corrupted
937 *  r3  = block offset
938 *  r9  = corrupted
939 *  r12 = corrupted
940 */
941
942call_cache_fn:	adr	r12, proc_types
943#ifdef CONFIG_CPU_CP15
944		mrc	p15, 0, r9, c0, c0	@ get processor ID
945#elif defined(CONFIG_CPU_V7M)
946		/*
947		 * On v7-M the processor id is located in the V7M_SCB_CPUID
948		 * register, but as cache handling is IMPLEMENTATION DEFINED on
949		 * v7-M (if existant at all) we just return early here.
950		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
951		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
952		 * use cp15 registers that are not implemented on v7-M.
953		 */
954		bx	lr
955#else
956		ldr	r9, =CONFIG_PROCESSOR_ID
957#endif
9581:		ldr	r1, [r12, #0]		@ get value
959		ldr	r2, [r12, #4]		@ get mask
960		eor	r1, r1, r9		@ (real ^ match)
961		tst	r1, r2			@       & mask
962 ARM(		addeq	pc, r12, r3		) @ call cache function
963 THUMB(		addeq	r12, r3			)
964 THUMB(		moveq	pc, r12			) @ call cache function
965		add	r12, r12, #PROC_ENTRY_SIZE
966		b	1b
967
968/*
969 * Table for cache operations.  This is basically:
970 *   - CPU ID match
971 *   - CPU ID mask
972 *   - 'cache on' method instruction
973 *   - 'cache off' method instruction
974 *   - 'cache flush' method instruction
975 *
976 * We match an entry using: ((real_id ^ match) & mask) == 0
977 *
978 * Writethrough caches generally only need 'on' and 'off'
979 * methods.  Writeback caches _must_ have the flush method
980 * defined.
981 */
982		.align	2
983		.type	proc_types,#object
984proc_types:
985		.word	0x41000000		@ old ARM ID
986		.word	0xff00f000
987		mov	pc, lr
988 THUMB(		nop				)
989		mov	pc, lr
990 THUMB(		nop				)
991		mov	pc, lr
992 THUMB(		nop				)
993
994		.word	0x41007000		@ ARM7/710
995		.word	0xfff8fe00
996		mov	pc, lr
997 THUMB(		nop				)
998		mov	pc, lr
999 THUMB(		nop				)
1000		mov	pc, lr
1001 THUMB(		nop				)
1002
1003		.word	0x41807200		@ ARM720T (writethrough)
1004		.word	0xffffff00
1005		W(b)	__armv4_mmu_cache_on
1006		W(b)	__armv4_mmu_cache_off
1007		mov	pc, lr
1008 THUMB(		nop				)
1009
1010		.word	0x41007400		@ ARM74x
1011		.word	0xff00ff00
1012		W(b)	__armv3_mpu_cache_on
1013		W(b)	__armv3_mpu_cache_off
1014		W(b)	__armv3_mpu_cache_flush
1015
1016		.word	0x41009400		@ ARM94x
1017		.word	0xff00ff00
1018		W(b)	__armv4_mpu_cache_on
1019		W(b)	__armv4_mpu_cache_off
1020		W(b)	__armv4_mpu_cache_flush
1021
1022		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1023		.word	0xff0ffff0
1024		W(b)	__arm926ejs_mmu_cache_on
1025		W(b)	__armv4_mmu_cache_off
1026		W(b)	__armv5tej_mmu_cache_flush
1027
1028		.word	0x00007000		@ ARM7 IDs
1029		.word	0x0000f000
1030		mov	pc, lr
1031 THUMB(		nop				)
1032		mov	pc, lr
1033 THUMB(		nop				)
1034		mov	pc, lr
1035 THUMB(		nop				)
1036
1037		@ Everything from here on will be the new ID system.
1038
1039		.word	0x4401a100		@ sa110 / sa1100
1040		.word	0xffffffe0
1041		W(b)	__armv4_mmu_cache_on
1042		W(b)	__armv4_mmu_cache_off
1043		W(b)	__armv4_mmu_cache_flush
1044
1045		.word	0x6901b110		@ sa1110
1046		.word	0xfffffff0
1047		W(b)	__armv4_mmu_cache_on
1048		W(b)	__armv4_mmu_cache_off
1049		W(b)	__armv4_mmu_cache_flush
1050
1051		.word	0x56056900
1052		.word	0xffffff00		@ PXA9xx
1053		W(b)	__armv4_mmu_cache_on
1054		W(b)	__armv4_mmu_cache_off
1055		W(b)	__armv4_mmu_cache_flush
1056
1057		.word	0x56158000		@ PXA168
1058		.word	0xfffff000
1059		W(b)	__armv4_mmu_cache_on
1060		W(b)	__armv4_mmu_cache_off
1061		W(b)	__armv5tej_mmu_cache_flush
1062
1063		.word	0x56050000		@ Feroceon
1064		.word	0xff0f0000
1065		W(b)	__armv4_mmu_cache_on
1066		W(b)	__armv4_mmu_cache_off
1067		W(b)	__armv5tej_mmu_cache_flush
1068
1069#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1070		/* this conflicts with the standard ARMv5TE entry */
1071		.long	0x41009260		@ Old Feroceon
1072		.long	0xff00fff0
1073		b	__armv4_mmu_cache_on
1074		b	__armv4_mmu_cache_off
1075		b	__armv5tej_mmu_cache_flush
1076#endif
1077
1078		.word	0x66015261		@ FA526
1079		.word	0xff01fff1
1080		W(b)	__fa526_cache_on
1081		W(b)	__armv4_mmu_cache_off
1082		W(b)	__fa526_cache_flush
1083
1084		@ These match on the architecture ID
1085
1086		.word	0x00020000		@ ARMv4T
1087		.word	0x000f0000
1088		W(b)	__armv4_mmu_cache_on
1089		W(b)	__armv4_mmu_cache_off
1090		W(b)	__armv4_mmu_cache_flush
1091
1092		.word	0x00050000		@ ARMv5TE
1093		.word	0x000f0000
1094		W(b)	__armv4_mmu_cache_on
1095		W(b)	__armv4_mmu_cache_off
1096		W(b)	__armv4_mmu_cache_flush
1097
1098		.word	0x00060000		@ ARMv5TEJ
1099		.word	0x000f0000
1100		W(b)	__armv4_mmu_cache_on
1101		W(b)	__armv4_mmu_cache_off
1102		W(b)	__armv5tej_mmu_cache_flush
1103
1104		.word	0x0007b000		@ ARMv6
1105		.word	0x000ff000
1106		W(b)	__armv6_mmu_cache_on
1107		W(b)	__armv4_mmu_cache_off
1108		W(b)	__armv6_mmu_cache_flush
1109
1110		.word	0x000f0000		@ new CPU Id
1111		.word	0x000f0000
1112		W(b)	__armv7_mmu_cache_on
1113		W(b)	__armv7_mmu_cache_off
1114		W(b)	__armv7_mmu_cache_flush
1115
1116		.word	0			@ unrecognised type
1117		.word	0
1118		mov	pc, lr
1119 THUMB(		nop				)
1120		mov	pc, lr
1121 THUMB(		nop				)
1122		mov	pc, lr
1123 THUMB(		nop				)
1124
1125		.size	proc_types, . - proc_types
1126
1127		/*
1128		 * If you get a "non-constant expression in ".if" statement"
1129		 * error from the assembler on this line, check that you have
1130		 * not accidentally written a "b" instruction where you should
1131		 * have written W(b).
1132		 */
1133		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1134		.error "The size of one or more proc_types entries is wrong."
1135		.endif
1136
1137/*
1138 * Turn off the Cache and MMU.  ARMv3 does not support
1139 * reading the control register, but ARMv4 does.
1140 *
1141 * On exit,
1142 *  r0, r1, r2, r3, r9, r12 corrupted
1143 * This routine must preserve:
1144 *  r4, r7, r8
1145 */
1146		.align	5
1147cache_off:	mov	r3, #12			@ cache_off function
1148		b	call_cache_fn
1149
1150__armv4_mpu_cache_off:
1151		mrc	p15, 0, r0, c1, c0
1152		bic	r0, r0, #0x000d
1153		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1154		mov	r0, #0
1155		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1156		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1157		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1158		mov	pc, lr
1159
1160__armv3_mpu_cache_off:
1161		mrc	p15, 0, r0, c1, c0
1162		bic	r0, r0, #0x000d
1163		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1164		mov	r0, #0
1165		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1166		mov	pc, lr
1167
1168__armv4_mmu_cache_off:
1169#ifdef CONFIG_MMU
1170		mrc	p15, 0, r0, c1, c0
1171		bic	r0, r0, #0x000d
1172		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1173		mov	r0, #0
1174		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1175		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1176#endif
1177		mov	pc, lr
1178
1179__armv7_mmu_cache_off:
1180		mrc	p15, 0, r0, c1, c0
1181#ifdef CONFIG_MMU
1182		bic	r0, r0, #0x000d
1183#else
1184		bic	r0, r0, #0x000c
1185#endif
1186		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1187		mov	r0, #0
1188#ifdef CONFIG_MMU
1189		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1190#endif
1191		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1192		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1193		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1194		mov	pc, lr
1195
1196/*
1197 * Clean and flush the cache to maintain consistency.
1198 *
1199 * On entry,
1200 *  r0 = start address
1201 *  r1 = end address (exclusive)
1202 * On exit,
1203 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1204 * This routine must preserve:
1205 *  r4, r6, r7, r8
1206 */
1207		.align	5
1208cache_clean_flush:
1209		mov	r3, #16
1210		mov	r11, r1
1211		b	call_cache_fn
1212
1213__armv4_mpu_cache_flush:
1214		tst	r4, #1
1215		movne	pc, lr
1216		mov	r2, #1
1217		mov	r3, #0
1218		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1219		mov	r1, #7 << 5		@ 8 segments
12201:		orr	r3, r1, #63 << 26	@ 64 entries
12212:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1222		subs	r3, r3, #1 << 26
1223		bcs	2b			@ entries 63 to 0
1224		subs 	r1, r1, #1 << 5
1225		bcs	1b			@ segments 7 to 0
1226
1227		teq	r2, #0
1228		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1229		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1230		mov	pc, lr
1231
1232__fa526_cache_flush:
1233		tst	r4, #1
1234		movne	pc, lr
1235		mov	r1, #0
1236		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1237		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1238		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1239		mov	pc, lr
1240
1241__armv6_mmu_cache_flush:
1242		mov	r1, #0
1243		tst	r4, #1
1244		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1245		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1246		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1247		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1248		mov	pc, lr
1249
1250__armv7_mmu_cache_flush:
1251		enable_cp15_barriers	r10
1252		tst	r4, #1
1253		bne	iflush
1254		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1255		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1256		mov	r10, #0
1257		beq	hierarchical
1258		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1259		b	iflush
1260hierarchical:
1261		dcache_line_size r1, r2		@ r1 := dcache min line size
1262		sub	r2, r1, #1		@ r2 := line size mask
1263		bic	r0, r0, r2		@ round down start to line size
1264		sub	r11, r11, #1		@ end address is exclusive
1265		bic	r11, r11, r2		@ round down end to line size
12660:		cmp	r0, r11			@ finished?
1267		bgt	iflush
1268		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1269		add	r0, r0, r1
1270		b	0b
1271iflush:
1272		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1273		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1274		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1275		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1276		mov	pc, lr
1277
1278__armv5tej_mmu_cache_flush:
1279		tst	r4, #1
1280		movne	pc, lr
12811:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1282		bne	1b
1283		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1284		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1285		mov	pc, lr
1286
1287__armv4_mmu_cache_flush:
1288		tst	r4, #1
1289		movne	pc, lr
1290		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1291		mov	r11, #32		@ default: 32 byte line size
1292		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1293		teq	r3, r9			@ cache ID register present?
1294		beq	no_cache_id
1295		mov	r1, r3, lsr #18
1296		and	r1, r1, #7
1297		mov	r2, #1024
1298		mov	r2, r2, lsl r1		@ base dcache size *2
1299		tst	r3, #1 << 14		@ test M bit
1300		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1301		mov	r3, r3, lsr #12
1302		and	r3, r3, #3
1303		mov	r11, #8
1304		mov	r11, r11, lsl r3	@ cache line size in bytes
1305no_cache_id:
1306		mov	r1, pc
1307		bic	r1, r1, #63		@ align to longest cache line
1308		add	r2, r1, r2
13091:
1310 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1311 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1312 THUMB(		add     r1, r1, r11		)
1313		teq	r1, r2
1314		bne	1b
1315
1316		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1317		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1318		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1319		mov	pc, lr
1320
1321__armv3_mmu_cache_flush:
1322__armv3_mpu_cache_flush:
1323		tst	r4, #1
1324		movne	pc, lr
1325		mov	r1, #0
1326		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1327		mov	pc, lr
1328
1329/*
1330 * Various debugging routines for printing hex characters and
1331 * memory, which again must be relocatable.
1332 */
1333#ifdef DEBUG
1334		.align	2
1335		.type	phexbuf,#object
1336phexbuf:	.space	12
1337		.size	phexbuf, . - phexbuf
1338
1339@ phex corrupts {r0, r1, r2, r3}
1340phex:		adr	r3, phexbuf
1341		mov	r2, #0
1342		strb	r2, [r3, r1]
13431:		subs	r1, r1, #1
1344		movmi	r0, r3
1345		bmi	puts
1346		and	r2, r0, #15
1347		mov	r0, r0, lsr #4
1348		cmp	r2, #10
1349		addge	r2, r2, #7
1350		add	r2, r2, #'0'
1351		strb	r2, [r3, r1]
1352		b	1b
1353
1354@ puts corrupts {r0, r1, r2, r3}
1355puts:		loadsp	r3, r2, r1
13561:		ldrb	r2, [r0], #1
1357		teq	r2, #0
1358		moveq	pc, lr
13592:		writeb	r2, r3
1360		mov	r1, #0x00020000
13613:		subs	r1, r1, #1
1362		bne	3b
1363		teq	r2, #'\n'
1364		moveq	r2, #'\r'
1365		beq	2b
1366		teq	r0, #0
1367		bne	1b
1368		mov	pc, lr
1369@ putc corrupts {r0, r1, r2, r3}
1370putc:
1371		mov	r2, r0
1372		loadsp	r3, r1, r0
1373		mov	r0, #0
1374		b	2b
1375
1376@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1377memdump:	mov	r12, r0
1378		mov	r10, lr
1379		mov	r11, #0
13802:		mov	r0, r11, lsl #2
1381		add	r0, r0, r12
1382		mov	r1, #8
1383		bl	phex
1384		mov	r0, #':'
1385		bl	putc
13861:		mov	r0, #' '
1387		bl	putc
1388		ldr	r0, [r12, r11, lsl #2]
1389		mov	r1, #8
1390		bl	phex
1391		and	r0, r11, #7
1392		teq	r0, #3
1393		moveq	r0, #' '
1394		bleq	putc
1395		and	r0, r11, #7
1396		add	r11, r11, #1
1397		teq	r0, #7
1398		bne	1b
1399		mov	r0, #'\n'
1400		bl	putc
1401		cmp	r11, #64
1402		blt	2b
1403		mov	pc, r10
1404#endif
1405
1406		.ltorg
1407
1408#ifdef CONFIG_ARM_VIRT_EXT
1409.align 5
1410__hyp_reentry_vectors:
1411		W(b)	.			@ reset
1412		W(b)	.			@ undef
1413#ifdef CONFIG_EFI_STUB
1414		W(b)	__enter_kernel_from_hyp	@ hvc from HYP
1415#else
1416		W(b)	.			@ svc
1417#endif
1418		W(b)	.			@ pabort
1419		W(b)	.			@ dabort
1420		W(b)	__enter_kernel		@ hyp
1421		W(b)	.			@ irq
1422		W(b)	.			@ fiq
1423#endif /* CONFIG_ARM_VIRT_EXT */
1424
1425__enter_kernel:
1426		mov	r0, #0			@ must be 0
1427		mov	r1, r7			@ restore architecture number
1428		mov	r2, r8			@ restore atags pointer
1429 ARM(		mov	pc, r4		)	@ call kernel
1430 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1431 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1432
1433reloc_code_end:
1434
1435#ifdef CONFIG_EFI_STUB
1436__enter_kernel_from_hyp:
1437		mrc	p15, 4, r0, c1, c0, 0	@ read HSCTLR
1438		bic	r0, r0, #0x5		@ disable MMU and caches
1439		mcr	p15, 4, r0, c1, c0, 0	@ write HSCTLR
1440		isb
1441		b	__enter_kernel
1442
1443ENTRY(efi_enter_kernel)
1444		mov	r4, r0			@ preserve image base
1445		mov	r8, r1			@ preserve DT pointer
1446
1447 ARM(		adrl	r0, call_cache_fn	)
1448 THUMB(		adr	r0, call_cache_fn	)
1449		adr	r1, 0f			@ clean the region of code we
1450		bl	cache_clean_flush	@ may run with the MMU off
1451
1452#ifdef CONFIG_ARM_VIRT_EXT
1453		@
1454		@ The EFI spec does not support booting on ARM in HYP mode,
1455		@ since it mandates that the MMU and caches are on, with all
1456		@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
1457		@
1458		@ While the EDK2 reference implementation adheres to this,
1459		@ U-Boot might decide to enter the EFI stub in HYP mode
1460		@ anyway, with the MMU and caches either on or off.
1461		@
1462		mrs	r0, cpsr		@ get the current mode
1463		msr	spsr_cxsf, r0		@ record boot mode
1464		and	r0, r0, #MODE_MASK	@ are we running in HYP mode?
1465		cmp	r0, #HYP_MODE
1466		bne	.Lefi_svc
1467
1468		mrc	p15, 4, r1, c1, c0, 0	@ read HSCTLR
1469		tst	r1, #0x1		@ MMU enabled at HYP?
1470		beq	1f
1471
1472		@
1473		@ When running in HYP mode with the caches on, we're better
1474		@ off just carrying on using the cached 1:1 mapping that the
1475		@ firmware provided. Set up the HYP vectors so HVC instructions
1476		@ issued from HYP mode take us to the correct handler code. We
1477		@ will disable the MMU before jumping to the kernel proper.
1478		@
1479		adr	r0, __hyp_reentry_vectors
1480		mcr	p15, 4, r0, c12, c0, 0	@ set HYP vector base (HVBAR)
1481		isb
1482		b	.Lefi_hyp
1483
1484		@
1485		@ When running in HYP mode with the caches off, we need to drop
1486		@ into SVC mode now, and let the decompressor set up its cached
1487		@ 1:1 mapping as usual.
1488		@
14891:		mov	r9, r4			@ preserve image base
1490		bl	__hyp_stub_install	@ install HYP stub vectors
1491		safe_svcmode_maskall	r1	@ drop to SVC mode
1492		msr	spsr_cxsf, r0		@ record boot mode
1493		orr	r4, r9, #1		@ restore image base and set LSB
1494		b	.Lefi_hyp
1495.Lefi_svc:
1496#endif
1497		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
1498		tst	r0, #0x1		@ MMU enabled?
1499		orreq	r4, r4, #1		@ set LSB if not
1500
1501.Lefi_hyp:
1502		mov	r0, r8			@ DT start
1503		add	r1, r8, r2		@ DT end
1504		bl	cache_clean_flush
1505
1506		adr	r0, 0f			@ switch to our stack
1507		ldr	sp, [r0]
1508		add	sp, sp, r0
1509
1510		mov	r5, #0			@ appended DTB size
1511		mov	r7, #0xFFFFFFFF		@ machine ID
1512		b	wont_overwrite
1513ENDPROC(efi_enter_kernel)
15140:		.long	.L_user_stack_end - .
1515#endif
1516
1517		.align
1518		.section ".stack", "aw", %nobits
1519.L_user_stack:	.space	4096
1520.L_user_stack_end:
1521