xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 9d4fa1a1)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb
53		senduart \ch, \rb
54		.endm
55
56#if defined(CONFIG_ARCH_SA1100)
57		.macro	loadsp, rb, tmp1, tmp2
58		mov	\rb, #0x80000000	@ physical base address
59#ifdef CONFIG_DEBUG_LL_SER3
60		add	\rb, \rb, #0x00050000	@ Ser3
61#else
62		add	\rb, \rb, #0x00010000	@ Ser1
63#endif
64		.endm
65#else
66		.macro	loadsp,	rb, tmp1, tmp2
67		addruart \rb, \tmp1, \tmp2
68		.endm
69#endif
70#endif
71#endif
72
73		.macro	kputc,val
74		mov	r0, \val
75		bl	putc
76		.endm
77
78		.macro	kphex,val,len
79		mov	r0, \val
80		mov	r1, #\len
81		bl	phex
82		.endm
83
84		.macro	debug_reloc_start
85#ifdef DEBUG
86		kputc	#'\n'
87		kphex	r6, 8		/* processor id */
88		kputc	#':'
89		kphex	r7, 8		/* architecture id */
90#ifdef CONFIG_CPU_CP15
91		kputc	#':'
92		mrc	p15, 0, r0, c1, c0
93		kphex	r0, 8		/* control reg */
94#endif
95		kputc	#'\n'
96		kphex	r5, 8		/* decompressed kernel start */
97		kputc	#'-'
98		kphex	r9, 8		/* decompressed kernel end  */
99		kputc	#'>'
100		kphex	r4, 8		/* kernel execution address */
101		kputc	#'\n'
102#endif
103		.endm
104
105		.macro	debug_reloc_end
106#ifdef DEBUG
107		kphex	r5, 8		/* end of kernel */
108		kputc	#'\n'
109		mov	r0, r4
110		bl	memdump		/* dump 256 bytes at start of kernel */
111#endif
112		.endm
113
114		/*
115		 * Debug kernel copy by printing the memory addresses involved
116		 */
117		.macro dbgkc, begin, end, cbegin, cend
118#ifdef DEBUG
119		kputc   #'\n'
120		kputc   #'C'
121		kputc   #':'
122		kputc   #'0'
123		kputc   #'x'
124		kphex   \begin, 8	/* Start of compressed kernel */
125		kputc	#'-'
126		kputc	#'0'
127		kputc	#'x'
128		kphex	\end, 8		/* End of compressed kernel */
129		kputc	#'-'
130		kputc	#'>'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \cbegin, 8	/* Start of kernel copy */
134		kputc	#'-'
135		kputc	#'0'
136		kputc	#'x'
137		kphex	\cend, 8	/* End of kernel copy */
138		kputc	#'\n'
139		kputc	#'\r'
140#endif
141		.endm
142
143		.macro	enable_cp15_barriers, reg
144		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
145		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
146		bne	.L_\@
147		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
148		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
149 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
150 THUMB(		isb						)
151.L_\@:
152		.endm
153
154		/*
155		 * The kernel build system appends the size of the
156		 * decompressed kernel at the end of the compressed data
157		 * in little-endian form.
158		 */
159		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
160		adr	\res, .Linflated_image_size_offset
161		ldr	\tmp1, [\res]
162		add	\tmp1, \tmp1, \res	@ address of inflated image size
163
164		ldrb	\res, [\tmp1]		@ get_unaligned_le32
165		ldrb	\tmp2, [\tmp1, #1]
166		orr	\res, \res, \tmp2, lsl #8
167		ldrb	\tmp2, [\tmp1, #2]
168		ldrb	\tmp1, [\tmp1, #3]
169		orr	\res, \res, \tmp2, lsl #16
170		orr	\res, \res, \tmp1, lsl #24
171		.endm
172
173		.section ".start", "ax"
174/*
175 * sort out different calling conventions
176 */
177		.align
178		/*
179		 * Always enter in ARM state for CPUs that support the ARM ISA.
180		 * As of today (2014) that's exactly the members of the A and R
181		 * classes.
182		 */
183 AR_CLASS(	.arm	)
184start:
185		.type	start,#function
186		/*
187		 * These 7 nops along with the 1 nop immediately below for
188		 * !THUMB2 form 8 nops that make the compressed kernel bootable
189		 * on legacy ARM systems that were assuming the kernel in a.out
190		 * binary format. The boot loaders on these systems would
191		 * jump 32 bytes into the image to skip the a.out header.
192		 * with these 8 nops filling exactly 32 bytes, things still
193		 * work as expected on these legacy systems. Thumb2 mode keeps
194		 * 7 of the nops as it turns out that some boot loaders
195		 * were patching the initial instructions of the kernel, i.e
196		 * had started to exploit this "patch area".
197		 */
198		.rept	7
199		__nop
200		.endr
201#ifndef CONFIG_THUMB2_KERNEL
202		__nop
203#else
204 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
205  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
206		.thumb
207#endif
208		W(b)	1f
209
210		.word	_magic_sig	@ Magic numbers to help the loader
211		.word	_magic_start	@ absolute load/run zImage address
212		.word	_magic_end	@ zImage end address
213		.word	0x04030201	@ endianness flag
214		.word	0x45454545	@ another magic number to indicate
215		.word	_magic_table	@ additional data table
216
217		__EFI_HEADER
2181:
219 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
220 AR_CLASS(	mrs	r9, cpsr	)
221#ifdef CONFIG_ARM_VIRT_EXT
222		bl	__hyp_stub_install	@ get into SVC mode, reversibly
223#endif
224		mov	r7, r1			@ save architecture ID
225		mov	r8, r2			@ save atags pointer
226
227#ifndef CONFIG_CPU_V7M
228		/*
229		 * Booting from Angel - need to enter SVC mode and disable
230		 * FIQs/IRQs (numeric definitions from angel arm.h source).
231		 * We only do this if we were in user mode on entry.
232		 */
233		mrs	r2, cpsr		@ get current mode
234		tst	r2, #3			@ not user?
235		bne	not_angel
236		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
237 ARM(		swi	0x123456	)	@ angel_SWI_ARM
238 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
239not_angel:
240		safe_svcmode_maskall r0
241		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
242						@ SPSR
243#endif
244		/*
245		 * Note that some cache flushing and other stuff may
246		 * be needed here - is there an Angel SWI call for this?
247		 */
248
249		/*
250		 * some architecture specific code can be inserted
251		 * by the linker here, but it should preserve r7, r8, and r9.
252		 */
253
254		.text
255
256#ifdef CONFIG_AUTO_ZRELADDR
257		/*
258		 * Find the start of physical memory.  As we are executing
259		 * without the MMU on, we are in the physical address space.
260		 * We just need to get rid of any offset by aligning the
261		 * address.
262		 *
263		 * This alignment is a balance between the requirements of
264		 * different platforms - we have chosen 128MB to allow
265		 * platforms which align the start of their physical memory
266		 * to 128MB to use this feature, while allowing the zImage
267		 * to be placed within the first 128MB of memory on other
268		 * platforms.  Increasing the alignment means we place
269		 * stricter alignment requirements on the start of physical
270		 * memory, but relaxing it means that we break people who
271		 * are already placing their zImage in (eg) the top 64MB
272		 * of this range.
273		 */
274		mov	r4, pc
275		and	r4, r4, #0xf8000000
276		/* Determine final kernel image address. */
277		add	r4, r4, #TEXT_OFFSET
278#else
279		ldr	r4, =zreladdr
280#endif
281
282		/*
283		 * Set up a page table only if it won't overwrite ourself.
284		 * That means r4 < pc || r4 - 16k page directory > &_end.
285		 * Given that r4 > &_end is most unfrequent, we add a rough
286		 * additional 1MB of room for a possible appended DTB.
287		 */
288		mov	r0, pc
289		cmp	r0, r4
290		ldrcc	r0, LC0+28
291		addcc	r0, r0, pc
292		cmpcc	r4, r0
293		orrcc	r4, r4, #1		@ remember we skipped cache_on
294		blcs	cache_on
295
296restart:	adr	r0, LC0
297		ldmia	r0, {r1, r2, r3, r6, r11, r12}
298		ldr	sp, [r0, #24]
299
300		/*
301		 * We might be running at a different address.  We need
302		 * to fix up various pointers.
303		 */
304		sub	r0, r0, r1		@ calculate the delta offset
305		add	r6, r6, r0		@ _edata
306
307		get_inflated_image_size	r9, r10, lr
308
309#ifndef CONFIG_ZBOOT_ROM
310		/* malloc space is above the relocated stack (64k max) */
311		add	sp, sp, r0
312		add	r10, sp, #0x10000
313#else
314		/*
315		 * With ZBOOT_ROM the bss/stack is non relocatable,
316		 * but someone could still run this code from RAM,
317		 * in which case our reference is _edata.
318		 */
319		mov	r10, r6
320#endif
321
322		mov	r5, #0			@ init dtb size to 0
323#ifdef CONFIG_ARM_APPENDED_DTB
324/*
325 *   r0  = delta
326 *   r2  = BSS start
327 *   r3  = BSS end
328 *   r4  = final kernel address (possibly with LSB set)
329 *   r5  = appended dtb size (still unknown)
330 *   r6  = _edata
331 *   r7  = architecture ID
332 *   r8  = atags/device tree pointer
333 *   r9  = size of decompressed image
334 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
335 *   r11 = GOT start
336 *   r12 = GOT end
337 *   sp  = stack pointer
338 *
339 * if there are device trees (dtb) appended to zImage, advance r10 so that the
340 * dtb data will get relocated along with the kernel if necessary.
341 */
342
343		ldr	lr, [r6, #0]
344#ifndef __ARMEB__
345		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
346#else
347		ldr	r1, =0xd00dfeed
348#endif
349		cmp	lr, r1
350		bne	dtb_check_done		@ not found
351
352#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
353		/*
354		 * OK... Let's do some funky business here.
355		 * If we do have a DTB appended to zImage, and we do have
356		 * an ATAG list around, we want the later to be translated
357		 * and folded into the former here. No GOT fixup has occurred
358		 * yet, but none of the code we're about to call uses any
359		 * global variable.
360		*/
361
362		/* Get the initial DTB size */
363		ldr	r5, [r6, #4]
364#ifndef __ARMEB__
365		/* convert to little endian */
366		eor	r1, r5, r5, ror #16
367		bic	r1, r1, #0x00ff0000
368		mov	r5, r5, ror #8
369		eor	r5, r5, r1, lsr #8
370#endif
371		/* 50% DTB growth should be good enough */
372		add	r5, r5, r5, lsr #1
373		/* preserve 64-bit alignment */
374		add	r5, r5, #7
375		bic	r5, r5, #7
376		/* clamp to 32KB min and 1MB max */
377		cmp	r5, #(1 << 15)
378		movlo	r5, #(1 << 15)
379		cmp	r5, #(1 << 20)
380		movhi	r5, #(1 << 20)
381		/* temporarily relocate the stack past the DTB work space */
382		add	sp, sp, r5
383
384		stmfd	sp!, {r0-r3, ip, lr}
385		mov	r0, r8
386		mov	r1, r6
387		mov	r2, r5
388		bl	atags_to_fdt
389
390		/*
391		 * If returned value is 1, there is no ATAG at the location
392		 * pointed by r8.  Try the typical 0x100 offset from start
393		 * of RAM and hope for the best.
394		 */
395		cmp	r0, #1
396		sub	r0, r4, #TEXT_OFFSET
397		bic	r0, r0, #1
398		add	r0, r0, #0x100
399		mov	r1, r6
400		mov	r2, r5
401		bleq	atags_to_fdt
402
403		ldmfd	sp!, {r0-r3, ip, lr}
404		sub	sp, sp, r5
405#endif
406
407		mov	r8, r6			@ use the appended device tree
408
409		/*
410		 * Make sure that the DTB doesn't end up in the final
411		 * kernel's .bss area. To do so, we adjust the decompressed
412		 * kernel size to compensate if that .bss size is larger
413		 * than the relocated code.
414		 */
415		ldr	r5, =_kernel_bss_size
416		adr	r1, wont_overwrite
417		sub	r1, r6, r1
418		subs	r1, r5, r1
419		addhi	r9, r9, r1
420
421		/* Get the current DTB size */
422		ldr	r5, [r6, #4]
423#ifndef __ARMEB__
424		/* convert r5 (dtb size) to little endian */
425		eor	r1, r5, r5, ror #16
426		bic	r1, r1, #0x00ff0000
427		mov	r5, r5, ror #8
428		eor	r5, r5, r1, lsr #8
429#endif
430
431		/* preserve 64-bit alignment */
432		add	r5, r5, #7
433		bic	r5, r5, #7
434
435		/* relocate some pointers past the appended dtb */
436		add	r6, r6, r5
437		add	r10, r10, r5
438		add	sp, sp, r5
439dtb_check_done:
440#endif
441
442/*
443 * Check to see if we will overwrite ourselves.
444 *   r4  = final kernel address (possibly with LSB set)
445 *   r9  = size of decompressed image
446 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
447 * We basically want:
448 *   r4 - 16k page directory >= r10 -> OK
449 *   r4 + image length <= address of wont_overwrite -> OK
450 * Note: the possible LSB in r4 is harmless here.
451 */
452		add	r10, r10, #16384
453		cmp	r4, r10
454		bhs	wont_overwrite
455		add	r10, r4, r9
456		adr	r9, wont_overwrite
457		cmp	r10, r9
458		bls	wont_overwrite
459
460/*
461 * Relocate ourselves past the end of the decompressed kernel.
462 *   r6  = _edata
463 *   r10 = end of the decompressed kernel
464 * Because we always copy ahead, we need to do it from the end and go
465 * backward in case the source and destination overlap.
466 */
467		/*
468		 * Bump to the next 256-byte boundary with the size of
469		 * the relocation code added. This avoids overwriting
470		 * ourself when the offset is small.
471		 */
472		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
473		bic	r10, r10, #255
474
475		/* Get start of code we want to copy and align it down. */
476		adr	r5, restart
477		bic	r5, r5, #31
478
479/* Relocate the hyp vector base if necessary */
480#ifdef CONFIG_ARM_VIRT_EXT
481		mrs	r0, spsr
482		and	r0, r0, #MODE_MASK
483		cmp	r0, #HYP_MODE
484		bne	1f
485
486		/*
487		 * Compute the address of the hyp vectors after relocation.
488		 * This requires some arithmetic since we cannot directly
489		 * reference __hyp_stub_vectors in a PC-relative way.
490		 * Call __hyp_set_vectors with the new address so that we
491		 * can HVC again after the copy.
492		 */
4930:		adr	r0, 0b
494		movw	r1, #:lower16:__hyp_stub_vectors - 0b
495		movt	r1, #:upper16:__hyp_stub_vectors - 0b
496		add	r0, r0, r1
497		sub	r0, r0, r5
498		add	r0, r0, r10
499		bl	__hyp_set_vectors
5001:
501#endif
502
503		sub	r9, r6, r5		@ size to copy
504		add	r9, r9, #31		@ rounded up to a multiple
505		bic	r9, r9, #31		@ ... of 32 bytes
506		add	r6, r9, r5
507		add	r9, r9, r10
508
509#ifdef DEBUG
510		sub     r10, r6, r5
511		sub     r10, r9, r10
512		/*
513		 * We are about to copy the kernel to a new memory area.
514		 * The boundaries of the new memory area can be found in
515		 * r10 and r9, whilst r5 and r6 contain the boundaries
516		 * of the memory we are going to copy.
517		 * Calling dbgkc will help with the printing of this
518		 * information.
519		 */
520		dbgkc	r5, r6, r10, r9
521#endif
522
5231:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
524		cmp	r6, r5
525		stmdb	r9!, {r0 - r3, r10 - r12, lr}
526		bhi	1b
527
528		/* Preserve offset to relocated code. */
529		sub	r6, r9, r6
530
531		mov	r0, r9			@ start of relocated zImage
532		add	r1, sp, r6		@ end of relocated zImage
533		bl	cache_clean_flush
534
535		badr	r0, restart
536		add	r0, r0, r6
537		mov	pc, r0
538
539wont_overwrite:
540/*
541 * If delta is zero, we are running at the address we were linked at.
542 *   r0  = delta
543 *   r2  = BSS start
544 *   r3  = BSS end
545 *   r4  = kernel execution address (possibly with LSB set)
546 *   r5  = appended dtb size (0 if not present)
547 *   r7  = architecture ID
548 *   r8  = atags pointer
549 *   r11 = GOT start
550 *   r12 = GOT end
551 *   sp  = stack pointer
552 */
553		orrs	r1, r0, r5
554		beq	not_relocated
555
556		add	r11, r11, r0
557		add	r12, r12, r0
558
559#ifndef CONFIG_ZBOOT_ROM
560		/*
561		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
562		 * we need to fix up pointers into the BSS region.
563		 * Note that the stack pointer has already been fixed up.
564		 */
565		add	r2, r2, r0
566		add	r3, r3, r0
567
568		/*
569		 * Relocate all entries in the GOT table.
570		 * Bump bss entries to _edata + dtb size
571		 */
5721:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
573		add	r1, r1, r0		@ This fixes up C references
574		cmp	r1, r2			@ if entry >= bss_start &&
575		cmphs	r3, r1			@       bss_end > entry
576		addhi	r1, r1, r5		@    entry += dtb size
577		str	r1, [r11], #4		@ next entry
578		cmp	r11, r12
579		blo	1b
580
581		/* bump our bss pointers too */
582		add	r2, r2, r5
583		add	r3, r3, r5
584
585#else
586
587		/*
588		 * Relocate entries in the GOT table.  We only relocate
589		 * the entries that are outside the (relocated) BSS region.
590		 */
5911:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
592		cmp	r1, r2			@ entry < bss_start ||
593		cmphs	r3, r1			@ _end < entry
594		addlo	r1, r1, r0		@ table.  This fixes up the
595		str	r1, [r11], #4		@ C references.
596		cmp	r11, r12
597		blo	1b
598#endif
599
600not_relocated:	mov	r0, #0
6011:		str	r0, [r2], #4		@ clear bss
602		str	r0, [r2], #4
603		str	r0, [r2], #4
604		str	r0, [r2], #4
605		cmp	r2, r3
606		blo	1b
607
608		/*
609		 * Did we skip the cache setup earlier?
610		 * That is indicated by the LSB in r4.
611		 * Do it now if so.
612		 */
613		tst	r4, #1
614		bic	r4, r4, #1
615		blne	cache_on
616
617/*
618 * The C runtime environment should now be setup sufficiently.
619 * Set up some pointers, and start decompressing.
620 *   r4  = kernel execution address
621 *   r7  = architecture ID
622 *   r8  = atags pointer
623 */
624		mov	r0, r4
625		mov	r1, sp			@ malloc space above stack
626		add	r2, sp, #0x10000	@ 64k max
627		mov	r3, r7
628		bl	decompress_kernel
629
630		get_inflated_image_size	r1, r2, r3
631
632		mov	r0, r4			@ start of inflated image
633		add	r1, r1, r0		@ end of inflated image
634		bl	cache_clean_flush
635		bl	cache_off
636
637#ifdef CONFIG_ARM_VIRT_EXT
638		mrs	r0, spsr		@ Get saved CPU boot mode
639		and	r0, r0, #MODE_MASK
640		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
641		bne	__enter_kernel		@ boot kernel directly
642
643		adr	r12, .L__hyp_reentry_vectors_offset
644		ldr	r0, [r12]
645		add	r0, r0, r12
646
647		bl	__hyp_set_vectors
648		__HVC(0)			@ otherwise bounce to hyp mode
649
650		b	.			@ should never be reached
651
652		.align	2
653.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
654#else
655		b	__enter_kernel
656#endif
657
658		.align	2
659		.type	LC0, #object
660LC0:		.word	LC0			@ r1
661		.word	__bss_start		@ r2
662		.word	_end			@ r3
663		.word	_edata			@ r6
664		.word	_got_start		@ r11
665		.word	_got_end		@ ip
666		.word	.L_user_stack_end	@ sp
667		.word	_end - restart + 16384 + 1024*1024
668		.size	LC0, . - LC0
669
670.Linflated_image_size_offset:
671		.long	(input_data_end - 4) - .
672
673#ifdef CONFIG_ARCH_RPC
674		.globl	params
675params:		ldr	r0, =0x10000100		@ params_phys for RPC
676		mov	pc, lr
677		.ltorg
678		.align
679#endif
680
681/*
682 * dcache_line_size - get the minimum D-cache line size from the CTR register
683 * on ARMv7.
684 */
685		.macro	dcache_line_size, reg, tmp
686#ifdef CONFIG_CPU_V7M
687		movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
688		movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
689		ldr	\tmp, [\tmp]
690#else
691		mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
692#endif
693		lsr	\tmp, \tmp, #16
694		and	\tmp, \tmp, #0xf		@ cache line size encoding
695		mov	\reg, #4			@ bytes per word
696		mov	\reg, \reg, lsl \tmp		@ actual cache line size
697		.endm
698
699/*
700 * Turn on the cache.  We need to setup some page tables so that we
701 * can have both the I and D caches on.
702 *
703 * We place the page tables 16k down from the kernel execution address,
704 * and we hope that nothing else is using it.  If we're using it, we
705 * will go pop!
706 *
707 * On entry,
708 *  r4 = kernel execution address
709 *  r7 = architecture number
710 *  r8 = atags pointer
711 * On exit,
712 *  r0, r1, r2, r3, r9, r10, r12 corrupted
713 * This routine must preserve:
714 *  r4, r7, r8
715 */
716		.align	5
717cache_on:	mov	r3, #8			@ cache_on function
718		b	call_cache_fn
719
720/*
721 * Initialize the highest priority protection region, PR7
722 * to cover all 32bit address and cacheable and bufferable.
723 */
724__armv4_mpu_cache_on:
725		mov	r0, #0x3f		@ 4G, the whole
726		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
727		mcr 	p15, 0, r0, c6, c7, 1
728
729		mov	r0, #0x80		@ PR7
730		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
731		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
732		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
733
734		mov	r0, #0xc000
735		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
736		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
737
738		mov	r0, #0
739		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
740		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
741		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
742		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
743						@ ...I .... ..D. WC.M
744		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
745		orr	r0, r0, #0x1000		@ ...1 .... .... ....
746
747		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
748
749		mov	r0, #0
750		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
751		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
752		mov	pc, lr
753
754__armv3_mpu_cache_on:
755		mov	r0, #0x3f		@ 4G, the whole
756		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
757
758		mov	r0, #0x80		@ PR7
759		mcr	p15, 0, r0, c2, c0, 0	@ cache on
760		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
761
762		mov	r0, #0xc000
763		mcr	p15, 0, r0, c5, c0, 0	@ access permission
764
765		mov	r0, #0
766		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
767		/*
768		 * ?? ARMv3 MMU does not allow reading the control register,
769		 * does this really work on ARMv3 MPU?
770		 */
771		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
772						@ .... .... .... WC.M
773		orr	r0, r0, #0x000d		@ .... .... .... 11.1
774		/* ?? this overwrites the value constructed above? */
775		mov	r0, #0
776		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
777
778		/* ?? invalidate for the second time? */
779		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
780		mov	pc, lr
781
782#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
783#define CB_BITS 0x08
784#else
785#define CB_BITS 0x0c
786#endif
787
788__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
789		bic	r3, r3, #0xff		@ Align the pointer
790		bic	r3, r3, #0x3f00
791/*
792 * Initialise the page tables, turning on the cacheable and bufferable
793 * bits for the RAM area only.
794 */
795		mov	r0, r3
796		mov	r9, r0, lsr #18
797		mov	r9, r9, lsl #18		@ start of RAM
798		add	r10, r9, #0x10000000	@ a reasonable RAM size
799		mov	r1, #0x12		@ XN|U + section mapping
800		orr	r1, r1, #3 << 10	@ AP=11
801		add	r2, r3, #16384
8021:		cmp	r1, r9			@ if virt > start of RAM
803		cmphs	r10, r1			@   && end of RAM > virt
804		bic	r1, r1, #0x1c		@ clear XN|U + C + B
805		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
806		orrhs	r1, r1, r6		@ set RAM section settings
807		str	r1, [r0], #4		@ 1:1 mapping
808		add	r1, r1, #1048576
809		teq	r0, r2
810		bne	1b
811/*
812 * If ever we are running from Flash, then we surely want the cache
813 * to be enabled also for our execution instance...  We map 2MB of it
814 * so there is no map overlap problem for up to 1 MB compressed kernel.
815 * If the execution is in RAM then we would only be duplicating the above.
816 */
817		orr	r1, r6, #0x04		@ ensure B is set for this
818		orr	r1, r1, #3 << 10
819		mov	r2, pc
820		mov	r2, r2, lsr #20
821		orr	r1, r1, r2, lsl #20
822		add	r0, r3, r2, lsl #2
823		str	r1, [r0], #4
824		add	r1, r1, #1048576
825		str	r1, [r0]
826		mov	pc, lr
827ENDPROC(__setup_mmu)
828
829@ Enable unaligned access on v6, to allow better code generation
830@ for the decompressor C code:
831__armv6_mmu_cache_on:
832		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
833		bic	r0, r0, #2		@ A (no unaligned access fault)
834		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
835		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
836		b	__armv4_mmu_cache_on
837
838__arm926ejs_mmu_cache_on:
839#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
840		mov	r0, #4			@ put dcache in WT mode
841		mcr	p15, 7, r0, c15, c0, 0
842#endif
843
844__armv4_mmu_cache_on:
845		mov	r12, lr
846#ifdef CONFIG_MMU
847		mov	r6, #CB_BITS | 0x12	@ U
848		bl	__setup_mmu
849		mov	r0, #0
850		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
851		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
852		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
853		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
854		orr	r0, r0, #0x0030
855 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
856		bl	__common_mmu_cache_on
857		mov	r0, #0
858		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
859#endif
860		mov	pc, r12
861
862__armv7_mmu_cache_on:
863		enable_cp15_barriers	r11
864		mov	r12, lr
865#ifdef CONFIG_MMU
866		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
867		tst	r11, #0xf		@ VMSA
868		movne	r6, #CB_BITS | 0x02	@ !XN
869		blne	__setup_mmu
870		mov	r0, #0
871		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
872		tst	r11, #0xf		@ VMSA
873		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
874#endif
875		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
876		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
877		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
878		orr	r0, r0, #0x003c		@ write buffer
879		bic	r0, r0, #2		@ A (no unaligned access fault)
880		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
881						@ (needed for ARM1176)
882#ifdef CONFIG_MMU
883 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
884		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
885		orrne	r0, r0, #1		@ MMU enabled
886		movne	r1, #0xfffffffd		@ domain 0 = client
887		bic     r6, r6, #1 << 31        @ 32-bit translation system
888		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
889		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
890		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
891		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
892#endif
893		mcr	p15, 0, r0, c7, c5, 4	@ ISB
894		mcr	p15, 0, r0, c1, c0, 0	@ load control register
895		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
896		mov	r0, #0
897		mcr	p15, 0, r0, c7, c5, 4	@ ISB
898		mov	pc, r12
899
900__fa526_cache_on:
901		mov	r12, lr
902		mov	r6, #CB_BITS | 0x12	@ U
903		bl	__setup_mmu
904		mov	r0, #0
905		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
906		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
907		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
908		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
909		orr	r0, r0, #0x1000		@ I-cache enable
910		bl	__common_mmu_cache_on
911		mov	r0, #0
912		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
913		mov	pc, r12
914
915__common_mmu_cache_on:
916#ifndef CONFIG_THUMB2_KERNEL
917#ifndef DEBUG
918		orr	r0, r0, #0x000d		@ Write buffer, mmu
919#endif
920		mov	r1, #-1
921		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
922		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
923		b	1f
924		.align	5			@ cache line aligned
9251:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
926		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
927		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
928#endif
929
930#define PROC_ENTRY_SIZE (4*5)
931
932/*
933 * Here follow the relocatable cache support functions for the
934 * various processors.  This is a generic hook for locating an
935 * entry and jumping to an instruction at the specified offset
936 * from the start of the block.  Please note this is all position
937 * independent code.
938 *
939 *  r1  = corrupted
940 *  r2  = corrupted
941 *  r3  = block offset
942 *  r9  = corrupted
943 *  r12 = corrupted
944 */
945
946call_cache_fn:	adr	r12, proc_types
947#ifdef CONFIG_CPU_CP15
948		mrc	p15, 0, r9, c0, c0	@ get processor ID
949#elif defined(CONFIG_CPU_V7M)
950		/*
951		 * On v7-M the processor id is located in the V7M_SCB_CPUID
952		 * register, but as cache handling is IMPLEMENTATION DEFINED on
953		 * v7-M (if existant at all) we just return early here.
954		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
955		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
956		 * use cp15 registers that are not implemented on v7-M.
957		 */
958		bx	lr
959#else
960		ldr	r9, =CONFIG_PROCESSOR_ID
961#endif
9621:		ldr	r1, [r12, #0]		@ get value
963		ldr	r2, [r12, #4]		@ get mask
964		eor	r1, r1, r9		@ (real ^ match)
965		tst	r1, r2			@       & mask
966 ARM(		addeq	pc, r12, r3		) @ call cache function
967 THUMB(		addeq	r12, r3			)
968 THUMB(		moveq	pc, r12			) @ call cache function
969		add	r12, r12, #PROC_ENTRY_SIZE
970		b	1b
971
972/*
973 * Table for cache operations.  This is basically:
974 *   - CPU ID match
975 *   - CPU ID mask
976 *   - 'cache on' method instruction
977 *   - 'cache off' method instruction
978 *   - 'cache flush' method instruction
979 *
980 * We match an entry using: ((real_id ^ match) & mask) == 0
981 *
982 * Writethrough caches generally only need 'on' and 'off'
983 * methods.  Writeback caches _must_ have the flush method
984 * defined.
985 */
986		.align	2
987		.type	proc_types,#object
988proc_types:
989		.word	0x41000000		@ old ARM ID
990		.word	0xff00f000
991		mov	pc, lr
992 THUMB(		nop				)
993		mov	pc, lr
994 THUMB(		nop				)
995		mov	pc, lr
996 THUMB(		nop				)
997
998		.word	0x41007000		@ ARM7/710
999		.word	0xfff8fe00
1000		mov	pc, lr
1001 THUMB(		nop				)
1002		mov	pc, lr
1003 THUMB(		nop				)
1004		mov	pc, lr
1005 THUMB(		nop				)
1006
1007		.word	0x41807200		@ ARM720T (writethrough)
1008		.word	0xffffff00
1009		W(b)	__armv4_mmu_cache_on
1010		W(b)	__armv4_mmu_cache_off
1011		mov	pc, lr
1012 THUMB(		nop				)
1013
1014		.word	0x41007400		@ ARM74x
1015		.word	0xff00ff00
1016		W(b)	__armv3_mpu_cache_on
1017		W(b)	__armv3_mpu_cache_off
1018		W(b)	__armv3_mpu_cache_flush
1019
1020		.word	0x41009400		@ ARM94x
1021		.word	0xff00ff00
1022		W(b)	__armv4_mpu_cache_on
1023		W(b)	__armv4_mpu_cache_off
1024		W(b)	__armv4_mpu_cache_flush
1025
1026		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1027		.word	0xff0ffff0
1028		W(b)	__arm926ejs_mmu_cache_on
1029		W(b)	__armv4_mmu_cache_off
1030		W(b)	__armv5tej_mmu_cache_flush
1031
1032		.word	0x00007000		@ ARM7 IDs
1033		.word	0x0000f000
1034		mov	pc, lr
1035 THUMB(		nop				)
1036		mov	pc, lr
1037 THUMB(		nop				)
1038		mov	pc, lr
1039 THUMB(		nop				)
1040
1041		@ Everything from here on will be the new ID system.
1042
1043		.word	0x4401a100		@ sa110 / sa1100
1044		.word	0xffffffe0
1045		W(b)	__armv4_mmu_cache_on
1046		W(b)	__armv4_mmu_cache_off
1047		W(b)	__armv4_mmu_cache_flush
1048
1049		.word	0x6901b110		@ sa1110
1050		.word	0xfffffff0
1051		W(b)	__armv4_mmu_cache_on
1052		W(b)	__armv4_mmu_cache_off
1053		W(b)	__armv4_mmu_cache_flush
1054
1055		.word	0x56056900
1056		.word	0xffffff00		@ PXA9xx
1057		W(b)	__armv4_mmu_cache_on
1058		W(b)	__armv4_mmu_cache_off
1059		W(b)	__armv4_mmu_cache_flush
1060
1061		.word	0x56158000		@ PXA168
1062		.word	0xfffff000
1063		W(b)	__armv4_mmu_cache_on
1064		W(b)	__armv4_mmu_cache_off
1065		W(b)	__armv5tej_mmu_cache_flush
1066
1067		.word	0x56050000		@ Feroceon
1068		.word	0xff0f0000
1069		W(b)	__armv4_mmu_cache_on
1070		W(b)	__armv4_mmu_cache_off
1071		W(b)	__armv5tej_mmu_cache_flush
1072
1073#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1074		/* this conflicts with the standard ARMv5TE entry */
1075		.long	0x41009260		@ Old Feroceon
1076		.long	0xff00fff0
1077		b	__armv4_mmu_cache_on
1078		b	__armv4_mmu_cache_off
1079		b	__armv5tej_mmu_cache_flush
1080#endif
1081
1082		.word	0x66015261		@ FA526
1083		.word	0xff01fff1
1084		W(b)	__fa526_cache_on
1085		W(b)	__armv4_mmu_cache_off
1086		W(b)	__fa526_cache_flush
1087
1088		@ These match on the architecture ID
1089
1090		.word	0x00020000		@ ARMv4T
1091		.word	0x000f0000
1092		W(b)	__armv4_mmu_cache_on
1093		W(b)	__armv4_mmu_cache_off
1094		W(b)	__armv4_mmu_cache_flush
1095
1096		.word	0x00050000		@ ARMv5TE
1097		.word	0x000f0000
1098		W(b)	__armv4_mmu_cache_on
1099		W(b)	__armv4_mmu_cache_off
1100		W(b)	__armv4_mmu_cache_flush
1101
1102		.word	0x00060000		@ ARMv5TEJ
1103		.word	0x000f0000
1104		W(b)	__armv4_mmu_cache_on
1105		W(b)	__armv4_mmu_cache_off
1106		W(b)	__armv5tej_mmu_cache_flush
1107
1108		.word	0x0007b000		@ ARMv6
1109		.word	0x000ff000
1110		W(b)	__armv6_mmu_cache_on
1111		W(b)	__armv4_mmu_cache_off
1112		W(b)	__armv6_mmu_cache_flush
1113
1114		.word	0x000f0000		@ new CPU Id
1115		.word	0x000f0000
1116		W(b)	__armv7_mmu_cache_on
1117		W(b)	__armv7_mmu_cache_off
1118		W(b)	__armv7_mmu_cache_flush
1119
1120		.word	0			@ unrecognised type
1121		.word	0
1122		mov	pc, lr
1123 THUMB(		nop				)
1124		mov	pc, lr
1125 THUMB(		nop				)
1126		mov	pc, lr
1127 THUMB(		nop				)
1128
1129		.size	proc_types, . - proc_types
1130
1131		/*
1132		 * If you get a "non-constant expression in ".if" statement"
1133		 * error from the assembler on this line, check that you have
1134		 * not accidentally written a "b" instruction where you should
1135		 * have written W(b).
1136		 */
1137		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1138		.error "The size of one or more proc_types entries is wrong."
1139		.endif
1140
1141/*
1142 * Turn off the Cache and MMU.  ARMv3 does not support
1143 * reading the control register, but ARMv4 does.
1144 *
1145 * On exit,
1146 *  r0, r1, r2, r3, r9, r12 corrupted
1147 * This routine must preserve:
1148 *  r4, r7, r8
1149 */
1150		.align	5
1151cache_off:	mov	r3, #12			@ cache_off function
1152		b	call_cache_fn
1153
1154__armv4_mpu_cache_off:
1155		mrc	p15, 0, r0, c1, c0
1156		bic	r0, r0, #0x000d
1157		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1158		mov	r0, #0
1159		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1160		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1161		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1162		mov	pc, lr
1163
1164__armv3_mpu_cache_off:
1165		mrc	p15, 0, r0, c1, c0
1166		bic	r0, r0, #0x000d
1167		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1168		mov	r0, #0
1169		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1170		mov	pc, lr
1171
1172__armv4_mmu_cache_off:
1173#ifdef CONFIG_MMU
1174		mrc	p15, 0, r0, c1, c0
1175		bic	r0, r0, #0x000d
1176		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1177		mov	r0, #0
1178		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1179		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1180#endif
1181		mov	pc, lr
1182
1183__armv7_mmu_cache_off:
1184		mrc	p15, 0, r0, c1, c0
1185#ifdef CONFIG_MMU
1186		bic	r0, r0, #0x000d
1187#else
1188		bic	r0, r0, #0x000c
1189#endif
1190		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1191		mov	r0, #0
1192#ifdef CONFIG_MMU
1193		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1194#endif
1195		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1196		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1197		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1198		mov	pc, lr
1199
1200/*
1201 * Clean and flush the cache to maintain consistency.
1202 *
1203 * On entry,
1204 *  r0 = start address
1205 *  r1 = end address (exclusive)
1206 * On exit,
1207 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1208 * This routine must preserve:
1209 *  r4, r6, r7, r8
1210 */
1211		.align	5
1212cache_clean_flush:
1213		mov	r3, #16
1214		mov	r11, r1
1215		b	call_cache_fn
1216
1217__armv4_mpu_cache_flush:
1218		tst	r4, #1
1219		movne	pc, lr
1220		mov	r2, #1
1221		mov	r3, #0
1222		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1223		mov	r1, #7 << 5		@ 8 segments
12241:		orr	r3, r1, #63 << 26	@ 64 entries
12252:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1226		subs	r3, r3, #1 << 26
1227		bcs	2b			@ entries 63 to 0
1228		subs 	r1, r1, #1 << 5
1229		bcs	1b			@ segments 7 to 0
1230
1231		teq	r2, #0
1232		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1233		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1234		mov	pc, lr
1235
1236__fa526_cache_flush:
1237		tst	r4, #1
1238		movne	pc, lr
1239		mov	r1, #0
1240		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1241		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1242		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1243		mov	pc, lr
1244
1245__armv6_mmu_cache_flush:
1246		mov	r1, #0
1247		tst	r4, #1
1248		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1249		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1250		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1251		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1252		mov	pc, lr
1253
1254__armv7_mmu_cache_flush:
1255		enable_cp15_barriers	r10
1256		tst	r4, #1
1257		bne	iflush
1258		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1259		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1260		mov	r10, #0
1261		beq	hierarchical
1262		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1263		b	iflush
1264hierarchical:
1265		dcache_line_size r1, r2		@ r1 := dcache min line size
1266		sub	r2, r1, #1		@ r2 := line size mask
1267		bic	r0, r0, r2		@ round down start to line size
1268		sub	r11, r11, #1		@ end address is exclusive
1269		bic	r11, r11, r2		@ round down end to line size
12700:		cmp	r0, r11			@ finished?
1271		bgt	iflush
1272		mcr	p15, 0, r0, c7, c14, 1	@ Dcache clean/invalidate by VA
1273		add	r0, r0, r1
1274		b	0b
1275iflush:
1276		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1277		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1278		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1279		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1280		mov	pc, lr
1281
1282__armv5tej_mmu_cache_flush:
1283		tst	r4, #1
1284		movne	pc, lr
12851:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1286		bne	1b
1287		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1288		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1289		mov	pc, lr
1290
1291__armv4_mmu_cache_flush:
1292		tst	r4, #1
1293		movne	pc, lr
1294		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1295		mov	r11, #32		@ default: 32 byte line size
1296		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1297		teq	r3, r9			@ cache ID register present?
1298		beq	no_cache_id
1299		mov	r1, r3, lsr #18
1300		and	r1, r1, #7
1301		mov	r2, #1024
1302		mov	r2, r2, lsl r1		@ base dcache size *2
1303		tst	r3, #1 << 14		@ test M bit
1304		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1305		mov	r3, r3, lsr #12
1306		and	r3, r3, #3
1307		mov	r11, #8
1308		mov	r11, r11, lsl r3	@ cache line size in bytes
1309no_cache_id:
1310		mov	r1, pc
1311		bic	r1, r1, #63		@ align to longest cache line
1312		add	r2, r1, r2
13131:
1314 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1315 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1316 THUMB(		add     r1, r1, r11		)
1317		teq	r1, r2
1318		bne	1b
1319
1320		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1321		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1322		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1323		mov	pc, lr
1324
1325__armv3_mmu_cache_flush:
1326__armv3_mpu_cache_flush:
1327		tst	r4, #1
1328		movne	pc, lr
1329		mov	r1, #0
1330		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1331		mov	pc, lr
1332
1333/*
1334 * Various debugging routines for printing hex characters and
1335 * memory, which again must be relocatable.
1336 */
1337#ifdef DEBUG
1338		.align	2
1339		.type	phexbuf,#object
1340phexbuf:	.space	12
1341		.size	phexbuf, . - phexbuf
1342
1343@ phex corrupts {r0, r1, r2, r3}
1344phex:		adr	r3, phexbuf
1345		mov	r2, #0
1346		strb	r2, [r3, r1]
13471:		subs	r1, r1, #1
1348		movmi	r0, r3
1349		bmi	puts
1350		and	r2, r0, #15
1351		mov	r0, r0, lsr #4
1352		cmp	r2, #10
1353		addge	r2, r2, #7
1354		add	r2, r2, #'0'
1355		strb	r2, [r3, r1]
1356		b	1b
1357
1358@ puts corrupts {r0, r1, r2, r3}
1359puts:		loadsp	r3, r2, r1
13601:		ldrb	r2, [r0], #1
1361		teq	r2, #0
1362		moveq	pc, lr
13632:		writeb	r2, r3
1364		mov	r1, #0x00020000
13653:		subs	r1, r1, #1
1366		bne	3b
1367		teq	r2, #'\n'
1368		moveq	r2, #'\r'
1369		beq	2b
1370		teq	r0, #0
1371		bne	1b
1372		mov	pc, lr
1373@ putc corrupts {r0, r1, r2, r3}
1374putc:
1375		mov	r2, r0
1376		loadsp	r3, r1, r0
1377		mov	r0, #0
1378		b	2b
1379
1380@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1381memdump:	mov	r12, r0
1382		mov	r10, lr
1383		mov	r11, #0
13842:		mov	r0, r11, lsl #2
1385		add	r0, r0, r12
1386		mov	r1, #8
1387		bl	phex
1388		mov	r0, #':'
1389		bl	putc
13901:		mov	r0, #' '
1391		bl	putc
1392		ldr	r0, [r12, r11, lsl #2]
1393		mov	r1, #8
1394		bl	phex
1395		and	r0, r11, #7
1396		teq	r0, #3
1397		moveq	r0, #' '
1398		bleq	putc
1399		and	r0, r11, #7
1400		add	r11, r11, #1
1401		teq	r0, #7
1402		bne	1b
1403		mov	r0, #'\n'
1404		bl	putc
1405		cmp	r11, #64
1406		blt	2b
1407		mov	pc, r10
1408#endif
1409
1410		.ltorg
1411
1412#ifdef CONFIG_ARM_VIRT_EXT
1413.align 5
1414__hyp_reentry_vectors:
1415		W(b)	.			@ reset
1416		W(b)	.			@ undef
1417		W(b)	.			@ svc
1418		W(b)	.			@ pabort
1419		W(b)	.			@ dabort
1420		W(b)	__enter_kernel		@ hyp
1421		W(b)	.			@ irq
1422		W(b)	.			@ fiq
1423#endif /* CONFIG_ARM_VIRT_EXT */
1424
1425__enter_kernel:
1426		mov	r0, #0			@ must be 0
1427		mov	r1, r7			@ restore architecture number
1428		mov	r2, r8			@ restore atags pointer
1429 ARM(		mov	pc, r4		)	@ call kernel
1430 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1431 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1432
1433reloc_code_end:
1434
1435#ifdef CONFIG_EFI_STUB
1436ENTRY(efi_enter_kernel)
1437		mov	r7, r0				@ preserve image base
1438		mov	r4, r1				@ preserve DT pointer
1439
1440		mov	r0, r4				@ DT start
1441		add	r1, r4, r2			@ DT end
1442		bl	cache_clean_flush
1443
1444		mov	r0, r7				@ relocated zImage
1445		ldr	r1, =_edata			@ size of zImage
1446		add	r1, r1, r0			@ end of zImage
1447		bl	cache_clean_flush
1448
1449		@ The PE/COFF loader might not have cleaned the code we are
1450		@ running beyond the PoU, and so calling cache_off below from
1451		@ inside the PE/COFF loader allocated region is unsafe unless
1452		@ we explicitly clean it to the PoC.
1453 ARM(		adrl	r0, call_cache_fn	)
1454 THUMB(		adr	r0, call_cache_fn	)	@ region of code we will
1455		adr	r1, 0f				@ run with MMU off
1456		bl	cache_clean_flush
1457		bl	cache_off
1458
1459		@ Set parameters for booting zImage according to boot protocol
1460		@ put FDT address in r2, it was returned by efi_entry()
1461		@ r1 is the machine type, and r0 needs to be 0
1462		mov	r0, #0
1463		mov	r1, #0xFFFFFFFF
1464		mov	r2, r4
1465		add	r7, r7, #(__efi_start - start)
1466		mov	pc, r7				@ no mode switch
1467ENDPROC(efi_enter_kernel)
14680:
1469#endif
1470
1471		.align
1472		.section ".stack", "aw", %nobits
1473.L_user_stack:	.space	4096
1474.L_user_stack_end:
1475