xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision d236d361)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/v7m.h>
14
15#include "efi-header.S"
16
17 AR_CLASS(	.arch	armv7-a	)
18 M_CLASS(	.arch	armv7-m	)
19
20/*
21 * Debugging stuff
22 *
23 * Note that these macros must not contain any code which is not
24 * 100% relocatable.  Any attempt to do so will result in a crash.
25 * Please select one of the following when turning on debugging.
26 */
27#ifdef DEBUG
28
29#if defined(CONFIG_DEBUG_ICEDCC)
30
31#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
32		.macro	loadsp, rb, tmp
33		.endm
34		.macro	writeb, ch, rb
35		mcr	p14, 0, \ch, c0, c5, 0
36		.endm
37#elif defined(CONFIG_CPU_XSCALE)
38		.macro	loadsp, rb, tmp
39		.endm
40		.macro	writeb, ch, rb
41		mcr	p14, 0, \ch, c8, c0, 0
42		.endm
43#else
44		.macro	loadsp, rb, tmp
45		.endm
46		.macro	writeb, ch, rb
47		mcr	p14, 0, \ch, c1, c0, 0
48		.endm
49#endif
50
51#else
52
53#include CONFIG_DEBUG_LL_INCLUDE
54
55		.macro	writeb,	ch, rb
56		senduart \ch, \rb
57		.endm
58
59#if defined(CONFIG_ARCH_SA1100)
60		.macro	loadsp, rb, tmp
61		mov	\rb, #0x80000000	@ physical base address
62#ifdef CONFIG_DEBUG_LL_SER3
63		add	\rb, \rb, #0x00050000	@ Ser3
64#else
65		add	\rb, \rb, #0x00010000	@ Ser1
66#endif
67		.endm
68#else
69		.macro	loadsp,	rb, tmp
70		addruart \rb, \tmp
71		.endm
72#endif
73#endif
74#endif
75
76		.macro	kputc,val
77		mov	r0, \val
78		bl	putc
79		.endm
80
81		.macro	kphex,val,len
82		mov	r0, \val
83		mov	r1, #\len
84		bl	phex
85		.endm
86
87		.macro	debug_reloc_start
88#ifdef DEBUG
89		kputc	#'\n'
90		kphex	r6, 8		/* processor id */
91		kputc	#':'
92		kphex	r7, 8		/* architecture id */
93#ifdef CONFIG_CPU_CP15
94		kputc	#':'
95		mrc	p15, 0, r0, c1, c0
96		kphex	r0, 8		/* control reg */
97#endif
98		kputc	#'\n'
99		kphex	r5, 8		/* decompressed kernel start */
100		kputc	#'-'
101		kphex	r9, 8		/* decompressed kernel end  */
102		kputc	#'>'
103		kphex	r4, 8		/* kernel execution address */
104		kputc	#'\n'
105#endif
106		.endm
107
108		.macro	debug_reloc_end
109#ifdef DEBUG
110		kphex	r5, 8		/* end of kernel */
111		kputc	#'\n'
112		mov	r0, r4
113		bl	memdump		/* dump 256 bytes at start of kernel */
114#endif
115		.endm
116
117		.section ".start", #alloc, #execinstr
118/*
119 * sort out different calling conventions
120 */
121		.align
122		/*
123		 * Always enter in ARM state for CPUs that support the ARM ISA.
124		 * As of today (2014) that's exactly the members of the A and R
125		 * classes.
126		 */
127 AR_CLASS(	.arm	)
128start:
129		.type	start,#function
130		.rept	7
131		__nop
132		.endr
133   ARM(		mov	r0, r0		)
134   ARM(		b	1f		)
135 THUMB(		badr	r12, 1f		)
136 THUMB(		bx	r12		)
137
138		.word	_magic_sig	@ Magic numbers to help the loader
139		.word	_magic_start	@ absolute load/run zImage address
140		.word	_magic_end	@ zImage end address
141		.word	0x04030201	@ endianness flag
142
143 THUMB(		.thumb			)
1441:		__EFI_HEADER
145
146 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
147 AR_CLASS(	mrs	r9, cpsr	)
148#ifdef CONFIG_ARM_VIRT_EXT
149		bl	__hyp_stub_install	@ get into SVC mode, reversibly
150#endif
151		mov	r7, r1			@ save architecture ID
152		mov	r8, r2			@ save atags pointer
153
154#ifndef CONFIG_CPU_V7M
155		/*
156		 * Booting from Angel - need to enter SVC mode and disable
157		 * FIQs/IRQs (numeric definitions from angel arm.h source).
158		 * We only do this if we were in user mode on entry.
159		 */
160		mrs	r2, cpsr		@ get current mode
161		tst	r2, #3			@ not user?
162		bne	not_angel
163		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
164 ARM(		swi	0x123456	)	@ angel_SWI_ARM
165 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
166not_angel:
167		safe_svcmode_maskall r0
168		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
169						@ SPSR
170#endif
171		/*
172		 * Note that some cache flushing and other stuff may
173		 * be needed here - is there an Angel SWI call for this?
174		 */
175
176		/*
177		 * some architecture specific code can be inserted
178		 * by the linker here, but it should preserve r7, r8, and r9.
179		 */
180
181		.text
182
183#ifdef CONFIG_AUTO_ZRELADDR
184		/*
185		 * Find the start of physical memory.  As we are executing
186		 * without the MMU on, we are in the physical address space.
187		 * We just need to get rid of any offset by aligning the
188		 * address.
189		 *
190		 * This alignment is a balance between the requirements of
191		 * different platforms - we have chosen 128MB to allow
192		 * platforms which align the start of their physical memory
193		 * to 128MB to use this feature, while allowing the zImage
194		 * to be placed within the first 128MB of memory on other
195		 * platforms.  Increasing the alignment means we place
196		 * stricter alignment requirements on the start of physical
197		 * memory, but relaxing it means that we break people who
198		 * are already placing their zImage in (eg) the top 64MB
199		 * of this range.
200		 */
201		mov	r4, pc
202		and	r4, r4, #0xf8000000
203		/* Determine final kernel image address. */
204		add	r4, r4, #TEXT_OFFSET
205#else
206		ldr	r4, =zreladdr
207#endif
208
209		/*
210		 * Set up a page table only if it won't overwrite ourself.
211		 * That means r4 < pc || r4 - 16k page directory > &_end.
212		 * Given that r4 > &_end is most unfrequent, we add a rough
213		 * additional 1MB of room for a possible appended DTB.
214		 */
215		mov	r0, pc
216		cmp	r0, r4
217		ldrcc	r0, LC0+32
218		addcc	r0, r0, pc
219		cmpcc	r4, r0
220		orrcc	r4, r4, #1		@ remember we skipped cache_on
221		blcs	cache_on
222
223restart:	adr	r0, LC0
224		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
225		ldr	sp, [r0, #28]
226
227		/*
228		 * We might be running at a different address.  We need
229		 * to fix up various pointers.
230		 */
231		sub	r0, r0, r1		@ calculate the delta offset
232		add	r6, r6, r0		@ _edata
233		add	r10, r10, r0		@ inflated kernel size location
234
235		/*
236		 * The kernel build system appends the size of the
237		 * decompressed kernel at the end of the compressed data
238		 * in little-endian form.
239		 */
240		ldrb	r9, [r10, #0]
241		ldrb	lr, [r10, #1]
242		orr	r9, r9, lr, lsl #8
243		ldrb	lr, [r10, #2]
244		ldrb	r10, [r10, #3]
245		orr	r9, r9, lr, lsl #16
246		orr	r9, r9, r10, lsl #24
247
248#ifndef CONFIG_ZBOOT_ROM
249		/* malloc space is above the relocated stack (64k max) */
250		add	sp, sp, r0
251		add	r10, sp, #0x10000
252#else
253		/*
254		 * With ZBOOT_ROM the bss/stack is non relocatable,
255		 * but someone could still run this code from RAM,
256		 * in which case our reference is _edata.
257		 */
258		mov	r10, r6
259#endif
260
261		mov	r5, #0			@ init dtb size to 0
262#ifdef CONFIG_ARM_APPENDED_DTB
263/*
264 *   r0  = delta
265 *   r2  = BSS start
266 *   r3  = BSS end
267 *   r4  = final kernel address (possibly with LSB set)
268 *   r5  = appended dtb size (still unknown)
269 *   r6  = _edata
270 *   r7  = architecture ID
271 *   r8  = atags/device tree pointer
272 *   r9  = size of decompressed image
273 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
274 *   r11 = GOT start
275 *   r12 = GOT end
276 *   sp  = stack pointer
277 *
278 * if there are device trees (dtb) appended to zImage, advance r10 so that the
279 * dtb data will get relocated along with the kernel if necessary.
280 */
281
282		ldr	lr, [r6, #0]
283#ifndef __ARMEB__
284		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
285#else
286		ldr	r1, =0xd00dfeed
287#endif
288		cmp	lr, r1
289		bne	dtb_check_done		@ not found
290
291#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
292		/*
293		 * OK... Let's do some funky business here.
294		 * If we do have a DTB appended to zImage, and we do have
295		 * an ATAG list around, we want the later to be translated
296		 * and folded into the former here. No GOT fixup has occurred
297		 * yet, but none of the code we're about to call uses any
298		 * global variable.
299		*/
300
301		/* Get the initial DTB size */
302		ldr	r5, [r6, #4]
303#ifndef __ARMEB__
304		/* convert to little endian */
305		eor	r1, r5, r5, ror #16
306		bic	r1, r1, #0x00ff0000
307		mov	r5, r5, ror #8
308		eor	r5, r5, r1, lsr #8
309#endif
310		/* 50% DTB growth should be good enough */
311		add	r5, r5, r5, lsr #1
312		/* preserve 64-bit alignment */
313		add	r5, r5, #7
314		bic	r5, r5, #7
315		/* clamp to 32KB min and 1MB max */
316		cmp	r5, #(1 << 15)
317		movlo	r5, #(1 << 15)
318		cmp	r5, #(1 << 20)
319		movhi	r5, #(1 << 20)
320		/* temporarily relocate the stack past the DTB work space */
321		add	sp, sp, r5
322
323		stmfd	sp!, {r0-r3, ip, lr}
324		mov	r0, r8
325		mov	r1, r6
326		mov	r2, r5
327		bl	atags_to_fdt
328
329		/*
330		 * If returned value is 1, there is no ATAG at the location
331		 * pointed by r8.  Try the typical 0x100 offset from start
332		 * of RAM and hope for the best.
333		 */
334		cmp	r0, #1
335		sub	r0, r4, #TEXT_OFFSET
336		bic	r0, r0, #1
337		add	r0, r0, #0x100
338		mov	r1, r6
339		mov	r2, r5
340		bleq	atags_to_fdt
341
342		ldmfd	sp!, {r0-r3, ip, lr}
343		sub	sp, sp, r5
344#endif
345
346		mov	r8, r6			@ use the appended device tree
347
348		/*
349		 * Make sure that the DTB doesn't end up in the final
350		 * kernel's .bss area. To do so, we adjust the decompressed
351		 * kernel size to compensate if that .bss size is larger
352		 * than the relocated code.
353		 */
354		ldr	r5, =_kernel_bss_size
355		adr	r1, wont_overwrite
356		sub	r1, r6, r1
357		subs	r1, r5, r1
358		addhi	r9, r9, r1
359
360		/* Get the current DTB size */
361		ldr	r5, [r6, #4]
362#ifndef __ARMEB__
363		/* convert r5 (dtb size) to little endian */
364		eor	r1, r5, r5, ror #16
365		bic	r1, r1, #0x00ff0000
366		mov	r5, r5, ror #8
367		eor	r5, r5, r1, lsr #8
368#endif
369
370		/* preserve 64-bit alignment */
371		add	r5, r5, #7
372		bic	r5, r5, #7
373
374		/* relocate some pointers past the appended dtb */
375		add	r6, r6, r5
376		add	r10, r10, r5
377		add	sp, sp, r5
378dtb_check_done:
379#endif
380
381/*
382 * Check to see if we will overwrite ourselves.
383 *   r4  = final kernel address (possibly with LSB set)
384 *   r9  = size of decompressed image
385 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
386 * We basically want:
387 *   r4 - 16k page directory >= r10 -> OK
388 *   r4 + image length <= address of wont_overwrite -> OK
389 * Note: the possible LSB in r4 is harmless here.
390 */
391		add	r10, r10, #16384
392		cmp	r4, r10
393		bhs	wont_overwrite
394		add	r10, r4, r9
395		adr	r9, wont_overwrite
396		cmp	r10, r9
397		bls	wont_overwrite
398
399/*
400 * Relocate ourselves past the end of the decompressed kernel.
401 *   r6  = _edata
402 *   r10 = end of the decompressed kernel
403 * Because we always copy ahead, we need to do it from the end and go
404 * backward in case the source and destination overlap.
405 */
406		/*
407		 * Bump to the next 256-byte boundary with the size of
408		 * the relocation code added. This avoids overwriting
409		 * ourself when the offset is small.
410		 */
411		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
412		bic	r10, r10, #255
413
414		/* Get start of code we want to copy and align it down. */
415		adr	r5, restart
416		bic	r5, r5, #31
417
418/* Relocate the hyp vector base if necessary */
419#ifdef CONFIG_ARM_VIRT_EXT
420		mrs	r0, spsr
421		and	r0, r0, #MODE_MASK
422		cmp	r0, #HYP_MODE
423		bne	1f
424
425		/*
426		 * Compute the address of the hyp vectors after relocation.
427		 * This requires some arithmetic since we cannot directly
428		 * reference __hyp_stub_vectors in a PC-relative way.
429		 * Call __hyp_set_vectors with the new address so that we
430		 * can HVC again after the copy.
431		 */
4320:		adr	r0, 0b
433		movw	r1, #:lower16:__hyp_stub_vectors - 0b
434		movt	r1, #:upper16:__hyp_stub_vectors - 0b
435		add	r0, r0, r1
436		sub	r0, r0, r5
437		add	r0, r0, r10
438		bl	__hyp_set_vectors
4391:
440#endif
441
442		sub	r9, r6, r5		@ size to copy
443		add	r9, r9, #31		@ rounded up to a multiple
444		bic	r9, r9, #31		@ ... of 32 bytes
445		add	r6, r9, r5
446		add	r9, r9, r10
447
4481:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
449		cmp	r6, r5
450		stmdb	r9!, {r0 - r3, r10 - r12, lr}
451		bhi	1b
452
453		/* Preserve offset to relocated code. */
454		sub	r6, r9, r6
455
456#ifndef CONFIG_ZBOOT_ROM
457		/* cache_clean_flush may use the stack, so relocate it */
458		add	sp, sp, r6
459#endif
460
461		bl	cache_clean_flush
462
463		badr	r0, restart
464		add	r0, r0, r6
465		mov	pc, r0
466
467wont_overwrite:
468/*
469 * If delta is zero, we are running at the address we were linked at.
470 *   r0  = delta
471 *   r2  = BSS start
472 *   r3  = BSS end
473 *   r4  = kernel execution address (possibly with LSB set)
474 *   r5  = appended dtb size (0 if not present)
475 *   r7  = architecture ID
476 *   r8  = atags pointer
477 *   r11 = GOT start
478 *   r12 = GOT end
479 *   sp  = stack pointer
480 */
481		orrs	r1, r0, r5
482		beq	not_relocated
483
484		add	r11, r11, r0
485		add	r12, r12, r0
486
487#ifndef CONFIG_ZBOOT_ROM
488		/*
489		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
490		 * we need to fix up pointers into the BSS region.
491		 * Note that the stack pointer has already been fixed up.
492		 */
493		add	r2, r2, r0
494		add	r3, r3, r0
495
496		/*
497		 * Relocate all entries in the GOT table.
498		 * Bump bss entries to _edata + dtb size
499		 */
5001:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
501		add	r1, r1, r0		@ This fixes up C references
502		cmp	r1, r2			@ if entry >= bss_start &&
503		cmphs	r3, r1			@       bss_end > entry
504		addhi	r1, r1, r5		@    entry += dtb size
505		str	r1, [r11], #4		@ next entry
506		cmp	r11, r12
507		blo	1b
508
509		/* bump our bss pointers too */
510		add	r2, r2, r5
511		add	r3, r3, r5
512
513#else
514
515		/*
516		 * Relocate entries in the GOT table.  We only relocate
517		 * the entries that are outside the (relocated) BSS region.
518		 */
5191:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
520		cmp	r1, r2			@ entry < bss_start ||
521		cmphs	r3, r1			@ _end < entry
522		addlo	r1, r1, r0		@ table.  This fixes up the
523		str	r1, [r11], #4		@ C references.
524		cmp	r11, r12
525		blo	1b
526#endif
527
528not_relocated:	mov	r0, #0
5291:		str	r0, [r2], #4		@ clear bss
530		str	r0, [r2], #4
531		str	r0, [r2], #4
532		str	r0, [r2], #4
533		cmp	r2, r3
534		blo	1b
535
536		/*
537		 * Did we skip the cache setup earlier?
538		 * That is indicated by the LSB in r4.
539		 * Do it now if so.
540		 */
541		tst	r4, #1
542		bic	r4, r4, #1
543		blne	cache_on
544
545/*
546 * The C runtime environment should now be setup sufficiently.
547 * Set up some pointers, and start decompressing.
548 *   r4  = kernel execution address
549 *   r7  = architecture ID
550 *   r8  = atags pointer
551 */
552		mov	r0, r4
553		mov	r1, sp			@ malloc space above stack
554		add	r2, sp, #0x10000	@ 64k max
555		mov	r3, r7
556		bl	decompress_kernel
557		bl	cache_clean_flush
558		bl	cache_off
559		mov	r1, r7			@ restore architecture number
560		mov	r2, r8			@ restore atags pointer
561
562#ifdef CONFIG_ARM_VIRT_EXT
563		mrs	r0, spsr		@ Get saved CPU boot mode
564		and	r0, r0, #MODE_MASK
565		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
566		bne	__enter_kernel		@ boot kernel directly
567
568		adr	r12, .L__hyp_reentry_vectors_offset
569		ldr	r0, [r12]
570		add	r0, r0, r12
571
572		bl	__hyp_set_vectors
573		__HVC(0)			@ otherwise bounce to hyp mode
574
575		b	.			@ should never be reached
576
577		.align	2
578.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
579#else
580		b	__enter_kernel
581#endif
582
583		.align	2
584		.type	LC0, #object
585LC0:		.word	LC0			@ r1
586		.word	__bss_start		@ r2
587		.word	_end			@ r3
588		.word	_edata			@ r6
589		.word	input_data_end - 4	@ r10 (inflated size location)
590		.word	_got_start		@ r11
591		.word	_got_end		@ ip
592		.word	.L_user_stack_end	@ sp
593		.word	_end - restart + 16384 + 1024*1024
594		.size	LC0, . - LC0
595
596#ifdef CONFIG_ARCH_RPC
597		.globl	params
598params:		ldr	r0, =0x10000100		@ params_phys for RPC
599		mov	pc, lr
600		.ltorg
601		.align
602#endif
603
604/*
605 * Turn on the cache.  We need to setup some page tables so that we
606 * can have both the I and D caches on.
607 *
608 * We place the page tables 16k down from the kernel execution address,
609 * and we hope that nothing else is using it.  If we're using it, we
610 * will go pop!
611 *
612 * On entry,
613 *  r4 = kernel execution address
614 *  r7 = architecture number
615 *  r8 = atags pointer
616 * On exit,
617 *  r0, r1, r2, r3, r9, r10, r12 corrupted
618 * This routine must preserve:
619 *  r4, r7, r8
620 */
621		.align	5
622cache_on:	mov	r3, #8			@ cache_on function
623		b	call_cache_fn
624
625/*
626 * Initialize the highest priority protection region, PR7
627 * to cover all 32bit address and cacheable and bufferable.
628 */
629__armv4_mpu_cache_on:
630		mov	r0, #0x3f		@ 4G, the whole
631		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
632		mcr 	p15, 0, r0, c6, c7, 1
633
634		mov	r0, #0x80		@ PR7
635		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
636		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
637		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
638
639		mov	r0, #0xc000
640		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
641		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
642
643		mov	r0, #0
644		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
645		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
646		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
647		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
648						@ ...I .... ..D. WC.M
649		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
650		orr	r0, r0, #0x1000		@ ...1 .... .... ....
651
652		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
653
654		mov	r0, #0
655		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
656		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
657		mov	pc, lr
658
659__armv3_mpu_cache_on:
660		mov	r0, #0x3f		@ 4G, the whole
661		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
662
663		mov	r0, #0x80		@ PR7
664		mcr	p15, 0, r0, c2, c0, 0	@ cache on
665		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
666
667		mov	r0, #0xc000
668		mcr	p15, 0, r0, c5, c0, 0	@ access permission
669
670		mov	r0, #0
671		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
672		/*
673		 * ?? ARMv3 MMU does not allow reading the control register,
674		 * does this really work on ARMv3 MPU?
675		 */
676		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
677						@ .... .... .... WC.M
678		orr	r0, r0, #0x000d		@ .... .... .... 11.1
679		/* ?? this overwrites the value constructed above? */
680		mov	r0, #0
681		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
682
683		/* ?? invalidate for the second time? */
684		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
685		mov	pc, lr
686
687#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
688#define CB_BITS 0x08
689#else
690#define CB_BITS 0x0c
691#endif
692
693__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
694		bic	r3, r3, #0xff		@ Align the pointer
695		bic	r3, r3, #0x3f00
696/*
697 * Initialise the page tables, turning on the cacheable and bufferable
698 * bits for the RAM area only.
699 */
700		mov	r0, r3
701		mov	r9, r0, lsr #18
702		mov	r9, r9, lsl #18		@ start of RAM
703		add	r10, r9, #0x10000000	@ a reasonable RAM size
704		mov	r1, #0x12		@ XN|U + section mapping
705		orr	r1, r1, #3 << 10	@ AP=11
706		add	r2, r3, #16384
7071:		cmp	r1, r9			@ if virt > start of RAM
708		cmphs	r10, r1			@   && end of RAM > virt
709		bic	r1, r1, #0x1c		@ clear XN|U + C + B
710		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
711		orrhs	r1, r1, r6		@ set RAM section settings
712		str	r1, [r0], #4		@ 1:1 mapping
713		add	r1, r1, #1048576
714		teq	r0, r2
715		bne	1b
716/*
717 * If ever we are running from Flash, then we surely want the cache
718 * to be enabled also for our execution instance...  We map 2MB of it
719 * so there is no map overlap problem for up to 1 MB compressed kernel.
720 * If the execution is in RAM then we would only be duplicating the above.
721 */
722		orr	r1, r6, #0x04		@ ensure B is set for this
723		orr	r1, r1, #3 << 10
724		mov	r2, pc
725		mov	r2, r2, lsr #20
726		orr	r1, r1, r2, lsl #20
727		add	r0, r3, r2, lsl #2
728		str	r1, [r0], #4
729		add	r1, r1, #1048576
730		str	r1, [r0]
731		mov	pc, lr
732ENDPROC(__setup_mmu)
733
734@ Enable unaligned access on v6, to allow better code generation
735@ for the decompressor C code:
736__armv6_mmu_cache_on:
737		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
738		bic	r0, r0, #2		@ A (no unaligned access fault)
739		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
740		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
741		b	__armv4_mmu_cache_on
742
743__arm926ejs_mmu_cache_on:
744#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
745		mov	r0, #4			@ put dcache in WT mode
746		mcr	p15, 7, r0, c15, c0, 0
747#endif
748
749__armv4_mmu_cache_on:
750		mov	r12, lr
751#ifdef CONFIG_MMU
752		mov	r6, #CB_BITS | 0x12	@ U
753		bl	__setup_mmu
754		mov	r0, #0
755		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
756		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
757		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
758		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
759		orr	r0, r0, #0x0030
760 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
761		bl	__common_mmu_cache_on
762		mov	r0, #0
763		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
764#endif
765		mov	pc, r12
766
767__armv7_mmu_cache_on:
768		mov	r12, lr
769#ifdef CONFIG_MMU
770		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
771		tst	r11, #0xf		@ VMSA
772		movne	r6, #CB_BITS | 0x02	@ !XN
773		blne	__setup_mmu
774		mov	r0, #0
775		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
776		tst	r11, #0xf		@ VMSA
777		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
778#endif
779		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
780		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
781		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
782		orr	r0, r0, #0x003c		@ write buffer
783		bic	r0, r0, #2		@ A (no unaligned access fault)
784		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
785						@ (needed for ARM1176)
786#ifdef CONFIG_MMU
787 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
788		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
789		orrne	r0, r0, #1		@ MMU enabled
790		movne	r1, #0xfffffffd		@ domain 0 = client
791		bic     r6, r6, #1 << 31        @ 32-bit translation system
792		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
793		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
794		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
795		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
796#endif
797		mcr	p15, 0, r0, c7, c5, 4	@ ISB
798		mcr	p15, 0, r0, c1, c0, 0	@ load control register
799		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
800		mov	r0, #0
801		mcr	p15, 0, r0, c7, c5, 4	@ ISB
802		mov	pc, r12
803
804__fa526_cache_on:
805		mov	r12, lr
806		mov	r6, #CB_BITS | 0x12	@ U
807		bl	__setup_mmu
808		mov	r0, #0
809		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
810		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
811		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
812		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
813		orr	r0, r0, #0x1000		@ I-cache enable
814		bl	__common_mmu_cache_on
815		mov	r0, #0
816		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
817		mov	pc, r12
818
819__common_mmu_cache_on:
820#ifndef CONFIG_THUMB2_KERNEL
821#ifndef DEBUG
822		orr	r0, r0, #0x000d		@ Write buffer, mmu
823#endif
824		mov	r1, #-1
825		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
826		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
827		b	1f
828		.align	5			@ cache line aligned
8291:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
830		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
831		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
832#endif
833
834#define PROC_ENTRY_SIZE (4*5)
835
836/*
837 * Here follow the relocatable cache support functions for the
838 * various processors.  This is a generic hook for locating an
839 * entry and jumping to an instruction at the specified offset
840 * from the start of the block.  Please note this is all position
841 * independent code.
842 *
843 *  r1  = corrupted
844 *  r2  = corrupted
845 *  r3  = block offset
846 *  r9  = corrupted
847 *  r12 = corrupted
848 */
849
850call_cache_fn:	adr	r12, proc_types
851#ifdef CONFIG_CPU_CP15
852		mrc	p15, 0, r9, c0, c0	@ get processor ID
853#elif defined(CONFIG_CPU_V7M)
854		/*
855		 * On v7-M the processor id is located in the V7M_SCB_CPUID
856		 * register, but as cache handling is IMPLEMENTATION DEFINED on
857		 * v7-M (if existant at all) we just return early here.
858		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
859		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
860		 * use cp15 registers that are not implemented on v7-M.
861		 */
862		bx	lr
863#else
864		ldr	r9, =CONFIG_PROCESSOR_ID
865#endif
8661:		ldr	r1, [r12, #0]		@ get value
867		ldr	r2, [r12, #4]		@ get mask
868		eor	r1, r1, r9		@ (real ^ match)
869		tst	r1, r2			@       & mask
870 ARM(		addeq	pc, r12, r3		) @ call cache function
871 THUMB(		addeq	r12, r3			)
872 THUMB(		moveq	pc, r12			) @ call cache function
873		add	r12, r12, #PROC_ENTRY_SIZE
874		b	1b
875
876/*
877 * Table for cache operations.  This is basically:
878 *   - CPU ID match
879 *   - CPU ID mask
880 *   - 'cache on' method instruction
881 *   - 'cache off' method instruction
882 *   - 'cache flush' method instruction
883 *
884 * We match an entry using: ((real_id ^ match) & mask) == 0
885 *
886 * Writethrough caches generally only need 'on' and 'off'
887 * methods.  Writeback caches _must_ have the flush method
888 * defined.
889 */
890		.align	2
891		.type	proc_types,#object
892proc_types:
893		.word	0x41000000		@ old ARM ID
894		.word	0xff00f000
895		mov	pc, lr
896 THUMB(		nop				)
897		mov	pc, lr
898 THUMB(		nop				)
899		mov	pc, lr
900 THUMB(		nop				)
901
902		.word	0x41007000		@ ARM7/710
903		.word	0xfff8fe00
904		mov	pc, lr
905 THUMB(		nop				)
906		mov	pc, lr
907 THUMB(		nop				)
908		mov	pc, lr
909 THUMB(		nop				)
910
911		.word	0x41807200		@ ARM720T (writethrough)
912		.word	0xffffff00
913		W(b)	__armv4_mmu_cache_on
914		W(b)	__armv4_mmu_cache_off
915		mov	pc, lr
916 THUMB(		nop				)
917
918		.word	0x41007400		@ ARM74x
919		.word	0xff00ff00
920		W(b)	__armv3_mpu_cache_on
921		W(b)	__armv3_mpu_cache_off
922		W(b)	__armv3_mpu_cache_flush
923
924		.word	0x41009400		@ ARM94x
925		.word	0xff00ff00
926		W(b)	__armv4_mpu_cache_on
927		W(b)	__armv4_mpu_cache_off
928		W(b)	__armv4_mpu_cache_flush
929
930		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
931		.word	0xff0ffff0
932		W(b)	__arm926ejs_mmu_cache_on
933		W(b)	__armv4_mmu_cache_off
934		W(b)	__armv5tej_mmu_cache_flush
935
936		.word	0x00007000		@ ARM7 IDs
937		.word	0x0000f000
938		mov	pc, lr
939 THUMB(		nop				)
940		mov	pc, lr
941 THUMB(		nop				)
942		mov	pc, lr
943 THUMB(		nop				)
944
945		@ Everything from here on will be the new ID system.
946
947		.word	0x4401a100		@ sa110 / sa1100
948		.word	0xffffffe0
949		W(b)	__armv4_mmu_cache_on
950		W(b)	__armv4_mmu_cache_off
951		W(b)	__armv4_mmu_cache_flush
952
953		.word	0x6901b110		@ sa1110
954		.word	0xfffffff0
955		W(b)	__armv4_mmu_cache_on
956		W(b)	__armv4_mmu_cache_off
957		W(b)	__armv4_mmu_cache_flush
958
959		.word	0x56056900
960		.word	0xffffff00		@ PXA9xx
961		W(b)	__armv4_mmu_cache_on
962		W(b)	__armv4_mmu_cache_off
963		W(b)	__armv4_mmu_cache_flush
964
965		.word	0x56158000		@ PXA168
966		.word	0xfffff000
967		W(b)	__armv4_mmu_cache_on
968		W(b)	__armv4_mmu_cache_off
969		W(b)	__armv5tej_mmu_cache_flush
970
971		.word	0x56050000		@ Feroceon
972		.word	0xff0f0000
973		W(b)	__armv4_mmu_cache_on
974		W(b)	__armv4_mmu_cache_off
975		W(b)	__armv5tej_mmu_cache_flush
976
977#ifdef CONFIG_CPU_FEROCEON_OLD_ID
978		/* this conflicts with the standard ARMv5TE entry */
979		.long	0x41009260		@ Old Feroceon
980		.long	0xff00fff0
981		b	__armv4_mmu_cache_on
982		b	__armv4_mmu_cache_off
983		b	__armv5tej_mmu_cache_flush
984#endif
985
986		.word	0x66015261		@ FA526
987		.word	0xff01fff1
988		W(b)	__fa526_cache_on
989		W(b)	__armv4_mmu_cache_off
990		W(b)	__fa526_cache_flush
991
992		@ These match on the architecture ID
993
994		.word	0x00020000		@ ARMv4T
995		.word	0x000f0000
996		W(b)	__armv4_mmu_cache_on
997		W(b)	__armv4_mmu_cache_off
998		W(b)	__armv4_mmu_cache_flush
999
1000		.word	0x00050000		@ ARMv5TE
1001		.word	0x000f0000
1002		W(b)	__armv4_mmu_cache_on
1003		W(b)	__armv4_mmu_cache_off
1004		W(b)	__armv4_mmu_cache_flush
1005
1006		.word	0x00060000		@ ARMv5TEJ
1007		.word	0x000f0000
1008		W(b)	__armv4_mmu_cache_on
1009		W(b)	__armv4_mmu_cache_off
1010		W(b)	__armv5tej_mmu_cache_flush
1011
1012		.word	0x0007b000		@ ARMv6
1013		.word	0x000ff000
1014		W(b)	__armv6_mmu_cache_on
1015		W(b)	__armv4_mmu_cache_off
1016		W(b)	__armv6_mmu_cache_flush
1017
1018		.word	0x000f0000		@ new CPU Id
1019		.word	0x000f0000
1020		W(b)	__armv7_mmu_cache_on
1021		W(b)	__armv7_mmu_cache_off
1022		W(b)	__armv7_mmu_cache_flush
1023
1024		.word	0			@ unrecognised type
1025		.word	0
1026		mov	pc, lr
1027 THUMB(		nop				)
1028		mov	pc, lr
1029 THUMB(		nop				)
1030		mov	pc, lr
1031 THUMB(		nop				)
1032
1033		.size	proc_types, . - proc_types
1034
1035		/*
1036		 * If you get a "non-constant expression in ".if" statement"
1037		 * error from the assembler on this line, check that you have
1038		 * not accidentally written a "b" instruction where you should
1039		 * have written W(b).
1040		 */
1041		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1042		.error "The size of one or more proc_types entries is wrong."
1043		.endif
1044
1045/*
1046 * Turn off the Cache and MMU.  ARMv3 does not support
1047 * reading the control register, but ARMv4 does.
1048 *
1049 * On exit,
1050 *  r0, r1, r2, r3, r9, r12 corrupted
1051 * This routine must preserve:
1052 *  r4, r7, r8
1053 */
1054		.align	5
1055cache_off:	mov	r3, #12			@ cache_off function
1056		b	call_cache_fn
1057
1058__armv4_mpu_cache_off:
1059		mrc	p15, 0, r0, c1, c0
1060		bic	r0, r0, #0x000d
1061		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1062		mov	r0, #0
1063		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1064		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1065		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1066		mov	pc, lr
1067
1068__armv3_mpu_cache_off:
1069		mrc	p15, 0, r0, c1, c0
1070		bic	r0, r0, #0x000d
1071		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1072		mov	r0, #0
1073		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1074		mov	pc, lr
1075
1076__armv4_mmu_cache_off:
1077#ifdef CONFIG_MMU
1078		mrc	p15, 0, r0, c1, c0
1079		bic	r0, r0, #0x000d
1080		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1081		mov	r0, #0
1082		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1083		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1084#endif
1085		mov	pc, lr
1086
1087__armv7_mmu_cache_off:
1088		mrc	p15, 0, r0, c1, c0
1089#ifdef CONFIG_MMU
1090		bic	r0, r0, #0x000d
1091#else
1092		bic	r0, r0, #0x000c
1093#endif
1094		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1095		mov	r12, lr
1096		bl	__armv7_mmu_cache_flush
1097		mov	r0, #0
1098#ifdef CONFIG_MMU
1099		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1100#endif
1101		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1102		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1103		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1104		mov	pc, r12
1105
1106/*
1107 * Clean and flush the cache to maintain consistency.
1108 *
1109 * On exit,
1110 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1111 * This routine must preserve:
1112 *  r4, r6, r7, r8
1113 */
1114		.align	5
1115cache_clean_flush:
1116		mov	r3, #16
1117		b	call_cache_fn
1118
1119__armv4_mpu_cache_flush:
1120		tst	r4, #1
1121		movne	pc, lr
1122		mov	r2, #1
1123		mov	r3, #0
1124		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1125		mov	r1, #7 << 5		@ 8 segments
11261:		orr	r3, r1, #63 << 26	@ 64 entries
11272:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1128		subs	r3, r3, #1 << 26
1129		bcs	2b			@ entries 63 to 0
1130		subs 	r1, r1, #1 << 5
1131		bcs	1b			@ segments 7 to 0
1132
1133		teq	r2, #0
1134		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1135		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1136		mov	pc, lr
1137
1138__fa526_cache_flush:
1139		tst	r4, #1
1140		movne	pc, lr
1141		mov	r1, #0
1142		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1143		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1144		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1145		mov	pc, lr
1146
1147__armv6_mmu_cache_flush:
1148		mov	r1, #0
1149		tst	r4, #1
1150		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1151		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1152		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1153		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1154		mov	pc, lr
1155
1156__armv7_mmu_cache_flush:
1157		tst	r4, #1
1158		bne	iflush
1159		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1160		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1161		mov	r10, #0
1162		beq	hierarchical
1163		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1164		b	iflush
1165hierarchical:
1166		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1167		stmfd	sp!, {r0-r7, r9-r11}
1168		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1169		ands	r3, r0, #0x7000000	@ extract loc from clidr
1170		mov	r3, r3, lsr #23		@ left align loc bit field
1171		beq	finished		@ if loc is 0, then no need to clean
1172		mov	r10, #0			@ start clean at cache level 0
1173loop1:
1174		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1175		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1176		and	r1, r1, #7		@ mask of the bits for current cache only
1177		cmp	r1, #2			@ see what cache we have at this level
1178		blt	skip			@ skip if no cache, or just i-cache
1179		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1180		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1181		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1182		and	r2, r1, #7		@ extract the length of the cache lines
1183		add	r2, r2, #4		@ add 4 (line length offset)
1184		ldr	r4, =0x3ff
1185		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1186		clz	r5, r4			@ find bit position of way size increment
1187		ldr	r7, =0x7fff
1188		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1189loop2:
1190		mov	r9, r4			@ create working copy of max way size
1191loop3:
1192 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1193 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1194 THUMB(		lsl	r6, r9, r5		)
1195 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1196 THUMB(		lsl	r6, r7, r2		)
1197 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1198		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1199		subs	r9, r9, #1		@ decrement the way
1200		bge	loop3
1201		subs	r7, r7, #1		@ decrement the index
1202		bge	loop2
1203skip:
1204		add	r10, r10, #2		@ increment cache number
1205		cmp	r3, r10
1206		bgt	loop1
1207finished:
1208		ldmfd	sp!, {r0-r7, r9-r11}
1209		mov	r10, #0			@ switch back to cache level 0
1210		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1211iflush:
1212		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1213		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1214		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1215		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1216		mov	pc, lr
1217
1218__armv5tej_mmu_cache_flush:
1219		tst	r4, #1
1220		movne	pc, lr
12211:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
1222		bne	1b
1223		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1224		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1225		mov	pc, lr
1226
1227__armv4_mmu_cache_flush:
1228		tst	r4, #1
1229		movne	pc, lr
1230		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1231		mov	r11, #32		@ default: 32 byte line size
1232		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1233		teq	r3, r9			@ cache ID register present?
1234		beq	no_cache_id
1235		mov	r1, r3, lsr #18
1236		and	r1, r1, #7
1237		mov	r2, #1024
1238		mov	r2, r2, lsl r1		@ base dcache size *2
1239		tst	r3, #1 << 14		@ test M bit
1240		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1241		mov	r3, r3, lsr #12
1242		and	r3, r3, #3
1243		mov	r11, #8
1244		mov	r11, r11, lsl r3	@ cache line size in bytes
1245no_cache_id:
1246		mov	r1, pc
1247		bic	r1, r1, #63		@ align to longest cache line
1248		add	r2, r1, r2
12491:
1250 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1251 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1252 THUMB(		add     r1, r1, r11		)
1253		teq	r1, r2
1254		bne	1b
1255
1256		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1257		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1258		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1259		mov	pc, lr
1260
1261__armv3_mmu_cache_flush:
1262__armv3_mpu_cache_flush:
1263		tst	r4, #1
1264		movne	pc, lr
1265		mov	r1, #0
1266		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1267		mov	pc, lr
1268
1269/*
1270 * Various debugging routines for printing hex characters and
1271 * memory, which again must be relocatable.
1272 */
1273#ifdef DEBUG
1274		.align	2
1275		.type	phexbuf,#object
1276phexbuf:	.space	12
1277		.size	phexbuf, . - phexbuf
1278
1279@ phex corrupts {r0, r1, r2, r3}
1280phex:		adr	r3, phexbuf
1281		mov	r2, #0
1282		strb	r2, [r3, r1]
12831:		subs	r1, r1, #1
1284		movmi	r0, r3
1285		bmi	puts
1286		and	r2, r0, #15
1287		mov	r0, r0, lsr #4
1288		cmp	r2, #10
1289		addge	r2, r2, #7
1290		add	r2, r2, #'0'
1291		strb	r2, [r3, r1]
1292		b	1b
1293
1294@ puts corrupts {r0, r1, r2, r3}
1295puts:		loadsp	r3, r1
12961:		ldrb	r2, [r0], #1
1297		teq	r2, #0
1298		moveq	pc, lr
12992:		writeb	r2, r3
1300		mov	r1, #0x00020000
13013:		subs	r1, r1, #1
1302		bne	3b
1303		teq	r2, #'\n'
1304		moveq	r2, #'\r'
1305		beq	2b
1306		teq	r0, #0
1307		bne	1b
1308		mov	pc, lr
1309@ putc corrupts {r0, r1, r2, r3}
1310putc:
1311		mov	r2, r0
1312		mov	r0, #0
1313		loadsp	r3, r1
1314		b	2b
1315
1316@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1317memdump:	mov	r12, r0
1318		mov	r10, lr
1319		mov	r11, #0
13202:		mov	r0, r11, lsl #2
1321		add	r0, r0, r12
1322		mov	r1, #8
1323		bl	phex
1324		mov	r0, #':'
1325		bl	putc
13261:		mov	r0, #' '
1327		bl	putc
1328		ldr	r0, [r12, r11, lsl #2]
1329		mov	r1, #8
1330		bl	phex
1331		and	r0, r11, #7
1332		teq	r0, #3
1333		moveq	r0, #' '
1334		bleq	putc
1335		and	r0, r11, #7
1336		add	r11, r11, #1
1337		teq	r0, #7
1338		bne	1b
1339		mov	r0, #'\n'
1340		bl	putc
1341		cmp	r11, #64
1342		blt	2b
1343		mov	pc, r10
1344#endif
1345
1346		.ltorg
1347
1348#ifdef CONFIG_ARM_VIRT_EXT
1349.align 5
1350__hyp_reentry_vectors:
1351		W(b)	.			@ reset
1352		W(b)	.			@ undef
1353		W(b)	.			@ svc
1354		W(b)	.			@ pabort
1355		W(b)	.			@ dabort
1356		W(b)	__enter_kernel		@ hyp
1357		W(b)	.			@ irq
1358		W(b)	.			@ fiq
1359#endif /* CONFIG_ARM_VIRT_EXT */
1360
1361__enter_kernel:
1362		mov	r0, #0			@ must be 0
1363 ARM(		mov	pc, r4		)	@ call kernel
1364 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1365 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1366
1367reloc_code_end:
1368
1369#ifdef CONFIG_EFI_STUB
1370		.align	2
1371_start:		.long	start - .
1372
1373ENTRY(efi_stub_entry)
1374		@ allocate space on stack for passing current zImage address
1375		@ and for the EFI stub to return of new entry point of
1376		@ zImage, as EFI stub may copy the kernel. Pointer address
1377		@ is passed in r2. r0 and r1 are passed through from the
1378		@ EFI firmware to efi_entry
1379		adr	ip, _start
1380		ldr	r3, [ip]
1381		add	r3, r3, ip
1382		stmfd	sp!, {r3, lr}
1383		mov	r2, sp			@ pass zImage address in r2
1384		bl	efi_entry
1385
1386		@ Check for error return from EFI stub. r0 has FDT address
1387		@ or error code.
1388		cmn	r0, #1
1389		beq	efi_load_fail
1390
1391		@ Preserve return value of efi_entry() in r4
1392		mov	r4, r0
1393		bl	cache_clean_flush
1394		bl	cache_off
1395
1396		@ Set parameters for booting zImage according to boot protocol
1397		@ put FDT address in r2, it was returned by efi_entry()
1398		@ r1 is the machine type, and r0 needs to be 0
1399		mov	r0, #0
1400		mov	r1, #0xFFFFFFFF
1401		mov	r2, r4
1402
1403		@ Branch to (possibly) relocated zImage that is in [sp]
1404		ldr	lr, [sp]
1405		ldr	ip, =start_offset
1406		add	lr, lr, ip
1407		mov	pc, lr				@ no mode switch
1408
1409efi_load_fail:
1410		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1411		ldr	r0, =0x80000001
1412		ldmfd	sp!, {ip, pc}
1413ENDPROC(efi_stub_entry)
1414#endif
1415
1416		.align
1417		.section ".stack", "aw", %nobits
1418.L_user_stack:	.space	4096
1419.L_user_stack_end:
1420