xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 98ddec80)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/v7m.h>
14
15#include "efi-header.S"
16
17 AR_CLASS(	.arch	armv7-a	)
18 M_CLASS(	.arch	armv7-m	)
19
20/*
21 * Debugging stuff
22 *
23 * Note that these macros must not contain any code which is not
24 * 100% relocatable.  Any attempt to do so will result in a crash.
25 * Please select one of the following when turning on debugging.
26 */
27#ifdef DEBUG
28
29#if defined(CONFIG_DEBUG_ICEDCC)
30
31#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
32		.macro	loadsp, rb, tmp1, tmp2
33		.endm
34		.macro	writeb, ch, rb
35		mcr	p14, 0, \ch, c0, c5, 0
36		.endm
37#elif defined(CONFIG_CPU_XSCALE)
38		.macro	loadsp, rb, tmp1, tmp2
39		.endm
40		.macro	writeb, ch, rb
41		mcr	p14, 0, \ch, c8, c0, 0
42		.endm
43#else
44		.macro	loadsp, rb, tmp1, tmp2
45		.endm
46		.macro	writeb, ch, rb
47		mcr	p14, 0, \ch, c1, c0, 0
48		.endm
49#endif
50
51#else
52
53#include CONFIG_DEBUG_LL_INCLUDE
54
55		.macro	writeb,	ch, rb
56		senduart \ch, \rb
57		.endm
58
59#if defined(CONFIG_ARCH_SA1100)
60		.macro	loadsp, rb, tmp1, tmp2
61		mov	\rb, #0x80000000	@ physical base address
62#ifdef CONFIG_DEBUG_LL_SER3
63		add	\rb, \rb, #0x00050000	@ Ser3
64#else
65		add	\rb, \rb, #0x00010000	@ Ser1
66#endif
67		.endm
68#else
69		.macro	loadsp,	rb, tmp1, tmp2
70		addruart \rb, \tmp1, \tmp2
71		.endm
72#endif
73#endif
74#endif
75
76		.macro	kputc,val
77		mov	r0, \val
78		bl	putc
79		.endm
80
81		.macro	kphex,val,len
82		mov	r0, \val
83		mov	r1, #\len
84		bl	phex
85		.endm
86
87		.macro	debug_reloc_start
88#ifdef DEBUG
89		kputc	#'\n'
90		kphex	r6, 8		/* processor id */
91		kputc	#':'
92		kphex	r7, 8		/* architecture id */
93#ifdef CONFIG_CPU_CP15
94		kputc	#':'
95		mrc	p15, 0, r0, c1, c0
96		kphex	r0, 8		/* control reg */
97#endif
98		kputc	#'\n'
99		kphex	r5, 8		/* decompressed kernel start */
100		kputc	#'-'
101		kphex	r9, 8		/* decompressed kernel end  */
102		kputc	#'>'
103		kphex	r4, 8		/* kernel execution address */
104		kputc	#'\n'
105#endif
106		.endm
107
108		.macro	debug_reloc_end
109#ifdef DEBUG
110		kphex	r5, 8		/* end of kernel */
111		kputc	#'\n'
112		mov	r0, r4
113		bl	memdump		/* dump 256 bytes at start of kernel */
114#endif
115		.endm
116
117		.section ".start", #alloc, #execinstr
118/*
119 * sort out different calling conventions
120 */
121		.align
122		/*
123		 * Always enter in ARM state for CPUs that support the ARM ISA.
124		 * As of today (2014) that's exactly the members of the A and R
125		 * classes.
126		 */
127 AR_CLASS(	.arm	)
128start:
129		.type	start,#function
130		.rept	7
131		__nop
132		.endr
133#ifndef CONFIG_THUMB2_KERNEL
134		mov	r0, r0
135#else
136 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
137  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
138		.thumb
139#endif
140		W(b)	1f
141
142		.word	_magic_sig	@ Magic numbers to help the loader
143		.word	_magic_start	@ absolute load/run zImage address
144		.word	_magic_end	@ zImage end address
145		.word	0x04030201	@ endianness flag
146		.word	0x45454545	@ another magic number to indicate
147		.word	_magic_table	@ additional data table
148
149		__EFI_HEADER
1501:
151 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
152 AR_CLASS(	mrs	r9, cpsr	)
153#ifdef CONFIG_ARM_VIRT_EXT
154		bl	__hyp_stub_install	@ get into SVC mode, reversibly
155#endif
156		mov	r7, r1			@ save architecture ID
157		mov	r8, r2			@ save atags pointer
158
159#ifndef CONFIG_CPU_V7M
160		/*
161		 * Booting from Angel - need to enter SVC mode and disable
162		 * FIQs/IRQs (numeric definitions from angel arm.h source).
163		 * We only do this if we were in user mode on entry.
164		 */
165		mrs	r2, cpsr		@ get current mode
166		tst	r2, #3			@ not user?
167		bne	not_angel
168		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
169 ARM(		swi	0x123456	)	@ angel_SWI_ARM
170 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
171not_angel:
172		safe_svcmode_maskall r0
173		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
174						@ SPSR
175#endif
176		/*
177		 * Note that some cache flushing and other stuff may
178		 * be needed here - is there an Angel SWI call for this?
179		 */
180
181		/*
182		 * some architecture specific code can be inserted
183		 * by the linker here, but it should preserve r7, r8, and r9.
184		 */
185
186		.text
187
188#ifdef CONFIG_AUTO_ZRELADDR
189		/*
190		 * Find the start of physical memory.  As we are executing
191		 * without the MMU on, we are in the physical address space.
192		 * We just need to get rid of any offset by aligning the
193		 * address.
194		 *
195		 * This alignment is a balance between the requirements of
196		 * different platforms - we have chosen 128MB to allow
197		 * platforms which align the start of their physical memory
198		 * to 128MB to use this feature, while allowing the zImage
199		 * to be placed within the first 128MB of memory on other
200		 * platforms.  Increasing the alignment means we place
201		 * stricter alignment requirements on the start of physical
202		 * memory, but relaxing it means that we break people who
203		 * are already placing their zImage in (eg) the top 64MB
204		 * of this range.
205		 */
206		mov	r4, pc
207		and	r4, r4, #0xf8000000
208		/* Determine final kernel image address. */
209		add	r4, r4, #TEXT_OFFSET
210#else
211		ldr	r4, =zreladdr
212#endif
213
214		/*
215		 * Set up a page table only if it won't overwrite ourself.
216		 * That means r4 < pc || r4 - 16k page directory > &_end.
217		 * Given that r4 > &_end is most unfrequent, we add a rough
218		 * additional 1MB of room for a possible appended DTB.
219		 */
220		mov	r0, pc
221		cmp	r0, r4
222		ldrcc	r0, LC0+32
223		addcc	r0, r0, pc
224		cmpcc	r4, r0
225		orrcc	r4, r4, #1		@ remember we skipped cache_on
226		blcs	cache_on
227
228restart:	adr	r0, LC0
229		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
230		ldr	sp, [r0, #28]
231
232		/*
233		 * We might be running at a different address.  We need
234		 * to fix up various pointers.
235		 */
236		sub	r0, r0, r1		@ calculate the delta offset
237		add	r6, r6, r0		@ _edata
238		add	r10, r10, r0		@ inflated kernel size location
239
240		/*
241		 * The kernel build system appends the size of the
242		 * decompressed kernel at the end of the compressed data
243		 * in little-endian form.
244		 */
245		ldrb	r9, [r10, #0]
246		ldrb	lr, [r10, #1]
247		orr	r9, r9, lr, lsl #8
248		ldrb	lr, [r10, #2]
249		ldrb	r10, [r10, #3]
250		orr	r9, r9, lr, lsl #16
251		orr	r9, r9, r10, lsl #24
252
253#ifndef CONFIG_ZBOOT_ROM
254		/* malloc space is above the relocated stack (64k max) */
255		add	sp, sp, r0
256		add	r10, sp, #0x10000
257#else
258		/*
259		 * With ZBOOT_ROM the bss/stack is non relocatable,
260		 * but someone could still run this code from RAM,
261		 * in which case our reference is _edata.
262		 */
263		mov	r10, r6
264#endif
265
266		mov	r5, #0			@ init dtb size to 0
267#ifdef CONFIG_ARM_APPENDED_DTB
268/*
269 *   r0  = delta
270 *   r2  = BSS start
271 *   r3  = BSS end
272 *   r4  = final kernel address (possibly with LSB set)
273 *   r5  = appended dtb size (still unknown)
274 *   r6  = _edata
275 *   r7  = architecture ID
276 *   r8  = atags/device tree pointer
277 *   r9  = size of decompressed image
278 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
279 *   r11 = GOT start
280 *   r12 = GOT end
281 *   sp  = stack pointer
282 *
283 * if there are device trees (dtb) appended to zImage, advance r10 so that the
284 * dtb data will get relocated along with the kernel if necessary.
285 */
286
287		ldr	lr, [r6, #0]
288#ifndef __ARMEB__
289		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
290#else
291		ldr	r1, =0xd00dfeed
292#endif
293		cmp	lr, r1
294		bne	dtb_check_done		@ not found
295
296#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
297		/*
298		 * OK... Let's do some funky business here.
299		 * If we do have a DTB appended to zImage, and we do have
300		 * an ATAG list around, we want the later to be translated
301		 * and folded into the former here. No GOT fixup has occurred
302		 * yet, but none of the code we're about to call uses any
303		 * global variable.
304		*/
305
306		/* Get the initial DTB size */
307		ldr	r5, [r6, #4]
308#ifndef __ARMEB__
309		/* convert to little endian */
310		eor	r1, r5, r5, ror #16
311		bic	r1, r1, #0x00ff0000
312		mov	r5, r5, ror #8
313		eor	r5, r5, r1, lsr #8
314#endif
315		/* 50% DTB growth should be good enough */
316		add	r5, r5, r5, lsr #1
317		/* preserve 64-bit alignment */
318		add	r5, r5, #7
319		bic	r5, r5, #7
320		/* clamp to 32KB min and 1MB max */
321		cmp	r5, #(1 << 15)
322		movlo	r5, #(1 << 15)
323		cmp	r5, #(1 << 20)
324		movhi	r5, #(1 << 20)
325		/* temporarily relocate the stack past the DTB work space */
326		add	sp, sp, r5
327
328		stmfd	sp!, {r0-r3, ip, lr}
329		mov	r0, r8
330		mov	r1, r6
331		mov	r2, r5
332		bl	atags_to_fdt
333
334		/*
335		 * If returned value is 1, there is no ATAG at the location
336		 * pointed by r8.  Try the typical 0x100 offset from start
337		 * of RAM and hope for the best.
338		 */
339		cmp	r0, #1
340		sub	r0, r4, #TEXT_OFFSET
341		bic	r0, r0, #1
342		add	r0, r0, #0x100
343		mov	r1, r6
344		mov	r2, r5
345		bleq	atags_to_fdt
346
347		ldmfd	sp!, {r0-r3, ip, lr}
348		sub	sp, sp, r5
349#endif
350
351		mov	r8, r6			@ use the appended device tree
352
353		/*
354		 * Make sure that the DTB doesn't end up in the final
355		 * kernel's .bss area. To do so, we adjust the decompressed
356		 * kernel size to compensate if that .bss size is larger
357		 * than the relocated code.
358		 */
359		ldr	r5, =_kernel_bss_size
360		adr	r1, wont_overwrite
361		sub	r1, r6, r1
362		subs	r1, r5, r1
363		addhi	r9, r9, r1
364
365		/* Get the current DTB size */
366		ldr	r5, [r6, #4]
367#ifndef __ARMEB__
368		/* convert r5 (dtb size) to little endian */
369		eor	r1, r5, r5, ror #16
370		bic	r1, r1, #0x00ff0000
371		mov	r5, r5, ror #8
372		eor	r5, r5, r1, lsr #8
373#endif
374
375		/* preserve 64-bit alignment */
376		add	r5, r5, #7
377		bic	r5, r5, #7
378
379		/* relocate some pointers past the appended dtb */
380		add	r6, r6, r5
381		add	r10, r10, r5
382		add	sp, sp, r5
383dtb_check_done:
384#endif
385
386/*
387 * Check to see if we will overwrite ourselves.
388 *   r4  = final kernel address (possibly with LSB set)
389 *   r9  = size of decompressed image
390 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
391 * We basically want:
392 *   r4 - 16k page directory >= r10 -> OK
393 *   r4 + image length <= address of wont_overwrite -> OK
394 * Note: the possible LSB in r4 is harmless here.
395 */
396		add	r10, r10, #16384
397		cmp	r4, r10
398		bhs	wont_overwrite
399		add	r10, r4, r9
400		adr	r9, wont_overwrite
401		cmp	r10, r9
402		bls	wont_overwrite
403
404/*
405 * Relocate ourselves past the end of the decompressed kernel.
406 *   r6  = _edata
407 *   r10 = end of the decompressed kernel
408 * Because we always copy ahead, we need to do it from the end and go
409 * backward in case the source and destination overlap.
410 */
411		/*
412		 * Bump to the next 256-byte boundary with the size of
413		 * the relocation code added. This avoids overwriting
414		 * ourself when the offset is small.
415		 */
416		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
417		bic	r10, r10, #255
418
419		/* Get start of code we want to copy and align it down. */
420		adr	r5, restart
421		bic	r5, r5, #31
422
423/* Relocate the hyp vector base if necessary */
424#ifdef CONFIG_ARM_VIRT_EXT
425		mrs	r0, spsr
426		and	r0, r0, #MODE_MASK
427		cmp	r0, #HYP_MODE
428		bne	1f
429
430		/*
431		 * Compute the address of the hyp vectors after relocation.
432		 * This requires some arithmetic since we cannot directly
433		 * reference __hyp_stub_vectors in a PC-relative way.
434		 * Call __hyp_set_vectors with the new address so that we
435		 * can HVC again after the copy.
436		 */
4370:		adr	r0, 0b
438		movw	r1, #:lower16:__hyp_stub_vectors - 0b
439		movt	r1, #:upper16:__hyp_stub_vectors - 0b
440		add	r0, r0, r1
441		sub	r0, r0, r5
442		add	r0, r0, r10
443		bl	__hyp_set_vectors
4441:
445#endif
446
447		sub	r9, r6, r5		@ size to copy
448		add	r9, r9, #31		@ rounded up to a multiple
449		bic	r9, r9, #31		@ ... of 32 bytes
450		add	r6, r9, r5
451		add	r9, r9, r10
452
4531:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
454		cmp	r6, r5
455		stmdb	r9!, {r0 - r3, r10 - r12, lr}
456		bhi	1b
457
458		/* Preserve offset to relocated code. */
459		sub	r6, r9, r6
460
461#ifndef CONFIG_ZBOOT_ROM
462		/* cache_clean_flush may use the stack, so relocate it */
463		add	sp, sp, r6
464#endif
465
466		bl	cache_clean_flush
467
468		badr	r0, restart
469		add	r0, r0, r6
470		mov	pc, r0
471
472wont_overwrite:
473/*
474 * If delta is zero, we are running at the address we were linked at.
475 *   r0  = delta
476 *   r2  = BSS start
477 *   r3  = BSS end
478 *   r4  = kernel execution address (possibly with LSB set)
479 *   r5  = appended dtb size (0 if not present)
480 *   r7  = architecture ID
481 *   r8  = atags pointer
482 *   r11 = GOT start
483 *   r12 = GOT end
484 *   sp  = stack pointer
485 */
486		orrs	r1, r0, r5
487		beq	not_relocated
488
489		add	r11, r11, r0
490		add	r12, r12, r0
491
492#ifndef CONFIG_ZBOOT_ROM
493		/*
494		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
495		 * we need to fix up pointers into the BSS region.
496		 * Note that the stack pointer has already been fixed up.
497		 */
498		add	r2, r2, r0
499		add	r3, r3, r0
500
501		/*
502		 * Relocate all entries in the GOT table.
503		 * Bump bss entries to _edata + dtb size
504		 */
5051:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
506		add	r1, r1, r0		@ This fixes up C references
507		cmp	r1, r2			@ if entry >= bss_start &&
508		cmphs	r3, r1			@       bss_end > entry
509		addhi	r1, r1, r5		@    entry += dtb size
510		str	r1, [r11], #4		@ next entry
511		cmp	r11, r12
512		blo	1b
513
514		/* bump our bss pointers too */
515		add	r2, r2, r5
516		add	r3, r3, r5
517
518#else
519
520		/*
521		 * Relocate entries in the GOT table.  We only relocate
522		 * the entries that are outside the (relocated) BSS region.
523		 */
5241:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
525		cmp	r1, r2			@ entry < bss_start ||
526		cmphs	r3, r1			@ _end < entry
527		addlo	r1, r1, r0		@ table.  This fixes up the
528		str	r1, [r11], #4		@ C references.
529		cmp	r11, r12
530		blo	1b
531#endif
532
533not_relocated:	mov	r0, #0
5341:		str	r0, [r2], #4		@ clear bss
535		str	r0, [r2], #4
536		str	r0, [r2], #4
537		str	r0, [r2], #4
538		cmp	r2, r3
539		blo	1b
540
541		/*
542		 * Did we skip the cache setup earlier?
543		 * That is indicated by the LSB in r4.
544		 * Do it now if so.
545		 */
546		tst	r4, #1
547		bic	r4, r4, #1
548		blne	cache_on
549
550/*
551 * The C runtime environment should now be setup sufficiently.
552 * Set up some pointers, and start decompressing.
553 *   r4  = kernel execution address
554 *   r7  = architecture ID
555 *   r8  = atags pointer
556 */
557		mov	r0, r4
558		mov	r1, sp			@ malloc space above stack
559		add	r2, sp, #0x10000	@ 64k max
560		mov	r3, r7
561		bl	decompress_kernel
562		bl	cache_clean_flush
563		bl	cache_off
564
565#ifdef CONFIG_ARM_VIRT_EXT
566		mrs	r0, spsr		@ Get saved CPU boot mode
567		and	r0, r0, #MODE_MASK
568		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
569		bne	__enter_kernel		@ boot kernel directly
570
571		adr	r12, .L__hyp_reentry_vectors_offset
572		ldr	r0, [r12]
573		add	r0, r0, r12
574
575		bl	__hyp_set_vectors
576		__HVC(0)			@ otherwise bounce to hyp mode
577
578		b	.			@ should never be reached
579
580		.align	2
581.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
582#else
583		b	__enter_kernel
584#endif
585
586		.align	2
587		.type	LC0, #object
588LC0:		.word	LC0			@ r1
589		.word	__bss_start		@ r2
590		.word	_end			@ r3
591		.word	_edata			@ r6
592		.word	input_data_end - 4	@ r10 (inflated size location)
593		.word	_got_start		@ r11
594		.word	_got_end		@ ip
595		.word	.L_user_stack_end	@ sp
596		.word	_end - restart + 16384 + 1024*1024
597		.size	LC0, . - LC0
598
599#ifdef CONFIG_ARCH_RPC
600		.globl	params
601params:		ldr	r0, =0x10000100		@ params_phys for RPC
602		mov	pc, lr
603		.ltorg
604		.align
605#endif
606
607/*
608 * Turn on the cache.  We need to setup some page tables so that we
609 * can have both the I and D caches on.
610 *
611 * We place the page tables 16k down from the kernel execution address,
612 * and we hope that nothing else is using it.  If we're using it, we
613 * will go pop!
614 *
615 * On entry,
616 *  r4 = kernel execution address
617 *  r7 = architecture number
618 *  r8 = atags pointer
619 * On exit,
620 *  r0, r1, r2, r3, r9, r10, r12 corrupted
621 * This routine must preserve:
622 *  r4, r7, r8
623 */
624		.align	5
625cache_on:	mov	r3, #8			@ cache_on function
626		b	call_cache_fn
627
628/*
629 * Initialize the highest priority protection region, PR7
630 * to cover all 32bit address and cacheable and bufferable.
631 */
632__armv4_mpu_cache_on:
633		mov	r0, #0x3f		@ 4G, the whole
634		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
635		mcr 	p15, 0, r0, c6, c7, 1
636
637		mov	r0, #0x80		@ PR7
638		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
639		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
640		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
641
642		mov	r0, #0xc000
643		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
644		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
645
646		mov	r0, #0
647		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
648		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
649		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
650		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
651						@ ...I .... ..D. WC.M
652		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
653		orr	r0, r0, #0x1000		@ ...1 .... .... ....
654
655		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
656
657		mov	r0, #0
658		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
659		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
660		mov	pc, lr
661
662__armv3_mpu_cache_on:
663		mov	r0, #0x3f		@ 4G, the whole
664		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
665
666		mov	r0, #0x80		@ PR7
667		mcr	p15, 0, r0, c2, c0, 0	@ cache on
668		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
669
670		mov	r0, #0xc000
671		mcr	p15, 0, r0, c5, c0, 0	@ access permission
672
673		mov	r0, #0
674		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
675		/*
676		 * ?? ARMv3 MMU does not allow reading the control register,
677		 * does this really work on ARMv3 MPU?
678		 */
679		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
680						@ .... .... .... WC.M
681		orr	r0, r0, #0x000d		@ .... .... .... 11.1
682		/* ?? this overwrites the value constructed above? */
683		mov	r0, #0
684		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
685
686		/* ?? invalidate for the second time? */
687		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
688		mov	pc, lr
689
690#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
691#define CB_BITS 0x08
692#else
693#define CB_BITS 0x0c
694#endif
695
696__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
697		bic	r3, r3, #0xff		@ Align the pointer
698		bic	r3, r3, #0x3f00
699/*
700 * Initialise the page tables, turning on the cacheable and bufferable
701 * bits for the RAM area only.
702 */
703		mov	r0, r3
704		mov	r9, r0, lsr #18
705		mov	r9, r9, lsl #18		@ start of RAM
706		add	r10, r9, #0x10000000	@ a reasonable RAM size
707		mov	r1, #0x12		@ XN|U + section mapping
708		orr	r1, r1, #3 << 10	@ AP=11
709		add	r2, r3, #16384
7101:		cmp	r1, r9			@ if virt > start of RAM
711		cmphs	r10, r1			@   && end of RAM > virt
712		bic	r1, r1, #0x1c		@ clear XN|U + C + B
713		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
714		orrhs	r1, r1, r6		@ set RAM section settings
715		str	r1, [r0], #4		@ 1:1 mapping
716		add	r1, r1, #1048576
717		teq	r0, r2
718		bne	1b
719/*
720 * If ever we are running from Flash, then we surely want the cache
721 * to be enabled also for our execution instance...  We map 2MB of it
722 * so there is no map overlap problem for up to 1 MB compressed kernel.
723 * If the execution is in RAM then we would only be duplicating the above.
724 */
725		orr	r1, r6, #0x04		@ ensure B is set for this
726		orr	r1, r1, #3 << 10
727		mov	r2, pc
728		mov	r2, r2, lsr #20
729		orr	r1, r1, r2, lsl #20
730		add	r0, r3, r2, lsl #2
731		str	r1, [r0], #4
732		add	r1, r1, #1048576
733		str	r1, [r0]
734		mov	pc, lr
735ENDPROC(__setup_mmu)
736
737@ Enable unaligned access on v6, to allow better code generation
738@ for the decompressor C code:
739__armv6_mmu_cache_on:
740		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
741		bic	r0, r0, #2		@ A (no unaligned access fault)
742		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
743		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
744		b	__armv4_mmu_cache_on
745
746__arm926ejs_mmu_cache_on:
747#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
748		mov	r0, #4			@ put dcache in WT mode
749		mcr	p15, 7, r0, c15, c0, 0
750#endif
751
752__armv4_mmu_cache_on:
753		mov	r12, lr
754#ifdef CONFIG_MMU
755		mov	r6, #CB_BITS | 0x12	@ U
756		bl	__setup_mmu
757		mov	r0, #0
758		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
759		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
760		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
761		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
762		orr	r0, r0, #0x0030
763 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
764		bl	__common_mmu_cache_on
765		mov	r0, #0
766		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
767#endif
768		mov	pc, r12
769
770__armv7_mmu_cache_on:
771		mov	r12, lr
772#ifdef CONFIG_MMU
773		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
774		tst	r11, #0xf		@ VMSA
775		movne	r6, #CB_BITS | 0x02	@ !XN
776		blne	__setup_mmu
777		mov	r0, #0
778		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
779		tst	r11, #0xf		@ VMSA
780		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
781#endif
782		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
783		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
784		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
785		orr	r0, r0, #0x003c		@ write buffer
786		bic	r0, r0, #2		@ A (no unaligned access fault)
787		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
788						@ (needed for ARM1176)
789#ifdef CONFIG_MMU
790 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
791		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
792		orrne	r0, r0, #1		@ MMU enabled
793		movne	r1, #0xfffffffd		@ domain 0 = client
794		bic     r6, r6, #1 << 31        @ 32-bit translation system
795		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
796		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
797		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
798		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
799#endif
800		mcr	p15, 0, r0, c7, c5, 4	@ ISB
801		mcr	p15, 0, r0, c1, c0, 0	@ load control register
802		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
803		mov	r0, #0
804		mcr	p15, 0, r0, c7, c5, 4	@ ISB
805		mov	pc, r12
806
807__fa526_cache_on:
808		mov	r12, lr
809		mov	r6, #CB_BITS | 0x12	@ U
810		bl	__setup_mmu
811		mov	r0, #0
812		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
813		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
814		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
815		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
816		orr	r0, r0, #0x1000		@ I-cache enable
817		bl	__common_mmu_cache_on
818		mov	r0, #0
819		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
820		mov	pc, r12
821
822__common_mmu_cache_on:
823#ifndef CONFIG_THUMB2_KERNEL
824#ifndef DEBUG
825		orr	r0, r0, #0x000d		@ Write buffer, mmu
826#endif
827		mov	r1, #-1
828		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
829		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
830		b	1f
831		.align	5			@ cache line aligned
8321:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
833		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
834		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
835#endif
836
837#define PROC_ENTRY_SIZE (4*5)
838
839/*
840 * Here follow the relocatable cache support functions for the
841 * various processors.  This is a generic hook for locating an
842 * entry and jumping to an instruction at the specified offset
843 * from the start of the block.  Please note this is all position
844 * independent code.
845 *
846 *  r1  = corrupted
847 *  r2  = corrupted
848 *  r3  = block offset
849 *  r9  = corrupted
850 *  r12 = corrupted
851 */
852
853call_cache_fn:	adr	r12, proc_types
854#ifdef CONFIG_CPU_CP15
855		mrc	p15, 0, r9, c0, c0	@ get processor ID
856#elif defined(CONFIG_CPU_V7M)
857		/*
858		 * On v7-M the processor id is located in the V7M_SCB_CPUID
859		 * register, but as cache handling is IMPLEMENTATION DEFINED on
860		 * v7-M (if existant at all) we just return early here.
861		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
862		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
863		 * use cp15 registers that are not implemented on v7-M.
864		 */
865		bx	lr
866#else
867		ldr	r9, =CONFIG_PROCESSOR_ID
868#endif
8691:		ldr	r1, [r12, #0]		@ get value
870		ldr	r2, [r12, #4]		@ get mask
871		eor	r1, r1, r9		@ (real ^ match)
872		tst	r1, r2			@       & mask
873 ARM(		addeq	pc, r12, r3		) @ call cache function
874 THUMB(		addeq	r12, r3			)
875 THUMB(		moveq	pc, r12			) @ call cache function
876		add	r12, r12, #PROC_ENTRY_SIZE
877		b	1b
878
879/*
880 * Table for cache operations.  This is basically:
881 *   - CPU ID match
882 *   - CPU ID mask
883 *   - 'cache on' method instruction
884 *   - 'cache off' method instruction
885 *   - 'cache flush' method instruction
886 *
887 * We match an entry using: ((real_id ^ match) & mask) == 0
888 *
889 * Writethrough caches generally only need 'on' and 'off'
890 * methods.  Writeback caches _must_ have the flush method
891 * defined.
892 */
893		.align	2
894		.type	proc_types,#object
895proc_types:
896		.word	0x41000000		@ old ARM ID
897		.word	0xff00f000
898		mov	pc, lr
899 THUMB(		nop				)
900		mov	pc, lr
901 THUMB(		nop				)
902		mov	pc, lr
903 THUMB(		nop				)
904
905		.word	0x41007000		@ ARM7/710
906		.word	0xfff8fe00
907		mov	pc, lr
908 THUMB(		nop				)
909		mov	pc, lr
910 THUMB(		nop				)
911		mov	pc, lr
912 THUMB(		nop				)
913
914		.word	0x41807200		@ ARM720T (writethrough)
915		.word	0xffffff00
916		W(b)	__armv4_mmu_cache_on
917		W(b)	__armv4_mmu_cache_off
918		mov	pc, lr
919 THUMB(		nop				)
920
921		.word	0x41007400		@ ARM74x
922		.word	0xff00ff00
923		W(b)	__armv3_mpu_cache_on
924		W(b)	__armv3_mpu_cache_off
925		W(b)	__armv3_mpu_cache_flush
926
927		.word	0x41009400		@ ARM94x
928		.word	0xff00ff00
929		W(b)	__armv4_mpu_cache_on
930		W(b)	__armv4_mpu_cache_off
931		W(b)	__armv4_mpu_cache_flush
932
933		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
934		.word	0xff0ffff0
935		W(b)	__arm926ejs_mmu_cache_on
936		W(b)	__armv4_mmu_cache_off
937		W(b)	__armv5tej_mmu_cache_flush
938
939		.word	0x00007000		@ ARM7 IDs
940		.word	0x0000f000
941		mov	pc, lr
942 THUMB(		nop				)
943		mov	pc, lr
944 THUMB(		nop				)
945		mov	pc, lr
946 THUMB(		nop				)
947
948		@ Everything from here on will be the new ID system.
949
950		.word	0x4401a100		@ sa110 / sa1100
951		.word	0xffffffe0
952		W(b)	__armv4_mmu_cache_on
953		W(b)	__armv4_mmu_cache_off
954		W(b)	__armv4_mmu_cache_flush
955
956		.word	0x6901b110		@ sa1110
957		.word	0xfffffff0
958		W(b)	__armv4_mmu_cache_on
959		W(b)	__armv4_mmu_cache_off
960		W(b)	__armv4_mmu_cache_flush
961
962		.word	0x56056900
963		.word	0xffffff00		@ PXA9xx
964		W(b)	__armv4_mmu_cache_on
965		W(b)	__armv4_mmu_cache_off
966		W(b)	__armv4_mmu_cache_flush
967
968		.word	0x56158000		@ PXA168
969		.word	0xfffff000
970		W(b)	__armv4_mmu_cache_on
971		W(b)	__armv4_mmu_cache_off
972		W(b)	__armv5tej_mmu_cache_flush
973
974		.word	0x56050000		@ Feroceon
975		.word	0xff0f0000
976		W(b)	__armv4_mmu_cache_on
977		W(b)	__armv4_mmu_cache_off
978		W(b)	__armv5tej_mmu_cache_flush
979
980#ifdef CONFIG_CPU_FEROCEON_OLD_ID
981		/* this conflicts with the standard ARMv5TE entry */
982		.long	0x41009260		@ Old Feroceon
983		.long	0xff00fff0
984		b	__armv4_mmu_cache_on
985		b	__armv4_mmu_cache_off
986		b	__armv5tej_mmu_cache_flush
987#endif
988
989		.word	0x66015261		@ FA526
990		.word	0xff01fff1
991		W(b)	__fa526_cache_on
992		W(b)	__armv4_mmu_cache_off
993		W(b)	__fa526_cache_flush
994
995		@ These match on the architecture ID
996
997		.word	0x00020000		@ ARMv4T
998		.word	0x000f0000
999		W(b)	__armv4_mmu_cache_on
1000		W(b)	__armv4_mmu_cache_off
1001		W(b)	__armv4_mmu_cache_flush
1002
1003		.word	0x00050000		@ ARMv5TE
1004		.word	0x000f0000
1005		W(b)	__armv4_mmu_cache_on
1006		W(b)	__armv4_mmu_cache_off
1007		W(b)	__armv4_mmu_cache_flush
1008
1009		.word	0x00060000		@ ARMv5TEJ
1010		.word	0x000f0000
1011		W(b)	__armv4_mmu_cache_on
1012		W(b)	__armv4_mmu_cache_off
1013		W(b)	__armv5tej_mmu_cache_flush
1014
1015		.word	0x0007b000		@ ARMv6
1016		.word	0x000ff000
1017		W(b)	__armv6_mmu_cache_on
1018		W(b)	__armv4_mmu_cache_off
1019		W(b)	__armv6_mmu_cache_flush
1020
1021		.word	0x000f0000		@ new CPU Id
1022		.word	0x000f0000
1023		W(b)	__armv7_mmu_cache_on
1024		W(b)	__armv7_mmu_cache_off
1025		W(b)	__armv7_mmu_cache_flush
1026
1027		.word	0			@ unrecognised type
1028		.word	0
1029		mov	pc, lr
1030 THUMB(		nop				)
1031		mov	pc, lr
1032 THUMB(		nop				)
1033		mov	pc, lr
1034 THUMB(		nop				)
1035
1036		.size	proc_types, . - proc_types
1037
1038		/*
1039		 * If you get a "non-constant expression in ".if" statement"
1040		 * error from the assembler on this line, check that you have
1041		 * not accidentally written a "b" instruction where you should
1042		 * have written W(b).
1043		 */
1044		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1045		.error "The size of one or more proc_types entries is wrong."
1046		.endif
1047
1048/*
1049 * Turn off the Cache and MMU.  ARMv3 does not support
1050 * reading the control register, but ARMv4 does.
1051 *
1052 * On exit,
1053 *  r0, r1, r2, r3, r9, r12 corrupted
1054 * This routine must preserve:
1055 *  r4, r7, r8
1056 */
1057		.align	5
1058cache_off:	mov	r3, #12			@ cache_off function
1059		b	call_cache_fn
1060
1061__armv4_mpu_cache_off:
1062		mrc	p15, 0, r0, c1, c0
1063		bic	r0, r0, #0x000d
1064		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1065		mov	r0, #0
1066		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1067		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1068		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1069		mov	pc, lr
1070
1071__armv3_mpu_cache_off:
1072		mrc	p15, 0, r0, c1, c0
1073		bic	r0, r0, #0x000d
1074		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1075		mov	r0, #0
1076		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1077		mov	pc, lr
1078
1079__armv4_mmu_cache_off:
1080#ifdef CONFIG_MMU
1081		mrc	p15, 0, r0, c1, c0
1082		bic	r0, r0, #0x000d
1083		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1084		mov	r0, #0
1085		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1086		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1087#endif
1088		mov	pc, lr
1089
1090__armv7_mmu_cache_off:
1091		mrc	p15, 0, r0, c1, c0
1092#ifdef CONFIG_MMU
1093		bic	r0, r0, #0x000d
1094#else
1095		bic	r0, r0, #0x000c
1096#endif
1097		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1098		mov	r12, lr
1099		bl	__armv7_mmu_cache_flush
1100		mov	r0, #0
1101#ifdef CONFIG_MMU
1102		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1103#endif
1104		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1105		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1106		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1107		mov	pc, r12
1108
1109/*
1110 * Clean and flush the cache to maintain consistency.
1111 *
1112 * On exit,
1113 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1114 * This routine must preserve:
1115 *  r4, r6, r7, r8
1116 */
1117		.align	5
1118cache_clean_flush:
1119		mov	r3, #16
1120		b	call_cache_fn
1121
1122__armv4_mpu_cache_flush:
1123		tst	r4, #1
1124		movne	pc, lr
1125		mov	r2, #1
1126		mov	r3, #0
1127		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1128		mov	r1, #7 << 5		@ 8 segments
11291:		orr	r3, r1, #63 << 26	@ 64 entries
11302:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1131		subs	r3, r3, #1 << 26
1132		bcs	2b			@ entries 63 to 0
1133		subs 	r1, r1, #1 << 5
1134		bcs	1b			@ segments 7 to 0
1135
1136		teq	r2, #0
1137		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1138		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1139		mov	pc, lr
1140
1141__fa526_cache_flush:
1142		tst	r4, #1
1143		movne	pc, lr
1144		mov	r1, #0
1145		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1146		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1147		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1148		mov	pc, lr
1149
1150__armv6_mmu_cache_flush:
1151		mov	r1, #0
1152		tst	r4, #1
1153		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1154		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1155		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1156		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1157		mov	pc, lr
1158
1159__armv7_mmu_cache_flush:
1160		tst	r4, #1
1161		bne	iflush
1162		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1163		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1164		mov	r10, #0
1165		beq	hierarchical
1166		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1167		b	iflush
1168hierarchical:
1169		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1170		stmfd	sp!, {r0-r7, r9-r11}
1171		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1172		ands	r3, r0, #0x7000000	@ extract loc from clidr
1173		mov	r3, r3, lsr #23		@ left align loc bit field
1174		beq	finished		@ if loc is 0, then no need to clean
1175		mov	r10, #0			@ start clean at cache level 0
1176loop1:
1177		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1178		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1179		and	r1, r1, #7		@ mask of the bits for current cache only
1180		cmp	r1, #2			@ see what cache we have at this level
1181		blt	skip			@ skip if no cache, or just i-cache
1182		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1183		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1184		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1185		and	r2, r1, #7		@ extract the length of the cache lines
1186		add	r2, r2, #4		@ add 4 (line length offset)
1187		ldr	r4, =0x3ff
1188		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1189		clz	r5, r4			@ find bit position of way size increment
1190		ldr	r7, =0x7fff
1191		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1192loop2:
1193		mov	r9, r4			@ create working copy of max way size
1194loop3:
1195 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1196 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1197 THUMB(		lsl	r6, r9, r5		)
1198 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1199 THUMB(		lsl	r6, r7, r2		)
1200 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1201		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1202		subs	r9, r9, #1		@ decrement the way
1203		bge	loop3
1204		subs	r7, r7, #1		@ decrement the index
1205		bge	loop2
1206skip:
1207		add	r10, r10, #2		@ increment cache number
1208		cmp	r3, r10
1209		bgt	loop1
1210finished:
1211		ldmfd	sp!, {r0-r7, r9-r11}
1212		mov	r10, #0			@ switch back to cache level 0
1213		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1214iflush:
1215		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1216		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1217		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1218		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1219		mov	pc, lr
1220
1221__armv5tej_mmu_cache_flush:
1222		tst	r4, #1
1223		movne	pc, lr
12241:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
1225		bne	1b
1226		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1227		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1228		mov	pc, lr
1229
1230__armv4_mmu_cache_flush:
1231		tst	r4, #1
1232		movne	pc, lr
1233		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1234		mov	r11, #32		@ default: 32 byte line size
1235		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1236		teq	r3, r9			@ cache ID register present?
1237		beq	no_cache_id
1238		mov	r1, r3, lsr #18
1239		and	r1, r1, #7
1240		mov	r2, #1024
1241		mov	r2, r2, lsl r1		@ base dcache size *2
1242		tst	r3, #1 << 14		@ test M bit
1243		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1244		mov	r3, r3, lsr #12
1245		and	r3, r3, #3
1246		mov	r11, #8
1247		mov	r11, r11, lsl r3	@ cache line size in bytes
1248no_cache_id:
1249		mov	r1, pc
1250		bic	r1, r1, #63		@ align to longest cache line
1251		add	r2, r1, r2
12521:
1253 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1254 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1255 THUMB(		add     r1, r1, r11		)
1256		teq	r1, r2
1257		bne	1b
1258
1259		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1260		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1261		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1262		mov	pc, lr
1263
1264__armv3_mmu_cache_flush:
1265__armv3_mpu_cache_flush:
1266		tst	r4, #1
1267		movne	pc, lr
1268		mov	r1, #0
1269		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1270		mov	pc, lr
1271
1272/*
1273 * Various debugging routines for printing hex characters and
1274 * memory, which again must be relocatable.
1275 */
1276#ifdef DEBUG
1277		.align	2
1278		.type	phexbuf,#object
1279phexbuf:	.space	12
1280		.size	phexbuf, . - phexbuf
1281
1282@ phex corrupts {r0, r1, r2, r3}
1283phex:		adr	r3, phexbuf
1284		mov	r2, #0
1285		strb	r2, [r3, r1]
12861:		subs	r1, r1, #1
1287		movmi	r0, r3
1288		bmi	puts
1289		and	r2, r0, #15
1290		mov	r0, r0, lsr #4
1291		cmp	r2, #10
1292		addge	r2, r2, #7
1293		add	r2, r2, #'0'
1294		strb	r2, [r3, r1]
1295		b	1b
1296
1297@ puts corrupts {r0, r1, r2, r3}
1298puts:		loadsp	r3, r2, r1
12991:		ldrb	r2, [r0], #1
1300		teq	r2, #0
1301		moveq	pc, lr
13022:		writeb	r2, r3
1303		mov	r1, #0x00020000
13043:		subs	r1, r1, #1
1305		bne	3b
1306		teq	r2, #'\n'
1307		moveq	r2, #'\r'
1308		beq	2b
1309		teq	r0, #0
1310		bne	1b
1311		mov	pc, lr
1312@ putc corrupts {r0, r1, r2, r3}
1313putc:
1314		mov	r2, r0
1315		loadsp	r3, r1, r0
1316		mov	r0, #0
1317		b	2b
1318
1319@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1320memdump:	mov	r12, r0
1321		mov	r10, lr
1322		mov	r11, #0
13232:		mov	r0, r11, lsl #2
1324		add	r0, r0, r12
1325		mov	r1, #8
1326		bl	phex
1327		mov	r0, #':'
1328		bl	putc
13291:		mov	r0, #' '
1330		bl	putc
1331		ldr	r0, [r12, r11, lsl #2]
1332		mov	r1, #8
1333		bl	phex
1334		and	r0, r11, #7
1335		teq	r0, #3
1336		moveq	r0, #' '
1337		bleq	putc
1338		and	r0, r11, #7
1339		add	r11, r11, #1
1340		teq	r0, #7
1341		bne	1b
1342		mov	r0, #'\n'
1343		bl	putc
1344		cmp	r11, #64
1345		blt	2b
1346		mov	pc, r10
1347#endif
1348
1349		.ltorg
1350
1351#ifdef CONFIG_ARM_VIRT_EXT
1352.align 5
1353__hyp_reentry_vectors:
1354		W(b)	.			@ reset
1355		W(b)	.			@ undef
1356		W(b)	.			@ svc
1357		W(b)	.			@ pabort
1358		W(b)	.			@ dabort
1359		W(b)	__enter_kernel		@ hyp
1360		W(b)	.			@ irq
1361		W(b)	.			@ fiq
1362#endif /* CONFIG_ARM_VIRT_EXT */
1363
1364__enter_kernel:
1365		mov	r0, #0			@ must be 0
1366		mov	r1, r7			@ restore architecture number
1367		mov	r2, r8			@ restore atags pointer
1368 ARM(		mov	pc, r4		)	@ call kernel
1369 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1370 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1371
1372reloc_code_end:
1373
1374#ifdef CONFIG_EFI_STUB
1375		.align	2
1376_start:		.long	start - .
1377
1378ENTRY(efi_stub_entry)
1379		@ allocate space on stack for passing current zImage address
1380		@ and for the EFI stub to return of new entry point of
1381		@ zImage, as EFI stub may copy the kernel. Pointer address
1382		@ is passed in r2. r0 and r1 are passed through from the
1383		@ EFI firmware to efi_entry
1384		adr	ip, _start
1385		ldr	r3, [ip]
1386		add	r3, r3, ip
1387		stmfd	sp!, {r3, lr}
1388		mov	r2, sp			@ pass zImage address in r2
1389		bl	efi_entry
1390
1391		@ Check for error return from EFI stub. r0 has FDT address
1392		@ or error code.
1393		cmn	r0, #1
1394		beq	efi_load_fail
1395
1396		@ Preserve return value of efi_entry() in r4
1397		mov	r4, r0
1398		bl	cache_clean_flush
1399		bl	cache_off
1400
1401		@ Set parameters for booting zImage according to boot protocol
1402		@ put FDT address in r2, it was returned by efi_entry()
1403		@ r1 is the machine type, and r0 needs to be 0
1404		mov	r0, #0
1405		mov	r1, #0xFFFFFFFF
1406		mov	r2, r4
1407
1408		@ Branch to (possibly) relocated zImage that is in [sp]
1409		ldr	lr, [sp]
1410		ldr	ip, =start_offset
1411		add	lr, lr, ip
1412		mov	pc, lr				@ no mode switch
1413
1414efi_load_fail:
1415		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1416		ldr	r0, =0x80000001
1417		ldmfd	sp!, {ip, pc}
1418ENDPROC(efi_stub_entry)
1419#endif
1420
1421		.align
1422		.section ".stack", "aw", %nobits
1423.L_user_stack:	.space	4096
1424.L_user_stack_end:
1425