xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 93707cbabcc8baf2b2b5f4a99c1f08ee83eb7abd)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13#include <asm/v7m.h>
14
15#include "efi-header.S"
16
17 AR_CLASS(	.arch	armv7-a	)
18 M_CLASS(	.arch	armv7-m	)
19
20/*
21 * Debugging stuff
22 *
23 * Note that these macros must not contain any code which is not
24 * 100% relocatable.  Any attempt to do so will result in a crash.
25 * Please select one of the following when turning on debugging.
26 */
27#ifdef DEBUG
28
29#if defined(CONFIG_DEBUG_ICEDCC)
30
31#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
32		.macro	loadsp, rb, tmp
33		.endm
34		.macro	writeb, ch, rb
35		mcr	p14, 0, \ch, c0, c5, 0
36		.endm
37#elif defined(CONFIG_CPU_XSCALE)
38		.macro	loadsp, rb, tmp
39		.endm
40		.macro	writeb, ch, rb
41		mcr	p14, 0, \ch, c8, c0, 0
42		.endm
43#else
44		.macro	loadsp, rb, tmp
45		.endm
46		.macro	writeb, ch, rb
47		mcr	p14, 0, \ch, c1, c0, 0
48		.endm
49#endif
50
51#else
52
53#include CONFIG_DEBUG_LL_INCLUDE
54
55		.macro	writeb,	ch, rb
56		senduart \ch, \rb
57		.endm
58
59#if defined(CONFIG_ARCH_SA1100)
60		.macro	loadsp, rb, tmp
61		mov	\rb, #0x80000000	@ physical base address
62#ifdef CONFIG_DEBUG_LL_SER3
63		add	\rb, \rb, #0x00050000	@ Ser3
64#else
65		add	\rb, \rb, #0x00010000	@ Ser1
66#endif
67		.endm
68#else
69		.macro	loadsp,	rb, tmp
70		addruart \rb, \tmp
71		.endm
72#endif
73#endif
74#endif
75
76		.macro	kputc,val
77		mov	r0, \val
78		bl	putc
79		.endm
80
81		.macro	kphex,val,len
82		mov	r0, \val
83		mov	r1, #\len
84		bl	phex
85		.endm
86
87		.macro	debug_reloc_start
88#ifdef DEBUG
89		kputc	#'\n'
90		kphex	r6, 8		/* processor id */
91		kputc	#':'
92		kphex	r7, 8		/* architecture id */
93#ifdef CONFIG_CPU_CP15
94		kputc	#':'
95		mrc	p15, 0, r0, c1, c0
96		kphex	r0, 8		/* control reg */
97#endif
98		kputc	#'\n'
99		kphex	r5, 8		/* decompressed kernel start */
100		kputc	#'-'
101		kphex	r9, 8		/* decompressed kernel end  */
102		kputc	#'>'
103		kphex	r4, 8		/* kernel execution address */
104		kputc	#'\n'
105#endif
106		.endm
107
108		.macro	debug_reloc_end
109#ifdef DEBUG
110		kphex	r5, 8		/* end of kernel */
111		kputc	#'\n'
112		mov	r0, r4
113		bl	memdump		/* dump 256 bytes at start of kernel */
114#endif
115		.endm
116
117		.section ".start", #alloc, #execinstr
118/*
119 * sort out different calling conventions
120 */
121		.align
122		/*
123		 * Always enter in ARM state for CPUs that support the ARM ISA.
124		 * As of today (2014) that's exactly the members of the A and R
125		 * classes.
126		 */
127 AR_CLASS(	.arm	)
128start:
129		.type	start,#function
130		.rept	7
131		__nop
132		.endr
133#ifndef CONFIG_THUMB2_KERNEL
134		mov	r0, r0
135#else
136 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
137  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
138		.thumb
139#endif
140		W(b)	1f
141
142		.word	_magic_sig	@ Magic numbers to help the loader
143		.word	_magic_start	@ absolute load/run zImage address
144		.word	_magic_end	@ zImage end address
145		.word	0x04030201	@ endianness flag
146		.word	0x45454545	@ another magic number to indicate
147		.word	_magic_table	@ additional data table
148
149		__EFI_HEADER
1501:
151 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
152 AR_CLASS(	mrs	r9, cpsr	)
153#ifdef CONFIG_ARM_VIRT_EXT
154		bl	__hyp_stub_install	@ get into SVC mode, reversibly
155#endif
156		mov	r7, r1			@ save architecture ID
157		mov	r8, r2			@ save atags pointer
158
159#ifndef CONFIG_CPU_V7M
160		/*
161		 * Booting from Angel - need to enter SVC mode and disable
162		 * FIQs/IRQs (numeric definitions from angel arm.h source).
163		 * We only do this if we were in user mode on entry.
164		 */
165		mrs	r2, cpsr		@ get current mode
166		tst	r2, #3			@ not user?
167		bne	not_angel
168		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
169 ARM(		swi	0x123456	)	@ angel_SWI_ARM
170 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
171not_angel:
172		safe_svcmode_maskall r0
173		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
174						@ SPSR
175#endif
176		/*
177		 * Note that some cache flushing and other stuff may
178		 * be needed here - is there an Angel SWI call for this?
179		 */
180
181		/*
182		 * some architecture specific code can be inserted
183		 * by the linker here, but it should preserve r7, r8, and r9.
184		 */
185
186		.text
187
188#ifdef CONFIG_AUTO_ZRELADDR
189		/*
190		 * Find the start of physical memory.  As we are executing
191		 * without the MMU on, we are in the physical address space.
192		 * We just need to get rid of any offset by aligning the
193		 * address.
194		 *
195		 * This alignment is a balance between the requirements of
196		 * different platforms - we have chosen 128MB to allow
197		 * platforms which align the start of their physical memory
198		 * to 128MB to use this feature, while allowing the zImage
199		 * to be placed within the first 128MB of memory on other
200		 * platforms.  Increasing the alignment means we place
201		 * stricter alignment requirements on the start of physical
202		 * memory, but relaxing it means that we break people who
203		 * are already placing their zImage in (eg) the top 64MB
204		 * of this range.
205		 */
206		mov	r4, pc
207		and	r4, r4, #0xf8000000
208		/* Determine final kernel image address. */
209		add	r4, r4, #TEXT_OFFSET
210#else
211		ldr	r4, =zreladdr
212#endif
213
214		/*
215		 * Set up a page table only if it won't overwrite ourself.
216		 * That means r4 < pc || r4 - 16k page directory > &_end.
217		 * Given that r4 > &_end is most unfrequent, we add a rough
218		 * additional 1MB of room for a possible appended DTB.
219		 */
220		mov	r0, pc
221		cmp	r0, r4
222		ldrcc	r0, LC0+32
223		addcc	r0, r0, pc
224		cmpcc	r4, r0
225		orrcc	r4, r4, #1		@ remember we skipped cache_on
226		blcs	cache_on
227
228restart:	adr	r0, LC0
229		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
230		ldr	sp, [r0, #28]
231
232		/*
233		 * We might be running at a different address.  We need
234		 * to fix up various pointers.
235		 */
236		sub	r0, r0, r1		@ calculate the delta offset
237		add	r6, r6, r0		@ _edata
238		add	r10, r10, r0		@ inflated kernel size location
239
240		/*
241		 * The kernel build system appends the size of the
242		 * decompressed kernel at the end of the compressed data
243		 * in little-endian form.
244		 */
245		ldrb	r9, [r10, #0]
246		ldrb	lr, [r10, #1]
247		orr	r9, r9, lr, lsl #8
248		ldrb	lr, [r10, #2]
249		ldrb	r10, [r10, #3]
250		orr	r9, r9, lr, lsl #16
251		orr	r9, r9, r10, lsl #24
252
253#ifndef CONFIG_ZBOOT_ROM
254		/* malloc space is above the relocated stack (64k max) */
255		add	sp, sp, r0
256		add	r10, sp, #0x10000
257#else
258		/*
259		 * With ZBOOT_ROM the bss/stack is non relocatable,
260		 * but someone could still run this code from RAM,
261		 * in which case our reference is _edata.
262		 */
263		mov	r10, r6
264#endif
265
266		mov	r5, #0			@ init dtb size to 0
267#ifdef CONFIG_ARM_APPENDED_DTB
268/*
269 *   r0  = delta
270 *   r2  = BSS start
271 *   r3  = BSS end
272 *   r4  = final kernel address (possibly with LSB set)
273 *   r5  = appended dtb size (still unknown)
274 *   r6  = _edata
275 *   r7  = architecture ID
276 *   r8  = atags/device tree pointer
277 *   r9  = size of decompressed image
278 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
279 *   r11 = GOT start
280 *   r12 = GOT end
281 *   sp  = stack pointer
282 *
283 * if there are device trees (dtb) appended to zImage, advance r10 so that the
284 * dtb data will get relocated along with the kernel if necessary.
285 */
286
287		ldr	lr, [r6, #0]
288#ifndef __ARMEB__
289		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
290#else
291		ldr	r1, =0xd00dfeed
292#endif
293		cmp	lr, r1
294		bne	dtb_check_done		@ not found
295
296#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
297		/*
298		 * OK... Let's do some funky business here.
299		 * If we do have a DTB appended to zImage, and we do have
300		 * an ATAG list around, we want the later to be translated
301		 * and folded into the former here. No GOT fixup has occurred
302		 * yet, but none of the code we're about to call uses any
303		 * global variable.
304		*/
305
306		/* Get the initial DTB size */
307		ldr	r5, [r6, #4]
308#ifndef __ARMEB__
309		/* convert to little endian */
310		eor	r1, r5, r5, ror #16
311		bic	r1, r1, #0x00ff0000
312		mov	r5, r5, ror #8
313		eor	r5, r5, r1, lsr #8
314#endif
315		/* 50% DTB growth should be good enough */
316		add	r5, r5, r5, lsr #1
317		/* preserve 64-bit alignment */
318		add	r5, r5, #7
319		bic	r5, r5, #7
320		/* clamp to 32KB min and 1MB max */
321		cmp	r5, #(1 << 15)
322		movlo	r5, #(1 << 15)
323		cmp	r5, #(1 << 20)
324		movhi	r5, #(1 << 20)
325		/* temporarily relocate the stack past the DTB work space */
326		add	sp, sp, r5
327
328		stmfd	sp!, {r0-r3, ip, lr}
329		mov	r0, r8
330		mov	r1, r6
331		mov	r2, r5
332		bl	atags_to_fdt
333
334		/*
335		 * If returned value is 1, there is no ATAG at the location
336		 * pointed by r8.  Try the typical 0x100 offset from start
337		 * of RAM and hope for the best.
338		 */
339		cmp	r0, #1
340		sub	r0, r4, #TEXT_OFFSET
341		bic	r0, r0, #1
342		add	r0, r0, #0x100
343		mov	r1, r6
344		mov	r2, r5
345		bleq	atags_to_fdt
346
347		ldmfd	sp!, {r0-r3, ip, lr}
348		sub	sp, sp, r5
349#endif
350
351		mov	r8, r6			@ use the appended device tree
352
353		/*
354		 * Make sure that the DTB doesn't end up in the final
355		 * kernel's .bss area. To do so, we adjust the decompressed
356		 * kernel size to compensate if that .bss size is larger
357		 * than the relocated code.
358		 */
359		ldr	r5, =_kernel_bss_size
360		adr	r1, wont_overwrite
361		sub	r1, r6, r1
362		subs	r1, r5, r1
363		addhi	r9, r9, r1
364
365		/* Get the current DTB size */
366		ldr	r5, [r6, #4]
367#ifndef __ARMEB__
368		/* convert r5 (dtb size) to little endian */
369		eor	r1, r5, r5, ror #16
370		bic	r1, r1, #0x00ff0000
371		mov	r5, r5, ror #8
372		eor	r5, r5, r1, lsr #8
373#endif
374
375		/* preserve 64-bit alignment */
376		add	r5, r5, #7
377		bic	r5, r5, #7
378
379		/* relocate some pointers past the appended dtb */
380		add	r6, r6, r5
381		add	r10, r10, r5
382		add	sp, sp, r5
383dtb_check_done:
384#endif
385
386/*
387 * Check to see if we will overwrite ourselves.
388 *   r4  = final kernel address (possibly with LSB set)
389 *   r9  = size of decompressed image
390 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
391 * We basically want:
392 *   r4 - 16k page directory >= r10 -> OK
393 *   r4 + image length <= address of wont_overwrite -> OK
394 * Note: the possible LSB in r4 is harmless here.
395 */
396		add	r10, r10, #16384
397		cmp	r4, r10
398		bhs	wont_overwrite
399		add	r10, r4, r9
400		adr	r9, wont_overwrite
401		cmp	r10, r9
402		bls	wont_overwrite
403
404/*
405 * Relocate ourselves past the end of the decompressed kernel.
406 *   r6  = _edata
407 *   r10 = end of the decompressed kernel
408 * Because we always copy ahead, we need to do it from the end and go
409 * backward in case the source and destination overlap.
410 */
411		/*
412		 * Bump to the next 256-byte boundary with the size of
413		 * the relocation code added. This avoids overwriting
414		 * ourself when the offset is small.
415		 */
416		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
417		bic	r10, r10, #255
418
419		/* Get start of code we want to copy and align it down. */
420		adr	r5, restart
421		bic	r5, r5, #31
422
423/* Relocate the hyp vector base if necessary */
424#ifdef CONFIG_ARM_VIRT_EXT
425		mrs	r0, spsr
426		and	r0, r0, #MODE_MASK
427		cmp	r0, #HYP_MODE
428		bne	1f
429
430		/*
431		 * Compute the address of the hyp vectors after relocation.
432		 * This requires some arithmetic since we cannot directly
433		 * reference __hyp_stub_vectors in a PC-relative way.
434		 * Call __hyp_set_vectors with the new address so that we
435		 * can HVC again after the copy.
436		 */
4370:		adr	r0, 0b
438		movw	r1, #:lower16:__hyp_stub_vectors - 0b
439		movt	r1, #:upper16:__hyp_stub_vectors - 0b
440		add	r0, r0, r1
441		sub	r0, r0, r5
442		add	r0, r0, r10
443		bl	__hyp_set_vectors
4441:
445#endif
446
447		sub	r9, r6, r5		@ size to copy
448		add	r9, r9, #31		@ rounded up to a multiple
449		bic	r9, r9, #31		@ ... of 32 bytes
450		add	r6, r9, r5
451		add	r9, r9, r10
452
4531:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
454		cmp	r6, r5
455		stmdb	r9!, {r0 - r3, r10 - r12, lr}
456		bhi	1b
457
458		/* Preserve offset to relocated code. */
459		sub	r6, r9, r6
460
461#ifndef CONFIG_ZBOOT_ROM
462		/* cache_clean_flush may use the stack, so relocate it */
463		add	sp, sp, r6
464#endif
465
466		bl	cache_clean_flush
467
468		badr	r0, restart
469		add	r0, r0, r6
470		mov	pc, r0
471
472wont_overwrite:
473/*
474 * If delta is zero, we are running at the address we were linked at.
475 *   r0  = delta
476 *   r2  = BSS start
477 *   r3  = BSS end
478 *   r4  = kernel execution address (possibly with LSB set)
479 *   r5  = appended dtb size (0 if not present)
480 *   r7  = architecture ID
481 *   r8  = atags pointer
482 *   r11 = GOT start
483 *   r12 = GOT end
484 *   sp  = stack pointer
485 */
486		orrs	r1, r0, r5
487		beq	not_relocated
488
489		add	r11, r11, r0
490		add	r12, r12, r0
491
492#ifndef CONFIG_ZBOOT_ROM
493		/*
494		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
495		 * we need to fix up pointers into the BSS region.
496		 * Note that the stack pointer has already been fixed up.
497		 */
498		add	r2, r2, r0
499		add	r3, r3, r0
500
501		/*
502		 * Relocate all entries in the GOT table.
503		 * Bump bss entries to _edata + dtb size
504		 */
5051:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
506		add	r1, r1, r0		@ This fixes up C references
507		cmp	r1, r2			@ if entry >= bss_start &&
508		cmphs	r3, r1			@       bss_end > entry
509		addhi	r1, r1, r5		@    entry += dtb size
510		str	r1, [r11], #4		@ next entry
511		cmp	r11, r12
512		blo	1b
513
514		/* bump our bss pointers too */
515		add	r2, r2, r5
516		add	r3, r3, r5
517
518#else
519
520		/*
521		 * Relocate entries in the GOT table.  We only relocate
522		 * the entries that are outside the (relocated) BSS region.
523		 */
5241:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
525		cmp	r1, r2			@ entry < bss_start ||
526		cmphs	r3, r1			@ _end < entry
527		addlo	r1, r1, r0		@ table.  This fixes up the
528		str	r1, [r11], #4		@ C references.
529		cmp	r11, r12
530		blo	1b
531#endif
532
533not_relocated:	mov	r0, #0
5341:		str	r0, [r2], #4		@ clear bss
535		str	r0, [r2], #4
536		str	r0, [r2], #4
537		str	r0, [r2], #4
538		cmp	r2, r3
539		blo	1b
540
541		/*
542		 * Did we skip the cache setup earlier?
543		 * That is indicated by the LSB in r4.
544		 * Do it now if so.
545		 */
546		tst	r4, #1
547		bic	r4, r4, #1
548		blne	cache_on
549
550/*
551 * The C runtime environment should now be setup sufficiently.
552 * Set up some pointers, and start decompressing.
553 *   r4  = kernel execution address
554 *   r7  = architecture ID
555 *   r8  = atags pointer
556 */
557		mov	r0, r4
558		mov	r1, sp			@ malloc space above stack
559		add	r2, sp, #0x10000	@ 64k max
560		mov	r3, r7
561		bl	decompress_kernel
562		bl	cache_clean_flush
563		bl	cache_off
564		mov	r1, r7			@ restore architecture number
565		mov	r2, r8			@ restore atags pointer
566
567#ifdef CONFIG_ARM_VIRT_EXT
568		mrs	r0, spsr		@ Get saved CPU boot mode
569		and	r0, r0, #MODE_MASK
570		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
571		bne	__enter_kernel		@ boot kernel directly
572
573		adr	r12, .L__hyp_reentry_vectors_offset
574		ldr	r0, [r12]
575		add	r0, r0, r12
576
577		bl	__hyp_set_vectors
578		__HVC(0)			@ otherwise bounce to hyp mode
579
580		b	.			@ should never be reached
581
582		.align	2
583.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
584#else
585		b	__enter_kernel
586#endif
587
588		.align	2
589		.type	LC0, #object
590LC0:		.word	LC0			@ r1
591		.word	__bss_start		@ r2
592		.word	_end			@ r3
593		.word	_edata			@ r6
594		.word	input_data_end - 4	@ r10 (inflated size location)
595		.word	_got_start		@ r11
596		.word	_got_end		@ ip
597		.word	.L_user_stack_end	@ sp
598		.word	_end - restart + 16384 + 1024*1024
599		.size	LC0, . - LC0
600
601#ifdef CONFIG_ARCH_RPC
602		.globl	params
603params:		ldr	r0, =0x10000100		@ params_phys for RPC
604		mov	pc, lr
605		.ltorg
606		.align
607#endif
608
609/*
610 * Turn on the cache.  We need to setup some page tables so that we
611 * can have both the I and D caches on.
612 *
613 * We place the page tables 16k down from the kernel execution address,
614 * and we hope that nothing else is using it.  If we're using it, we
615 * will go pop!
616 *
617 * On entry,
618 *  r4 = kernel execution address
619 *  r7 = architecture number
620 *  r8 = atags pointer
621 * On exit,
622 *  r0, r1, r2, r3, r9, r10, r12 corrupted
623 * This routine must preserve:
624 *  r4, r7, r8
625 */
626		.align	5
627cache_on:	mov	r3, #8			@ cache_on function
628		b	call_cache_fn
629
630/*
631 * Initialize the highest priority protection region, PR7
632 * to cover all 32bit address and cacheable and bufferable.
633 */
634__armv4_mpu_cache_on:
635		mov	r0, #0x3f		@ 4G, the whole
636		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
637		mcr 	p15, 0, r0, c6, c7, 1
638
639		mov	r0, #0x80		@ PR7
640		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
641		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
642		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
643
644		mov	r0, #0xc000
645		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
646		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
647
648		mov	r0, #0
649		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
650		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
651		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
652		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
653						@ ...I .... ..D. WC.M
654		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
655		orr	r0, r0, #0x1000		@ ...1 .... .... ....
656
657		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
658
659		mov	r0, #0
660		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
661		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
662		mov	pc, lr
663
664__armv3_mpu_cache_on:
665		mov	r0, #0x3f		@ 4G, the whole
666		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
667
668		mov	r0, #0x80		@ PR7
669		mcr	p15, 0, r0, c2, c0, 0	@ cache on
670		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
671
672		mov	r0, #0xc000
673		mcr	p15, 0, r0, c5, c0, 0	@ access permission
674
675		mov	r0, #0
676		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
677		/*
678		 * ?? ARMv3 MMU does not allow reading the control register,
679		 * does this really work on ARMv3 MPU?
680		 */
681		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
682						@ .... .... .... WC.M
683		orr	r0, r0, #0x000d		@ .... .... .... 11.1
684		/* ?? this overwrites the value constructed above? */
685		mov	r0, #0
686		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
687
688		/* ?? invalidate for the second time? */
689		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
690		mov	pc, lr
691
692#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
693#define CB_BITS 0x08
694#else
695#define CB_BITS 0x0c
696#endif
697
698__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
699		bic	r3, r3, #0xff		@ Align the pointer
700		bic	r3, r3, #0x3f00
701/*
702 * Initialise the page tables, turning on the cacheable and bufferable
703 * bits for the RAM area only.
704 */
705		mov	r0, r3
706		mov	r9, r0, lsr #18
707		mov	r9, r9, lsl #18		@ start of RAM
708		add	r10, r9, #0x10000000	@ a reasonable RAM size
709		mov	r1, #0x12		@ XN|U + section mapping
710		orr	r1, r1, #3 << 10	@ AP=11
711		add	r2, r3, #16384
7121:		cmp	r1, r9			@ if virt > start of RAM
713		cmphs	r10, r1			@   && end of RAM > virt
714		bic	r1, r1, #0x1c		@ clear XN|U + C + B
715		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
716		orrhs	r1, r1, r6		@ set RAM section settings
717		str	r1, [r0], #4		@ 1:1 mapping
718		add	r1, r1, #1048576
719		teq	r0, r2
720		bne	1b
721/*
722 * If ever we are running from Flash, then we surely want the cache
723 * to be enabled also for our execution instance...  We map 2MB of it
724 * so there is no map overlap problem for up to 1 MB compressed kernel.
725 * If the execution is in RAM then we would only be duplicating the above.
726 */
727		orr	r1, r6, #0x04		@ ensure B is set for this
728		orr	r1, r1, #3 << 10
729		mov	r2, pc
730		mov	r2, r2, lsr #20
731		orr	r1, r1, r2, lsl #20
732		add	r0, r3, r2, lsl #2
733		str	r1, [r0], #4
734		add	r1, r1, #1048576
735		str	r1, [r0]
736		mov	pc, lr
737ENDPROC(__setup_mmu)
738
739@ Enable unaligned access on v6, to allow better code generation
740@ for the decompressor C code:
741__armv6_mmu_cache_on:
742		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
743		bic	r0, r0, #2		@ A (no unaligned access fault)
744		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
745		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
746		b	__armv4_mmu_cache_on
747
748__arm926ejs_mmu_cache_on:
749#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
750		mov	r0, #4			@ put dcache in WT mode
751		mcr	p15, 7, r0, c15, c0, 0
752#endif
753
754__armv4_mmu_cache_on:
755		mov	r12, lr
756#ifdef CONFIG_MMU
757		mov	r6, #CB_BITS | 0x12	@ U
758		bl	__setup_mmu
759		mov	r0, #0
760		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
761		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
762		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
763		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
764		orr	r0, r0, #0x0030
765 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
766		bl	__common_mmu_cache_on
767		mov	r0, #0
768		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
769#endif
770		mov	pc, r12
771
772__armv7_mmu_cache_on:
773		mov	r12, lr
774#ifdef CONFIG_MMU
775		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
776		tst	r11, #0xf		@ VMSA
777		movne	r6, #CB_BITS | 0x02	@ !XN
778		blne	__setup_mmu
779		mov	r0, #0
780		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
781		tst	r11, #0xf		@ VMSA
782		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
783#endif
784		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
785		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
786		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
787		orr	r0, r0, #0x003c		@ write buffer
788		bic	r0, r0, #2		@ A (no unaligned access fault)
789		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
790						@ (needed for ARM1176)
791#ifdef CONFIG_MMU
792 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
793		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
794		orrne	r0, r0, #1		@ MMU enabled
795		movne	r1, #0xfffffffd		@ domain 0 = client
796		bic     r6, r6, #1 << 31        @ 32-bit translation system
797		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
798		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
799		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
800		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
801#endif
802		mcr	p15, 0, r0, c7, c5, 4	@ ISB
803		mcr	p15, 0, r0, c1, c0, 0	@ load control register
804		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
805		mov	r0, #0
806		mcr	p15, 0, r0, c7, c5, 4	@ ISB
807		mov	pc, r12
808
809__fa526_cache_on:
810		mov	r12, lr
811		mov	r6, #CB_BITS | 0x12	@ U
812		bl	__setup_mmu
813		mov	r0, #0
814		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
815		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
816		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
817		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
818		orr	r0, r0, #0x1000		@ I-cache enable
819		bl	__common_mmu_cache_on
820		mov	r0, #0
821		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
822		mov	pc, r12
823
824__common_mmu_cache_on:
825#ifndef CONFIG_THUMB2_KERNEL
826#ifndef DEBUG
827		orr	r0, r0, #0x000d		@ Write buffer, mmu
828#endif
829		mov	r1, #-1
830		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
831		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
832		b	1f
833		.align	5			@ cache line aligned
8341:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
835		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
836		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
837#endif
838
839#define PROC_ENTRY_SIZE (4*5)
840
841/*
842 * Here follow the relocatable cache support functions for the
843 * various processors.  This is a generic hook for locating an
844 * entry and jumping to an instruction at the specified offset
845 * from the start of the block.  Please note this is all position
846 * independent code.
847 *
848 *  r1  = corrupted
849 *  r2  = corrupted
850 *  r3  = block offset
851 *  r9  = corrupted
852 *  r12 = corrupted
853 */
854
855call_cache_fn:	adr	r12, proc_types
856#ifdef CONFIG_CPU_CP15
857		mrc	p15, 0, r9, c0, c0	@ get processor ID
858#elif defined(CONFIG_CPU_V7M)
859		/*
860		 * On v7-M the processor id is located in the V7M_SCB_CPUID
861		 * register, but as cache handling is IMPLEMENTATION DEFINED on
862		 * v7-M (if existant at all) we just return early here.
863		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
864		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
865		 * use cp15 registers that are not implemented on v7-M.
866		 */
867		bx	lr
868#else
869		ldr	r9, =CONFIG_PROCESSOR_ID
870#endif
8711:		ldr	r1, [r12, #0]		@ get value
872		ldr	r2, [r12, #4]		@ get mask
873		eor	r1, r1, r9		@ (real ^ match)
874		tst	r1, r2			@       & mask
875 ARM(		addeq	pc, r12, r3		) @ call cache function
876 THUMB(		addeq	r12, r3			)
877 THUMB(		moveq	pc, r12			) @ call cache function
878		add	r12, r12, #PROC_ENTRY_SIZE
879		b	1b
880
881/*
882 * Table for cache operations.  This is basically:
883 *   - CPU ID match
884 *   - CPU ID mask
885 *   - 'cache on' method instruction
886 *   - 'cache off' method instruction
887 *   - 'cache flush' method instruction
888 *
889 * We match an entry using: ((real_id ^ match) & mask) == 0
890 *
891 * Writethrough caches generally only need 'on' and 'off'
892 * methods.  Writeback caches _must_ have the flush method
893 * defined.
894 */
895		.align	2
896		.type	proc_types,#object
897proc_types:
898		.word	0x41000000		@ old ARM ID
899		.word	0xff00f000
900		mov	pc, lr
901 THUMB(		nop				)
902		mov	pc, lr
903 THUMB(		nop				)
904		mov	pc, lr
905 THUMB(		nop				)
906
907		.word	0x41007000		@ ARM7/710
908		.word	0xfff8fe00
909		mov	pc, lr
910 THUMB(		nop				)
911		mov	pc, lr
912 THUMB(		nop				)
913		mov	pc, lr
914 THUMB(		nop				)
915
916		.word	0x41807200		@ ARM720T (writethrough)
917		.word	0xffffff00
918		W(b)	__armv4_mmu_cache_on
919		W(b)	__armv4_mmu_cache_off
920		mov	pc, lr
921 THUMB(		nop				)
922
923		.word	0x41007400		@ ARM74x
924		.word	0xff00ff00
925		W(b)	__armv3_mpu_cache_on
926		W(b)	__armv3_mpu_cache_off
927		W(b)	__armv3_mpu_cache_flush
928
929		.word	0x41009400		@ ARM94x
930		.word	0xff00ff00
931		W(b)	__armv4_mpu_cache_on
932		W(b)	__armv4_mpu_cache_off
933		W(b)	__armv4_mpu_cache_flush
934
935		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
936		.word	0xff0ffff0
937		W(b)	__arm926ejs_mmu_cache_on
938		W(b)	__armv4_mmu_cache_off
939		W(b)	__armv5tej_mmu_cache_flush
940
941		.word	0x00007000		@ ARM7 IDs
942		.word	0x0000f000
943		mov	pc, lr
944 THUMB(		nop				)
945		mov	pc, lr
946 THUMB(		nop				)
947		mov	pc, lr
948 THUMB(		nop				)
949
950		@ Everything from here on will be the new ID system.
951
952		.word	0x4401a100		@ sa110 / sa1100
953		.word	0xffffffe0
954		W(b)	__armv4_mmu_cache_on
955		W(b)	__armv4_mmu_cache_off
956		W(b)	__armv4_mmu_cache_flush
957
958		.word	0x6901b110		@ sa1110
959		.word	0xfffffff0
960		W(b)	__armv4_mmu_cache_on
961		W(b)	__armv4_mmu_cache_off
962		W(b)	__armv4_mmu_cache_flush
963
964		.word	0x56056900
965		.word	0xffffff00		@ PXA9xx
966		W(b)	__armv4_mmu_cache_on
967		W(b)	__armv4_mmu_cache_off
968		W(b)	__armv4_mmu_cache_flush
969
970		.word	0x56158000		@ PXA168
971		.word	0xfffff000
972		W(b)	__armv4_mmu_cache_on
973		W(b)	__armv4_mmu_cache_off
974		W(b)	__armv5tej_mmu_cache_flush
975
976		.word	0x56050000		@ Feroceon
977		.word	0xff0f0000
978		W(b)	__armv4_mmu_cache_on
979		W(b)	__armv4_mmu_cache_off
980		W(b)	__armv5tej_mmu_cache_flush
981
982#ifdef CONFIG_CPU_FEROCEON_OLD_ID
983		/* this conflicts with the standard ARMv5TE entry */
984		.long	0x41009260		@ Old Feroceon
985		.long	0xff00fff0
986		b	__armv4_mmu_cache_on
987		b	__armv4_mmu_cache_off
988		b	__armv5tej_mmu_cache_flush
989#endif
990
991		.word	0x66015261		@ FA526
992		.word	0xff01fff1
993		W(b)	__fa526_cache_on
994		W(b)	__armv4_mmu_cache_off
995		W(b)	__fa526_cache_flush
996
997		@ These match on the architecture ID
998
999		.word	0x00020000		@ ARMv4T
1000		.word	0x000f0000
1001		W(b)	__armv4_mmu_cache_on
1002		W(b)	__armv4_mmu_cache_off
1003		W(b)	__armv4_mmu_cache_flush
1004
1005		.word	0x00050000		@ ARMv5TE
1006		.word	0x000f0000
1007		W(b)	__armv4_mmu_cache_on
1008		W(b)	__armv4_mmu_cache_off
1009		W(b)	__armv4_mmu_cache_flush
1010
1011		.word	0x00060000		@ ARMv5TEJ
1012		.word	0x000f0000
1013		W(b)	__armv4_mmu_cache_on
1014		W(b)	__armv4_mmu_cache_off
1015		W(b)	__armv5tej_mmu_cache_flush
1016
1017		.word	0x0007b000		@ ARMv6
1018		.word	0x000ff000
1019		W(b)	__armv6_mmu_cache_on
1020		W(b)	__armv4_mmu_cache_off
1021		W(b)	__armv6_mmu_cache_flush
1022
1023		.word	0x000f0000		@ new CPU Id
1024		.word	0x000f0000
1025		W(b)	__armv7_mmu_cache_on
1026		W(b)	__armv7_mmu_cache_off
1027		W(b)	__armv7_mmu_cache_flush
1028
1029		.word	0			@ unrecognised type
1030		.word	0
1031		mov	pc, lr
1032 THUMB(		nop				)
1033		mov	pc, lr
1034 THUMB(		nop				)
1035		mov	pc, lr
1036 THUMB(		nop				)
1037
1038		.size	proc_types, . - proc_types
1039
1040		/*
1041		 * If you get a "non-constant expression in ".if" statement"
1042		 * error from the assembler on this line, check that you have
1043		 * not accidentally written a "b" instruction where you should
1044		 * have written W(b).
1045		 */
1046		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1047		.error "The size of one or more proc_types entries is wrong."
1048		.endif
1049
1050/*
1051 * Turn off the Cache and MMU.  ARMv3 does not support
1052 * reading the control register, but ARMv4 does.
1053 *
1054 * On exit,
1055 *  r0, r1, r2, r3, r9, r12 corrupted
1056 * This routine must preserve:
1057 *  r4, r7, r8
1058 */
1059		.align	5
1060cache_off:	mov	r3, #12			@ cache_off function
1061		b	call_cache_fn
1062
1063__armv4_mpu_cache_off:
1064		mrc	p15, 0, r0, c1, c0
1065		bic	r0, r0, #0x000d
1066		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1067		mov	r0, #0
1068		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1069		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1070		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1071		mov	pc, lr
1072
1073__armv3_mpu_cache_off:
1074		mrc	p15, 0, r0, c1, c0
1075		bic	r0, r0, #0x000d
1076		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1077		mov	r0, #0
1078		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1079		mov	pc, lr
1080
1081__armv4_mmu_cache_off:
1082#ifdef CONFIG_MMU
1083		mrc	p15, 0, r0, c1, c0
1084		bic	r0, r0, #0x000d
1085		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1086		mov	r0, #0
1087		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1088		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1089#endif
1090		mov	pc, lr
1091
1092__armv7_mmu_cache_off:
1093		mrc	p15, 0, r0, c1, c0
1094#ifdef CONFIG_MMU
1095		bic	r0, r0, #0x000d
1096#else
1097		bic	r0, r0, #0x000c
1098#endif
1099		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1100		mov	r12, lr
1101		bl	__armv7_mmu_cache_flush
1102		mov	r0, #0
1103#ifdef CONFIG_MMU
1104		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1105#endif
1106		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1107		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1108		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1109		mov	pc, r12
1110
1111/*
1112 * Clean and flush the cache to maintain consistency.
1113 *
1114 * On exit,
1115 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1116 * This routine must preserve:
1117 *  r4, r6, r7, r8
1118 */
1119		.align	5
1120cache_clean_flush:
1121		mov	r3, #16
1122		b	call_cache_fn
1123
1124__armv4_mpu_cache_flush:
1125		tst	r4, #1
1126		movne	pc, lr
1127		mov	r2, #1
1128		mov	r3, #0
1129		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1130		mov	r1, #7 << 5		@ 8 segments
11311:		orr	r3, r1, #63 << 26	@ 64 entries
11322:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1133		subs	r3, r3, #1 << 26
1134		bcs	2b			@ entries 63 to 0
1135		subs 	r1, r1, #1 << 5
1136		bcs	1b			@ segments 7 to 0
1137
1138		teq	r2, #0
1139		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1140		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1141		mov	pc, lr
1142
1143__fa526_cache_flush:
1144		tst	r4, #1
1145		movne	pc, lr
1146		mov	r1, #0
1147		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1148		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1149		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1150		mov	pc, lr
1151
1152__armv6_mmu_cache_flush:
1153		mov	r1, #0
1154		tst	r4, #1
1155		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1156		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1157		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1158		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1159		mov	pc, lr
1160
1161__armv7_mmu_cache_flush:
1162		tst	r4, #1
1163		bne	iflush
1164		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1165		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1166		mov	r10, #0
1167		beq	hierarchical
1168		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1169		b	iflush
1170hierarchical:
1171		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1172		stmfd	sp!, {r0-r7, r9-r11}
1173		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1174		ands	r3, r0, #0x7000000	@ extract loc from clidr
1175		mov	r3, r3, lsr #23		@ left align loc bit field
1176		beq	finished		@ if loc is 0, then no need to clean
1177		mov	r10, #0			@ start clean at cache level 0
1178loop1:
1179		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1180		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1181		and	r1, r1, #7		@ mask of the bits for current cache only
1182		cmp	r1, #2			@ see what cache we have at this level
1183		blt	skip			@ skip if no cache, or just i-cache
1184		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1185		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1186		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1187		and	r2, r1, #7		@ extract the length of the cache lines
1188		add	r2, r2, #4		@ add 4 (line length offset)
1189		ldr	r4, =0x3ff
1190		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1191		clz	r5, r4			@ find bit position of way size increment
1192		ldr	r7, =0x7fff
1193		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1194loop2:
1195		mov	r9, r4			@ create working copy of max way size
1196loop3:
1197 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1198 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1199 THUMB(		lsl	r6, r9, r5		)
1200 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1201 THUMB(		lsl	r6, r7, r2		)
1202 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1203		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1204		subs	r9, r9, #1		@ decrement the way
1205		bge	loop3
1206		subs	r7, r7, #1		@ decrement the index
1207		bge	loop2
1208skip:
1209		add	r10, r10, #2		@ increment cache number
1210		cmp	r3, r10
1211		bgt	loop1
1212finished:
1213		ldmfd	sp!, {r0-r7, r9-r11}
1214		mov	r10, #0			@ switch back to cache level 0
1215		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1216iflush:
1217		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1218		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1219		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1220		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1221		mov	pc, lr
1222
1223__armv5tej_mmu_cache_flush:
1224		tst	r4, #1
1225		movne	pc, lr
12261:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
1227		bne	1b
1228		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1229		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1230		mov	pc, lr
1231
1232__armv4_mmu_cache_flush:
1233		tst	r4, #1
1234		movne	pc, lr
1235		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1236		mov	r11, #32		@ default: 32 byte line size
1237		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1238		teq	r3, r9			@ cache ID register present?
1239		beq	no_cache_id
1240		mov	r1, r3, lsr #18
1241		and	r1, r1, #7
1242		mov	r2, #1024
1243		mov	r2, r2, lsl r1		@ base dcache size *2
1244		tst	r3, #1 << 14		@ test M bit
1245		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1246		mov	r3, r3, lsr #12
1247		and	r3, r3, #3
1248		mov	r11, #8
1249		mov	r11, r11, lsl r3	@ cache line size in bytes
1250no_cache_id:
1251		mov	r1, pc
1252		bic	r1, r1, #63		@ align to longest cache line
1253		add	r2, r1, r2
12541:
1255 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1256 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1257 THUMB(		add     r1, r1, r11		)
1258		teq	r1, r2
1259		bne	1b
1260
1261		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1262		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1263		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1264		mov	pc, lr
1265
1266__armv3_mmu_cache_flush:
1267__armv3_mpu_cache_flush:
1268		tst	r4, #1
1269		movne	pc, lr
1270		mov	r1, #0
1271		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1272		mov	pc, lr
1273
1274/*
1275 * Various debugging routines for printing hex characters and
1276 * memory, which again must be relocatable.
1277 */
1278#ifdef DEBUG
1279		.align	2
1280		.type	phexbuf,#object
1281phexbuf:	.space	12
1282		.size	phexbuf, . - phexbuf
1283
1284@ phex corrupts {r0, r1, r2, r3}
1285phex:		adr	r3, phexbuf
1286		mov	r2, #0
1287		strb	r2, [r3, r1]
12881:		subs	r1, r1, #1
1289		movmi	r0, r3
1290		bmi	puts
1291		and	r2, r0, #15
1292		mov	r0, r0, lsr #4
1293		cmp	r2, #10
1294		addge	r2, r2, #7
1295		add	r2, r2, #'0'
1296		strb	r2, [r3, r1]
1297		b	1b
1298
1299@ puts corrupts {r0, r1, r2, r3}
1300puts:		loadsp	r3, r1
13011:		ldrb	r2, [r0], #1
1302		teq	r2, #0
1303		moveq	pc, lr
13042:		writeb	r2, r3
1305		mov	r1, #0x00020000
13063:		subs	r1, r1, #1
1307		bne	3b
1308		teq	r2, #'\n'
1309		moveq	r2, #'\r'
1310		beq	2b
1311		teq	r0, #0
1312		bne	1b
1313		mov	pc, lr
1314@ putc corrupts {r0, r1, r2, r3}
1315putc:
1316		mov	r2, r0
1317		mov	r0, #0
1318		loadsp	r3, r1
1319		b	2b
1320
1321@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1322memdump:	mov	r12, r0
1323		mov	r10, lr
1324		mov	r11, #0
13252:		mov	r0, r11, lsl #2
1326		add	r0, r0, r12
1327		mov	r1, #8
1328		bl	phex
1329		mov	r0, #':'
1330		bl	putc
13311:		mov	r0, #' '
1332		bl	putc
1333		ldr	r0, [r12, r11, lsl #2]
1334		mov	r1, #8
1335		bl	phex
1336		and	r0, r11, #7
1337		teq	r0, #3
1338		moveq	r0, #' '
1339		bleq	putc
1340		and	r0, r11, #7
1341		add	r11, r11, #1
1342		teq	r0, #7
1343		bne	1b
1344		mov	r0, #'\n'
1345		bl	putc
1346		cmp	r11, #64
1347		blt	2b
1348		mov	pc, r10
1349#endif
1350
1351		.ltorg
1352
1353#ifdef CONFIG_ARM_VIRT_EXT
1354.align 5
1355__hyp_reentry_vectors:
1356		W(b)	.			@ reset
1357		W(b)	.			@ undef
1358		W(b)	.			@ svc
1359		W(b)	.			@ pabort
1360		W(b)	.			@ dabort
1361		W(b)	__enter_kernel		@ hyp
1362		W(b)	.			@ irq
1363		W(b)	.			@ fiq
1364#endif /* CONFIG_ARM_VIRT_EXT */
1365
1366__enter_kernel:
1367		mov	r0, #0			@ must be 0
1368 ARM(		mov	pc, r4		)	@ call kernel
1369 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1370 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1371
1372reloc_code_end:
1373
1374#ifdef CONFIG_EFI_STUB
1375		.align	2
1376_start:		.long	start - .
1377
1378ENTRY(efi_stub_entry)
1379		@ allocate space on stack for passing current zImage address
1380		@ and for the EFI stub to return of new entry point of
1381		@ zImage, as EFI stub may copy the kernel. Pointer address
1382		@ is passed in r2. r0 and r1 are passed through from the
1383		@ EFI firmware to efi_entry
1384		adr	ip, _start
1385		ldr	r3, [ip]
1386		add	r3, r3, ip
1387		stmfd	sp!, {r3, lr}
1388		mov	r2, sp			@ pass zImage address in r2
1389		bl	efi_entry
1390
1391		@ Check for error return from EFI stub. r0 has FDT address
1392		@ or error code.
1393		cmn	r0, #1
1394		beq	efi_load_fail
1395
1396		@ Preserve return value of efi_entry() in r4
1397		mov	r4, r0
1398		bl	cache_clean_flush
1399		bl	cache_off
1400
1401		@ Set parameters for booting zImage according to boot protocol
1402		@ put FDT address in r2, it was returned by efi_entry()
1403		@ r1 is the machine type, and r0 needs to be 0
1404		mov	r0, #0
1405		mov	r1, #0xFFFFFFFF
1406		mov	r2, r4
1407
1408		@ Branch to (possibly) relocated zImage that is in [sp]
1409		ldr	lr, [sp]
1410		ldr	ip, =start_offset
1411		add	lr, lr, ip
1412		mov	pc, lr				@ no mode switch
1413
1414efi_load_fail:
1415		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1416		ldr	r0, =0x80000001
1417		ldmfd	sp!, {ip, pc}
1418ENDPROC(efi_stub_entry)
1419#endif
1420
1421		.align
1422		.section ".stack", "aw", %nobits
1423.L_user_stack:	.space	4096
1424.L_user_stack_end:
1425