xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision b8d312aa)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb
53		senduart \ch, \rb
54		.endm
55
56#if defined(CONFIG_ARCH_SA1100)
57		.macro	loadsp, rb, tmp1, tmp2
58		mov	\rb, #0x80000000	@ physical base address
59#ifdef CONFIG_DEBUG_LL_SER3
60		add	\rb, \rb, #0x00050000	@ Ser3
61#else
62		add	\rb, \rb, #0x00010000	@ Ser1
63#endif
64		.endm
65#else
66		.macro	loadsp,	rb, tmp1, tmp2
67		addruart \rb, \tmp1, \tmp2
68		.endm
69#endif
70#endif
71#endif
72
73		.macro	kputc,val
74		mov	r0, \val
75		bl	putc
76		.endm
77
78		.macro	kphex,val,len
79		mov	r0, \val
80		mov	r1, #\len
81		bl	phex
82		.endm
83
84		.macro	debug_reloc_start
85#ifdef DEBUG
86		kputc	#'\n'
87		kphex	r6, 8		/* processor id */
88		kputc	#':'
89		kphex	r7, 8		/* architecture id */
90#ifdef CONFIG_CPU_CP15
91		kputc	#':'
92		mrc	p15, 0, r0, c1, c0
93		kphex	r0, 8		/* control reg */
94#endif
95		kputc	#'\n'
96		kphex	r5, 8		/* decompressed kernel start */
97		kputc	#'-'
98		kphex	r9, 8		/* decompressed kernel end  */
99		kputc	#'>'
100		kphex	r4, 8		/* kernel execution address */
101		kputc	#'\n'
102#endif
103		.endm
104
105		.macro	debug_reloc_end
106#ifdef DEBUG
107		kphex	r5, 8		/* end of kernel */
108		kputc	#'\n'
109		mov	r0, r4
110		bl	memdump		/* dump 256 bytes at start of kernel */
111#endif
112		.endm
113
114		/*
115		 * Debug kernel copy by printing the memory addresses involved
116		 */
117		.macro dbgkc, begin, end, cbegin, cend
118#ifdef DEBUG
119		kputc   #'\n'
120		kputc   #'C'
121		kputc   #':'
122		kputc   #'0'
123		kputc   #'x'
124		kphex   \begin, 8	/* Start of compressed kernel */
125		kputc	#'-'
126		kputc	#'0'
127		kputc	#'x'
128		kphex	\end, 8		/* End of compressed kernel */
129		kputc	#'-'
130		kputc	#'>'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \cbegin, 8	/* Start of kernel copy */
134		kputc	#'-'
135		kputc	#'0'
136		kputc	#'x'
137		kphex	\cend, 8	/* End of kernel copy */
138		kputc	#'\n'
139		kputc	#'\r'
140#endif
141		.endm
142
143		.section ".start", #alloc, #execinstr
144/*
145 * sort out different calling conventions
146 */
147		.align
148		/*
149		 * Always enter in ARM state for CPUs that support the ARM ISA.
150		 * As of today (2014) that's exactly the members of the A and R
151		 * classes.
152		 */
153 AR_CLASS(	.arm	)
154start:
155		.type	start,#function
156		.rept	7
157		__nop
158		.endr
159#ifndef CONFIG_THUMB2_KERNEL
160		mov	r0, r0
161#else
162 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
163  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
164		.thumb
165#endif
166		W(b)	1f
167
168		.word	_magic_sig	@ Magic numbers to help the loader
169		.word	_magic_start	@ absolute load/run zImage address
170		.word	_magic_end	@ zImage end address
171		.word	0x04030201	@ endianness flag
172		.word	0x45454545	@ another magic number to indicate
173		.word	_magic_table	@ additional data table
174
175		__EFI_HEADER
1761:
177 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
178 AR_CLASS(	mrs	r9, cpsr	)
179#ifdef CONFIG_ARM_VIRT_EXT
180		bl	__hyp_stub_install	@ get into SVC mode, reversibly
181#endif
182		mov	r7, r1			@ save architecture ID
183		mov	r8, r2			@ save atags pointer
184
185#ifndef CONFIG_CPU_V7M
186		/*
187		 * Booting from Angel - need to enter SVC mode and disable
188		 * FIQs/IRQs (numeric definitions from angel arm.h source).
189		 * We only do this if we were in user mode on entry.
190		 */
191		mrs	r2, cpsr		@ get current mode
192		tst	r2, #3			@ not user?
193		bne	not_angel
194		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
195 ARM(		swi	0x123456	)	@ angel_SWI_ARM
196 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
197not_angel:
198		safe_svcmode_maskall r0
199		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
200						@ SPSR
201#endif
202		/*
203		 * Note that some cache flushing and other stuff may
204		 * be needed here - is there an Angel SWI call for this?
205		 */
206
207		/*
208		 * some architecture specific code can be inserted
209		 * by the linker here, but it should preserve r7, r8, and r9.
210		 */
211
212		.text
213
214#ifdef CONFIG_AUTO_ZRELADDR
215		/*
216		 * Find the start of physical memory.  As we are executing
217		 * without the MMU on, we are in the physical address space.
218		 * We just need to get rid of any offset by aligning the
219		 * address.
220		 *
221		 * This alignment is a balance between the requirements of
222		 * different platforms - we have chosen 128MB to allow
223		 * platforms which align the start of their physical memory
224		 * to 128MB to use this feature, while allowing the zImage
225		 * to be placed within the first 128MB of memory on other
226		 * platforms.  Increasing the alignment means we place
227		 * stricter alignment requirements on the start of physical
228		 * memory, but relaxing it means that we break people who
229		 * are already placing their zImage in (eg) the top 64MB
230		 * of this range.
231		 */
232		mov	r4, pc
233		and	r4, r4, #0xf8000000
234		/* Determine final kernel image address. */
235		add	r4, r4, #TEXT_OFFSET
236#else
237		ldr	r4, =zreladdr
238#endif
239
240		/*
241		 * Set up a page table only if it won't overwrite ourself.
242		 * That means r4 < pc || r4 - 16k page directory > &_end.
243		 * Given that r4 > &_end is most unfrequent, we add a rough
244		 * additional 1MB of room for a possible appended DTB.
245		 */
246		mov	r0, pc
247		cmp	r0, r4
248		ldrcc	r0, LC0+32
249		addcc	r0, r0, pc
250		cmpcc	r4, r0
251		orrcc	r4, r4, #1		@ remember we skipped cache_on
252		blcs	cache_on
253
254restart:	adr	r0, LC0
255		ldmia	r0, {r1, r2, r3, r6, r10, r11, r12}
256		ldr	sp, [r0, #28]
257
258		/*
259		 * We might be running at a different address.  We need
260		 * to fix up various pointers.
261		 */
262		sub	r0, r0, r1		@ calculate the delta offset
263		add	r6, r6, r0		@ _edata
264		add	r10, r10, r0		@ inflated kernel size location
265
266		/*
267		 * The kernel build system appends the size of the
268		 * decompressed kernel at the end of the compressed data
269		 * in little-endian form.
270		 */
271		ldrb	r9, [r10, #0]
272		ldrb	lr, [r10, #1]
273		orr	r9, r9, lr, lsl #8
274		ldrb	lr, [r10, #2]
275		ldrb	r10, [r10, #3]
276		orr	r9, r9, lr, lsl #16
277		orr	r9, r9, r10, lsl #24
278
279#ifndef CONFIG_ZBOOT_ROM
280		/* malloc space is above the relocated stack (64k max) */
281		add	sp, sp, r0
282		add	r10, sp, #0x10000
283#else
284		/*
285		 * With ZBOOT_ROM the bss/stack is non relocatable,
286		 * but someone could still run this code from RAM,
287		 * in which case our reference is _edata.
288		 */
289		mov	r10, r6
290#endif
291
292		mov	r5, #0			@ init dtb size to 0
293#ifdef CONFIG_ARM_APPENDED_DTB
294/*
295 *   r0  = delta
296 *   r2  = BSS start
297 *   r3  = BSS end
298 *   r4  = final kernel address (possibly with LSB set)
299 *   r5  = appended dtb size (still unknown)
300 *   r6  = _edata
301 *   r7  = architecture ID
302 *   r8  = atags/device tree pointer
303 *   r9  = size of decompressed image
304 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
305 *   r11 = GOT start
306 *   r12 = GOT end
307 *   sp  = stack pointer
308 *
309 * if there are device trees (dtb) appended to zImage, advance r10 so that the
310 * dtb data will get relocated along with the kernel if necessary.
311 */
312
313		ldr	lr, [r6, #0]
314#ifndef __ARMEB__
315		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
316#else
317		ldr	r1, =0xd00dfeed
318#endif
319		cmp	lr, r1
320		bne	dtb_check_done		@ not found
321
322#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
323		/*
324		 * OK... Let's do some funky business here.
325		 * If we do have a DTB appended to zImage, and we do have
326		 * an ATAG list around, we want the later to be translated
327		 * and folded into the former here. No GOT fixup has occurred
328		 * yet, but none of the code we're about to call uses any
329		 * global variable.
330		*/
331
332		/* Get the initial DTB size */
333		ldr	r5, [r6, #4]
334#ifndef __ARMEB__
335		/* convert to little endian */
336		eor	r1, r5, r5, ror #16
337		bic	r1, r1, #0x00ff0000
338		mov	r5, r5, ror #8
339		eor	r5, r5, r1, lsr #8
340#endif
341		/* 50% DTB growth should be good enough */
342		add	r5, r5, r5, lsr #1
343		/* preserve 64-bit alignment */
344		add	r5, r5, #7
345		bic	r5, r5, #7
346		/* clamp to 32KB min and 1MB max */
347		cmp	r5, #(1 << 15)
348		movlo	r5, #(1 << 15)
349		cmp	r5, #(1 << 20)
350		movhi	r5, #(1 << 20)
351		/* temporarily relocate the stack past the DTB work space */
352		add	sp, sp, r5
353
354		stmfd	sp!, {r0-r3, ip, lr}
355		mov	r0, r8
356		mov	r1, r6
357		mov	r2, r5
358		bl	atags_to_fdt
359
360		/*
361		 * If returned value is 1, there is no ATAG at the location
362		 * pointed by r8.  Try the typical 0x100 offset from start
363		 * of RAM and hope for the best.
364		 */
365		cmp	r0, #1
366		sub	r0, r4, #TEXT_OFFSET
367		bic	r0, r0, #1
368		add	r0, r0, #0x100
369		mov	r1, r6
370		mov	r2, r5
371		bleq	atags_to_fdt
372
373		ldmfd	sp!, {r0-r3, ip, lr}
374		sub	sp, sp, r5
375#endif
376
377		mov	r8, r6			@ use the appended device tree
378
379		/*
380		 * Make sure that the DTB doesn't end up in the final
381		 * kernel's .bss area. To do so, we adjust the decompressed
382		 * kernel size to compensate if that .bss size is larger
383		 * than the relocated code.
384		 */
385		ldr	r5, =_kernel_bss_size
386		adr	r1, wont_overwrite
387		sub	r1, r6, r1
388		subs	r1, r5, r1
389		addhi	r9, r9, r1
390
391		/* Get the current DTB size */
392		ldr	r5, [r6, #4]
393#ifndef __ARMEB__
394		/* convert r5 (dtb size) to little endian */
395		eor	r1, r5, r5, ror #16
396		bic	r1, r1, #0x00ff0000
397		mov	r5, r5, ror #8
398		eor	r5, r5, r1, lsr #8
399#endif
400
401		/* preserve 64-bit alignment */
402		add	r5, r5, #7
403		bic	r5, r5, #7
404
405		/* relocate some pointers past the appended dtb */
406		add	r6, r6, r5
407		add	r10, r10, r5
408		add	sp, sp, r5
409dtb_check_done:
410#endif
411
412/*
413 * Check to see if we will overwrite ourselves.
414 *   r4  = final kernel address (possibly with LSB set)
415 *   r9  = size of decompressed image
416 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
417 * We basically want:
418 *   r4 - 16k page directory >= r10 -> OK
419 *   r4 + image length <= address of wont_overwrite -> OK
420 * Note: the possible LSB in r4 is harmless here.
421 */
422		add	r10, r10, #16384
423		cmp	r4, r10
424		bhs	wont_overwrite
425		add	r10, r4, r9
426		adr	r9, wont_overwrite
427		cmp	r10, r9
428		bls	wont_overwrite
429
430/*
431 * Relocate ourselves past the end of the decompressed kernel.
432 *   r6  = _edata
433 *   r10 = end of the decompressed kernel
434 * Because we always copy ahead, we need to do it from the end and go
435 * backward in case the source and destination overlap.
436 */
437		/*
438		 * Bump to the next 256-byte boundary with the size of
439		 * the relocation code added. This avoids overwriting
440		 * ourself when the offset is small.
441		 */
442		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
443		bic	r10, r10, #255
444
445		/* Get start of code we want to copy and align it down. */
446		adr	r5, restart
447		bic	r5, r5, #31
448
449/* Relocate the hyp vector base if necessary */
450#ifdef CONFIG_ARM_VIRT_EXT
451		mrs	r0, spsr
452		and	r0, r0, #MODE_MASK
453		cmp	r0, #HYP_MODE
454		bne	1f
455
456		/*
457		 * Compute the address of the hyp vectors after relocation.
458		 * This requires some arithmetic since we cannot directly
459		 * reference __hyp_stub_vectors in a PC-relative way.
460		 * Call __hyp_set_vectors with the new address so that we
461		 * can HVC again after the copy.
462		 */
4630:		adr	r0, 0b
464		movw	r1, #:lower16:__hyp_stub_vectors - 0b
465		movt	r1, #:upper16:__hyp_stub_vectors - 0b
466		add	r0, r0, r1
467		sub	r0, r0, r5
468		add	r0, r0, r10
469		bl	__hyp_set_vectors
4701:
471#endif
472
473		sub	r9, r6, r5		@ size to copy
474		add	r9, r9, #31		@ rounded up to a multiple
475		bic	r9, r9, #31		@ ... of 32 bytes
476		add	r6, r9, r5
477		add	r9, r9, r10
478
479#ifdef DEBUG
480		sub     r10, r6, r5
481		sub     r10, r9, r10
482		/*
483		 * We are about to copy the kernel to a new memory area.
484		 * The boundaries of the new memory area can be found in
485		 * r10 and r9, whilst r5 and r6 contain the boundaries
486		 * of the memory we are going to copy.
487		 * Calling dbgkc will help with the printing of this
488		 * information.
489		 */
490		dbgkc	r5, r6, r10, r9
491#endif
492
4931:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
494		cmp	r6, r5
495		stmdb	r9!, {r0 - r3, r10 - r12, lr}
496		bhi	1b
497
498		/* Preserve offset to relocated code. */
499		sub	r6, r9, r6
500
501#ifndef CONFIG_ZBOOT_ROM
502		/* cache_clean_flush may use the stack, so relocate it */
503		add	sp, sp, r6
504#endif
505
506		bl	cache_clean_flush
507
508		badr	r0, restart
509		add	r0, r0, r6
510		mov	pc, r0
511
512wont_overwrite:
513/*
514 * If delta is zero, we are running at the address we were linked at.
515 *   r0  = delta
516 *   r2  = BSS start
517 *   r3  = BSS end
518 *   r4  = kernel execution address (possibly with LSB set)
519 *   r5  = appended dtb size (0 if not present)
520 *   r7  = architecture ID
521 *   r8  = atags pointer
522 *   r11 = GOT start
523 *   r12 = GOT end
524 *   sp  = stack pointer
525 */
526		orrs	r1, r0, r5
527		beq	not_relocated
528
529		add	r11, r11, r0
530		add	r12, r12, r0
531
532#ifndef CONFIG_ZBOOT_ROM
533		/*
534		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
535		 * we need to fix up pointers into the BSS region.
536		 * Note that the stack pointer has already been fixed up.
537		 */
538		add	r2, r2, r0
539		add	r3, r3, r0
540
541		/*
542		 * Relocate all entries in the GOT table.
543		 * Bump bss entries to _edata + dtb size
544		 */
5451:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
546		add	r1, r1, r0		@ This fixes up C references
547		cmp	r1, r2			@ if entry >= bss_start &&
548		cmphs	r3, r1			@       bss_end > entry
549		addhi	r1, r1, r5		@    entry += dtb size
550		str	r1, [r11], #4		@ next entry
551		cmp	r11, r12
552		blo	1b
553
554		/* bump our bss pointers too */
555		add	r2, r2, r5
556		add	r3, r3, r5
557
558#else
559
560		/*
561		 * Relocate entries in the GOT table.  We only relocate
562		 * the entries that are outside the (relocated) BSS region.
563		 */
5641:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
565		cmp	r1, r2			@ entry < bss_start ||
566		cmphs	r3, r1			@ _end < entry
567		addlo	r1, r1, r0		@ table.  This fixes up the
568		str	r1, [r11], #4		@ C references.
569		cmp	r11, r12
570		blo	1b
571#endif
572
573not_relocated:	mov	r0, #0
5741:		str	r0, [r2], #4		@ clear bss
575		str	r0, [r2], #4
576		str	r0, [r2], #4
577		str	r0, [r2], #4
578		cmp	r2, r3
579		blo	1b
580
581		/*
582		 * Did we skip the cache setup earlier?
583		 * That is indicated by the LSB in r4.
584		 * Do it now if so.
585		 */
586		tst	r4, #1
587		bic	r4, r4, #1
588		blne	cache_on
589
590/*
591 * The C runtime environment should now be setup sufficiently.
592 * Set up some pointers, and start decompressing.
593 *   r4  = kernel execution address
594 *   r7  = architecture ID
595 *   r8  = atags pointer
596 */
597		mov	r0, r4
598		mov	r1, sp			@ malloc space above stack
599		add	r2, sp, #0x10000	@ 64k max
600		mov	r3, r7
601		bl	decompress_kernel
602		bl	cache_clean_flush
603		bl	cache_off
604
605#ifdef CONFIG_ARM_VIRT_EXT
606		mrs	r0, spsr		@ Get saved CPU boot mode
607		and	r0, r0, #MODE_MASK
608		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
609		bne	__enter_kernel		@ boot kernel directly
610
611		adr	r12, .L__hyp_reentry_vectors_offset
612		ldr	r0, [r12]
613		add	r0, r0, r12
614
615		bl	__hyp_set_vectors
616		__HVC(0)			@ otherwise bounce to hyp mode
617
618		b	.			@ should never be reached
619
620		.align	2
621.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
622#else
623		b	__enter_kernel
624#endif
625
626		.align	2
627		.type	LC0, #object
628LC0:		.word	LC0			@ r1
629		.word	__bss_start		@ r2
630		.word	_end			@ r3
631		.word	_edata			@ r6
632		.word	input_data_end - 4	@ r10 (inflated size location)
633		.word	_got_start		@ r11
634		.word	_got_end		@ ip
635		.word	.L_user_stack_end	@ sp
636		.word	_end - restart + 16384 + 1024*1024
637		.size	LC0, . - LC0
638
639#ifdef CONFIG_ARCH_RPC
640		.globl	params
641params:		ldr	r0, =0x10000100		@ params_phys for RPC
642		mov	pc, lr
643		.ltorg
644		.align
645#endif
646
647/*
648 * Turn on the cache.  We need to setup some page tables so that we
649 * can have both the I and D caches on.
650 *
651 * We place the page tables 16k down from the kernel execution address,
652 * and we hope that nothing else is using it.  If we're using it, we
653 * will go pop!
654 *
655 * On entry,
656 *  r4 = kernel execution address
657 *  r7 = architecture number
658 *  r8 = atags pointer
659 * On exit,
660 *  r0, r1, r2, r3, r9, r10, r12 corrupted
661 * This routine must preserve:
662 *  r4, r7, r8
663 */
664		.align	5
665cache_on:	mov	r3, #8			@ cache_on function
666		b	call_cache_fn
667
668/*
669 * Initialize the highest priority protection region, PR7
670 * to cover all 32bit address and cacheable and bufferable.
671 */
672__armv4_mpu_cache_on:
673		mov	r0, #0x3f		@ 4G, the whole
674		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
675		mcr 	p15, 0, r0, c6, c7, 1
676
677		mov	r0, #0x80		@ PR7
678		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
679		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
680		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
681
682		mov	r0, #0xc000
683		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
684		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
685
686		mov	r0, #0
687		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
688		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
689		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
690		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
691						@ ...I .... ..D. WC.M
692		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
693		orr	r0, r0, #0x1000		@ ...1 .... .... ....
694
695		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
696
697		mov	r0, #0
698		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
699		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
700		mov	pc, lr
701
702__armv3_mpu_cache_on:
703		mov	r0, #0x3f		@ 4G, the whole
704		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
705
706		mov	r0, #0x80		@ PR7
707		mcr	p15, 0, r0, c2, c0, 0	@ cache on
708		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
709
710		mov	r0, #0xc000
711		mcr	p15, 0, r0, c5, c0, 0	@ access permission
712
713		mov	r0, #0
714		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
715		/*
716		 * ?? ARMv3 MMU does not allow reading the control register,
717		 * does this really work on ARMv3 MPU?
718		 */
719		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
720						@ .... .... .... WC.M
721		orr	r0, r0, #0x000d		@ .... .... .... 11.1
722		/* ?? this overwrites the value constructed above? */
723		mov	r0, #0
724		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
725
726		/* ?? invalidate for the second time? */
727		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
728		mov	pc, lr
729
730#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
731#define CB_BITS 0x08
732#else
733#define CB_BITS 0x0c
734#endif
735
736__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
737		bic	r3, r3, #0xff		@ Align the pointer
738		bic	r3, r3, #0x3f00
739/*
740 * Initialise the page tables, turning on the cacheable and bufferable
741 * bits for the RAM area only.
742 */
743		mov	r0, r3
744		mov	r9, r0, lsr #18
745		mov	r9, r9, lsl #18		@ start of RAM
746		add	r10, r9, #0x10000000	@ a reasonable RAM size
747		mov	r1, #0x12		@ XN|U + section mapping
748		orr	r1, r1, #3 << 10	@ AP=11
749		add	r2, r3, #16384
7501:		cmp	r1, r9			@ if virt > start of RAM
751		cmphs	r10, r1			@   && end of RAM > virt
752		bic	r1, r1, #0x1c		@ clear XN|U + C + B
753		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
754		orrhs	r1, r1, r6		@ set RAM section settings
755		str	r1, [r0], #4		@ 1:1 mapping
756		add	r1, r1, #1048576
757		teq	r0, r2
758		bne	1b
759/*
760 * If ever we are running from Flash, then we surely want the cache
761 * to be enabled also for our execution instance...  We map 2MB of it
762 * so there is no map overlap problem for up to 1 MB compressed kernel.
763 * If the execution is in RAM then we would only be duplicating the above.
764 */
765		orr	r1, r6, #0x04		@ ensure B is set for this
766		orr	r1, r1, #3 << 10
767		mov	r2, pc
768		mov	r2, r2, lsr #20
769		orr	r1, r1, r2, lsl #20
770		add	r0, r3, r2, lsl #2
771		str	r1, [r0], #4
772		add	r1, r1, #1048576
773		str	r1, [r0]
774		mov	pc, lr
775ENDPROC(__setup_mmu)
776
777@ Enable unaligned access on v6, to allow better code generation
778@ for the decompressor C code:
779__armv6_mmu_cache_on:
780		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
781		bic	r0, r0, #2		@ A (no unaligned access fault)
782		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
783		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
784		b	__armv4_mmu_cache_on
785
786__arm926ejs_mmu_cache_on:
787#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
788		mov	r0, #4			@ put dcache in WT mode
789		mcr	p15, 7, r0, c15, c0, 0
790#endif
791
792__armv4_mmu_cache_on:
793		mov	r12, lr
794#ifdef CONFIG_MMU
795		mov	r6, #CB_BITS | 0x12	@ U
796		bl	__setup_mmu
797		mov	r0, #0
798		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
799		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
800		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
801		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
802		orr	r0, r0, #0x0030
803 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
804		bl	__common_mmu_cache_on
805		mov	r0, #0
806		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
807#endif
808		mov	pc, r12
809
810__armv7_mmu_cache_on:
811		mov	r12, lr
812#ifdef CONFIG_MMU
813		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
814		tst	r11, #0xf		@ VMSA
815		movne	r6, #CB_BITS | 0x02	@ !XN
816		blne	__setup_mmu
817		mov	r0, #0
818		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
819		tst	r11, #0xf		@ VMSA
820		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
821#endif
822		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
823		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
824		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
825		orr	r0, r0, #0x003c		@ write buffer
826		bic	r0, r0, #2		@ A (no unaligned access fault)
827		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
828						@ (needed for ARM1176)
829#ifdef CONFIG_MMU
830 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
831		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
832		orrne	r0, r0, #1		@ MMU enabled
833		movne	r1, #0xfffffffd		@ domain 0 = client
834		bic     r6, r6, #1 << 31        @ 32-bit translation system
835		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
836		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
837		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
838		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
839#endif
840		mcr	p15, 0, r0, c7, c5, 4	@ ISB
841		mcr	p15, 0, r0, c1, c0, 0	@ load control register
842		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
843		mov	r0, #0
844		mcr	p15, 0, r0, c7, c5, 4	@ ISB
845		mov	pc, r12
846
847__fa526_cache_on:
848		mov	r12, lr
849		mov	r6, #CB_BITS | 0x12	@ U
850		bl	__setup_mmu
851		mov	r0, #0
852		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
853		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
854		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
855		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
856		orr	r0, r0, #0x1000		@ I-cache enable
857		bl	__common_mmu_cache_on
858		mov	r0, #0
859		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
860		mov	pc, r12
861
862__common_mmu_cache_on:
863#ifndef CONFIG_THUMB2_KERNEL
864#ifndef DEBUG
865		orr	r0, r0, #0x000d		@ Write buffer, mmu
866#endif
867		mov	r1, #-1
868		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
869		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
870		b	1f
871		.align	5			@ cache line aligned
8721:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
873		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
874		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
875#endif
876
877#define PROC_ENTRY_SIZE (4*5)
878
879/*
880 * Here follow the relocatable cache support functions for the
881 * various processors.  This is a generic hook for locating an
882 * entry and jumping to an instruction at the specified offset
883 * from the start of the block.  Please note this is all position
884 * independent code.
885 *
886 *  r1  = corrupted
887 *  r2  = corrupted
888 *  r3  = block offset
889 *  r9  = corrupted
890 *  r12 = corrupted
891 */
892
893call_cache_fn:	adr	r12, proc_types
894#ifdef CONFIG_CPU_CP15
895		mrc	p15, 0, r9, c0, c0	@ get processor ID
896#elif defined(CONFIG_CPU_V7M)
897		/*
898		 * On v7-M the processor id is located in the V7M_SCB_CPUID
899		 * register, but as cache handling is IMPLEMENTATION DEFINED on
900		 * v7-M (if existant at all) we just return early here.
901		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
902		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
903		 * use cp15 registers that are not implemented on v7-M.
904		 */
905		bx	lr
906#else
907		ldr	r9, =CONFIG_PROCESSOR_ID
908#endif
9091:		ldr	r1, [r12, #0]		@ get value
910		ldr	r2, [r12, #4]		@ get mask
911		eor	r1, r1, r9		@ (real ^ match)
912		tst	r1, r2			@       & mask
913 ARM(		addeq	pc, r12, r3		) @ call cache function
914 THUMB(		addeq	r12, r3			)
915 THUMB(		moveq	pc, r12			) @ call cache function
916		add	r12, r12, #PROC_ENTRY_SIZE
917		b	1b
918
919/*
920 * Table for cache operations.  This is basically:
921 *   - CPU ID match
922 *   - CPU ID mask
923 *   - 'cache on' method instruction
924 *   - 'cache off' method instruction
925 *   - 'cache flush' method instruction
926 *
927 * We match an entry using: ((real_id ^ match) & mask) == 0
928 *
929 * Writethrough caches generally only need 'on' and 'off'
930 * methods.  Writeback caches _must_ have the flush method
931 * defined.
932 */
933		.align	2
934		.type	proc_types,#object
935proc_types:
936		.word	0x41000000		@ old ARM ID
937		.word	0xff00f000
938		mov	pc, lr
939 THUMB(		nop				)
940		mov	pc, lr
941 THUMB(		nop				)
942		mov	pc, lr
943 THUMB(		nop				)
944
945		.word	0x41007000		@ ARM7/710
946		.word	0xfff8fe00
947		mov	pc, lr
948 THUMB(		nop				)
949		mov	pc, lr
950 THUMB(		nop				)
951		mov	pc, lr
952 THUMB(		nop				)
953
954		.word	0x41807200		@ ARM720T (writethrough)
955		.word	0xffffff00
956		W(b)	__armv4_mmu_cache_on
957		W(b)	__armv4_mmu_cache_off
958		mov	pc, lr
959 THUMB(		nop				)
960
961		.word	0x41007400		@ ARM74x
962		.word	0xff00ff00
963		W(b)	__armv3_mpu_cache_on
964		W(b)	__armv3_mpu_cache_off
965		W(b)	__armv3_mpu_cache_flush
966
967		.word	0x41009400		@ ARM94x
968		.word	0xff00ff00
969		W(b)	__armv4_mpu_cache_on
970		W(b)	__armv4_mpu_cache_off
971		W(b)	__armv4_mpu_cache_flush
972
973		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
974		.word	0xff0ffff0
975		W(b)	__arm926ejs_mmu_cache_on
976		W(b)	__armv4_mmu_cache_off
977		W(b)	__armv5tej_mmu_cache_flush
978
979		.word	0x00007000		@ ARM7 IDs
980		.word	0x0000f000
981		mov	pc, lr
982 THUMB(		nop				)
983		mov	pc, lr
984 THUMB(		nop				)
985		mov	pc, lr
986 THUMB(		nop				)
987
988		@ Everything from here on will be the new ID system.
989
990		.word	0x4401a100		@ sa110 / sa1100
991		.word	0xffffffe0
992		W(b)	__armv4_mmu_cache_on
993		W(b)	__armv4_mmu_cache_off
994		W(b)	__armv4_mmu_cache_flush
995
996		.word	0x6901b110		@ sa1110
997		.word	0xfffffff0
998		W(b)	__armv4_mmu_cache_on
999		W(b)	__armv4_mmu_cache_off
1000		W(b)	__armv4_mmu_cache_flush
1001
1002		.word	0x56056900
1003		.word	0xffffff00		@ PXA9xx
1004		W(b)	__armv4_mmu_cache_on
1005		W(b)	__armv4_mmu_cache_off
1006		W(b)	__armv4_mmu_cache_flush
1007
1008		.word	0x56158000		@ PXA168
1009		.word	0xfffff000
1010		W(b)	__armv4_mmu_cache_on
1011		W(b)	__armv4_mmu_cache_off
1012		W(b)	__armv5tej_mmu_cache_flush
1013
1014		.word	0x56050000		@ Feroceon
1015		.word	0xff0f0000
1016		W(b)	__armv4_mmu_cache_on
1017		W(b)	__armv4_mmu_cache_off
1018		W(b)	__armv5tej_mmu_cache_flush
1019
1020#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1021		/* this conflicts with the standard ARMv5TE entry */
1022		.long	0x41009260		@ Old Feroceon
1023		.long	0xff00fff0
1024		b	__armv4_mmu_cache_on
1025		b	__armv4_mmu_cache_off
1026		b	__armv5tej_mmu_cache_flush
1027#endif
1028
1029		.word	0x66015261		@ FA526
1030		.word	0xff01fff1
1031		W(b)	__fa526_cache_on
1032		W(b)	__armv4_mmu_cache_off
1033		W(b)	__fa526_cache_flush
1034
1035		@ These match on the architecture ID
1036
1037		.word	0x00020000		@ ARMv4T
1038		.word	0x000f0000
1039		W(b)	__armv4_mmu_cache_on
1040		W(b)	__armv4_mmu_cache_off
1041		W(b)	__armv4_mmu_cache_flush
1042
1043		.word	0x00050000		@ ARMv5TE
1044		.word	0x000f0000
1045		W(b)	__armv4_mmu_cache_on
1046		W(b)	__armv4_mmu_cache_off
1047		W(b)	__armv4_mmu_cache_flush
1048
1049		.word	0x00060000		@ ARMv5TEJ
1050		.word	0x000f0000
1051		W(b)	__armv4_mmu_cache_on
1052		W(b)	__armv4_mmu_cache_off
1053		W(b)	__armv5tej_mmu_cache_flush
1054
1055		.word	0x0007b000		@ ARMv6
1056		.word	0x000ff000
1057		W(b)	__armv6_mmu_cache_on
1058		W(b)	__armv4_mmu_cache_off
1059		W(b)	__armv6_mmu_cache_flush
1060
1061		.word	0x000f0000		@ new CPU Id
1062		.word	0x000f0000
1063		W(b)	__armv7_mmu_cache_on
1064		W(b)	__armv7_mmu_cache_off
1065		W(b)	__armv7_mmu_cache_flush
1066
1067		.word	0			@ unrecognised type
1068		.word	0
1069		mov	pc, lr
1070 THUMB(		nop				)
1071		mov	pc, lr
1072 THUMB(		nop				)
1073		mov	pc, lr
1074 THUMB(		nop				)
1075
1076		.size	proc_types, . - proc_types
1077
1078		/*
1079		 * If you get a "non-constant expression in ".if" statement"
1080		 * error from the assembler on this line, check that you have
1081		 * not accidentally written a "b" instruction where you should
1082		 * have written W(b).
1083		 */
1084		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1085		.error "The size of one or more proc_types entries is wrong."
1086		.endif
1087
1088/*
1089 * Turn off the Cache and MMU.  ARMv3 does not support
1090 * reading the control register, but ARMv4 does.
1091 *
1092 * On exit,
1093 *  r0, r1, r2, r3, r9, r12 corrupted
1094 * This routine must preserve:
1095 *  r4, r7, r8
1096 */
1097		.align	5
1098cache_off:	mov	r3, #12			@ cache_off function
1099		b	call_cache_fn
1100
1101__armv4_mpu_cache_off:
1102		mrc	p15, 0, r0, c1, c0
1103		bic	r0, r0, #0x000d
1104		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1105		mov	r0, #0
1106		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1107		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1108		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1109		mov	pc, lr
1110
1111__armv3_mpu_cache_off:
1112		mrc	p15, 0, r0, c1, c0
1113		bic	r0, r0, #0x000d
1114		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1115		mov	r0, #0
1116		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1117		mov	pc, lr
1118
1119__armv4_mmu_cache_off:
1120#ifdef CONFIG_MMU
1121		mrc	p15, 0, r0, c1, c0
1122		bic	r0, r0, #0x000d
1123		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1124		mov	r0, #0
1125		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1126		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1127#endif
1128		mov	pc, lr
1129
1130__armv7_mmu_cache_off:
1131		mrc	p15, 0, r0, c1, c0
1132#ifdef CONFIG_MMU
1133		bic	r0, r0, #0x000d
1134#else
1135		bic	r0, r0, #0x000c
1136#endif
1137		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1138		mov	r12, lr
1139		bl	__armv7_mmu_cache_flush
1140		mov	r0, #0
1141#ifdef CONFIG_MMU
1142		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1143#endif
1144		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1145		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1146		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1147		mov	pc, r12
1148
1149/*
1150 * Clean and flush the cache to maintain consistency.
1151 *
1152 * On exit,
1153 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1154 * This routine must preserve:
1155 *  r4, r6, r7, r8
1156 */
1157		.align	5
1158cache_clean_flush:
1159		mov	r3, #16
1160		b	call_cache_fn
1161
1162__armv4_mpu_cache_flush:
1163		tst	r4, #1
1164		movne	pc, lr
1165		mov	r2, #1
1166		mov	r3, #0
1167		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1168		mov	r1, #7 << 5		@ 8 segments
11691:		orr	r3, r1, #63 << 26	@ 64 entries
11702:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1171		subs	r3, r3, #1 << 26
1172		bcs	2b			@ entries 63 to 0
1173		subs 	r1, r1, #1 << 5
1174		bcs	1b			@ segments 7 to 0
1175
1176		teq	r2, #0
1177		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1178		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1179		mov	pc, lr
1180
1181__fa526_cache_flush:
1182		tst	r4, #1
1183		movne	pc, lr
1184		mov	r1, #0
1185		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1186		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1187		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1188		mov	pc, lr
1189
1190__armv6_mmu_cache_flush:
1191		mov	r1, #0
1192		tst	r4, #1
1193		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1194		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1195		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1196		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1197		mov	pc, lr
1198
1199__armv7_mmu_cache_flush:
1200		tst	r4, #1
1201		bne	iflush
1202		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1203		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1204		mov	r10, #0
1205		beq	hierarchical
1206		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1207		b	iflush
1208hierarchical:
1209		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1210		stmfd	sp!, {r0-r7, r9-r11}
1211		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1212		ands	r3, r0, #0x7000000	@ extract loc from clidr
1213		mov	r3, r3, lsr #23		@ left align loc bit field
1214		beq	finished		@ if loc is 0, then no need to clean
1215		mov	r10, #0			@ start clean at cache level 0
1216loop1:
1217		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1218		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1219		and	r1, r1, #7		@ mask of the bits for current cache only
1220		cmp	r1, #2			@ see what cache we have at this level
1221		blt	skip			@ skip if no cache, or just i-cache
1222		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1223		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1224		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1225		and	r2, r1, #7		@ extract the length of the cache lines
1226		add	r2, r2, #4		@ add 4 (line length offset)
1227		ldr	r4, =0x3ff
1228		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1229		clz	r5, r4			@ find bit position of way size increment
1230		ldr	r7, =0x7fff
1231		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1232loop2:
1233		mov	r9, r4			@ create working copy of max way size
1234loop3:
1235 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1236 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1237 THUMB(		lsl	r6, r9, r5		)
1238 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1239 THUMB(		lsl	r6, r7, r2		)
1240 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1241		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1242		subs	r9, r9, #1		@ decrement the way
1243		bge	loop3
1244		subs	r7, r7, #1		@ decrement the index
1245		bge	loop2
1246skip:
1247		add	r10, r10, #2		@ increment cache number
1248		cmp	r3, r10
1249		bgt	loop1
1250finished:
1251		ldmfd	sp!, {r0-r7, r9-r11}
1252		mov	r10, #0			@ switch back to cache level 0
1253		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1254iflush:
1255		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1256		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1257		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1258		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1259		mov	pc, lr
1260
1261__armv5tej_mmu_cache_flush:
1262		tst	r4, #1
1263		movne	pc, lr
12641:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
1265		bne	1b
1266		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1267		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1268		mov	pc, lr
1269
1270__armv4_mmu_cache_flush:
1271		tst	r4, #1
1272		movne	pc, lr
1273		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1274		mov	r11, #32		@ default: 32 byte line size
1275		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1276		teq	r3, r9			@ cache ID register present?
1277		beq	no_cache_id
1278		mov	r1, r3, lsr #18
1279		and	r1, r1, #7
1280		mov	r2, #1024
1281		mov	r2, r2, lsl r1		@ base dcache size *2
1282		tst	r3, #1 << 14		@ test M bit
1283		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1284		mov	r3, r3, lsr #12
1285		and	r3, r3, #3
1286		mov	r11, #8
1287		mov	r11, r11, lsl r3	@ cache line size in bytes
1288no_cache_id:
1289		mov	r1, pc
1290		bic	r1, r1, #63		@ align to longest cache line
1291		add	r2, r1, r2
12921:
1293 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1294 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1295 THUMB(		add     r1, r1, r11		)
1296		teq	r1, r2
1297		bne	1b
1298
1299		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1300		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1301		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1302		mov	pc, lr
1303
1304__armv3_mmu_cache_flush:
1305__armv3_mpu_cache_flush:
1306		tst	r4, #1
1307		movne	pc, lr
1308		mov	r1, #0
1309		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1310		mov	pc, lr
1311
1312/*
1313 * Various debugging routines for printing hex characters and
1314 * memory, which again must be relocatable.
1315 */
1316#ifdef DEBUG
1317		.align	2
1318		.type	phexbuf,#object
1319phexbuf:	.space	12
1320		.size	phexbuf, . - phexbuf
1321
1322@ phex corrupts {r0, r1, r2, r3}
1323phex:		adr	r3, phexbuf
1324		mov	r2, #0
1325		strb	r2, [r3, r1]
13261:		subs	r1, r1, #1
1327		movmi	r0, r3
1328		bmi	puts
1329		and	r2, r0, #15
1330		mov	r0, r0, lsr #4
1331		cmp	r2, #10
1332		addge	r2, r2, #7
1333		add	r2, r2, #'0'
1334		strb	r2, [r3, r1]
1335		b	1b
1336
1337@ puts corrupts {r0, r1, r2, r3}
1338puts:		loadsp	r3, r2, r1
13391:		ldrb	r2, [r0], #1
1340		teq	r2, #0
1341		moveq	pc, lr
13422:		writeb	r2, r3
1343		mov	r1, #0x00020000
13443:		subs	r1, r1, #1
1345		bne	3b
1346		teq	r2, #'\n'
1347		moveq	r2, #'\r'
1348		beq	2b
1349		teq	r0, #0
1350		bne	1b
1351		mov	pc, lr
1352@ putc corrupts {r0, r1, r2, r3}
1353putc:
1354		mov	r2, r0
1355		loadsp	r3, r1, r0
1356		mov	r0, #0
1357		b	2b
1358
1359@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1360memdump:	mov	r12, r0
1361		mov	r10, lr
1362		mov	r11, #0
13632:		mov	r0, r11, lsl #2
1364		add	r0, r0, r12
1365		mov	r1, #8
1366		bl	phex
1367		mov	r0, #':'
1368		bl	putc
13691:		mov	r0, #' '
1370		bl	putc
1371		ldr	r0, [r12, r11, lsl #2]
1372		mov	r1, #8
1373		bl	phex
1374		and	r0, r11, #7
1375		teq	r0, #3
1376		moveq	r0, #' '
1377		bleq	putc
1378		and	r0, r11, #7
1379		add	r11, r11, #1
1380		teq	r0, #7
1381		bne	1b
1382		mov	r0, #'\n'
1383		bl	putc
1384		cmp	r11, #64
1385		blt	2b
1386		mov	pc, r10
1387#endif
1388
1389		.ltorg
1390
1391#ifdef CONFIG_ARM_VIRT_EXT
1392.align 5
1393__hyp_reentry_vectors:
1394		W(b)	.			@ reset
1395		W(b)	.			@ undef
1396		W(b)	.			@ svc
1397		W(b)	.			@ pabort
1398		W(b)	.			@ dabort
1399		W(b)	__enter_kernel		@ hyp
1400		W(b)	.			@ irq
1401		W(b)	.			@ fiq
1402#endif /* CONFIG_ARM_VIRT_EXT */
1403
1404__enter_kernel:
1405		mov	r0, #0			@ must be 0
1406		mov	r1, r7			@ restore architecture number
1407		mov	r2, r8			@ restore atags pointer
1408 ARM(		mov	pc, r4		)	@ call kernel
1409 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1410 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1411
1412reloc_code_end:
1413
1414#ifdef CONFIG_EFI_STUB
1415		.align	2
1416_start:		.long	start - .
1417
1418ENTRY(efi_stub_entry)
1419		@ allocate space on stack for passing current zImage address
1420		@ and for the EFI stub to return of new entry point of
1421		@ zImage, as EFI stub may copy the kernel. Pointer address
1422		@ is passed in r2. r0 and r1 are passed through from the
1423		@ EFI firmware to efi_entry
1424		adr	ip, _start
1425		ldr	r3, [ip]
1426		add	r3, r3, ip
1427		stmfd	sp!, {r3, lr}
1428		mov	r2, sp			@ pass zImage address in r2
1429		bl	efi_entry
1430
1431		@ Check for error return from EFI stub. r0 has FDT address
1432		@ or error code.
1433		cmn	r0, #1
1434		beq	efi_load_fail
1435
1436		@ Preserve return value of efi_entry() in r4
1437		mov	r4, r0
1438
1439		@ our cache maintenance code relies on CP15 barrier instructions
1440		@ but since we arrived here with the MMU and caches configured
1441		@ by UEFI, we must check that the CP15BEN bit is set in SCTLR.
1442		@ Note that this bit is RAO/WI on v6 and earlier, so the ISB in
1443		@ the enable path will be executed on v7+ only.
1444		mrc	p15, 0, r1, c1, c0, 0	@ read SCTLR
1445		tst	r1, #(1 << 5)		@ CP15BEN bit set?
1446		bne	0f
1447		orr	r1, r1, #(1 << 5)	@ CP15 barrier instructions
1448		mcr	p15, 0, r1, c1, c0, 0	@ write SCTLR
1449 ARM(		.inst	0xf57ff06f		@ v7+ isb	)
1450 THUMB(		isb						)
1451
14520:		bl	cache_clean_flush
1453		bl	cache_off
1454
1455		@ Set parameters for booting zImage according to boot protocol
1456		@ put FDT address in r2, it was returned by efi_entry()
1457		@ r1 is the machine type, and r0 needs to be 0
1458		mov	r0, #0
1459		mov	r1, #0xFFFFFFFF
1460		mov	r2, r4
1461
1462		@ Branch to (possibly) relocated zImage that is in [sp]
1463		ldr	lr, [sp]
1464		ldr	ip, =start_offset
1465		add	lr, lr, ip
1466		mov	pc, lr				@ no mode switch
1467
1468efi_load_fail:
1469		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1470		ldr	r0, =0x80000001
1471		ldmfd	sp!, {ip, pc}
1472ENDPROC(efi_stub_entry)
1473#endif
1474
1475		.align
1476		.section ".stack", "aw", %nobits
1477.L_user_stack:	.space	4096
1478.L_user_stack_end:
1479