xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 184bf653)
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 *  linux/arch/arm/boot/compressed/head.S
4 *
5 *  Copyright (C) 1996-2002 Russell King
6 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
7 */
8#include <linux/linkage.h>
9#include <asm/assembler.h>
10#include <asm/v7m.h>
11
12#include "efi-header.S"
13
14 AR_CLASS(	.arch	armv7-a	)
15 M_CLASS(	.arch	armv7-m	)
16
17/*
18 * Debugging stuff
19 *
20 * Note that these macros must not contain any code which is not
21 * 100% relocatable.  Any attempt to do so will result in a crash.
22 * Please select one of the following when turning on debugging.
23 */
24#ifdef DEBUG
25
26#if defined(CONFIG_DEBUG_ICEDCC)
27
28#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
29		.macro	loadsp, rb, tmp1, tmp2
30		.endm
31		.macro	writeb, ch, rb
32		mcr	p14, 0, \ch, c0, c5, 0
33		.endm
34#elif defined(CONFIG_CPU_XSCALE)
35		.macro	loadsp, rb, tmp1, tmp2
36		.endm
37		.macro	writeb, ch, rb
38		mcr	p14, 0, \ch, c8, c0, 0
39		.endm
40#else
41		.macro	loadsp, rb, tmp1, tmp2
42		.endm
43		.macro	writeb, ch, rb
44		mcr	p14, 0, \ch, c1, c0, 0
45		.endm
46#endif
47
48#else
49
50#include CONFIG_DEBUG_LL_INCLUDE
51
52		.macro	writeb,	ch, rb
53		senduart \ch, \rb
54		.endm
55
56#if defined(CONFIG_ARCH_SA1100)
57		.macro	loadsp, rb, tmp1, tmp2
58		mov	\rb, #0x80000000	@ physical base address
59#ifdef CONFIG_DEBUG_LL_SER3
60		add	\rb, \rb, #0x00050000	@ Ser3
61#else
62		add	\rb, \rb, #0x00010000	@ Ser1
63#endif
64		.endm
65#else
66		.macro	loadsp,	rb, tmp1, tmp2
67		addruart \rb, \tmp1, \tmp2
68		.endm
69#endif
70#endif
71#endif
72
73		.macro	kputc,val
74		mov	r0, \val
75		bl	putc
76		.endm
77
78		.macro	kphex,val,len
79		mov	r0, \val
80		mov	r1, #\len
81		bl	phex
82		.endm
83
84		.macro	debug_reloc_start
85#ifdef DEBUG
86		kputc	#'\n'
87		kphex	r6, 8		/* processor id */
88		kputc	#':'
89		kphex	r7, 8		/* architecture id */
90#ifdef CONFIG_CPU_CP15
91		kputc	#':'
92		mrc	p15, 0, r0, c1, c0
93		kphex	r0, 8		/* control reg */
94#endif
95		kputc	#'\n'
96		kphex	r5, 8		/* decompressed kernel start */
97		kputc	#'-'
98		kphex	r9, 8		/* decompressed kernel end  */
99		kputc	#'>'
100		kphex	r4, 8		/* kernel execution address */
101		kputc	#'\n'
102#endif
103		.endm
104
105		.macro	debug_reloc_end
106#ifdef DEBUG
107		kphex	r5, 8		/* end of kernel */
108		kputc	#'\n'
109		mov	r0, r4
110		bl	memdump		/* dump 256 bytes at start of kernel */
111#endif
112		.endm
113
114		/*
115		 * Debug kernel copy by printing the memory addresses involved
116		 */
117		.macro dbgkc, begin, end, cbegin, cend
118#ifdef DEBUG
119		kputc   #'\n'
120		kputc   #'C'
121		kputc   #':'
122		kputc   #'0'
123		kputc   #'x'
124		kphex   \begin, 8	/* Start of compressed kernel */
125		kputc	#'-'
126		kputc	#'0'
127		kputc	#'x'
128		kphex	\end, 8		/* End of compressed kernel */
129		kputc	#'-'
130		kputc	#'>'
131		kputc   #'0'
132		kputc   #'x'
133		kphex   \cbegin, 8	/* Start of kernel copy */
134		kputc	#'-'
135		kputc	#'0'
136		kputc	#'x'
137		kphex	\cend, 8	/* End of kernel copy */
138		kputc	#'\n'
139		kputc	#'\r'
140#endif
141		.endm
142
143		.macro	enable_cp15_barriers, reg
144		mrc	p15, 0, \reg, c1, c0, 0	@ read SCTLR
145		tst	\reg, #(1 << 5)		@ CP15BEN bit set?
146		bne	.L_\@
147		orr	\reg, \reg, #(1 << 5)	@ CP15 barrier instructions
148		mcr	p15, 0, \reg, c1, c0, 0	@ write SCTLR
149 ARM(		.inst   0xf57ff06f		@ v7+ isb	)
150 THUMB(		isb						)
151.L_\@:
152		.endm
153
154		/*
155		 * The kernel build system appends the size of the
156		 * decompressed kernel at the end of the compressed data
157		 * in little-endian form.
158		 */
159		.macro	get_inflated_image_size, res:req, tmp1:req, tmp2:req
160		adr	\res, .Linflated_image_size_offset
161		ldr	\tmp1, [\res]
162		add	\tmp1, \tmp1, \res	@ address of inflated image size
163
164		ldrb	\res, [\tmp1]		@ get_unaligned_le32
165		ldrb	\tmp2, [\tmp1, #1]
166		orr	\res, \res, \tmp2, lsl #8
167		ldrb	\tmp2, [\tmp1, #2]
168		ldrb	\tmp1, [\tmp1, #3]
169		orr	\res, \res, \tmp2, lsl #16
170		orr	\res, \res, \tmp1, lsl #24
171		.endm
172
173		.section ".start", "ax"
174/*
175 * sort out different calling conventions
176 */
177		.align
178		/*
179		 * Always enter in ARM state for CPUs that support the ARM ISA.
180		 * As of today (2014) that's exactly the members of the A and R
181		 * classes.
182		 */
183 AR_CLASS(	.arm	)
184start:
185		.type	start,#function
186		/*
187		 * These 7 nops along with the 1 nop immediately below for
188		 * !THUMB2 form 8 nops that make the compressed kernel bootable
189		 * on legacy ARM systems that were assuming the kernel in a.out
190		 * binary format. The boot loaders on these systems would
191		 * jump 32 bytes into the image to skip the a.out header.
192		 * with these 8 nops filling exactly 32 bytes, things still
193		 * work as expected on these legacy systems. Thumb2 mode keeps
194		 * 7 of the nops as it turns out that some boot loaders
195		 * were patching the initial instructions of the kernel, i.e
196		 * had started to exploit this "patch area".
197		 */
198		.rept	7
199		__nop
200		.endr
201#ifndef CONFIG_THUMB2_KERNEL
202		__nop
203#else
204 AR_CLASS(	sub	pc, pc, #3	)	@ A/R: switch to Thumb2 mode
205  M_CLASS(	nop.w			)	@ M: already in Thumb2 mode
206		.thumb
207#endif
208		W(b)	1f
209
210		.word	_magic_sig	@ Magic numbers to help the loader
211		.word	_magic_start	@ absolute load/run zImage address
212		.word	_magic_end	@ zImage end address
213		.word	0x04030201	@ endianness flag
214		.word	0x45454545	@ another magic number to indicate
215		.word	_magic_table	@ additional data table
216
217		__EFI_HEADER
2181:
219 ARM_BE8(	setend	be		)	@ go BE8 if compiled for BE8
220 AR_CLASS(	mrs	r9, cpsr	)
221#ifdef CONFIG_ARM_VIRT_EXT
222		bl	__hyp_stub_install	@ get into SVC mode, reversibly
223#endif
224		mov	r7, r1			@ save architecture ID
225		mov	r8, r2			@ save atags pointer
226
227#ifndef CONFIG_CPU_V7M
228		/*
229		 * Booting from Angel - need to enter SVC mode and disable
230		 * FIQs/IRQs (numeric definitions from angel arm.h source).
231		 * We only do this if we were in user mode on entry.
232		 */
233		mrs	r2, cpsr		@ get current mode
234		tst	r2, #3			@ not user?
235		bne	not_angel
236		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
237 ARM(		swi	0x123456	)	@ angel_SWI_ARM
238 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
239not_angel:
240		safe_svcmode_maskall r0
241		msr	spsr_cxsf, r9		@ Save the CPU boot mode in
242						@ SPSR
243#endif
244		/*
245		 * Note that some cache flushing and other stuff may
246		 * be needed here - is there an Angel SWI call for this?
247		 */
248
249		/*
250		 * some architecture specific code can be inserted
251		 * by the linker here, but it should preserve r7, r8, and r9.
252		 */
253
254		.text
255
256#ifdef CONFIG_AUTO_ZRELADDR
257		/*
258		 * Find the start of physical memory.  As we are executing
259		 * without the MMU on, we are in the physical address space.
260		 * We just need to get rid of any offset by aligning the
261		 * address.
262		 *
263		 * This alignment is a balance between the requirements of
264		 * different platforms - we have chosen 128MB to allow
265		 * platforms which align the start of their physical memory
266		 * to 128MB to use this feature, while allowing the zImage
267		 * to be placed within the first 128MB of memory on other
268		 * platforms.  Increasing the alignment means we place
269		 * stricter alignment requirements on the start of physical
270		 * memory, but relaxing it means that we break people who
271		 * are already placing their zImage in (eg) the top 64MB
272		 * of this range.
273		 */
274		mov	r4, pc
275		and	r4, r4, #0xf8000000
276		/* Determine final kernel image address. */
277		add	r4, r4, #TEXT_OFFSET
278#else
279		ldr	r4, =zreladdr
280#endif
281
282		/*
283		 * Set up a page table only if it won't overwrite ourself.
284		 * That means r4 < pc || r4 - 16k page directory > &_end.
285		 * Given that r4 > &_end is most unfrequent, we add a rough
286		 * additional 1MB of room for a possible appended DTB.
287		 */
288		mov	r0, pc
289		cmp	r0, r4
290		ldrcc	r0, LC0+28
291		addcc	r0, r0, pc
292		cmpcc	r4, r0
293		orrcc	r4, r4, #1		@ remember we skipped cache_on
294		blcs	cache_on
295
296restart:	adr	r0, LC0
297		ldmia	r0, {r1, r2, r3, r6, r11, r12}
298		ldr	sp, [r0, #24]
299
300		/*
301		 * We might be running at a different address.  We need
302		 * to fix up various pointers.
303		 */
304		sub	r0, r0, r1		@ calculate the delta offset
305		add	r6, r6, r0		@ _edata
306
307		get_inflated_image_size	r9, r10, lr
308
309#ifndef CONFIG_ZBOOT_ROM
310		/* malloc space is above the relocated stack (64k max) */
311		add	sp, sp, r0
312		add	r10, sp, #0x10000
313#else
314		/*
315		 * With ZBOOT_ROM the bss/stack is non relocatable,
316		 * but someone could still run this code from RAM,
317		 * in which case our reference is _edata.
318		 */
319		mov	r10, r6
320#endif
321
322		mov	r5, #0			@ init dtb size to 0
323#ifdef CONFIG_ARM_APPENDED_DTB
324/*
325 *   r0  = delta
326 *   r2  = BSS start
327 *   r3  = BSS end
328 *   r4  = final kernel address (possibly with LSB set)
329 *   r5  = appended dtb size (still unknown)
330 *   r6  = _edata
331 *   r7  = architecture ID
332 *   r8  = atags/device tree pointer
333 *   r9  = size of decompressed image
334 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
335 *   r11 = GOT start
336 *   r12 = GOT end
337 *   sp  = stack pointer
338 *
339 * if there are device trees (dtb) appended to zImage, advance r10 so that the
340 * dtb data will get relocated along with the kernel if necessary.
341 */
342
343		ldr	lr, [r6, #0]
344#ifndef __ARMEB__
345		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
346#else
347		ldr	r1, =0xd00dfeed
348#endif
349		cmp	lr, r1
350		bne	dtb_check_done		@ not found
351
352#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
353		/*
354		 * OK... Let's do some funky business here.
355		 * If we do have a DTB appended to zImage, and we do have
356		 * an ATAG list around, we want the later to be translated
357		 * and folded into the former here. No GOT fixup has occurred
358		 * yet, but none of the code we're about to call uses any
359		 * global variable.
360		*/
361
362		/* Get the initial DTB size */
363		ldr	r5, [r6, #4]
364#ifndef __ARMEB__
365		/* convert to little endian */
366		eor	r1, r5, r5, ror #16
367		bic	r1, r1, #0x00ff0000
368		mov	r5, r5, ror #8
369		eor	r5, r5, r1, lsr #8
370#endif
371		/* 50% DTB growth should be good enough */
372		add	r5, r5, r5, lsr #1
373		/* preserve 64-bit alignment */
374		add	r5, r5, #7
375		bic	r5, r5, #7
376		/* clamp to 32KB min and 1MB max */
377		cmp	r5, #(1 << 15)
378		movlo	r5, #(1 << 15)
379		cmp	r5, #(1 << 20)
380		movhi	r5, #(1 << 20)
381		/* temporarily relocate the stack past the DTB work space */
382		add	sp, sp, r5
383
384		stmfd	sp!, {r0-r3, ip, lr}
385		mov	r0, r8
386		mov	r1, r6
387		mov	r2, r5
388		bl	atags_to_fdt
389
390		/*
391		 * If returned value is 1, there is no ATAG at the location
392		 * pointed by r8.  Try the typical 0x100 offset from start
393		 * of RAM and hope for the best.
394		 */
395		cmp	r0, #1
396		sub	r0, r4, #TEXT_OFFSET
397		bic	r0, r0, #1
398		add	r0, r0, #0x100
399		mov	r1, r6
400		mov	r2, r5
401		bleq	atags_to_fdt
402
403		ldmfd	sp!, {r0-r3, ip, lr}
404		sub	sp, sp, r5
405#endif
406
407		mov	r8, r6			@ use the appended device tree
408
409		/*
410		 * Make sure that the DTB doesn't end up in the final
411		 * kernel's .bss area. To do so, we adjust the decompressed
412		 * kernel size to compensate if that .bss size is larger
413		 * than the relocated code.
414		 */
415		ldr	r5, =_kernel_bss_size
416		adr	r1, wont_overwrite
417		sub	r1, r6, r1
418		subs	r1, r5, r1
419		addhi	r9, r9, r1
420
421		/* Get the current DTB size */
422		ldr	r5, [r6, #4]
423#ifndef __ARMEB__
424		/* convert r5 (dtb size) to little endian */
425		eor	r1, r5, r5, ror #16
426		bic	r1, r1, #0x00ff0000
427		mov	r5, r5, ror #8
428		eor	r5, r5, r1, lsr #8
429#endif
430
431		/* preserve 64-bit alignment */
432		add	r5, r5, #7
433		bic	r5, r5, #7
434
435		/* relocate some pointers past the appended dtb */
436		add	r6, r6, r5
437		add	r10, r10, r5
438		add	sp, sp, r5
439dtb_check_done:
440#endif
441
442/*
443 * Check to see if we will overwrite ourselves.
444 *   r4  = final kernel address (possibly with LSB set)
445 *   r9  = size of decompressed image
446 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
447 * We basically want:
448 *   r4 - 16k page directory >= r10 -> OK
449 *   r4 + image length <= address of wont_overwrite -> OK
450 * Note: the possible LSB in r4 is harmless here.
451 */
452		add	r10, r10, #16384
453		cmp	r4, r10
454		bhs	wont_overwrite
455		add	r10, r4, r9
456		adr	r9, wont_overwrite
457		cmp	r10, r9
458		bls	wont_overwrite
459
460/*
461 * Relocate ourselves past the end of the decompressed kernel.
462 *   r6  = _edata
463 *   r10 = end of the decompressed kernel
464 * Because we always copy ahead, we need to do it from the end and go
465 * backward in case the source and destination overlap.
466 */
467		/*
468		 * Bump to the next 256-byte boundary with the size of
469		 * the relocation code added. This avoids overwriting
470		 * ourself when the offset is small.
471		 */
472		add	r10, r10, #((reloc_code_end - restart + 256) & ~255)
473		bic	r10, r10, #255
474
475		/* Get start of code we want to copy and align it down. */
476		adr	r5, restart
477		bic	r5, r5, #31
478
479/* Relocate the hyp vector base if necessary */
480#ifdef CONFIG_ARM_VIRT_EXT
481		mrs	r0, spsr
482		and	r0, r0, #MODE_MASK
483		cmp	r0, #HYP_MODE
484		bne	1f
485
486		/*
487		 * Compute the address of the hyp vectors after relocation.
488		 * This requires some arithmetic since we cannot directly
489		 * reference __hyp_stub_vectors in a PC-relative way.
490		 * Call __hyp_set_vectors with the new address so that we
491		 * can HVC again after the copy.
492		 */
4930:		adr	r0, 0b
494		movw	r1, #:lower16:__hyp_stub_vectors - 0b
495		movt	r1, #:upper16:__hyp_stub_vectors - 0b
496		add	r0, r0, r1
497		sub	r0, r0, r5
498		add	r0, r0, r10
499		bl	__hyp_set_vectors
5001:
501#endif
502
503		sub	r9, r6, r5		@ size to copy
504		add	r9, r9, #31		@ rounded up to a multiple
505		bic	r9, r9, #31		@ ... of 32 bytes
506		add	r6, r9, r5
507		add	r9, r9, r10
508
509#ifdef DEBUG
510		sub     r10, r6, r5
511		sub     r10, r9, r10
512		/*
513		 * We are about to copy the kernel to a new memory area.
514		 * The boundaries of the new memory area can be found in
515		 * r10 and r9, whilst r5 and r6 contain the boundaries
516		 * of the memory we are going to copy.
517		 * Calling dbgkc will help with the printing of this
518		 * information.
519		 */
520		dbgkc	r5, r6, r10, r9
521#endif
522
5231:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
524		cmp	r6, r5
525		stmdb	r9!, {r0 - r3, r10 - r12, lr}
526		bhi	1b
527
528		/* Preserve offset to relocated code. */
529		sub	r6, r9, r6
530
531#ifndef CONFIG_ZBOOT_ROM
532		/* cache_clean_flush may use the stack, so relocate it */
533		add	sp, sp, r6
534#endif
535
536		bl	cache_clean_flush
537
538		badr	r0, restart
539		add	r0, r0, r6
540		mov	pc, r0
541
542wont_overwrite:
543/*
544 * If delta is zero, we are running at the address we were linked at.
545 *   r0  = delta
546 *   r2  = BSS start
547 *   r3  = BSS end
548 *   r4  = kernel execution address (possibly with LSB set)
549 *   r5  = appended dtb size (0 if not present)
550 *   r7  = architecture ID
551 *   r8  = atags pointer
552 *   r11 = GOT start
553 *   r12 = GOT end
554 *   sp  = stack pointer
555 */
556		orrs	r1, r0, r5
557		beq	not_relocated
558
559		add	r11, r11, r0
560		add	r12, r12, r0
561
562#ifndef CONFIG_ZBOOT_ROM
563		/*
564		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
565		 * we need to fix up pointers into the BSS region.
566		 * Note that the stack pointer has already been fixed up.
567		 */
568		add	r2, r2, r0
569		add	r3, r3, r0
570
571		/*
572		 * Relocate all entries in the GOT table.
573		 * Bump bss entries to _edata + dtb size
574		 */
5751:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
576		add	r1, r1, r0		@ This fixes up C references
577		cmp	r1, r2			@ if entry >= bss_start &&
578		cmphs	r3, r1			@       bss_end > entry
579		addhi	r1, r1, r5		@    entry += dtb size
580		str	r1, [r11], #4		@ next entry
581		cmp	r11, r12
582		blo	1b
583
584		/* bump our bss pointers too */
585		add	r2, r2, r5
586		add	r3, r3, r5
587
588#else
589
590		/*
591		 * Relocate entries in the GOT table.  We only relocate
592		 * the entries that are outside the (relocated) BSS region.
593		 */
5941:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
595		cmp	r1, r2			@ entry < bss_start ||
596		cmphs	r3, r1			@ _end < entry
597		addlo	r1, r1, r0		@ table.  This fixes up the
598		str	r1, [r11], #4		@ C references.
599		cmp	r11, r12
600		blo	1b
601#endif
602
603not_relocated:	mov	r0, #0
6041:		str	r0, [r2], #4		@ clear bss
605		str	r0, [r2], #4
606		str	r0, [r2], #4
607		str	r0, [r2], #4
608		cmp	r2, r3
609		blo	1b
610
611		/*
612		 * Did we skip the cache setup earlier?
613		 * That is indicated by the LSB in r4.
614		 * Do it now if so.
615		 */
616		tst	r4, #1
617		bic	r4, r4, #1
618		blne	cache_on
619
620/*
621 * The C runtime environment should now be setup sufficiently.
622 * Set up some pointers, and start decompressing.
623 *   r4  = kernel execution address
624 *   r7  = architecture ID
625 *   r8  = atags pointer
626 */
627		mov	r0, r4
628		mov	r1, sp			@ malloc space above stack
629		add	r2, sp, #0x10000	@ 64k max
630		mov	r3, r7
631		bl	decompress_kernel
632		bl	cache_clean_flush
633		bl	cache_off
634
635#ifdef CONFIG_ARM_VIRT_EXT
636		mrs	r0, spsr		@ Get saved CPU boot mode
637		and	r0, r0, #MODE_MASK
638		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
639		bne	__enter_kernel		@ boot kernel directly
640
641		adr	r12, .L__hyp_reentry_vectors_offset
642		ldr	r0, [r12]
643		add	r0, r0, r12
644
645		bl	__hyp_set_vectors
646		__HVC(0)			@ otherwise bounce to hyp mode
647
648		b	.			@ should never be reached
649
650		.align	2
651.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
652#else
653		b	__enter_kernel
654#endif
655
656		.align	2
657		.type	LC0, #object
658LC0:		.word	LC0			@ r1
659		.word	__bss_start		@ r2
660		.word	_end			@ r3
661		.word	_edata			@ r6
662		.word	_got_start		@ r11
663		.word	_got_end		@ ip
664		.word	.L_user_stack_end	@ sp
665		.word	_end - restart + 16384 + 1024*1024
666		.size	LC0, . - LC0
667
668.Linflated_image_size_offset:
669		.long	(input_data_end - 4) - .
670
671#ifdef CONFIG_ARCH_RPC
672		.globl	params
673params:		ldr	r0, =0x10000100		@ params_phys for RPC
674		mov	pc, lr
675		.ltorg
676		.align
677#endif
678
679/*
680 * Turn on the cache.  We need to setup some page tables so that we
681 * can have both the I and D caches on.
682 *
683 * We place the page tables 16k down from the kernel execution address,
684 * and we hope that nothing else is using it.  If we're using it, we
685 * will go pop!
686 *
687 * On entry,
688 *  r4 = kernel execution address
689 *  r7 = architecture number
690 *  r8 = atags pointer
691 * On exit,
692 *  r0, r1, r2, r3, r9, r10, r12 corrupted
693 * This routine must preserve:
694 *  r4, r7, r8
695 */
696		.align	5
697cache_on:	mov	r3, #8			@ cache_on function
698		b	call_cache_fn
699
700/*
701 * Initialize the highest priority protection region, PR7
702 * to cover all 32bit address and cacheable and bufferable.
703 */
704__armv4_mpu_cache_on:
705		mov	r0, #0x3f		@ 4G, the whole
706		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
707		mcr 	p15, 0, r0, c6, c7, 1
708
709		mov	r0, #0x80		@ PR7
710		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
711		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
712		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
713
714		mov	r0, #0xc000
715		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
716		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
717
718		mov	r0, #0
719		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
720		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
721		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
722		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
723						@ ...I .... ..D. WC.M
724		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
725		orr	r0, r0, #0x1000		@ ...1 .... .... ....
726
727		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
728
729		mov	r0, #0
730		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
731		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
732		mov	pc, lr
733
734__armv3_mpu_cache_on:
735		mov	r0, #0x3f		@ 4G, the whole
736		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
737
738		mov	r0, #0x80		@ PR7
739		mcr	p15, 0, r0, c2, c0, 0	@ cache on
740		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
741
742		mov	r0, #0xc000
743		mcr	p15, 0, r0, c5, c0, 0	@ access permission
744
745		mov	r0, #0
746		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
747		/*
748		 * ?? ARMv3 MMU does not allow reading the control register,
749		 * does this really work on ARMv3 MPU?
750		 */
751		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
752						@ .... .... .... WC.M
753		orr	r0, r0, #0x000d		@ .... .... .... 11.1
754		/* ?? this overwrites the value constructed above? */
755		mov	r0, #0
756		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
757
758		/* ?? invalidate for the second time? */
759		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
760		mov	pc, lr
761
762#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
763#define CB_BITS 0x08
764#else
765#define CB_BITS 0x0c
766#endif
767
768__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
769		bic	r3, r3, #0xff		@ Align the pointer
770		bic	r3, r3, #0x3f00
771/*
772 * Initialise the page tables, turning on the cacheable and bufferable
773 * bits for the RAM area only.
774 */
775		mov	r0, r3
776		mov	r9, r0, lsr #18
777		mov	r9, r9, lsl #18		@ start of RAM
778		add	r10, r9, #0x10000000	@ a reasonable RAM size
779		mov	r1, #0x12		@ XN|U + section mapping
780		orr	r1, r1, #3 << 10	@ AP=11
781		add	r2, r3, #16384
7821:		cmp	r1, r9			@ if virt > start of RAM
783		cmphs	r10, r1			@   && end of RAM > virt
784		bic	r1, r1, #0x1c		@ clear XN|U + C + B
785		orrlo	r1, r1, #0x10		@ Set XN|U for non-RAM
786		orrhs	r1, r1, r6		@ set RAM section settings
787		str	r1, [r0], #4		@ 1:1 mapping
788		add	r1, r1, #1048576
789		teq	r0, r2
790		bne	1b
791/*
792 * If ever we are running from Flash, then we surely want the cache
793 * to be enabled also for our execution instance...  We map 2MB of it
794 * so there is no map overlap problem for up to 1 MB compressed kernel.
795 * If the execution is in RAM then we would only be duplicating the above.
796 */
797		orr	r1, r6, #0x04		@ ensure B is set for this
798		orr	r1, r1, #3 << 10
799		mov	r2, pc
800		mov	r2, r2, lsr #20
801		orr	r1, r1, r2, lsl #20
802		add	r0, r3, r2, lsl #2
803		str	r1, [r0], #4
804		add	r1, r1, #1048576
805		str	r1, [r0]
806		mov	pc, lr
807ENDPROC(__setup_mmu)
808
809@ Enable unaligned access on v6, to allow better code generation
810@ for the decompressor C code:
811__armv6_mmu_cache_on:
812		mrc	p15, 0, r0, c1, c0, 0	@ read SCTLR
813		bic	r0, r0, #2		@ A (no unaligned access fault)
814		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
815		mcr	p15, 0, r0, c1, c0, 0	@ write SCTLR
816		b	__armv4_mmu_cache_on
817
818__arm926ejs_mmu_cache_on:
819#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
820		mov	r0, #4			@ put dcache in WT mode
821		mcr	p15, 7, r0, c15, c0, 0
822#endif
823
824__armv4_mmu_cache_on:
825		mov	r12, lr
826#ifdef CONFIG_MMU
827		mov	r6, #CB_BITS | 0x12	@ U
828		bl	__setup_mmu
829		mov	r0, #0
830		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
831		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
832		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
833		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
834		orr	r0, r0, #0x0030
835 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
836		bl	__common_mmu_cache_on
837		mov	r0, #0
838		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
839#endif
840		mov	pc, r12
841
842__armv7_mmu_cache_on:
843		enable_cp15_barriers	r11
844		mov	r12, lr
845#ifdef CONFIG_MMU
846		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
847		tst	r11, #0xf		@ VMSA
848		movne	r6, #CB_BITS | 0x02	@ !XN
849		blne	__setup_mmu
850		mov	r0, #0
851		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
852		tst	r11, #0xf		@ VMSA
853		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
854#endif
855		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
856		bic	r0, r0, #1 << 28	@ clear SCTLR.TRE
857		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
858		orr	r0, r0, #0x003c		@ write buffer
859		bic	r0, r0, #2		@ A (no unaligned access fault)
860		orr	r0, r0, #1 << 22	@ U (v6 unaligned access model)
861						@ (needed for ARM1176)
862#ifdef CONFIG_MMU
863 ARM_BE8(	orr	r0, r0, #1 << 25 )	@ big-endian page tables
864		mrcne   p15, 0, r6, c2, c0, 2   @ read ttb control reg
865		orrne	r0, r0, #1		@ MMU enabled
866		movne	r1, #0xfffffffd		@ domain 0 = client
867		bic     r6, r6, #1 << 31        @ 32-bit translation system
868		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
869		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
870		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
871		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
872#endif
873		mcr	p15, 0, r0, c7, c5, 4	@ ISB
874		mcr	p15, 0, r0, c1, c0, 0	@ load control register
875		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
876		mov	r0, #0
877		mcr	p15, 0, r0, c7, c5, 4	@ ISB
878		mov	pc, r12
879
880__fa526_cache_on:
881		mov	r12, lr
882		mov	r6, #CB_BITS | 0x12	@ U
883		bl	__setup_mmu
884		mov	r0, #0
885		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
886		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
887		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
888		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
889		orr	r0, r0, #0x1000		@ I-cache enable
890		bl	__common_mmu_cache_on
891		mov	r0, #0
892		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
893		mov	pc, r12
894
895__common_mmu_cache_on:
896#ifndef CONFIG_THUMB2_KERNEL
897#ifndef DEBUG
898		orr	r0, r0, #0x000d		@ Write buffer, mmu
899#endif
900		mov	r1, #-1
901		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
902		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
903		b	1f
904		.align	5			@ cache line aligned
9051:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
906		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
907		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
908#endif
909
910#define PROC_ENTRY_SIZE (4*5)
911
912/*
913 * Here follow the relocatable cache support functions for the
914 * various processors.  This is a generic hook for locating an
915 * entry and jumping to an instruction at the specified offset
916 * from the start of the block.  Please note this is all position
917 * independent code.
918 *
919 *  r1  = corrupted
920 *  r2  = corrupted
921 *  r3  = block offset
922 *  r9  = corrupted
923 *  r12 = corrupted
924 */
925
926call_cache_fn:	adr	r12, proc_types
927#ifdef CONFIG_CPU_CP15
928		mrc	p15, 0, r9, c0, c0	@ get processor ID
929#elif defined(CONFIG_CPU_V7M)
930		/*
931		 * On v7-M the processor id is located in the V7M_SCB_CPUID
932		 * register, but as cache handling is IMPLEMENTATION DEFINED on
933		 * v7-M (if existant at all) we just return early here.
934		 * If V7M_SCB_CPUID were used the cpu ID functions (i.e.
935		 * __armv7_mmu_cache_{on,off,flush}) would be selected which
936		 * use cp15 registers that are not implemented on v7-M.
937		 */
938		bx	lr
939#else
940		ldr	r9, =CONFIG_PROCESSOR_ID
941#endif
9421:		ldr	r1, [r12, #0]		@ get value
943		ldr	r2, [r12, #4]		@ get mask
944		eor	r1, r1, r9		@ (real ^ match)
945		tst	r1, r2			@       & mask
946 ARM(		addeq	pc, r12, r3		) @ call cache function
947 THUMB(		addeq	r12, r3			)
948 THUMB(		moveq	pc, r12			) @ call cache function
949		add	r12, r12, #PROC_ENTRY_SIZE
950		b	1b
951
952/*
953 * Table for cache operations.  This is basically:
954 *   - CPU ID match
955 *   - CPU ID mask
956 *   - 'cache on' method instruction
957 *   - 'cache off' method instruction
958 *   - 'cache flush' method instruction
959 *
960 * We match an entry using: ((real_id ^ match) & mask) == 0
961 *
962 * Writethrough caches generally only need 'on' and 'off'
963 * methods.  Writeback caches _must_ have the flush method
964 * defined.
965 */
966		.align	2
967		.type	proc_types,#object
968proc_types:
969		.word	0x41000000		@ old ARM ID
970		.word	0xff00f000
971		mov	pc, lr
972 THUMB(		nop				)
973		mov	pc, lr
974 THUMB(		nop				)
975		mov	pc, lr
976 THUMB(		nop				)
977
978		.word	0x41007000		@ ARM7/710
979		.word	0xfff8fe00
980		mov	pc, lr
981 THUMB(		nop				)
982		mov	pc, lr
983 THUMB(		nop				)
984		mov	pc, lr
985 THUMB(		nop				)
986
987		.word	0x41807200		@ ARM720T (writethrough)
988		.word	0xffffff00
989		W(b)	__armv4_mmu_cache_on
990		W(b)	__armv4_mmu_cache_off
991		mov	pc, lr
992 THUMB(		nop				)
993
994		.word	0x41007400		@ ARM74x
995		.word	0xff00ff00
996		W(b)	__armv3_mpu_cache_on
997		W(b)	__armv3_mpu_cache_off
998		W(b)	__armv3_mpu_cache_flush
999
1000		.word	0x41009400		@ ARM94x
1001		.word	0xff00ff00
1002		W(b)	__armv4_mpu_cache_on
1003		W(b)	__armv4_mpu_cache_off
1004		W(b)	__armv4_mpu_cache_flush
1005
1006		.word	0x41069260		@ ARM926EJ-S (v5TEJ)
1007		.word	0xff0ffff0
1008		W(b)	__arm926ejs_mmu_cache_on
1009		W(b)	__armv4_mmu_cache_off
1010		W(b)	__armv5tej_mmu_cache_flush
1011
1012		.word	0x00007000		@ ARM7 IDs
1013		.word	0x0000f000
1014		mov	pc, lr
1015 THUMB(		nop				)
1016		mov	pc, lr
1017 THUMB(		nop				)
1018		mov	pc, lr
1019 THUMB(		nop				)
1020
1021		@ Everything from here on will be the new ID system.
1022
1023		.word	0x4401a100		@ sa110 / sa1100
1024		.word	0xffffffe0
1025		W(b)	__armv4_mmu_cache_on
1026		W(b)	__armv4_mmu_cache_off
1027		W(b)	__armv4_mmu_cache_flush
1028
1029		.word	0x6901b110		@ sa1110
1030		.word	0xfffffff0
1031		W(b)	__armv4_mmu_cache_on
1032		W(b)	__armv4_mmu_cache_off
1033		W(b)	__armv4_mmu_cache_flush
1034
1035		.word	0x56056900
1036		.word	0xffffff00		@ PXA9xx
1037		W(b)	__armv4_mmu_cache_on
1038		W(b)	__armv4_mmu_cache_off
1039		W(b)	__armv4_mmu_cache_flush
1040
1041		.word	0x56158000		@ PXA168
1042		.word	0xfffff000
1043		W(b)	__armv4_mmu_cache_on
1044		W(b)	__armv4_mmu_cache_off
1045		W(b)	__armv5tej_mmu_cache_flush
1046
1047		.word	0x56050000		@ Feroceon
1048		.word	0xff0f0000
1049		W(b)	__armv4_mmu_cache_on
1050		W(b)	__armv4_mmu_cache_off
1051		W(b)	__armv5tej_mmu_cache_flush
1052
1053#ifdef CONFIG_CPU_FEROCEON_OLD_ID
1054		/* this conflicts with the standard ARMv5TE entry */
1055		.long	0x41009260		@ Old Feroceon
1056		.long	0xff00fff0
1057		b	__armv4_mmu_cache_on
1058		b	__armv4_mmu_cache_off
1059		b	__armv5tej_mmu_cache_flush
1060#endif
1061
1062		.word	0x66015261		@ FA526
1063		.word	0xff01fff1
1064		W(b)	__fa526_cache_on
1065		W(b)	__armv4_mmu_cache_off
1066		W(b)	__fa526_cache_flush
1067
1068		@ These match on the architecture ID
1069
1070		.word	0x00020000		@ ARMv4T
1071		.word	0x000f0000
1072		W(b)	__armv4_mmu_cache_on
1073		W(b)	__armv4_mmu_cache_off
1074		W(b)	__armv4_mmu_cache_flush
1075
1076		.word	0x00050000		@ ARMv5TE
1077		.word	0x000f0000
1078		W(b)	__armv4_mmu_cache_on
1079		W(b)	__armv4_mmu_cache_off
1080		W(b)	__armv4_mmu_cache_flush
1081
1082		.word	0x00060000		@ ARMv5TEJ
1083		.word	0x000f0000
1084		W(b)	__armv4_mmu_cache_on
1085		W(b)	__armv4_mmu_cache_off
1086		W(b)	__armv5tej_mmu_cache_flush
1087
1088		.word	0x0007b000		@ ARMv6
1089		.word	0x000ff000
1090		W(b)	__armv6_mmu_cache_on
1091		W(b)	__armv4_mmu_cache_off
1092		W(b)	__armv6_mmu_cache_flush
1093
1094		.word	0x000f0000		@ new CPU Id
1095		.word	0x000f0000
1096		W(b)	__armv7_mmu_cache_on
1097		W(b)	__armv7_mmu_cache_off
1098		W(b)	__armv7_mmu_cache_flush
1099
1100		.word	0			@ unrecognised type
1101		.word	0
1102		mov	pc, lr
1103 THUMB(		nop				)
1104		mov	pc, lr
1105 THUMB(		nop				)
1106		mov	pc, lr
1107 THUMB(		nop				)
1108
1109		.size	proc_types, . - proc_types
1110
1111		/*
1112		 * If you get a "non-constant expression in ".if" statement"
1113		 * error from the assembler on this line, check that you have
1114		 * not accidentally written a "b" instruction where you should
1115		 * have written W(b).
1116		 */
1117		.if (. - proc_types) % PROC_ENTRY_SIZE != 0
1118		.error "The size of one or more proc_types entries is wrong."
1119		.endif
1120
1121/*
1122 * Turn off the Cache and MMU.  ARMv3 does not support
1123 * reading the control register, but ARMv4 does.
1124 *
1125 * On exit,
1126 *  r0, r1, r2, r3, r9, r12 corrupted
1127 * This routine must preserve:
1128 *  r4, r7, r8
1129 */
1130		.align	5
1131cache_off:	mov	r3, #12			@ cache_off function
1132		b	call_cache_fn
1133
1134__armv4_mpu_cache_off:
1135		mrc	p15, 0, r0, c1, c0
1136		bic	r0, r0, #0x000d
1137		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
1138		mov	r0, #0
1139		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
1140		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
1141		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
1142		mov	pc, lr
1143
1144__armv3_mpu_cache_off:
1145		mrc	p15, 0, r0, c1, c0
1146		bic	r0, r0, #0x000d
1147		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
1148		mov	r0, #0
1149		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
1150		mov	pc, lr
1151
1152__armv4_mmu_cache_off:
1153#ifdef CONFIG_MMU
1154		mrc	p15, 0, r0, c1, c0
1155		bic	r0, r0, #0x000d
1156		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1157		mov	r0, #0
1158		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
1159		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
1160#endif
1161		mov	pc, lr
1162
1163__armv7_mmu_cache_off:
1164		mrc	p15, 0, r0, c1, c0
1165#ifdef CONFIG_MMU
1166		bic	r0, r0, #0x000d
1167#else
1168		bic	r0, r0, #0x000c
1169#endif
1170		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
1171		mov	r12, lr
1172		bl	__armv7_mmu_cache_flush
1173		mov	r0, #0
1174#ifdef CONFIG_MMU
1175		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
1176#endif
1177		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
1178		mcr	p15, 0, r0, c7, c10, 4	@ DSB
1179		mcr	p15, 0, r0, c7, c5, 4	@ ISB
1180		mov	pc, r12
1181
1182/*
1183 * Clean and flush the cache to maintain consistency.
1184 *
1185 * On exit,
1186 *  r1, r2, r3, r9, r10, r11, r12 corrupted
1187 * This routine must preserve:
1188 *  r4, r6, r7, r8
1189 */
1190		.align	5
1191cache_clean_flush:
1192		mov	r3, #16
1193		b	call_cache_fn
1194
1195__armv4_mpu_cache_flush:
1196		tst	r4, #1
1197		movne	pc, lr
1198		mov	r2, #1
1199		mov	r3, #0
1200		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
1201		mov	r1, #7 << 5		@ 8 segments
12021:		orr	r3, r1, #63 << 26	@ 64 entries
12032:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
1204		subs	r3, r3, #1 << 26
1205		bcs	2b			@ entries 63 to 0
1206		subs 	r1, r1, #1 << 5
1207		bcs	1b			@ segments 7 to 0
1208
1209		teq	r2, #0
1210		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
1211		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
1212		mov	pc, lr
1213
1214__fa526_cache_flush:
1215		tst	r4, #1
1216		movne	pc, lr
1217		mov	r1, #0
1218		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
1219		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1220		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1221		mov	pc, lr
1222
1223__armv6_mmu_cache_flush:
1224		mov	r1, #0
1225		tst	r4, #1
1226		mcreq	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
1227		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
1228		mcreq	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
1229		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1230		mov	pc, lr
1231
1232__armv7_mmu_cache_flush:
1233		enable_cp15_barriers	r10
1234		tst	r4, #1
1235		bne	iflush
1236		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
1237		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
1238		mov	r10, #0
1239		beq	hierarchical
1240		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
1241		b	iflush
1242hierarchical:
1243		mcr	p15, 0, r10, c7, c10, 5	@ DMB
1244		stmfd	sp!, {r0-r7, r9-r11}
1245		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
1246		ands	r3, r0, #0x7000000	@ extract loc from clidr
1247		mov	r3, r3, lsr #23		@ left align loc bit field
1248		beq	finished		@ if loc is 0, then no need to clean
1249		mov	r10, #0			@ start clean at cache level 0
1250loop1:
1251		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
1252		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
1253		and	r1, r1, #7		@ mask of the bits for current cache only
1254		cmp	r1, #2			@ see what cache we have at this level
1255		blt	skip			@ skip if no cache, or just i-cache
1256		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1257		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
1258		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
1259		and	r2, r1, #7		@ extract the length of the cache lines
1260		add	r2, r2, #4		@ add 4 (line length offset)
1261		ldr	r4, =0x3ff
1262		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
1263		clz	r5, r4			@ find bit position of way size increment
1264		ldr	r7, =0x7fff
1265		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
1266loop2:
1267		mov	r9, r4			@ create working copy of max way size
1268loop3:
1269 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
1270 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
1271 THUMB(		lsl	r6, r9, r5		)
1272 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
1273 THUMB(		lsl	r6, r7, r2		)
1274 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
1275		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
1276		subs	r9, r9, #1		@ decrement the way
1277		bge	loop3
1278		subs	r7, r7, #1		@ decrement the index
1279		bge	loop2
1280skip:
1281		add	r10, r10, #2		@ increment cache number
1282		cmp	r3, r10
1283		bgt	loop1
1284finished:
1285		ldmfd	sp!, {r0-r7, r9-r11}
1286		mov	r10, #0			@ switch back to cache level 0
1287		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
1288iflush:
1289		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1290		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
1291		mcr	p15, 0, r10, c7, c10, 4	@ DSB
1292		mcr	p15, 0, r10, c7, c5, 4	@ ISB
1293		mov	pc, lr
1294
1295__armv5tej_mmu_cache_flush:
1296		tst	r4, #1
1297		movne	pc, lr
12981:		mrc	p15, 0, APSR_nzcv, c7, c14, 3	@ test,clean,invalidate D cache
1299		bne	1b
1300		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
1301		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
1302		mov	pc, lr
1303
1304__armv4_mmu_cache_flush:
1305		tst	r4, #1
1306		movne	pc, lr
1307		mov	r2, #64*1024		@ default: 32K dcache size (*2)
1308		mov	r11, #32		@ default: 32 byte line size
1309		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
1310		teq	r3, r9			@ cache ID register present?
1311		beq	no_cache_id
1312		mov	r1, r3, lsr #18
1313		and	r1, r1, #7
1314		mov	r2, #1024
1315		mov	r2, r2, lsl r1		@ base dcache size *2
1316		tst	r3, #1 << 14		@ test M bit
1317		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
1318		mov	r3, r3, lsr #12
1319		and	r3, r3, #3
1320		mov	r11, #8
1321		mov	r11, r11, lsl r3	@ cache line size in bytes
1322no_cache_id:
1323		mov	r1, pc
1324		bic	r1, r1, #63		@ align to longest cache line
1325		add	r2, r1, r2
13261:
1327 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
1328 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
1329 THUMB(		add     r1, r1, r11		)
1330		teq	r1, r2
1331		bne	1b
1332
1333		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1334		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1335		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1336		mov	pc, lr
1337
1338__armv3_mmu_cache_flush:
1339__armv3_mpu_cache_flush:
1340		tst	r4, #1
1341		movne	pc, lr
1342		mov	r1, #0
1343		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1344		mov	pc, lr
1345
1346/*
1347 * Various debugging routines for printing hex characters and
1348 * memory, which again must be relocatable.
1349 */
1350#ifdef DEBUG
1351		.align	2
1352		.type	phexbuf,#object
1353phexbuf:	.space	12
1354		.size	phexbuf, . - phexbuf
1355
1356@ phex corrupts {r0, r1, r2, r3}
1357phex:		adr	r3, phexbuf
1358		mov	r2, #0
1359		strb	r2, [r3, r1]
13601:		subs	r1, r1, #1
1361		movmi	r0, r3
1362		bmi	puts
1363		and	r2, r0, #15
1364		mov	r0, r0, lsr #4
1365		cmp	r2, #10
1366		addge	r2, r2, #7
1367		add	r2, r2, #'0'
1368		strb	r2, [r3, r1]
1369		b	1b
1370
1371@ puts corrupts {r0, r1, r2, r3}
1372puts:		loadsp	r3, r2, r1
13731:		ldrb	r2, [r0], #1
1374		teq	r2, #0
1375		moveq	pc, lr
13762:		writeb	r2, r3
1377		mov	r1, #0x00020000
13783:		subs	r1, r1, #1
1379		bne	3b
1380		teq	r2, #'\n'
1381		moveq	r2, #'\r'
1382		beq	2b
1383		teq	r0, #0
1384		bne	1b
1385		mov	pc, lr
1386@ putc corrupts {r0, r1, r2, r3}
1387putc:
1388		mov	r2, r0
1389		loadsp	r3, r1, r0
1390		mov	r0, #0
1391		b	2b
1392
1393@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1394memdump:	mov	r12, r0
1395		mov	r10, lr
1396		mov	r11, #0
13972:		mov	r0, r11, lsl #2
1398		add	r0, r0, r12
1399		mov	r1, #8
1400		bl	phex
1401		mov	r0, #':'
1402		bl	putc
14031:		mov	r0, #' '
1404		bl	putc
1405		ldr	r0, [r12, r11, lsl #2]
1406		mov	r1, #8
1407		bl	phex
1408		and	r0, r11, #7
1409		teq	r0, #3
1410		moveq	r0, #' '
1411		bleq	putc
1412		and	r0, r11, #7
1413		add	r11, r11, #1
1414		teq	r0, #7
1415		bne	1b
1416		mov	r0, #'\n'
1417		bl	putc
1418		cmp	r11, #64
1419		blt	2b
1420		mov	pc, r10
1421#endif
1422
1423		.ltorg
1424
1425#ifdef CONFIG_ARM_VIRT_EXT
1426.align 5
1427__hyp_reentry_vectors:
1428		W(b)	.			@ reset
1429		W(b)	.			@ undef
1430		W(b)	.			@ svc
1431		W(b)	.			@ pabort
1432		W(b)	.			@ dabort
1433		W(b)	__enter_kernel		@ hyp
1434		W(b)	.			@ irq
1435		W(b)	.			@ fiq
1436#endif /* CONFIG_ARM_VIRT_EXT */
1437
1438__enter_kernel:
1439		mov	r0, #0			@ must be 0
1440		mov	r1, r7			@ restore architecture number
1441		mov	r2, r8			@ restore atags pointer
1442 ARM(		mov	pc, r4		)	@ call kernel
1443 M_CLASS(	add	r4, r4, #1	)	@ enter in Thumb mode for M class
1444 THUMB(		bx	r4		)	@ entry point is always ARM for A/R classes
1445
1446reloc_code_end:
1447
1448#ifdef CONFIG_EFI_STUB
1449		.align	2
1450_start:		.long	start - .
1451
1452ENTRY(efi_stub_entry)
1453		@ allocate space on stack for passing current zImage address
1454		@ and for the EFI stub to return of new entry point of
1455		@ zImage, as EFI stub may copy the kernel. Pointer address
1456		@ is passed in r2. r0 and r1 are passed through from the
1457		@ EFI firmware to efi_entry
1458		adr	ip, _start
1459		ldr	r3, [ip]
1460		add	r3, r3, ip
1461		stmfd	sp!, {r3, lr}
1462		mov	r2, sp			@ pass zImage address in r2
1463		bl	efi_entry
1464
1465		@ Check for error return from EFI stub. r0 has FDT address
1466		@ or error code.
1467		cmn	r0, #1
1468		beq	efi_load_fail
1469
1470		@ Preserve return value of efi_entry() in r4
1471		mov	r4, r0
1472		add	r1, r4, #SZ_2M			@ DT end
1473		bl	cache_clean_flush
1474
1475		ldr	r0, [sp]			@ relocated zImage
1476		ldr	r1, =_edata			@ size of zImage
1477		add	r1, r1, r0			@ end of zImage
1478		bl	cache_clean_flush
1479
1480		@ The PE/COFF loader might not have cleaned the code we are
1481		@ running beyond the PoU, and so calling cache_off below from
1482		@ inside the PE/COFF loader allocated region is unsafe. Let's
1483		@ assume our own zImage relocation code did a better job, and
1484		@ jump into its version of this routine before proceeding.
1485		ldr	r0, [sp]			@ relocated zImage
1486		ldr	r1, .Ljmp
1487		sub	r1, r0, r1
1488		mov	pc, r1				@ no mode switch
14890:
1490		bl	cache_off
1491
1492		@ Set parameters for booting zImage according to boot protocol
1493		@ put FDT address in r2, it was returned by efi_entry()
1494		@ r1 is the machine type, and r0 needs to be 0
1495		mov	r0, #0
1496		mov	r1, #0xFFFFFFFF
1497		mov	r2, r4
1498		b	__efi_start
1499
1500efi_load_fail:
1501		@ Return EFI_LOAD_ERROR to EFI firmware on error.
1502		ldr	r0, =0x80000001
1503		ldmfd	sp!, {ip, pc}
1504ENDPROC(efi_stub_entry)
1505		.align	2
1506.Ljmp:		.long	start - 0b
1507#endif
1508
1509		.align
1510		.section ".stack", "aw", %nobits
1511.L_user_stack:	.space	4096
1512.L_user_stack_end:
1513