xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 9a38e989b8ce04923f919fc2a8a24eb07fb484e2)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_XSCALE)
31		.macro	loadsp, rb
32		.endm
33		.macro	writeb, ch, rb
34		mcr	p14, 0, \ch, c8, c0, 0
35		.endm
36#else
37		.macro	loadsp, rb
38		.endm
39		.macro	writeb, ch, rb
40		mcr	p14, 0, \ch, c1, c0, 0
41		.endm
42#endif
43
44#else
45
46#include <mach/debug-macro.S>
47
48		.macro	writeb,	ch, rb
49		senduart \ch, \rb
50		.endm
51
52#if defined(CONFIG_ARCH_SA1100)
53		.macro	loadsp, rb
54		mov	\rb, #0x80000000	@ physical base address
55#ifdef CONFIG_DEBUG_LL_SER3
56		add	\rb, \rb, #0x00050000	@ Ser3
57#else
58		add	\rb, \rb, #0x00010000	@ Ser1
59#endif
60		.endm
61#elif defined(CONFIG_ARCH_S3C2410)
62		.macro loadsp, rb
63		mov	\rb, #0x50000000
64		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
65		.endm
66#else
67		.macro	loadsp,	rb
68		addruart \rb
69		.endm
70#endif
71#endif
72#endif
73
74		.macro	kputc,val
75		mov	r0, \val
76		bl	putc
77		.endm
78
79		.macro	kphex,val,len
80		mov	r0, \val
81		mov	r1, #\len
82		bl	phex
83		.endm
84
85		.macro	debug_reloc_start
86#ifdef DEBUG
87		kputc	#'\n'
88		kphex	r6, 8		/* processor id */
89		kputc	#':'
90		kphex	r7, 8		/* architecture id */
91#ifdef CONFIG_CPU_CP15
92		kputc	#':'
93		mrc	p15, 0, r0, c1, c0
94		kphex	r0, 8		/* control reg */
95#endif
96		kputc	#'\n'
97		kphex	r5, 8		/* decompressed kernel start */
98		kputc	#'-'
99		kphex	r9, 8		/* decompressed kernel end  */
100		kputc	#'>'
101		kphex	r4, 8		/* kernel execution address */
102		kputc	#'\n'
103#endif
104		.endm
105
106		.macro	debug_reloc_end
107#ifdef DEBUG
108		kphex	r5, 8		/* end of kernel */
109		kputc	#'\n'
110		mov	r0, r4
111		bl	memdump		/* dump 256 bytes at start of kernel */
112#endif
113		.endm
114
115		.section ".start", #alloc, #execinstr
116/*
117 * sort out different calling conventions
118 */
119		.align
120start:
121		.type	start,#function
122		.rept	8
123		mov	r0, r0
124		.endr
125
126		b	1f
127		.word	0x016f2818		@ Magic numbers to help the loader
128		.word	start			@ absolute load/run zImage address
129		.word	_edata			@ zImage end address
1301:		mov	r7, r1			@ save architecture ID
131		mov	r8, r2			@ save atags pointer
132
133#ifndef __ARM_ARCH_2__
134		/*
135		 * Booting from Angel - need to enter SVC mode and disable
136		 * FIQs/IRQs (numeric definitions from angel arm.h source).
137		 * We only do this if we were in user mode on entry.
138		 */
139		mrs	r2, cpsr		@ get current mode
140		tst	r2, #3			@ not user?
141		bne	not_angel
142		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
143		swi	0x123456		@ angel_SWI_ARM
144not_angel:
145		mrs	r2, cpsr		@ turn off interrupts to
146		orr	r2, r2, #0xc0		@ prevent angel from running
147		msr	cpsr_c, r2
148#else
149		teqp	pc, #0x0c000003		@ turn off interrupts
150#endif
151
152		/*
153		 * Note that some cache flushing and other stuff may
154		 * be needed here - is there an Angel SWI call for this?
155		 */
156
157		/*
158		 * some architecture specific code can be inserted
159		 * by the linker here, but it should preserve r7, r8, and r9.
160		 */
161
162		.text
163		adr	r0, LC0
164		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
165		subs	r0, r0, r1		@ calculate the delta offset
166
167						@ if delta is zero, we are
168		beq	not_relocated		@ running at the address we
169						@ were linked at.
170
171		/*
172		 * We're running at a different address.  We need to fix
173		 * up various pointers:
174		 *   r5 - zImage base address
175		 *   r6 - GOT start
176		 *   ip - GOT end
177		 */
178		add	r5, r5, r0
179		add	r6, r6, r0
180		add	ip, ip, r0
181
182#ifndef CONFIG_ZBOOT_ROM
183		/*
184		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
185		 * we need to fix up pointers into the BSS region.
186		 *   r2 - BSS start
187		 *   r3 - BSS end
188		 *   sp - stack pointer
189		 */
190		add	r2, r2, r0
191		add	r3, r3, r0
192		add	sp, sp, r0
193
194		/*
195		 * Relocate all entries in the GOT table.
196		 */
1971:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
198		add	r1, r1, r0		@ table.  This fixes up the
199		str	r1, [r6], #4		@ C references.
200		cmp	r6, ip
201		blo	1b
202#else
203
204		/*
205		 * Relocate entries in the GOT table.  We only relocate
206		 * the entries that are outside the (relocated) BSS region.
207		 */
2081:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
209		cmp	r1, r2			@ entry < bss_start ||
210		cmphs	r3, r1			@ _end < entry
211		addlo	r1, r1, r0		@ table.  This fixes up the
212		str	r1, [r6], #4		@ C references.
213		cmp	r6, ip
214		blo	1b
215#endif
216
217not_relocated:	mov	r0, #0
2181:		str	r0, [r2], #4		@ clear bss
219		str	r0, [r2], #4
220		str	r0, [r2], #4
221		str	r0, [r2], #4
222		cmp	r2, r3
223		blo	1b
224
225		/*
226		 * The C runtime environment should now be setup
227		 * sufficiently.  Turn the cache on, set up some
228		 * pointers, and start decompressing.
229		 */
230		bl	cache_on
231
232		mov	r1, sp			@ malloc space above stack
233		add	r2, sp, #0x10000	@ 64k max
234
235/*
236 * Check to see if we will overwrite ourselves.
237 *   r4 = final kernel address
238 *   r5 = start of this image
239 *   r2 = end of malloc space (and therefore this image)
240 * We basically want:
241 *   r4 >= r2 -> OK
242 *   r4 + image length <= r5 -> OK
243 */
244		cmp	r4, r2
245		bhs	wont_overwrite
246		sub	r3, sp, r5		@ > compressed kernel size
247		add	r0, r4, r3, lsl #2	@ allow for 4x expansion
248		cmp	r0, r5
249		bls	wont_overwrite
250
251		mov	r5, r2			@ decompress after malloc space
252		mov	r0, r5
253		mov	r3, r7
254		bl	decompress_kernel
255
256		add	r0, r0, #127 + 128	@ alignment + stack
257		bic	r0, r0, #127		@ align the kernel length
258/*
259 * r0     = decompressed kernel length
260 * r1-r3  = unused
261 * r4     = kernel execution address
262 * r5     = decompressed kernel start
263 * r6     = processor ID
264 * r7     = architecture ID
265 * r8     = atags pointer
266 * r9-r14 = corrupted
267 */
268		add	r1, r5, r0		@ end of decompressed kernel
269		adr	r2, reloc_start
270		ldr	r3, LC1
271		add	r3, r2, r3
2721:		ldmia	r2!, {r9 - r14}		@ copy relocation code
273		stmia	r1!, {r9 - r14}
274		ldmia	r2!, {r9 - r14}
275		stmia	r1!, {r9 - r14}
276		cmp	r2, r3
277		blo	1b
278		add	sp, r1, #128		@ relocate the stack
279
280		bl	cache_clean_flush
281		add	pc, r5, r0		@ call relocation code
282
283/*
284 * We're not in danger of overwriting ourselves.  Do this the simple way.
285 *
286 * r4     = kernel execution address
287 * r7     = architecture ID
288 */
289wont_overwrite:	mov	r0, r4
290		mov	r3, r7
291		bl	decompress_kernel
292		b	call_kernel
293
294		.type	LC0, #object
295LC0:		.word	LC0			@ r1
296		.word	__bss_start		@ r2
297		.word	_end			@ r3
298		.word	zreladdr		@ r4
299		.word	_start			@ r5
300		.word	_got_start		@ r6
301		.word	_got_end		@ ip
302		.word	user_stack+4096		@ sp
303LC1:		.word	reloc_end - reloc_start
304		.size	LC0, . - LC0
305
306#ifdef CONFIG_ARCH_RPC
307		.globl	params
308params:		ldr	r0, =params_phys
309		mov	pc, lr
310		.ltorg
311		.align
312#endif
313
314/*
315 * Turn on the cache.  We need to setup some page tables so that we
316 * can have both the I and D caches on.
317 *
318 * We place the page tables 16k down from the kernel execution address,
319 * and we hope that nothing else is using it.  If we're using it, we
320 * will go pop!
321 *
322 * On entry,
323 *  r4 = kernel execution address
324 *  r6 = processor ID
325 *  r7 = architecture number
326 *  r8 = atags pointer
327 *  r9 = run-time address of "start"  (???)
328 * On exit,
329 *  r1, r2, r3, r9, r10, r12 corrupted
330 * This routine must preserve:
331 *  r4, r5, r6, r7, r8
332 */
333		.align	5
334cache_on:	mov	r3, #8			@ cache_on function
335		b	call_cache_fn
336
337/*
338 * Initialize the highest priority protection region, PR7
339 * to cover all 32bit address and cacheable and bufferable.
340 */
341__armv4_mpu_cache_on:
342		mov	r0, #0x3f		@ 4G, the whole
343		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
344		mcr 	p15, 0, r0, c6, c7, 1
345
346		mov	r0, #0x80		@ PR7
347		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
348		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
349		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
350
351		mov	r0, #0xc000
352		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
353		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
354
355		mov	r0, #0
356		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
357		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
358		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
359		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
360						@ ...I .... ..D. WC.M
361		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
362		orr	r0, r0, #0x1000		@ ...1 .... .... ....
363
364		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
365
366		mov	r0, #0
367		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
368		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
369		mov	pc, lr
370
371__armv3_mpu_cache_on:
372		mov	r0, #0x3f		@ 4G, the whole
373		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
374
375		mov	r0, #0x80		@ PR7
376		mcr	p15, 0, r0, c2, c0, 0	@ cache on
377		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
378
379		mov	r0, #0xc000
380		mcr	p15, 0, r0, c5, c0, 0	@ access permission
381
382		mov	r0, #0
383		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
384		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
385						@ .... .... .... WC.M
386		orr	r0, r0, #0x000d		@ .... .... .... 11.1
387		mov	r0, #0
388		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
389
390		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
391		mov	pc, lr
392
393__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
394		bic	r3, r3, #0xff		@ Align the pointer
395		bic	r3, r3, #0x3f00
396/*
397 * Initialise the page tables, turning on the cacheable and bufferable
398 * bits for the RAM area only.
399 */
400		mov	r0, r3
401		mov	r9, r0, lsr #18
402		mov	r9, r9, lsl #18		@ start of RAM
403		add	r10, r9, #0x10000000	@ a reasonable RAM size
404		mov	r1, #0x12
405		orr	r1, r1, #3 << 10
406		add	r2, r3, #16384
4071:		cmp	r1, r9			@ if virt > start of RAM
408		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
409		cmp	r1, r10			@ if virt > end of RAM
410		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
411		str	r1, [r0], #4		@ 1:1 mapping
412		add	r1, r1, #1048576
413		teq	r0, r2
414		bne	1b
415/*
416 * If ever we are running from Flash, then we surely want the cache
417 * to be enabled also for our execution instance...  We map 2MB of it
418 * so there is no map overlap problem for up to 1 MB compressed kernel.
419 * If the execution is in RAM then we would only be duplicating the above.
420 */
421		mov	r1, #0x1e
422		orr	r1, r1, #3 << 10
423		mov	r2, pc, lsr #20
424		orr	r1, r1, r2, lsl #20
425		add	r0, r3, r2, lsl #2
426		str	r1, [r0], #4
427		add	r1, r1, #1048576
428		str	r1, [r0]
429		mov	pc, lr
430ENDPROC(__setup_mmu)
431
432__armv4_mmu_cache_on:
433		mov	r12, lr
434		bl	__setup_mmu
435		mov	r0, #0
436		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
437		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
438		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
439		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
440		orr	r0, r0, #0x0030
441		bl	__common_mmu_cache_on
442		mov	r0, #0
443		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
444		mov	pc, r12
445
446__armv7_mmu_cache_on:
447		mov	r12, lr
448		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
449		tst	r11, #0xf		@ VMSA
450		blne	__setup_mmu
451		mov	r0, #0
452		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
453		tst	r11, #0xf		@ VMSA
454		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
455		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
456		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
457		orr	r0, r0, #0x003c		@ write buffer
458		orrne	r0, r0, #1		@ MMU enabled
459		movne	r1, #-1
460		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
461		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
462		mcr	p15, 0, r0, c1, c0, 0	@ load control register
463		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
464		mov	r0, #0
465		mcr	p15, 0, r0, c7, c5, 4	@ ISB
466		mov	pc, r12
467
468__arm6_mmu_cache_on:
469		mov	r12, lr
470		bl	__setup_mmu
471		mov	r0, #0
472		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
473		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
474		mov	r0, #0x30
475		bl	__common_mmu_cache_on
476		mov	r0, #0
477		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
478		mov	pc, r12
479
480__common_mmu_cache_on:
481#ifndef DEBUG
482		orr	r0, r0, #0x000d		@ Write buffer, mmu
483#endif
484		mov	r1, #-1
485		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
486		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
487		b	1f
488		.align	5			@ cache line aligned
4891:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
490		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
491		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
492
493/*
494 * All code following this line is relocatable.  It is relocated by
495 * the above code to the end of the decompressed kernel image and
496 * executed there.  During this time, we have no stacks.
497 *
498 * r0     = decompressed kernel length
499 * r1-r3  = unused
500 * r4     = kernel execution address
501 * r5     = decompressed kernel start
502 * r6     = processor ID
503 * r7     = architecture ID
504 * r8     = atags pointer
505 * r9-r14 = corrupted
506 */
507		.align	5
508reloc_start:	add	r9, r5, r0
509		sub	r9, r9, #128		@ do not copy the stack
510		debug_reloc_start
511		mov	r1, r4
5121:
513		.rept	4
514		ldmia	r5!, {r0, r2, r3, r10 - r14}	@ relocate kernel
515		stmia	r1!, {r0, r2, r3, r10 - r14}
516		.endr
517
518		cmp	r5, r9
519		blo	1b
520		add	sp, r1, #128		@ relocate the stack
521		debug_reloc_end
522
523call_kernel:	bl	cache_clean_flush
524		bl	cache_off
525		mov	r0, #0			@ must be zero
526		mov	r1, r7			@ restore architecture number
527		mov	r2, r8			@ restore atags pointer
528		mov	pc, r4			@ call kernel
529
530/*
531 * Here follow the relocatable cache support functions for the
532 * various processors.  This is a generic hook for locating an
533 * entry and jumping to an instruction at the specified offset
534 * from the start of the block.  Please note this is all position
535 * independent code.
536 *
537 *  r1  = corrupted
538 *  r2  = corrupted
539 *  r3  = block offset
540 *  r6  = corrupted
541 *  r12 = corrupted
542 */
543
544call_cache_fn:	adr	r12, proc_types
545#ifdef CONFIG_CPU_CP15
546		mrc	p15, 0, r6, c0, c0	@ get processor ID
547#else
548		ldr	r6, =CONFIG_PROCESSOR_ID
549#endif
5501:		ldr	r1, [r12, #0]		@ get value
551		ldr	r2, [r12, #4]		@ get mask
552		eor	r1, r1, r6		@ (real ^ match)
553		tst	r1, r2			@       & mask
554		addeq	pc, r12, r3		@ call cache function
555		add	r12, r12, #4*5
556		b	1b
557
558/*
559 * Table for cache operations.  This is basically:
560 *   - CPU ID match
561 *   - CPU ID mask
562 *   - 'cache on' method instruction
563 *   - 'cache off' method instruction
564 *   - 'cache flush' method instruction
565 *
566 * We match an entry using: ((real_id ^ match) & mask) == 0
567 *
568 * Writethrough caches generally only need 'on' and 'off'
569 * methods.  Writeback caches _must_ have the flush method
570 * defined.
571 */
572		.type	proc_types,#object
573proc_types:
574		.word	0x41560600		@ ARM6/610
575		.word	0xffffffe0
576		b	__arm6_mmu_cache_off	@ works, but slow
577		b	__arm6_mmu_cache_off
578		mov	pc, lr
579@		b	__arm6_mmu_cache_on		@ untested
580@		b	__arm6_mmu_cache_off
581@		b	__armv3_mmu_cache_flush
582
583		.word	0x00000000		@ old ARM ID
584		.word	0x0000f000
585		mov	pc, lr
586		mov	pc, lr
587		mov	pc, lr
588
589		.word	0x41007000		@ ARM7/710
590		.word	0xfff8fe00
591		b	__arm7_mmu_cache_off
592		b	__arm7_mmu_cache_off
593		mov	pc, lr
594
595		.word	0x41807200		@ ARM720T (writethrough)
596		.word	0xffffff00
597		b	__armv4_mmu_cache_on
598		b	__armv4_mmu_cache_off
599		mov	pc, lr
600
601		.word	0x41007400		@ ARM74x
602		.word	0xff00ff00
603		b	__armv3_mpu_cache_on
604		b	__armv3_mpu_cache_off
605		b	__armv3_mpu_cache_flush
606
607		.word	0x41009400		@ ARM94x
608		.word	0xff00ff00
609		b	__armv4_mpu_cache_on
610		b	__armv4_mpu_cache_off
611		b	__armv4_mpu_cache_flush
612
613		.word	0x00007000		@ ARM7 IDs
614		.word	0x0000f000
615		mov	pc, lr
616		mov	pc, lr
617		mov	pc, lr
618
619		@ Everything from here on will be the new ID system.
620
621		.word	0x4401a100		@ sa110 / sa1100
622		.word	0xffffffe0
623		b	__armv4_mmu_cache_on
624		b	__armv4_mmu_cache_off
625		b	__armv4_mmu_cache_flush
626
627		.word	0x6901b110		@ sa1110
628		.word	0xfffffff0
629		b	__armv4_mmu_cache_on
630		b	__armv4_mmu_cache_off
631		b	__armv4_mmu_cache_flush
632
633		.word	0x56056930
634		.word	0xff0ffff0		@ PXA935
635		b	__armv4_mmu_cache_on
636		b	__armv4_mmu_cache_off
637		b	__armv4_mmu_cache_flush
638
639		.word	0x56158000		@ PXA168
640		.word	0xfffff000
641		b __armv4_mmu_cache_on
642		b __armv4_mmu_cache_off
643		b __armv5tej_mmu_cache_flush
644
645		.word	0x56056930
646		.word	0xff0ffff0		@ PXA935
647		b	__armv4_mmu_cache_on
648		b	__armv4_mmu_cache_off
649		b	__armv4_mmu_cache_flush
650
651		.word	0x56050000		@ Feroceon
652		.word	0xff0f0000
653		b	__armv4_mmu_cache_on
654		b	__armv4_mmu_cache_off
655		b	__armv5tej_mmu_cache_flush
656
657		@ These match on the architecture ID
658
659		.word	0x00020000		@ ARMv4T
660		.word	0x000f0000
661		b	__armv4_mmu_cache_on
662		b	__armv4_mmu_cache_off
663		b	__armv4_mmu_cache_flush
664
665		.word	0x00050000		@ ARMv5TE
666		.word	0x000f0000
667		b	__armv4_mmu_cache_on
668		b	__armv4_mmu_cache_off
669		b	__armv4_mmu_cache_flush
670
671		.word	0x00060000		@ ARMv5TEJ
672		.word	0x000f0000
673		b	__armv4_mmu_cache_on
674		b	__armv4_mmu_cache_off
675		b	__armv5tej_mmu_cache_flush
676
677		.word	0x0007b000		@ ARMv6
678		.word	0x000ff000
679		b	__armv4_mmu_cache_on
680		b	__armv4_mmu_cache_off
681		b	__armv6_mmu_cache_flush
682
683		.word	0x000f0000		@ new CPU Id
684		.word	0x000f0000
685		b	__armv7_mmu_cache_on
686		b	__armv7_mmu_cache_off
687		b	__armv7_mmu_cache_flush
688
689		.word	0			@ unrecognised type
690		.word	0
691		mov	pc, lr
692		mov	pc, lr
693		mov	pc, lr
694
695		.size	proc_types, . - proc_types
696
697/*
698 * Turn off the Cache and MMU.  ARMv3 does not support
699 * reading the control register, but ARMv4 does.
700 *
701 * On entry,  r6 = processor ID
702 * On exit,   r0, r1, r2, r3, r12 corrupted
703 * This routine must preserve: r4, r6, r7
704 */
705		.align	5
706cache_off:	mov	r3, #12			@ cache_off function
707		b	call_cache_fn
708
709__armv4_mpu_cache_off:
710		mrc	p15, 0, r0, c1, c0
711		bic	r0, r0, #0x000d
712		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
713		mov	r0, #0
714		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
715		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
716		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
717		mov	pc, lr
718
719__armv3_mpu_cache_off:
720		mrc	p15, 0, r0, c1, c0
721		bic	r0, r0, #0x000d
722		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
723		mov	r0, #0
724		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
725		mov	pc, lr
726
727__armv4_mmu_cache_off:
728		mrc	p15, 0, r0, c1, c0
729		bic	r0, r0, #0x000d
730		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
731		mov	r0, #0
732		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
733		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
734		mov	pc, lr
735
736__armv7_mmu_cache_off:
737		mrc	p15, 0, r0, c1, c0
738		bic	r0, r0, #0x000d
739		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
740		mov	r12, lr
741		bl	__armv7_mmu_cache_flush
742		mov	r0, #0
743		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
744		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
745		mcr	p15, 0, r0, c7, c10, 4	@ DSB
746		mcr	p15, 0, r0, c7, c5, 4	@ ISB
747		mov	pc, r12
748
749__arm6_mmu_cache_off:
750		mov	r0, #0x00000030		@ ARM6 control reg.
751		b	__armv3_mmu_cache_off
752
753__arm7_mmu_cache_off:
754		mov	r0, #0x00000070		@ ARM7 control reg.
755		b	__armv3_mmu_cache_off
756
757__armv3_mmu_cache_off:
758		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
759		mov	r0, #0
760		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
761		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
762		mov	pc, lr
763
764/*
765 * Clean and flush the cache to maintain consistency.
766 *
767 * On entry,
768 *  r6 = processor ID
769 * On exit,
770 *  r1, r2, r3, r11, r12 corrupted
771 * This routine must preserve:
772 *  r0, r4, r5, r6, r7
773 */
774		.align	5
775cache_clean_flush:
776		mov	r3, #16
777		b	call_cache_fn
778
779__armv4_mpu_cache_flush:
780		mov	r2, #1
781		mov	r3, #0
782		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
783		mov	r1, #7 << 5		@ 8 segments
7841:		orr	r3, r1, #63 << 26	@ 64 entries
7852:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
786		subs	r3, r3, #1 << 26
787		bcs	2b			@ entries 63 to 0
788		subs 	r1, r1, #1 << 5
789		bcs	1b			@ segments 7 to 0
790
791		teq	r2, #0
792		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
793		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
794		mov	pc, lr
795
796
797__armv6_mmu_cache_flush:
798		mov	r1, #0
799		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
800		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
801		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
802		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
803		mov	pc, lr
804
805__armv7_mmu_cache_flush:
806		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
807		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
808		mov	r10, #0
809		beq	hierarchical
810		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
811		b	iflush
812hierarchical:
813		mcr	p15, 0, r10, c7, c10, 5	@ DMB
814		stmfd	sp!, {r0-r5, r7, r9, r11}
815		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
816		ands	r3, r0, #0x7000000	@ extract loc from clidr
817		mov	r3, r3, lsr #23		@ left align loc bit field
818		beq	finished		@ if loc is 0, then no need to clean
819		mov	r10, #0			@ start clean at cache level 0
820loop1:
821		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
822		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
823		and	r1, r1, #7		@ mask of the bits for current cache only
824		cmp	r1, #2			@ see what cache we have at this level
825		blt	skip			@ skip if no cache, or just i-cache
826		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
827		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
828		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
829		and	r2, r1, #7		@ extract the length of the cache lines
830		add	r2, r2, #4		@ add 4 (line length offset)
831		ldr	r4, =0x3ff
832		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
833		clz	r5, r4			@ find bit position of way size increment
834		ldr	r7, =0x7fff
835		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
836loop2:
837		mov	r9, r4			@ create working copy of max way size
838loop3:
839		orr	r11, r10, r9, lsl r5	@ factor way and cache number into r11
840		orr	r11, r11, r7, lsl r2	@ factor index number into r11
841		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
842		subs	r9, r9, #1		@ decrement the way
843		bge	loop3
844		subs	r7, r7, #1		@ decrement the index
845		bge	loop2
846skip:
847		add	r10, r10, #2		@ increment cache number
848		cmp	r3, r10
849		bgt	loop1
850finished:
851		ldmfd	sp!, {r0-r5, r7, r9, r11}
852		mov	r10, #0			@ swith back to cache level 0
853		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
854iflush:
855		mcr	p15, 0, r10, c7, c10, 4	@ DSB
856		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
857		mcr	p15, 0, r10, c7, c10, 4	@ DSB
858		mcr	p15, 0, r10, c7, c5, 4	@ ISB
859		mov	pc, lr
860
861__armv5tej_mmu_cache_flush:
8621:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
863		bne	1b
864		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
865		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
866		mov	pc, lr
867
868__armv4_mmu_cache_flush:
869		mov	r2, #64*1024		@ default: 32K dcache size (*2)
870		mov	r11, #32		@ default: 32 byte line size
871		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
872		teq	r3, r6			@ cache ID register present?
873		beq	no_cache_id
874		mov	r1, r3, lsr #18
875		and	r1, r1, #7
876		mov	r2, #1024
877		mov	r2, r2, lsl r1		@ base dcache size *2
878		tst	r3, #1 << 14		@ test M bit
879		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
880		mov	r3, r3, lsr #12
881		and	r3, r3, #3
882		mov	r11, #8
883		mov	r11, r11, lsl r3	@ cache line size in bytes
884no_cache_id:
885		bic	r1, pc, #63		@ align to longest cache line
886		add	r2, r1, r2
8871:		ldr	r3, [r1], r11		@ s/w flush D cache
888		teq	r1, r2
889		bne	1b
890
891		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
892		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
893		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
894		mov	pc, lr
895
896__armv3_mmu_cache_flush:
897__armv3_mpu_cache_flush:
898		mov	r1, #0
899		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
900		mov	pc, lr
901
902/*
903 * Various debugging routines for printing hex characters and
904 * memory, which again must be relocatable.
905 */
906#ifdef DEBUG
907		.type	phexbuf,#object
908phexbuf:	.space	12
909		.size	phexbuf, . - phexbuf
910
911phex:		adr	r3, phexbuf
912		mov	r2, #0
913		strb	r2, [r3, r1]
9141:		subs	r1, r1, #1
915		movmi	r0, r3
916		bmi	puts
917		and	r2, r0, #15
918		mov	r0, r0, lsr #4
919		cmp	r2, #10
920		addge	r2, r2, #7
921		add	r2, r2, #'0'
922		strb	r2, [r3, r1]
923		b	1b
924
925puts:		loadsp	r3
9261:		ldrb	r2, [r0], #1
927		teq	r2, #0
928		moveq	pc, lr
9292:		writeb	r2, r3
930		mov	r1, #0x00020000
9313:		subs	r1, r1, #1
932		bne	3b
933		teq	r2, #'\n'
934		moveq	r2, #'\r'
935		beq	2b
936		teq	r0, #0
937		bne	1b
938		mov	pc, lr
939putc:
940		mov	r2, r0
941		mov	r0, #0
942		loadsp	r3
943		b	2b
944
945memdump:	mov	r12, r0
946		mov	r10, lr
947		mov	r11, #0
9482:		mov	r0, r11, lsl #2
949		add	r0, r0, r12
950		mov	r1, #8
951		bl	phex
952		mov	r0, #':'
953		bl	putc
9541:		mov	r0, #' '
955		bl	putc
956		ldr	r0, [r12, r11, lsl #2]
957		mov	r1, #8
958		bl	phex
959		and	r0, r11, #7
960		teq	r0, #3
961		moveq	r0, #' '
962		bleq	putc
963		and	r0, r11, #7
964		add	r11, r11, #1
965		teq	r0, #7
966		bne	1b
967		mov	r0, #'\n'
968		bl	putc
969		cmp	r11, #64
970		blt	2b
971		mov	pc, r10
972#endif
973
974		.ltorg
975reloc_end:
976
977		.align
978		.section ".stack", "w"
979user_stack:	.space	4096
980