xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 64c70b1c)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#else
31		.macro	loadsp, rb
32		.endm
33		.macro	writeb, ch, rb
34		mcr	p14, 0, \ch, c0, c1, 0
35		.endm
36#endif
37
38#else
39
40#include <asm/arch/debug-macro.S>
41
42		.macro	writeb,	ch, rb
43		senduart \ch, \rb
44		.endm
45
46#if defined(CONFIG_ARCH_SA1100)
47		.macro	loadsp, rb
48		mov	\rb, #0x80000000	@ physical base address
49#ifdef CONFIG_DEBUG_LL_SER3
50		add	\rb, \rb, #0x00050000	@ Ser3
51#else
52		add	\rb, \rb, #0x00010000	@ Ser1
53#endif
54		.endm
55#elif defined(CONFIG_ARCH_S3C2410)
56		.macro loadsp, rb
57		mov	\rb, #0x50000000
58		add	\rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
59		.endm
60#else
61		.macro	loadsp,	rb
62		addruart \rb
63		.endm
64#endif
65#endif
66#endif
67
68		.macro	kputc,val
69		mov	r0, \val
70		bl	putc
71		.endm
72
73		.macro	kphex,val,len
74		mov	r0, \val
75		mov	r1, #\len
76		bl	phex
77		.endm
78
79		.macro	debug_reloc_start
80#ifdef DEBUG
81		kputc	#'\n'
82		kphex	r6, 8		/* processor id */
83		kputc	#':'
84		kphex	r7, 8		/* architecture id */
85#ifdef CONFIG_CPU_CP15
86		kputc	#':'
87		mrc	p15, 0, r0, c1, c0
88		kphex	r0, 8		/* control reg */
89#endif
90		kputc	#'\n'
91		kphex	r5, 8		/* decompressed kernel start */
92		kputc	#'-'
93		kphex	r9, 8		/* decompressed kernel end  */
94		kputc	#'>'
95		kphex	r4, 8		/* kernel execution address */
96		kputc	#'\n'
97#endif
98		.endm
99
100		.macro	debug_reloc_end
101#ifdef DEBUG
102		kphex	r5, 8		/* end of kernel */
103		kputc	#'\n'
104		mov	r0, r4
105		bl	memdump		/* dump 256 bytes at start of kernel */
106#endif
107		.endm
108
109		.section ".start", #alloc, #execinstr
110/*
111 * sort out different calling conventions
112 */
113		.align
114start:
115		.type	start,#function
116		.rept	8
117		mov	r0, r0
118		.endr
119
120		b	1f
121		.word	0x016f2818		@ Magic numbers to help the loader
122		.word	start			@ absolute load/run zImage address
123		.word	_edata			@ zImage end address
1241:		mov	r7, r1			@ save architecture ID
125		mov	r8, r2			@ save atags pointer
126
127#ifndef __ARM_ARCH_2__
128		/*
129		 * Booting from Angel - need to enter SVC mode and disable
130		 * FIQs/IRQs (numeric definitions from angel arm.h source).
131		 * We only do this if we were in user mode on entry.
132		 */
133		mrs	r2, cpsr		@ get current mode
134		tst	r2, #3			@ not user?
135		bne	not_angel
136		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
137		swi	0x123456		@ angel_SWI_ARM
138not_angel:
139		mrs	r2, cpsr		@ turn off interrupts to
140		orr	r2, r2, #0xc0		@ prevent angel from running
141		msr	cpsr_c, r2
142#else
143		teqp	pc, #0x0c000003		@ turn off interrupts
144#endif
145
146		/*
147		 * Note that some cache flushing and other stuff may
148		 * be needed here - is there an Angel SWI call for this?
149		 */
150
151		/*
152		 * some architecture specific code can be inserted
153		 * by the linker here, but it should preserve r7, r8, and r9.
154		 */
155
156		.text
157		adr	r0, LC0
158		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
159		subs	r0, r0, r1		@ calculate the delta offset
160
161						@ if delta is zero, we are
162		beq	not_relocated		@ running at the address we
163						@ were linked at.
164
165		/*
166		 * We're running at a different address.  We need to fix
167		 * up various pointers:
168		 *   r5 - zImage base address
169		 *   r6 - GOT start
170		 *   ip - GOT end
171		 */
172		add	r5, r5, r0
173		add	r6, r6, r0
174		add	ip, ip, r0
175
176#ifndef CONFIG_ZBOOT_ROM
177		/*
178		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
179		 * we need to fix up pointers into the BSS region.
180		 *   r2 - BSS start
181		 *   r3 - BSS end
182		 *   sp - stack pointer
183		 */
184		add	r2, r2, r0
185		add	r3, r3, r0
186		add	sp, sp, r0
187
188		/*
189		 * Relocate all entries in the GOT table.
190		 */
1911:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
192		add	r1, r1, r0		@ table.  This fixes up the
193		str	r1, [r6], #4		@ C references.
194		cmp	r6, ip
195		blo	1b
196#else
197
198		/*
199		 * Relocate entries in the GOT table.  We only relocate
200		 * the entries that are outside the (relocated) BSS region.
201		 */
2021:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
203		cmp	r1, r2			@ entry < bss_start ||
204		cmphs	r3, r1			@ _end < entry
205		addlo	r1, r1, r0		@ table.  This fixes up the
206		str	r1, [r6], #4		@ C references.
207		cmp	r6, ip
208		blo	1b
209#endif
210
211not_relocated:	mov	r0, #0
2121:		str	r0, [r2], #4		@ clear bss
213		str	r0, [r2], #4
214		str	r0, [r2], #4
215		str	r0, [r2], #4
216		cmp	r2, r3
217		blo	1b
218
219		/*
220		 * The C runtime environment should now be setup
221		 * sufficiently.  Turn the cache on, set up some
222		 * pointers, and start decompressing.
223		 */
224		bl	cache_on
225
226		mov	r1, sp			@ malloc space above stack
227		add	r2, sp, #0x10000	@ 64k max
228
229/*
230 * Check to see if we will overwrite ourselves.
231 *   r4 = final kernel address
232 *   r5 = start of this image
233 *   r2 = end of malloc space (and therefore this image)
234 * We basically want:
235 *   r4 >= r2 -> OK
236 *   r4 + image length <= r5 -> OK
237 */
238		cmp	r4, r2
239		bhs	wont_overwrite
240		sub	r3, sp, r5		@ > compressed kernel size
241		add	r0, r4, r3, lsl #2	@ allow for 4x expansion
242		cmp	r0, r5
243		bls	wont_overwrite
244
245		mov	r5, r2			@ decompress after malloc space
246		mov	r0, r5
247		mov	r3, r7
248		bl	decompress_kernel
249
250		add	r0, r0, #127 + 128	@ alignment + stack
251		bic	r0, r0, #127		@ align the kernel length
252/*
253 * r0     = decompressed kernel length
254 * r1-r3  = unused
255 * r4     = kernel execution address
256 * r5     = decompressed kernel start
257 * r6     = processor ID
258 * r7     = architecture ID
259 * r8     = atags pointer
260 * r9-r14 = corrupted
261 */
262		add	r1, r5, r0		@ end of decompressed kernel
263		adr	r2, reloc_start
264		ldr	r3, LC1
265		add	r3, r2, r3
2661:		ldmia	r2!, {r9 - r14}		@ copy relocation code
267		stmia	r1!, {r9 - r14}
268		ldmia	r2!, {r9 - r14}
269		stmia	r1!, {r9 - r14}
270		cmp	r2, r3
271		blo	1b
272		add	sp, r1, #128		@ relocate the stack
273
274		bl	cache_clean_flush
275		add	pc, r5, r0		@ call relocation code
276
277/*
278 * We're not in danger of overwriting ourselves.  Do this the simple way.
279 *
280 * r4     = kernel execution address
281 * r7     = architecture ID
282 */
283wont_overwrite:	mov	r0, r4
284		mov	r3, r7
285		bl	decompress_kernel
286		b	call_kernel
287
288		.type	LC0, #object
289LC0:		.word	LC0			@ r1
290		.word	__bss_start		@ r2
291		.word	_end			@ r3
292		.word	zreladdr		@ r4
293		.word	_start			@ r5
294		.word	_got_start		@ r6
295		.word	_got_end		@ ip
296		.word	user_stack+4096		@ sp
297LC1:		.word	reloc_end - reloc_start
298		.size	LC0, . - LC0
299
300#ifdef CONFIG_ARCH_RPC
301		.globl	params
302params:		ldr	r0, =params_phys
303		mov	pc, lr
304		.ltorg
305		.align
306#endif
307
308/*
309 * Turn on the cache.  We need to setup some page tables so that we
310 * can have both the I and D caches on.
311 *
312 * We place the page tables 16k down from the kernel execution address,
313 * and we hope that nothing else is using it.  If we're using it, we
314 * will go pop!
315 *
316 * On entry,
317 *  r4 = kernel execution address
318 *  r6 = processor ID
319 *  r7 = architecture number
320 *  r8 = atags pointer
321 *  r9 = run-time address of "start"  (???)
322 * On exit,
323 *  r1, r2, r3, r9, r10, r12 corrupted
324 * This routine must preserve:
325 *  r4, r5, r6, r7, r8
326 */
327		.align	5
328cache_on:	mov	r3, #8			@ cache_on function
329		b	call_cache_fn
330
331/*
332 * Initialize the highest priority protection region, PR7
333 * to cover all 32bit address and cacheable and bufferable.
334 */
335__armv4_mpu_cache_on:
336		mov	r0, #0x3f		@ 4G, the whole
337		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
338		mcr 	p15, 0, r0, c6, c7, 1
339
340		mov	r0, #0x80		@ PR7
341		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
342		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
343		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
344
345		mov	r0, #0xc000
346		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
347		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
348
349		mov	r0, #0
350		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
351		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
352		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
353		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
354						@ ...I .... ..D. WC.M
355		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
356		orr	r0, r0, #0x1000		@ ...1 .... .... ....
357
358		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
359
360		mov	r0, #0
361		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
362		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
363		mov	pc, lr
364
365__armv3_mpu_cache_on:
366		mov	r0, #0x3f		@ 4G, the whole
367		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
368
369		mov	r0, #0x80		@ PR7
370		mcr	p15, 0, r0, c2, c0, 0	@ cache on
371		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
372
373		mov	r0, #0xc000
374		mcr	p15, 0, r0, c5, c0, 0	@ access permission
375
376		mov	r0, #0
377		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
378		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
379						@ .... .... .... WC.M
380		orr	r0, r0, #0x000d		@ .... .... .... 11.1
381		mov	r0, #0
382		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
383
384		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
385		mov	pc, lr
386
387__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
388		bic	r3, r3, #0xff		@ Align the pointer
389		bic	r3, r3, #0x3f00
390/*
391 * Initialise the page tables, turning on the cacheable and bufferable
392 * bits for the RAM area only.
393 */
394		mov	r0, r3
395		mov	r9, r0, lsr #18
396		mov	r9, r9, lsl #18		@ start of RAM
397		add	r10, r9, #0x10000000	@ a reasonable RAM size
398		mov	r1, #0x12
399		orr	r1, r1, #3 << 10
400		add	r2, r3, #16384
4011:		cmp	r1, r9			@ if virt > start of RAM
402		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
403		cmp	r1, r10			@ if virt > end of RAM
404		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
405		str	r1, [r0], #4		@ 1:1 mapping
406		add	r1, r1, #1048576
407		teq	r0, r2
408		bne	1b
409/*
410 * If ever we are running from Flash, then we surely want the cache
411 * to be enabled also for our execution instance...  We map 2MB of it
412 * so there is no map overlap problem for up to 1 MB compressed kernel.
413 * If the execution is in RAM then we would only be duplicating the above.
414 */
415		mov	r1, #0x1e
416		orr	r1, r1, #3 << 10
417		mov	r2, pc, lsr #20
418		orr	r1, r1, r2, lsl #20
419		add	r0, r3, r2, lsl #2
420		str	r1, [r0], #4
421		add	r1, r1, #1048576
422		str	r1, [r0]
423		mov	pc, lr
424
425__armv4_mmu_cache_on:
426		mov	r12, lr
427		bl	__setup_mmu
428		mov	r0, #0
429		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
430		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
431		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
432		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
433		orr	r0, r0, #0x0030
434		bl	__common_mmu_cache_on
435		mov	r0, #0
436		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
437		mov	pc, r12
438
439__arm6_mmu_cache_on:
440		mov	r12, lr
441		bl	__setup_mmu
442		mov	r0, #0
443		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
444		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
445		mov	r0, #0x30
446		bl	__common_mmu_cache_on
447		mov	r0, #0
448		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
449		mov	pc, r12
450
451__common_mmu_cache_on:
452#ifndef DEBUG
453		orr	r0, r0, #0x000d		@ Write buffer, mmu
454#endif
455		mov	r1, #-1
456		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
457		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
458		b	1f
459		.align	5			@ cache line aligned
4601:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
461		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
462		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
463
464/*
465 * All code following this line is relocatable.  It is relocated by
466 * the above code to the end of the decompressed kernel image and
467 * executed there.  During this time, we have no stacks.
468 *
469 * r0     = decompressed kernel length
470 * r1-r3  = unused
471 * r4     = kernel execution address
472 * r5     = decompressed kernel start
473 * r6     = processor ID
474 * r7     = architecture ID
475 * r8     = atags pointer
476 * r9-r14 = corrupted
477 */
478		.align	5
479reloc_start:	add	r9, r5, r0
480		sub	r9, r9, #128		@ do not copy the stack
481		debug_reloc_start
482		mov	r1, r4
4831:
484		.rept	4
485		ldmia	r5!, {r0, r2, r3, r10 - r14}	@ relocate kernel
486		stmia	r1!, {r0, r2, r3, r10 - r14}
487		.endr
488
489		cmp	r5, r9
490		blo	1b
491		add	sp, r1, #128		@ relocate the stack
492		debug_reloc_end
493
494call_kernel:	bl	cache_clean_flush
495		bl	cache_off
496		mov	r0, #0			@ must be zero
497		mov	r1, r7			@ restore architecture number
498		mov	r2, r8			@ restore atags pointer
499		mov	pc, r4			@ call kernel
500
501/*
502 * Here follow the relocatable cache support functions for the
503 * various processors.  This is a generic hook for locating an
504 * entry and jumping to an instruction at the specified offset
505 * from the start of the block.  Please note this is all position
506 * independent code.
507 *
508 *  r1  = corrupted
509 *  r2  = corrupted
510 *  r3  = block offset
511 *  r6  = corrupted
512 *  r12 = corrupted
513 */
514
515call_cache_fn:	adr	r12, proc_types
516#ifdef CONFIG_CPU_CP15
517		mrc	p15, 0, r6, c0, c0	@ get processor ID
518#else
519		ldr	r6, =CONFIG_PROCESSOR_ID
520#endif
5211:		ldr	r1, [r12, #0]		@ get value
522		ldr	r2, [r12, #4]		@ get mask
523		eor	r1, r1, r6		@ (real ^ match)
524		tst	r1, r2			@       & mask
525		addeq	pc, r12, r3		@ call cache function
526		add	r12, r12, #4*5
527		b	1b
528
529/*
530 * Table for cache operations.  This is basically:
531 *   - CPU ID match
532 *   - CPU ID mask
533 *   - 'cache on' method instruction
534 *   - 'cache off' method instruction
535 *   - 'cache flush' method instruction
536 *
537 * We match an entry using: ((real_id ^ match) & mask) == 0
538 *
539 * Writethrough caches generally only need 'on' and 'off'
540 * methods.  Writeback caches _must_ have the flush method
541 * defined.
542 */
543		.type	proc_types,#object
544proc_types:
545		.word	0x41560600		@ ARM6/610
546		.word	0xffffffe0
547		b	__arm6_mmu_cache_off	@ works, but slow
548		b	__arm6_mmu_cache_off
549		mov	pc, lr
550@		b	__arm6_mmu_cache_on		@ untested
551@		b	__arm6_mmu_cache_off
552@		b	__armv3_mmu_cache_flush
553
554		.word	0x00000000		@ old ARM ID
555		.word	0x0000f000
556		mov	pc, lr
557		mov	pc, lr
558		mov	pc, lr
559
560		.word	0x41007000		@ ARM7/710
561		.word	0xfff8fe00
562		b	__arm7_mmu_cache_off
563		b	__arm7_mmu_cache_off
564		mov	pc, lr
565
566		.word	0x41807200		@ ARM720T (writethrough)
567		.word	0xffffff00
568		b	__armv4_mmu_cache_on
569		b	__armv4_mmu_cache_off
570		mov	pc, lr
571
572		.word	0x41007400		@ ARM74x
573		.word	0xff00ff00
574		b	__armv3_mpu_cache_on
575		b	__armv3_mpu_cache_off
576		b	__armv3_mpu_cache_flush
577
578		.word	0x41009400		@ ARM94x
579		.word	0xff00ff00
580		b	__armv4_mpu_cache_on
581		b	__armv4_mpu_cache_off
582		b	__armv4_mpu_cache_flush
583
584		.word	0x00007000		@ ARM7 IDs
585		.word	0x0000f000
586		mov	pc, lr
587		mov	pc, lr
588		mov	pc, lr
589
590		@ Everything from here on will be the new ID system.
591
592		.word	0x4401a100		@ sa110 / sa1100
593		.word	0xffffffe0
594		b	__armv4_mmu_cache_on
595		b	__armv4_mmu_cache_off
596		b	__armv4_mmu_cache_flush
597
598		.word	0x6901b110		@ sa1110
599		.word	0xfffffff0
600		b	__armv4_mmu_cache_on
601		b	__armv4_mmu_cache_off
602		b	__armv4_mmu_cache_flush
603
604		@ These match on the architecture ID
605
606		.word	0x00020000		@ ARMv4T
607		.word	0x000f0000
608		b	__armv4_mmu_cache_on
609		b	__armv4_mmu_cache_off
610		b	__armv4_mmu_cache_flush
611
612		.word	0x00050000		@ ARMv5TE
613		.word	0x000f0000
614		b	__armv4_mmu_cache_on
615		b	__armv4_mmu_cache_off
616		b	__armv4_mmu_cache_flush
617
618		.word	0x00060000		@ ARMv5TEJ
619		.word	0x000f0000
620		b	__armv4_mmu_cache_on
621		b	__armv4_mmu_cache_off
622		b	__armv4_mmu_cache_flush
623
624		.word	0x0007b000		@ ARMv6
625		.word	0x0007f000
626		b	__armv4_mmu_cache_on
627		b	__armv4_mmu_cache_off
628		b	__armv6_mmu_cache_flush
629
630		.word	0			@ unrecognised type
631		.word	0
632		mov	pc, lr
633		mov	pc, lr
634		mov	pc, lr
635
636		.size	proc_types, . - proc_types
637
638/*
639 * Turn off the Cache and MMU.  ARMv3 does not support
640 * reading the control register, but ARMv4 does.
641 *
642 * On entry,  r6 = processor ID
643 * On exit,   r0, r1, r2, r3, r12 corrupted
644 * This routine must preserve: r4, r6, r7
645 */
646		.align	5
647cache_off:	mov	r3, #12			@ cache_off function
648		b	call_cache_fn
649
650__armv4_mpu_cache_off:
651		mrc	p15, 0, r0, c1, c0
652		bic	r0, r0, #0x000d
653		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
654		mov	r0, #0
655		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
656		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
657		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
658		mov	pc, lr
659
660__armv3_mpu_cache_off:
661		mrc	p15, 0, r0, c1, c0
662		bic	r0, r0, #0x000d
663		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
664		mov	r0, #0
665		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
666		mov	pc, lr
667
668__armv4_mmu_cache_off:
669		mrc	p15, 0, r0, c1, c0
670		bic	r0, r0, #0x000d
671		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
672		mov	r0, #0
673		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
674		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
675		mov	pc, lr
676
677__arm6_mmu_cache_off:
678		mov	r0, #0x00000030		@ ARM6 control reg.
679		b	__armv3_mmu_cache_off
680
681__arm7_mmu_cache_off:
682		mov	r0, #0x00000070		@ ARM7 control reg.
683		b	__armv3_mmu_cache_off
684
685__armv3_mmu_cache_off:
686		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
687		mov	r0, #0
688		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
689		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
690		mov	pc, lr
691
692/*
693 * Clean and flush the cache to maintain consistency.
694 *
695 * On entry,
696 *  r6 = processor ID
697 * On exit,
698 *  r1, r2, r3, r11, r12 corrupted
699 * This routine must preserve:
700 *  r0, r4, r5, r6, r7
701 */
702		.align	5
703cache_clean_flush:
704		mov	r3, #16
705		b	call_cache_fn
706
707__armv4_mpu_cache_flush:
708		mov	r2, #1
709		mov	r3, #0
710		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
711		mov	r1, #7 << 5		@ 8 segments
7121:		orr	r3, r1, #63 << 26	@ 64 entries
7132:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
714		subs	r3, r3, #1 << 26
715		bcs	2b			@ entries 63 to 0
716		subs 	r1, r1, #1 << 5
717		bcs	1b			@ segments 7 to 0
718
719		teq	r2, #0
720		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
721		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
722		mov	pc, lr
723
724
725__armv6_mmu_cache_flush:
726		mov	r1, #0
727		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
728		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
729		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
730		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
731		mov	pc, lr
732
733__armv4_mmu_cache_flush:
734		mov	r2, #64*1024		@ default: 32K dcache size (*2)
735		mov	r11, #32		@ default: 32 byte line size
736		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
737		teq	r3, r6			@ cache ID register present?
738		beq	no_cache_id
739		mov	r1, r3, lsr #18
740		and	r1, r1, #7
741		mov	r2, #1024
742		mov	r2, r2, lsl r1		@ base dcache size *2
743		tst	r3, #1 << 14		@ test M bit
744		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
745		mov	r3, r3, lsr #12
746		and	r3, r3, #3
747		mov	r11, #8
748		mov	r11, r11, lsl r3	@ cache line size in bytes
749no_cache_id:
750		bic	r1, pc, #63		@ align to longest cache line
751		add	r2, r1, r2
7521:		ldr	r3, [r1], r11		@ s/w flush D cache
753		teq	r1, r2
754		bne	1b
755
756		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
757		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
758		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
759		mov	pc, lr
760
761__armv3_mmu_cache_flush:
762__armv3_mpu_cache_flush:
763		mov	r1, #0
764		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
765		mov	pc, lr
766
767/*
768 * Various debugging routines for printing hex characters and
769 * memory, which again must be relocatable.
770 */
771#ifdef DEBUG
772		.type	phexbuf,#object
773phexbuf:	.space	12
774		.size	phexbuf, . - phexbuf
775
776phex:		adr	r3, phexbuf
777		mov	r2, #0
778		strb	r2, [r3, r1]
7791:		subs	r1, r1, #1
780		movmi	r0, r3
781		bmi	puts
782		and	r2, r0, #15
783		mov	r0, r0, lsr #4
784		cmp	r2, #10
785		addge	r2, r2, #7
786		add	r2, r2, #'0'
787		strb	r2, [r3, r1]
788		b	1b
789
790puts:		loadsp	r3
7911:		ldrb	r2, [r0], #1
792		teq	r2, #0
793		moveq	pc, lr
7942:		writeb	r2, r3
795		mov	r1, #0x00020000
7963:		subs	r1, r1, #1
797		bne	3b
798		teq	r2, #'\n'
799		moveq	r2, #'\r'
800		beq	2b
801		teq	r0, #0
802		bne	1b
803		mov	pc, lr
804putc:
805		mov	r2, r0
806		mov	r0, #0
807		loadsp	r3
808		b	2b
809
810memdump:	mov	r12, r0
811		mov	r10, lr
812		mov	r11, #0
8132:		mov	r0, r11, lsl #2
814		add	r0, r0, r12
815		mov	r1, #8
816		bl	phex
817		mov	r0, #':'
818		bl	putc
8191:		mov	r0, #' '
820		bl	putc
821		ldr	r0, [r12, r11, lsl #2]
822		mov	r1, #8
823		bl	phex
824		and	r0, r11, #7
825		teq	r0, #3
826		moveq	r0, #' '
827		bleq	putc
828		and	r0, r11, #7
829		add	r11, r11, #1
830		teq	r0, #7
831		bne	1b
832		mov	r0, #'\n'
833		bl	putc
834		cmp	r11, #64
835		blt	2b
836		mov	pc, r10
837#endif
838
839		.ltorg
840reloc_end:
841
842		.align
843		.section ".stack", "w"
844user_stack:	.space	4096
845