xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 565d76cb)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K)
25		.macro	loadsp, rb, tmp
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_V7)
31		.macro	loadsp, rb, tmp
32		.endm
33		.macro	writeb, ch, rb
34wait:		mrc	p14, 0, pc, c0, c1, 0
35		bcs	wait
36		mcr	p14, 0, \ch, c0, c5, 0
37		.endm
38#elif defined(CONFIG_CPU_XSCALE)
39		.macro	loadsp, rb, tmp
40		.endm
41		.macro	writeb, ch, rb
42		mcr	p14, 0, \ch, c8, c0, 0
43		.endm
44#else
45		.macro	loadsp, rb, tmp
46		.endm
47		.macro	writeb, ch, rb
48		mcr	p14, 0, \ch, c1, c0, 0
49		.endm
50#endif
51
52#else
53
54#include <mach/debug-macro.S>
55
56		.macro	writeb,	ch, rb
57		senduart \ch, \rb
58		.endm
59
60#if defined(CONFIG_ARCH_SA1100)
61		.macro	loadsp, rb, tmp
62		mov	\rb, #0x80000000	@ physical base address
63#ifdef CONFIG_DEBUG_LL_SER3
64		add	\rb, \rb, #0x00050000	@ Ser3
65#else
66		add	\rb, \rb, #0x00010000	@ Ser1
67#endif
68		.endm
69#elif defined(CONFIG_ARCH_S3C2410)
70		.macro loadsp, rb, tmp
71		mov	\rb, #0x50000000
72		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
73		.endm
74#else
75		.macro	loadsp,	rb, tmp
76		addruart \rb, \tmp
77		.endm
78#endif
79#endif
80#endif
81
82		.macro	kputc,val
83		mov	r0, \val
84		bl	putc
85		.endm
86
87		.macro	kphex,val,len
88		mov	r0, \val
89		mov	r1, #\len
90		bl	phex
91		.endm
92
93		.macro	debug_reloc_start
94#ifdef DEBUG
95		kputc	#'\n'
96		kphex	r6, 8		/* processor id */
97		kputc	#':'
98		kphex	r7, 8		/* architecture id */
99#ifdef CONFIG_CPU_CP15
100		kputc	#':'
101		mrc	p15, 0, r0, c1, c0
102		kphex	r0, 8		/* control reg */
103#endif
104		kputc	#'\n'
105		kphex	r5, 8		/* decompressed kernel start */
106		kputc	#'-'
107		kphex	r9, 8		/* decompressed kernel end  */
108		kputc	#'>'
109		kphex	r4, 8		/* kernel execution address */
110		kputc	#'\n'
111#endif
112		.endm
113
114		.macro	debug_reloc_end
115#ifdef DEBUG
116		kphex	r5, 8		/* end of kernel */
117		kputc	#'\n'
118		mov	r0, r4
119		bl	memdump		/* dump 256 bytes at start of kernel */
120#endif
121		.endm
122
123		.section ".start", #alloc, #execinstr
124/*
125 * sort out different calling conventions
126 */
127		.align
128		.arm				@ Always enter in ARM state
129start:
130		.type	start,#function
131		.rept	7
132		mov	r0, r0
133		.endr
134   ARM(		mov	r0, r0		)
135   ARM(		b	1f		)
136 THUMB(		adr	r12, BSYM(1f)	)
137 THUMB(		bx	r12		)
138
139		.word	0x016f2818		@ Magic numbers to help the loader
140		.word	start			@ absolute load/run zImage address
141		.word	_edata			@ zImage end address
142 THUMB(		.thumb			)
1431:		mov	r7, r1			@ save architecture ID
144		mov	r8, r2			@ save atags pointer
145
146#ifndef __ARM_ARCH_2__
147		/*
148		 * Booting from Angel - need to enter SVC mode and disable
149		 * FIQs/IRQs (numeric definitions from angel arm.h source).
150		 * We only do this if we were in user mode on entry.
151		 */
152		mrs	r2, cpsr		@ get current mode
153		tst	r2, #3			@ not user?
154		bne	not_angel
155		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
156 ARM(		swi	0x123456	)	@ angel_SWI_ARM
157 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
158not_angel:
159		mrs	r2, cpsr		@ turn off interrupts to
160		orr	r2, r2, #0xc0		@ prevent angel from running
161		msr	cpsr_c, r2
162#else
163		teqp	pc, #0x0c000003		@ turn off interrupts
164#endif
165
166		/*
167		 * Note that some cache flushing and other stuff may
168		 * be needed here - is there an Angel SWI call for this?
169		 */
170
171		/*
172		 * some architecture specific code can be inserted
173		 * by the linker here, but it should preserve r7, r8, and r9.
174		 */
175
176		.text
177
178#ifdef CONFIG_AUTO_ZRELADDR
179		@ determine final kernel image address
180		mov	r4, pc
181		and	r4, r4, #0xf8000000
182		add	r4, r4, #TEXT_OFFSET
183#else
184		ldr	r4, =zreladdr
185#endif
186
187		bl	cache_on
188
189restart:	adr	r0, LC0
190		ldmia	r0, {r1, r2, r3, r5, r6, r9, r11, r12}
191		ldr	sp, [r0, #32]
192
193		/*
194		 * We might be running at a different address.  We need
195		 * to fix up various pointers.
196		 */
197		sub	r0, r0, r1		@ calculate the delta offset
198		add	r5, r5, r0		@ _start
199		add	r6, r6, r0		@ _edata
200
201#ifndef CONFIG_ZBOOT_ROM
202		/* malloc space is above the relocated stack (64k max) */
203		add	sp, sp, r0
204		add	r10, sp, #0x10000
205#else
206		/*
207		 * With ZBOOT_ROM the bss/stack is non relocatable,
208		 * but someone could still run this code from RAM,
209		 * in which case our reference is _edata.
210		 */
211		mov	r10, r6
212#endif
213
214/*
215 * Check to see if we will overwrite ourselves.
216 *   r4  = final kernel address
217 *   r5  = start of this image
218 *   r9  = size of decompressed image
219 *   r10 = end of this image, including  bss/stack/malloc space if non XIP
220 * We basically want:
221 *   r4 >= r10 -> OK
222 *   r4 + image length <= r5 -> OK
223 */
224		cmp	r4, r10
225		bhs	wont_overwrite
226		add	r10, r4, r9
227		cmp	r10, r5
228		bls	wont_overwrite
229
230/*
231 * Relocate ourselves past the end of the decompressed kernel.
232 *   r5  = start of this image
233 *   r6  = _edata
234 *   r10 = end of the decompressed kernel
235 * Because we always copy ahead, we need to do it from the end and go
236 * backward in case the source and destination overlap.
237 */
238		/* Round up to next 256-byte boundary. */
239		add	r10, r10, #256
240		bic	r10, r10, #255
241
242		sub	r9, r6, r5		@ size to copy
243		add	r9, r9, #31		@ rounded up to a multiple
244		bic	r9, r9, #31		@ ... of 32 bytes
245		add	r6, r9, r5
246		add	r9, r9, r10
247
2481:		ldmdb	r6!, {r0 - r3, r10 - r12, lr}
249		cmp	r6, r5
250		stmdb	r9!, {r0 - r3, r10 - r12, lr}
251		bhi	1b
252
253		/* Preserve offset to relocated code. */
254		sub	r6, r9, r6
255
256		bl	cache_clean_flush
257
258		adr	r0, BSYM(restart)
259		add	r0, r0, r6
260		mov	pc, r0
261
262wont_overwrite:
263/*
264 * If delta is zero, we are running at the address we were linked at.
265 *   r0  = delta
266 *   r2  = BSS start
267 *   r3  = BSS end
268 *   r4  = kernel execution address
269 *   r7  = architecture ID
270 *   r8  = atags pointer
271 *   r11 = GOT start
272 *   r12 = GOT end
273 *   sp  = stack pointer
274 */
275		teq	r0, #0
276		beq	not_relocated
277		add	r11, r11, r0
278		add	r12, r12, r0
279
280#ifndef CONFIG_ZBOOT_ROM
281		/*
282		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
283		 * we need to fix up pointers into the BSS region.
284		 * Note that the stack pointer has already been fixed up.
285		 */
286		add	r2, r2, r0
287		add	r3, r3, r0
288
289		/*
290		 * Relocate all entries in the GOT table.
291		 */
2921:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
293		add	r1, r1, r0		@ table.  This fixes up the
294		str	r1, [r11], #4		@ C references.
295		cmp	r11, r12
296		blo	1b
297#else
298
299		/*
300		 * Relocate entries in the GOT table.  We only relocate
301		 * the entries that are outside the (relocated) BSS region.
302		 */
3031:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
304		cmp	r1, r2			@ entry < bss_start ||
305		cmphs	r3, r1			@ _end < entry
306		addlo	r1, r1, r0		@ table.  This fixes up the
307		str	r1, [r11], #4		@ C references.
308		cmp	r11, r12
309		blo	1b
310#endif
311
312not_relocated:	mov	r0, #0
3131:		str	r0, [r2], #4		@ clear bss
314		str	r0, [r2], #4
315		str	r0, [r2], #4
316		str	r0, [r2], #4
317		cmp	r2, r3
318		blo	1b
319
320/*
321 * The C runtime environment should now be setup sufficiently.
322 * Set up some pointers, and start decompressing.
323 *   r4  = kernel execution address
324 *   r7  = architecture ID
325 *   r8  = atags pointer
326 */
327		mov	r0, r4
328		mov	r1, sp			@ malloc space above stack
329		add	r2, sp, #0x10000	@ 64k max
330		mov	r3, r7
331		bl	decompress_kernel
332		bl	cache_clean_flush
333		bl	cache_off
334		mov	r0, #0			@ must be zero
335		mov	r1, r7			@ restore architecture number
336		mov	r2, r8			@ restore atags pointer
337		mov	pc, r4			@ call kernel
338
339		.align	2
340		.type	LC0, #object
341LC0:		.word	LC0			@ r1
342		.word	__bss_start		@ r2
343		.word	_end			@ r3
344		.word	_start			@ r5
345		.word	_edata			@ r6
346		.word	_image_size		@ r9
347		.word	_got_start		@ r11
348		.word	_got_end		@ ip
349		.word	user_stack_end		@ sp
350		.size	LC0, . - LC0
351
352#ifdef CONFIG_ARCH_RPC
353		.globl	params
354params:		ldr	r0, =0x10000100		@ params_phys for RPC
355		mov	pc, lr
356		.ltorg
357		.align
358#endif
359
360/*
361 * Turn on the cache.  We need to setup some page tables so that we
362 * can have both the I and D caches on.
363 *
364 * We place the page tables 16k down from the kernel execution address,
365 * and we hope that nothing else is using it.  If we're using it, we
366 * will go pop!
367 *
368 * On entry,
369 *  r4 = kernel execution address
370 *  r7 = architecture number
371 *  r8 = atags pointer
372 * On exit,
373 *  r0, r1, r2, r3, r9, r10, r12 corrupted
374 * This routine must preserve:
375 *  r4, r7, r8
376 */
377		.align	5
378cache_on:	mov	r3, #8			@ cache_on function
379		b	call_cache_fn
380
381/*
382 * Initialize the highest priority protection region, PR7
383 * to cover all 32bit address and cacheable and bufferable.
384 */
385__armv4_mpu_cache_on:
386		mov	r0, #0x3f		@ 4G, the whole
387		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
388		mcr 	p15, 0, r0, c6, c7, 1
389
390		mov	r0, #0x80		@ PR7
391		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
392		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
393		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
394
395		mov	r0, #0xc000
396		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
397		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
398
399		mov	r0, #0
400		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
401		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
402		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
403		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
404						@ ...I .... ..D. WC.M
405		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
406		orr	r0, r0, #0x1000		@ ...1 .... .... ....
407
408		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
409
410		mov	r0, #0
411		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
412		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
413		mov	pc, lr
414
415__armv3_mpu_cache_on:
416		mov	r0, #0x3f		@ 4G, the whole
417		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
418
419		mov	r0, #0x80		@ PR7
420		mcr	p15, 0, r0, c2, c0, 0	@ cache on
421		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
422
423		mov	r0, #0xc000
424		mcr	p15, 0, r0, c5, c0, 0	@ access permission
425
426		mov	r0, #0
427		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
428		/*
429		 * ?? ARMv3 MMU does not allow reading the control register,
430		 * does this really work on ARMv3 MPU?
431		 */
432		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
433						@ .... .... .... WC.M
434		orr	r0, r0, #0x000d		@ .... .... .... 11.1
435		/* ?? this overwrites the value constructed above? */
436		mov	r0, #0
437		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
438
439		/* ?? invalidate for the second time? */
440		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
441		mov	pc, lr
442
443__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
444		bic	r3, r3, #0xff		@ Align the pointer
445		bic	r3, r3, #0x3f00
446/*
447 * Initialise the page tables, turning on the cacheable and bufferable
448 * bits for the RAM area only.
449 */
450		mov	r0, r3
451		mov	r9, r0, lsr #18
452		mov	r9, r9, lsl #18		@ start of RAM
453		add	r10, r9, #0x10000000	@ a reasonable RAM size
454		mov	r1, #0x12
455		orr	r1, r1, #3 << 10
456		add	r2, r3, #16384
4571:		cmp	r1, r9			@ if virt > start of RAM
458		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
459		cmp	r1, r10			@ if virt > end of RAM
460		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
461		str	r1, [r0], #4		@ 1:1 mapping
462		add	r1, r1, #1048576
463		teq	r0, r2
464		bne	1b
465/*
466 * If ever we are running from Flash, then we surely want the cache
467 * to be enabled also for our execution instance...  We map 2MB of it
468 * so there is no map overlap problem for up to 1 MB compressed kernel.
469 * If the execution is in RAM then we would only be duplicating the above.
470 */
471		mov	r1, #0x1e
472		orr	r1, r1, #3 << 10
473		mov	r2, pc
474		mov	r2, r2, lsr #20
475		orr	r1, r1, r2, lsl #20
476		add	r0, r3, r2, lsl #2
477		str	r1, [r0], #4
478		add	r1, r1, #1048576
479		str	r1, [r0]
480		mov	pc, lr
481ENDPROC(__setup_mmu)
482
483__armv4_mmu_cache_on:
484		mov	r12, lr
485#ifdef CONFIG_MMU
486		bl	__setup_mmu
487		mov	r0, #0
488		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
489		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
490		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
491		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
492		orr	r0, r0, #0x0030
493#ifdef CONFIG_CPU_ENDIAN_BE8
494		orr	r0, r0, #1 << 25	@ big-endian page tables
495#endif
496		bl	__common_mmu_cache_on
497		mov	r0, #0
498		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
499#endif
500		mov	pc, r12
501
502__armv7_mmu_cache_on:
503		mov	r12, lr
504#ifdef CONFIG_MMU
505		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
506		tst	r11, #0xf		@ VMSA
507		blne	__setup_mmu
508		mov	r0, #0
509		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
510		tst	r11, #0xf		@ VMSA
511		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
512#endif
513		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
514		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
515		orr	r0, r0, #0x003c		@ write buffer
516#ifdef CONFIG_MMU
517#ifdef CONFIG_CPU_ENDIAN_BE8
518		orr	r0, r0, #1 << 25	@ big-endian page tables
519#endif
520		orrne	r0, r0, #1		@ MMU enabled
521		movne	r1, #-1
522		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
523		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
524#endif
525		mcr	p15, 0, r0, c1, c0, 0	@ load control register
526		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
527		mov	r0, #0
528		mcr	p15, 0, r0, c7, c5, 4	@ ISB
529		mov	pc, r12
530
531__fa526_cache_on:
532		mov	r12, lr
533		bl	__setup_mmu
534		mov	r0, #0
535		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
536		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
537		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
538		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
539		orr	r0, r0, #0x1000		@ I-cache enable
540		bl	__common_mmu_cache_on
541		mov	r0, #0
542		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
543		mov	pc, r12
544
545__arm6_mmu_cache_on:
546		mov	r12, lr
547		bl	__setup_mmu
548		mov	r0, #0
549		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
550		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
551		mov	r0, #0x30
552		bl	__common_mmu_cache_on
553		mov	r0, #0
554		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
555		mov	pc, r12
556
557__common_mmu_cache_on:
558#ifndef CONFIG_THUMB2_KERNEL
559#ifndef DEBUG
560		orr	r0, r0, #0x000d		@ Write buffer, mmu
561#endif
562		mov	r1, #-1
563		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
564		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
565		b	1f
566		.align	5			@ cache line aligned
5671:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
568		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
569		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
570#endif
571
572/*
573 * Here follow the relocatable cache support functions for the
574 * various processors.  This is a generic hook for locating an
575 * entry and jumping to an instruction at the specified offset
576 * from the start of the block.  Please note this is all position
577 * independent code.
578 *
579 *  r1  = corrupted
580 *  r2  = corrupted
581 *  r3  = block offset
582 *  r9  = corrupted
583 *  r12 = corrupted
584 */
585
586call_cache_fn:	adr	r12, proc_types
587#ifdef CONFIG_CPU_CP15
588		mrc	p15, 0, r9, c0, c0	@ get processor ID
589#else
590		ldr	r9, =CONFIG_PROCESSOR_ID
591#endif
5921:		ldr	r1, [r12, #0]		@ get value
593		ldr	r2, [r12, #4]		@ get mask
594		eor	r1, r1, r9		@ (real ^ match)
595		tst	r1, r2			@       & mask
596 ARM(		addeq	pc, r12, r3		) @ call cache function
597 THUMB(		addeq	r12, r3			)
598 THUMB(		moveq	pc, r12			) @ call cache function
599		add	r12, r12, #4*5
600		b	1b
601
602/*
603 * Table for cache operations.  This is basically:
604 *   - CPU ID match
605 *   - CPU ID mask
606 *   - 'cache on' method instruction
607 *   - 'cache off' method instruction
608 *   - 'cache flush' method instruction
609 *
610 * We match an entry using: ((real_id ^ match) & mask) == 0
611 *
612 * Writethrough caches generally only need 'on' and 'off'
613 * methods.  Writeback caches _must_ have the flush method
614 * defined.
615 */
616		.align	2
617		.type	proc_types,#object
618proc_types:
619		.word	0x41560600		@ ARM6/610
620		.word	0xffffffe0
621		W(b)	__arm6_mmu_cache_off	@ works, but slow
622		W(b)	__arm6_mmu_cache_off
623		mov	pc, lr
624 THUMB(		nop				)
625@		b	__arm6_mmu_cache_on		@ untested
626@		b	__arm6_mmu_cache_off
627@		b	__armv3_mmu_cache_flush
628
629		.word	0x00000000		@ old ARM ID
630		.word	0x0000f000
631		mov	pc, lr
632 THUMB(		nop				)
633		mov	pc, lr
634 THUMB(		nop				)
635		mov	pc, lr
636 THUMB(		nop				)
637
638		.word	0x41007000		@ ARM7/710
639		.word	0xfff8fe00
640		W(b)	__arm7_mmu_cache_off
641		W(b)	__arm7_mmu_cache_off
642		mov	pc, lr
643 THUMB(		nop				)
644
645		.word	0x41807200		@ ARM720T (writethrough)
646		.word	0xffffff00
647		W(b)	__armv4_mmu_cache_on
648		W(b)	__armv4_mmu_cache_off
649		mov	pc, lr
650 THUMB(		nop				)
651
652		.word	0x41007400		@ ARM74x
653		.word	0xff00ff00
654		W(b)	__armv3_mpu_cache_on
655		W(b)	__armv3_mpu_cache_off
656		W(b)	__armv3_mpu_cache_flush
657
658		.word	0x41009400		@ ARM94x
659		.word	0xff00ff00
660		W(b)	__armv4_mpu_cache_on
661		W(b)	__armv4_mpu_cache_off
662		W(b)	__armv4_mpu_cache_flush
663
664		.word	0x00007000		@ ARM7 IDs
665		.word	0x0000f000
666		mov	pc, lr
667 THUMB(		nop				)
668		mov	pc, lr
669 THUMB(		nop				)
670		mov	pc, lr
671 THUMB(		nop				)
672
673		@ Everything from here on will be the new ID system.
674
675		.word	0x4401a100		@ sa110 / sa1100
676		.word	0xffffffe0
677		W(b)	__armv4_mmu_cache_on
678		W(b)	__armv4_mmu_cache_off
679		W(b)	__armv4_mmu_cache_flush
680
681		.word	0x6901b110		@ sa1110
682		.word	0xfffffff0
683		W(b)	__armv4_mmu_cache_on
684		W(b)	__armv4_mmu_cache_off
685		W(b)	__armv4_mmu_cache_flush
686
687		.word	0x56056900
688		.word	0xffffff00		@ PXA9xx
689		W(b)	__armv4_mmu_cache_on
690		W(b)	__armv4_mmu_cache_off
691		W(b)	__armv4_mmu_cache_flush
692
693		.word	0x56158000		@ PXA168
694		.word	0xfffff000
695		W(b)	__armv4_mmu_cache_on
696		W(b)	__armv4_mmu_cache_off
697		W(b)	__armv5tej_mmu_cache_flush
698
699		.word	0x56050000		@ Feroceon
700		.word	0xff0f0000
701		W(b)	__armv4_mmu_cache_on
702		W(b)	__armv4_mmu_cache_off
703		W(b)	__armv5tej_mmu_cache_flush
704
705#ifdef CONFIG_CPU_FEROCEON_OLD_ID
706		/* this conflicts with the standard ARMv5TE entry */
707		.long	0x41009260		@ Old Feroceon
708		.long	0xff00fff0
709		b	__armv4_mmu_cache_on
710		b	__armv4_mmu_cache_off
711		b	__armv5tej_mmu_cache_flush
712#endif
713
714		.word	0x66015261		@ FA526
715		.word	0xff01fff1
716		W(b)	__fa526_cache_on
717		W(b)	__armv4_mmu_cache_off
718		W(b)	__fa526_cache_flush
719
720		@ These match on the architecture ID
721
722		.word	0x00020000		@ ARMv4T
723		.word	0x000f0000
724		W(b)	__armv4_mmu_cache_on
725		W(b)	__armv4_mmu_cache_off
726		W(b)	__armv4_mmu_cache_flush
727
728		.word	0x00050000		@ ARMv5TE
729		.word	0x000f0000
730		W(b)	__armv4_mmu_cache_on
731		W(b)	__armv4_mmu_cache_off
732		W(b)	__armv4_mmu_cache_flush
733
734		.word	0x00060000		@ ARMv5TEJ
735		.word	0x000f0000
736		W(b)	__armv4_mmu_cache_on
737		W(b)	__armv4_mmu_cache_off
738		W(b)	__armv5tej_mmu_cache_flush
739
740		.word	0x0007b000		@ ARMv6
741		.word	0x000ff000
742		W(b)	__armv4_mmu_cache_on
743		W(b)	__armv4_mmu_cache_off
744		W(b)	__armv6_mmu_cache_flush
745
746		.word	0x560f5810		@ Marvell PJ4 ARMv6
747		.word	0xff0ffff0
748		W(b)	__armv4_mmu_cache_on
749		W(b)	__armv4_mmu_cache_off
750		W(b)	__armv6_mmu_cache_flush
751
752		.word	0x000f0000		@ new CPU Id
753		.word	0x000f0000
754		W(b)	__armv7_mmu_cache_on
755		W(b)	__armv7_mmu_cache_off
756		W(b)	__armv7_mmu_cache_flush
757
758		.word	0			@ unrecognised type
759		.word	0
760		mov	pc, lr
761 THUMB(		nop				)
762		mov	pc, lr
763 THUMB(		nop				)
764		mov	pc, lr
765 THUMB(		nop				)
766
767		.size	proc_types, . - proc_types
768
769/*
770 * Turn off the Cache and MMU.  ARMv3 does not support
771 * reading the control register, but ARMv4 does.
772 *
773 * On exit,
774 *  r0, r1, r2, r3, r9, r12 corrupted
775 * This routine must preserve:
776 *  r4, r7, r8
777 */
778		.align	5
779cache_off:	mov	r3, #12			@ cache_off function
780		b	call_cache_fn
781
782__armv4_mpu_cache_off:
783		mrc	p15, 0, r0, c1, c0
784		bic	r0, r0, #0x000d
785		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
786		mov	r0, #0
787		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
788		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
789		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
790		mov	pc, lr
791
792__armv3_mpu_cache_off:
793		mrc	p15, 0, r0, c1, c0
794		bic	r0, r0, #0x000d
795		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
796		mov	r0, #0
797		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
798		mov	pc, lr
799
800__armv4_mmu_cache_off:
801#ifdef CONFIG_MMU
802		mrc	p15, 0, r0, c1, c0
803		bic	r0, r0, #0x000d
804		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
805		mov	r0, #0
806		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
807		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
808#endif
809		mov	pc, lr
810
811__armv7_mmu_cache_off:
812		mrc	p15, 0, r0, c1, c0
813#ifdef CONFIG_MMU
814		bic	r0, r0, #0x000d
815#else
816		bic	r0, r0, #0x000c
817#endif
818		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
819		mov	r12, lr
820		bl	__armv7_mmu_cache_flush
821		mov	r0, #0
822#ifdef CONFIG_MMU
823		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
824#endif
825		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
826		mcr	p15, 0, r0, c7, c10, 4	@ DSB
827		mcr	p15, 0, r0, c7, c5, 4	@ ISB
828		mov	pc, r12
829
830__arm6_mmu_cache_off:
831		mov	r0, #0x00000030		@ ARM6 control reg.
832		b	__armv3_mmu_cache_off
833
834__arm7_mmu_cache_off:
835		mov	r0, #0x00000070		@ ARM7 control reg.
836		b	__armv3_mmu_cache_off
837
838__armv3_mmu_cache_off:
839		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
840		mov	r0, #0
841		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
842		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
843		mov	pc, lr
844
845/*
846 * Clean and flush the cache to maintain consistency.
847 *
848 * On exit,
849 *  r1, r2, r3, r9, r10, r11, r12 corrupted
850 * This routine must preserve:
851 *  r4, r6, r7, r8
852 */
853		.align	5
854cache_clean_flush:
855		mov	r3, #16
856		b	call_cache_fn
857
858__armv4_mpu_cache_flush:
859		mov	r2, #1
860		mov	r3, #0
861		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
862		mov	r1, #7 << 5		@ 8 segments
8631:		orr	r3, r1, #63 << 26	@ 64 entries
8642:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
865		subs	r3, r3, #1 << 26
866		bcs	2b			@ entries 63 to 0
867		subs 	r1, r1, #1 << 5
868		bcs	1b			@ segments 7 to 0
869
870		teq	r2, #0
871		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
872		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
873		mov	pc, lr
874
875__fa526_cache_flush:
876		mov	r1, #0
877		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
878		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
879		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
880		mov	pc, lr
881
882__armv6_mmu_cache_flush:
883		mov	r1, #0
884		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
885		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
886		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
887		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
888		mov	pc, lr
889
890__armv7_mmu_cache_flush:
891		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
892		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
893		mov	r10, #0
894		beq	hierarchical
895		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
896		b	iflush
897hierarchical:
898		mcr	p15, 0, r10, c7, c10, 5	@ DMB
899		stmfd	sp!, {r0-r7, r9-r11}
900		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
901		ands	r3, r0, #0x7000000	@ extract loc from clidr
902		mov	r3, r3, lsr #23		@ left align loc bit field
903		beq	finished		@ if loc is 0, then no need to clean
904		mov	r10, #0			@ start clean at cache level 0
905loop1:
906		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
907		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
908		and	r1, r1, #7		@ mask of the bits for current cache only
909		cmp	r1, #2			@ see what cache we have at this level
910		blt	skip			@ skip if no cache, or just i-cache
911		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
912		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
913		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
914		and	r2, r1, #7		@ extract the length of the cache lines
915		add	r2, r2, #4		@ add 4 (line length offset)
916		ldr	r4, =0x3ff
917		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
918		clz	r5, r4			@ find bit position of way size increment
919		ldr	r7, =0x7fff
920		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
921loop2:
922		mov	r9, r4			@ create working copy of max way size
923loop3:
924 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
925 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
926 THUMB(		lsl	r6, r9, r5		)
927 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
928 THUMB(		lsl	r6, r7, r2		)
929 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
930		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
931		subs	r9, r9, #1		@ decrement the way
932		bge	loop3
933		subs	r7, r7, #1		@ decrement the index
934		bge	loop2
935skip:
936		add	r10, r10, #2		@ increment cache number
937		cmp	r3, r10
938		bgt	loop1
939finished:
940		ldmfd	sp!, {r0-r7, r9-r11}
941		mov	r10, #0			@ swith back to cache level 0
942		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
943iflush:
944		mcr	p15, 0, r10, c7, c10, 4	@ DSB
945		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
946		mcr	p15, 0, r10, c7, c10, 4	@ DSB
947		mcr	p15, 0, r10, c7, c5, 4	@ ISB
948		mov	pc, lr
949
950__armv5tej_mmu_cache_flush:
9511:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
952		bne	1b
953		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
954		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
955		mov	pc, lr
956
957__armv4_mmu_cache_flush:
958		mov	r2, #64*1024		@ default: 32K dcache size (*2)
959		mov	r11, #32		@ default: 32 byte line size
960		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
961		teq	r3, r9			@ cache ID register present?
962		beq	no_cache_id
963		mov	r1, r3, lsr #18
964		and	r1, r1, #7
965		mov	r2, #1024
966		mov	r2, r2, lsl r1		@ base dcache size *2
967		tst	r3, #1 << 14		@ test M bit
968		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
969		mov	r3, r3, lsr #12
970		and	r3, r3, #3
971		mov	r11, #8
972		mov	r11, r11, lsl r3	@ cache line size in bytes
973no_cache_id:
974		mov	r1, pc
975		bic	r1, r1, #63		@ align to longest cache line
976		add	r2, r1, r2
9771:
978 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
979 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
980 THUMB(		add     r1, r1, r11		)
981		teq	r1, r2
982		bne	1b
983
984		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
985		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
986		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
987		mov	pc, lr
988
989__armv3_mmu_cache_flush:
990__armv3_mpu_cache_flush:
991		mov	r1, #0
992		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
993		mov	pc, lr
994
995/*
996 * Various debugging routines for printing hex characters and
997 * memory, which again must be relocatable.
998 */
999#ifdef DEBUG
1000		.align	2
1001		.type	phexbuf,#object
1002phexbuf:	.space	12
1003		.size	phexbuf, . - phexbuf
1004
1005@ phex corrupts {r0, r1, r2, r3}
1006phex:		adr	r3, phexbuf
1007		mov	r2, #0
1008		strb	r2, [r3, r1]
10091:		subs	r1, r1, #1
1010		movmi	r0, r3
1011		bmi	puts
1012		and	r2, r0, #15
1013		mov	r0, r0, lsr #4
1014		cmp	r2, #10
1015		addge	r2, r2, #7
1016		add	r2, r2, #'0'
1017		strb	r2, [r3, r1]
1018		b	1b
1019
1020@ puts corrupts {r0, r1, r2, r3}
1021puts:		loadsp	r3, r1
10221:		ldrb	r2, [r0], #1
1023		teq	r2, #0
1024		moveq	pc, lr
10252:		writeb	r2, r3
1026		mov	r1, #0x00020000
10273:		subs	r1, r1, #1
1028		bne	3b
1029		teq	r2, #'\n'
1030		moveq	r2, #'\r'
1031		beq	2b
1032		teq	r0, #0
1033		bne	1b
1034		mov	pc, lr
1035@ putc corrupts {r0, r1, r2, r3}
1036putc:
1037		mov	r2, r0
1038		mov	r0, #0
1039		loadsp	r3, r1
1040		b	2b
1041
1042@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1043memdump:	mov	r12, r0
1044		mov	r10, lr
1045		mov	r11, #0
10462:		mov	r0, r11, lsl #2
1047		add	r0, r0, r12
1048		mov	r1, #8
1049		bl	phex
1050		mov	r0, #':'
1051		bl	putc
10521:		mov	r0, #' '
1053		bl	putc
1054		ldr	r0, [r12, r11, lsl #2]
1055		mov	r1, #8
1056		bl	phex
1057		and	r0, r11, #7
1058		teq	r0, #3
1059		moveq	r0, #' '
1060		bleq	putc
1061		and	r0, r11, #7
1062		add	r11, r11, #1
1063		teq	r0, #7
1064		bne	1b
1065		mov	r0, #'\n'
1066		bl	putc
1067		cmp	r11, #64
1068		blt	2b
1069		mov	pc, r10
1070#endif
1071
1072		.ltorg
1073
1074		.align
1075		.section ".stack", "aw", %nobits
1076user_stack:	.space	4096
1077user_stack_end:
1078