xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *  Copyright (C) 2004 Hyok S. Choi (MPU support)
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23
24#ifdef CONFIG_CPU_V6
25		.macro	loadsp, rb, tmp
26		.endm
27		.macro	writeb, ch, rb
28		mcr	p14, 0, \ch, c0, c5, 0
29		.endm
30#elif defined(CONFIG_CPU_V7)
31		.macro	loadsp, rb, tmp
32		.endm
33		.macro	writeb, ch, rb
34wait:		mrc	p14, 0, pc, c0, c1, 0
35		bcs	wait
36		mcr	p14, 0, \ch, c0, c5, 0
37		.endm
38#elif defined(CONFIG_CPU_XSCALE)
39		.macro	loadsp, rb, tmp
40		.endm
41		.macro	writeb, ch, rb
42		mcr	p14, 0, \ch, c8, c0, 0
43		.endm
44#else
45		.macro	loadsp, rb, tmp
46		.endm
47		.macro	writeb, ch, rb
48		mcr	p14, 0, \ch, c1, c0, 0
49		.endm
50#endif
51
52#else
53
54#include <mach/debug-macro.S>
55
56		.macro	writeb,	ch, rb
57		senduart \ch, \rb
58		.endm
59
60#if defined(CONFIG_ARCH_SA1100)
61		.macro	loadsp, rb, tmp
62		mov	\rb, #0x80000000	@ physical base address
63#ifdef CONFIG_DEBUG_LL_SER3
64		add	\rb, \rb, #0x00050000	@ Ser3
65#else
66		add	\rb, \rb, #0x00010000	@ Ser1
67#endif
68		.endm
69#elif defined(CONFIG_ARCH_S3C2410)
70		.macro loadsp, rb, tmp
71		mov	\rb, #0x50000000
72		add	\rb, \rb, #0x4000 * CONFIG_S3C_LOWLEVEL_UART_PORT
73		.endm
74#else
75		.macro	loadsp,	rb, tmp
76		addruart \rb, \tmp
77		.endm
78#endif
79#endif
80#endif
81
82		.macro	kputc,val
83		mov	r0, \val
84		bl	putc
85		.endm
86
87		.macro	kphex,val,len
88		mov	r0, \val
89		mov	r1, #\len
90		bl	phex
91		.endm
92
93		.macro	debug_reloc_start
94#ifdef DEBUG
95		kputc	#'\n'
96		kphex	r6, 8		/* processor id */
97		kputc	#':'
98		kphex	r7, 8		/* architecture id */
99#ifdef CONFIG_CPU_CP15
100		kputc	#':'
101		mrc	p15, 0, r0, c1, c0
102		kphex	r0, 8		/* control reg */
103#endif
104		kputc	#'\n'
105		kphex	r5, 8		/* decompressed kernel start */
106		kputc	#'-'
107		kphex	r9, 8		/* decompressed kernel end  */
108		kputc	#'>'
109		kphex	r4, 8		/* kernel execution address */
110		kputc	#'\n'
111#endif
112		.endm
113
114		.macro	debug_reloc_end
115#ifdef DEBUG
116		kphex	r5, 8		/* end of kernel */
117		kputc	#'\n'
118		mov	r0, r4
119		bl	memdump		/* dump 256 bytes at start of kernel */
120#endif
121		.endm
122
123		.section ".start", #alloc, #execinstr
124/*
125 * sort out different calling conventions
126 */
127		.align
128		.arm				@ Always enter in ARM state
129start:
130		.type	start,#function
131 THUMB(		adr	r12, BSYM(1f)	)
132 THUMB(		bx	r12		)
133 THUMB(		.rept	6		)
134 ARM(		.rept	8		)
135		mov	r0, r0
136		.endr
137
138		b	1f
139		.word	0x016f2818		@ Magic numbers to help the loader
140		.word	start			@ absolute load/run zImage address
141		.word	_edata			@ zImage end address
142 THUMB(		.thumb			)
1431:		mov	r7, r1			@ save architecture ID
144		mov	r8, r2			@ save atags pointer
145
146#ifndef __ARM_ARCH_2__
147		/*
148		 * Booting from Angel - need to enter SVC mode and disable
149		 * FIQs/IRQs (numeric definitions from angel arm.h source).
150		 * We only do this if we were in user mode on entry.
151		 */
152		mrs	r2, cpsr		@ get current mode
153		tst	r2, #3			@ not user?
154		bne	not_angel
155		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
156 ARM(		swi	0x123456	)	@ angel_SWI_ARM
157 THUMB(		svc	0xab		)	@ angel_SWI_THUMB
158not_angel:
159		mrs	r2, cpsr		@ turn off interrupts to
160		orr	r2, r2, #0xc0		@ prevent angel from running
161		msr	cpsr_c, r2
162#else
163		teqp	pc, #0x0c000003		@ turn off interrupts
164#endif
165
166		/*
167		 * Note that some cache flushing and other stuff may
168		 * be needed here - is there an Angel SWI call for this?
169		 */
170
171		/*
172		 * some architecture specific code can be inserted
173		 * by the linker here, but it should preserve r7, r8, and r9.
174		 */
175
176		.text
177		adr	r0, LC0
178		ldmia	r0, {r1, r2, r3, r5, r6, r11, ip}
179		ldr	sp, [r0, #28]
180#ifdef CONFIG_AUTO_ZRELADDR
181		@ determine final kernel image address
182		mov	r4, pc
183		and	r4, r4, #0xf8000000
184		add	r4, r4, #TEXT_OFFSET
185#else
186		ldr	r4, =zreladdr
187#endif
188		subs	r0, r0, r1		@ calculate the delta offset
189
190						@ if delta is zero, we are
191		beq	not_relocated		@ running at the address we
192						@ were linked at.
193
194		/*
195		 * We're running at a different address.  We need to fix
196		 * up various pointers:
197		 *   r5 - zImage base address (_start)
198		 *   r6 - size of decompressed image
199		 *   r11 - GOT start
200		 *   ip - GOT end
201		 */
202		add	r5, r5, r0
203		add	r11, r11, r0
204		add	ip, ip, r0
205
206#ifndef CONFIG_ZBOOT_ROM
207		/*
208		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
209		 * we need to fix up pointers into the BSS region.
210		 *   r2 - BSS start
211		 *   r3 - BSS end
212		 *   sp - stack pointer
213		 */
214		add	r2, r2, r0
215		add	r3, r3, r0
216		add	sp, sp, r0
217
218		/*
219		 * Relocate all entries in the GOT table.
220		 */
2211:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
222		add	r1, r1, r0		@ table.  This fixes up the
223		str	r1, [r11], #4		@ C references.
224		cmp	r11, ip
225		blo	1b
226#else
227
228		/*
229		 * Relocate entries in the GOT table.  We only relocate
230		 * the entries that are outside the (relocated) BSS region.
231		 */
2321:		ldr	r1, [r11, #0]		@ relocate entries in the GOT
233		cmp	r1, r2			@ entry < bss_start ||
234		cmphs	r3, r1			@ _end < entry
235		addlo	r1, r1, r0		@ table.  This fixes up the
236		str	r1, [r11], #4		@ C references.
237		cmp	r11, ip
238		blo	1b
239#endif
240
241not_relocated:	mov	r0, #0
2421:		str	r0, [r2], #4		@ clear bss
243		str	r0, [r2], #4
244		str	r0, [r2], #4
245		str	r0, [r2], #4
246		cmp	r2, r3
247		blo	1b
248
249		/*
250		 * The C runtime environment should now be setup
251		 * sufficiently.  Turn the cache on, set up some
252		 * pointers, and start decompressing.
253		 */
254		bl	cache_on
255
256		mov	r1, sp			@ malloc space above stack
257		add	r2, sp, #0x10000	@ 64k max
258
259/*
260 * Check to see if we will overwrite ourselves.
261 *   r4 = final kernel address
262 *   r5 = start of this image
263 *   r6 = size of decompressed image
264 *   r2 = end of malloc space (and therefore this image)
265 * We basically want:
266 *   r4 >= r2 -> OK
267 *   r4 + image length <= r5 -> OK
268 */
269		cmp	r4, r2
270		bhs	wont_overwrite
271		add	r0, r4, r6
272		cmp	r0, r5
273		bls	wont_overwrite
274
275		mov	r5, r2			@ decompress after malloc space
276		mov	r0, r5
277		mov	r3, r7
278		bl	decompress_kernel
279
280		add	r0, r0, #127 + 128	@ alignment + stack
281		bic	r0, r0, #127		@ align the kernel length
282/*
283 * r0     = decompressed kernel length
284 * r1-r3  = unused
285 * r4     = kernel execution address
286 * r5     = decompressed kernel start
287 * r7     = architecture ID
288 * r8     = atags pointer
289 * r9-r12,r14 = corrupted
290 */
291		add	r1, r5, r0		@ end of decompressed kernel
292		adr	r2, reloc_start
293		ldr	r3, LC1
294		add	r3, r2, r3
2951:		ldmia	r2!, {r9 - r12, r14}	@ copy relocation code
296		stmia	r1!, {r9 - r12, r14}
297		ldmia	r2!, {r9 - r12, r14}
298		stmia	r1!, {r9 - r12, r14}
299		cmp	r2, r3
300		blo	1b
301		mov	sp, r1
302		add	sp, sp, #128		@ relocate the stack
303
304		bl	cache_clean_flush
305 ARM(		add	pc, r5, r0		) @ call relocation code
306 THUMB(		add	r12, r5, r0		)
307 THUMB(		mov	pc, r12			) @ call relocation code
308
309/*
310 * We're not in danger of overwriting ourselves.  Do this the simple way.
311 *
312 * r4     = kernel execution address
313 * r7     = architecture ID
314 */
315wont_overwrite:	mov	r0, r4
316		mov	r3, r7
317		bl	decompress_kernel
318		b	call_kernel
319
320		.align	2
321		.type	LC0, #object
322LC0:		.word	LC0			@ r1
323		.word	__bss_start		@ r2
324		.word	_end			@ r3
325		.word	_start			@ r5
326		.word	_image_size		@ r6
327		.word	_got_start		@ r11
328		.word	_got_end		@ ip
329		.word	user_stack_end		@ sp
330LC1:		.word	reloc_end - reloc_start
331		.size	LC0, . - LC0
332
333#ifdef CONFIG_ARCH_RPC
334		.globl	params
335params:		ldr	r0, =0x10000100		@ params_phys for RPC
336		mov	pc, lr
337		.ltorg
338		.align
339#endif
340
341/*
342 * Turn on the cache.  We need to setup some page tables so that we
343 * can have both the I and D caches on.
344 *
345 * We place the page tables 16k down from the kernel execution address,
346 * and we hope that nothing else is using it.  If we're using it, we
347 * will go pop!
348 *
349 * On entry,
350 *  r4 = kernel execution address
351 *  r7 = architecture number
352 *  r8 = atags pointer
353 * On exit,
354 *  r0, r1, r2, r3, r9, r10, r12 corrupted
355 * This routine must preserve:
356 *  r4, r5, r6, r7, r8
357 */
358		.align	5
359cache_on:	mov	r3, #8			@ cache_on function
360		b	call_cache_fn
361
362/*
363 * Initialize the highest priority protection region, PR7
364 * to cover all 32bit address and cacheable and bufferable.
365 */
366__armv4_mpu_cache_on:
367		mov	r0, #0x3f		@ 4G, the whole
368		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
369		mcr 	p15, 0, r0, c6, c7, 1
370
371		mov	r0, #0x80		@ PR7
372		mcr	p15, 0, r0, c2, c0, 0	@ D-cache on
373		mcr	p15, 0, r0, c2, c0, 1	@ I-cache on
374		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
375
376		mov	r0, #0xc000
377		mcr	p15, 0, r0, c5, c0, 1	@ I-access permission
378		mcr	p15, 0, r0, c5, c0, 0	@ D-access permission
379
380		mov	r0, #0
381		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
382		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
383		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
384		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
385						@ ...I .... ..D. WC.M
386		orr	r0, r0, #0x002d		@ .... .... ..1. 11.1
387		orr	r0, r0, #0x1000		@ ...1 .... .... ....
388
389		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
390
391		mov	r0, #0
392		mcr	p15, 0, r0, c7, c5, 0	@ flush(inval) I-Cache
393		mcr	p15, 0, r0, c7, c6, 0	@ flush(inval) D-Cache
394		mov	pc, lr
395
396__armv3_mpu_cache_on:
397		mov	r0, #0x3f		@ 4G, the whole
398		mcr	p15, 0, r0, c6, c7, 0	@ PR7 Area Setting
399
400		mov	r0, #0x80		@ PR7
401		mcr	p15, 0, r0, c2, c0, 0	@ cache on
402		mcr	p15, 0, r0, c3, c0, 0	@ write-buffer on
403
404		mov	r0, #0xc000
405		mcr	p15, 0, r0, c5, c0, 0	@ access permission
406
407		mov	r0, #0
408		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
409		/*
410		 * ?? ARMv3 MMU does not allow reading the control register,
411		 * does this really work on ARMv3 MPU?
412		 */
413		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
414						@ .... .... .... WC.M
415		orr	r0, r0, #0x000d		@ .... .... .... 11.1
416		/* ?? this overwrites the value constructed above? */
417		mov	r0, #0
418		mcr	p15, 0, r0, c1, c0, 0	@ write control reg
419
420		/* ?? invalidate for the second time? */
421		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
422		mov	pc, lr
423
424__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
425		bic	r3, r3, #0xff		@ Align the pointer
426		bic	r3, r3, #0x3f00
427/*
428 * Initialise the page tables, turning on the cacheable and bufferable
429 * bits for the RAM area only.
430 */
431		mov	r0, r3
432		mov	r9, r0, lsr #18
433		mov	r9, r9, lsl #18		@ start of RAM
434		add	r10, r9, #0x10000000	@ a reasonable RAM size
435		mov	r1, #0x12
436		orr	r1, r1, #3 << 10
437		add	r2, r3, #16384
4381:		cmp	r1, r9			@ if virt > start of RAM
439		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
440		cmp	r1, r10			@ if virt > end of RAM
441		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
442		str	r1, [r0], #4		@ 1:1 mapping
443		add	r1, r1, #1048576
444		teq	r0, r2
445		bne	1b
446/*
447 * If ever we are running from Flash, then we surely want the cache
448 * to be enabled also for our execution instance...  We map 2MB of it
449 * so there is no map overlap problem for up to 1 MB compressed kernel.
450 * If the execution is in RAM then we would only be duplicating the above.
451 */
452		mov	r1, #0x1e
453		orr	r1, r1, #3 << 10
454		mov	r2, pc
455		mov	r2, r2, lsr #20
456		orr	r1, r1, r2, lsl #20
457		add	r0, r3, r2, lsl #2
458		str	r1, [r0], #4
459		add	r1, r1, #1048576
460		str	r1, [r0]
461		mov	pc, lr
462ENDPROC(__setup_mmu)
463
464__armv4_mmu_cache_on:
465		mov	r12, lr
466#ifdef CONFIG_MMU
467		bl	__setup_mmu
468		mov	r0, #0
469		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
470		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
471		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
472		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
473		orr	r0, r0, #0x0030
474#ifdef CONFIG_CPU_ENDIAN_BE8
475		orr	r0, r0, #1 << 25	@ big-endian page tables
476#endif
477		bl	__common_mmu_cache_on
478		mov	r0, #0
479		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
480#endif
481		mov	pc, r12
482
483__armv7_mmu_cache_on:
484		mov	r12, lr
485#ifdef CONFIG_MMU
486		mrc	p15, 0, r11, c0, c1, 4	@ read ID_MMFR0
487		tst	r11, #0xf		@ VMSA
488		blne	__setup_mmu
489		mov	r0, #0
490		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
491		tst	r11, #0xf		@ VMSA
492		mcrne	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
493#endif
494		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
495		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
496		orr	r0, r0, #0x003c		@ write buffer
497#ifdef CONFIG_MMU
498#ifdef CONFIG_CPU_ENDIAN_BE8
499		orr	r0, r0, #1 << 25	@ big-endian page tables
500#endif
501		orrne	r0, r0, #1		@ MMU enabled
502		movne	r1, #-1
503		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
504		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
505#endif
506		mcr	p15, 0, r0, c1, c0, 0	@ load control register
507		mrc	p15, 0, r0, c1, c0, 0	@ and read it back
508		mov	r0, #0
509		mcr	p15, 0, r0, c7, c5, 4	@ ISB
510		mov	pc, r12
511
512__fa526_cache_on:
513		mov	r12, lr
514		bl	__setup_mmu
515		mov	r0, #0
516		mcr	p15, 0, r0, c7, c7, 0	@ Invalidate whole cache
517		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
518		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
519		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
520		orr	r0, r0, #0x1000		@ I-cache enable
521		bl	__common_mmu_cache_on
522		mov	r0, #0
523		mcr	p15, 0, r0, c8, c7, 0	@ flush UTLB
524		mov	pc, r12
525
526__arm6_mmu_cache_on:
527		mov	r12, lr
528		bl	__setup_mmu
529		mov	r0, #0
530		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
531		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
532		mov	r0, #0x30
533		bl	__common_mmu_cache_on
534		mov	r0, #0
535		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
536		mov	pc, r12
537
538__common_mmu_cache_on:
539#ifndef CONFIG_THUMB2_KERNEL
540#ifndef DEBUG
541		orr	r0, r0, #0x000d		@ Write buffer, mmu
542#endif
543		mov	r1, #-1
544		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
545		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
546		b	1f
547		.align	5			@ cache line aligned
5481:		mcr	p15, 0, r0, c1, c0, 0	@ load control register
549		mrc	p15, 0, r0, c1, c0, 0	@ and read it back to
550		sub	pc, lr, r0, lsr #32	@ properly flush pipeline
551#endif
552
553/*
554 * All code following this line is relocatable.  It is relocated by
555 * the above code to the end of the decompressed kernel image and
556 * executed there.  During this time, we have no stacks.
557 *
558 * r0     = decompressed kernel length
559 * r1-r3  = unused
560 * r4     = kernel execution address
561 * r5     = decompressed kernel start
562 * r7     = architecture ID
563 * r8     = atags pointer
564 * r9-r12,r14 = corrupted
565 */
566		.align	5
567reloc_start:	add	r9, r5, r0
568		sub	r9, r9, #128		@ do not copy the stack
569		debug_reloc_start
570		mov	r1, r4
5711:
572		.rept	4
573		ldmia	r5!, {r0, r2, r3, r10 - r12, r14}	@ relocate kernel
574		stmia	r1!, {r0, r2, r3, r10 - r12, r14}
575		.endr
576
577		cmp	r5, r9
578		blo	1b
579		mov	sp, r1
580		add	sp, sp, #128		@ relocate the stack
581		debug_reloc_end
582
583call_kernel:	bl	cache_clean_flush
584		bl	cache_off
585		mov	r0, #0			@ must be zero
586		mov	r1, r7			@ restore architecture number
587		mov	r2, r8			@ restore atags pointer
588		mov	pc, r4			@ call kernel
589
590/*
591 * Here follow the relocatable cache support functions for the
592 * various processors.  This is a generic hook for locating an
593 * entry and jumping to an instruction at the specified offset
594 * from the start of the block.  Please note this is all position
595 * independent code.
596 *
597 *  r1  = corrupted
598 *  r2  = corrupted
599 *  r3  = block offset
600 *  r9  = corrupted
601 *  r12 = corrupted
602 */
603
604call_cache_fn:	adr	r12, proc_types
605#ifdef CONFIG_CPU_CP15
606		mrc	p15, 0, r9, c0, c0	@ get processor ID
607#else
608		ldr	r9, =CONFIG_PROCESSOR_ID
609#endif
6101:		ldr	r1, [r12, #0]		@ get value
611		ldr	r2, [r12, #4]		@ get mask
612		eor	r1, r1, r9		@ (real ^ match)
613		tst	r1, r2			@       & mask
614 ARM(		addeq	pc, r12, r3		) @ call cache function
615 THUMB(		addeq	r12, r3			)
616 THUMB(		moveq	pc, r12			) @ call cache function
617		add	r12, r12, #4*5
618		b	1b
619
620/*
621 * Table for cache operations.  This is basically:
622 *   - CPU ID match
623 *   - CPU ID mask
624 *   - 'cache on' method instruction
625 *   - 'cache off' method instruction
626 *   - 'cache flush' method instruction
627 *
628 * We match an entry using: ((real_id ^ match) & mask) == 0
629 *
630 * Writethrough caches generally only need 'on' and 'off'
631 * methods.  Writeback caches _must_ have the flush method
632 * defined.
633 */
634		.align	2
635		.type	proc_types,#object
636proc_types:
637		.word	0x41560600		@ ARM6/610
638		.word	0xffffffe0
639		W(b)	__arm6_mmu_cache_off	@ works, but slow
640		W(b)	__arm6_mmu_cache_off
641		mov	pc, lr
642 THUMB(		nop				)
643@		b	__arm6_mmu_cache_on		@ untested
644@		b	__arm6_mmu_cache_off
645@		b	__armv3_mmu_cache_flush
646
647		.word	0x00000000		@ old ARM ID
648		.word	0x0000f000
649		mov	pc, lr
650 THUMB(		nop				)
651		mov	pc, lr
652 THUMB(		nop				)
653		mov	pc, lr
654 THUMB(		nop				)
655
656		.word	0x41007000		@ ARM7/710
657		.word	0xfff8fe00
658		W(b)	__arm7_mmu_cache_off
659		W(b)	__arm7_mmu_cache_off
660		mov	pc, lr
661 THUMB(		nop				)
662
663		.word	0x41807200		@ ARM720T (writethrough)
664		.word	0xffffff00
665		W(b)	__armv4_mmu_cache_on
666		W(b)	__armv4_mmu_cache_off
667		mov	pc, lr
668 THUMB(		nop				)
669
670		.word	0x41007400		@ ARM74x
671		.word	0xff00ff00
672		W(b)	__armv3_mpu_cache_on
673		W(b)	__armv3_mpu_cache_off
674		W(b)	__armv3_mpu_cache_flush
675
676		.word	0x41009400		@ ARM94x
677		.word	0xff00ff00
678		W(b)	__armv4_mpu_cache_on
679		W(b)	__armv4_mpu_cache_off
680		W(b)	__armv4_mpu_cache_flush
681
682		.word	0x00007000		@ ARM7 IDs
683		.word	0x0000f000
684		mov	pc, lr
685 THUMB(		nop				)
686		mov	pc, lr
687 THUMB(		nop				)
688		mov	pc, lr
689 THUMB(		nop				)
690
691		@ Everything from here on will be the new ID system.
692
693		.word	0x4401a100		@ sa110 / sa1100
694		.word	0xffffffe0
695		W(b)	__armv4_mmu_cache_on
696		W(b)	__armv4_mmu_cache_off
697		W(b)	__armv4_mmu_cache_flush
698
699		.word	0x6901b110		@ sa1110
700		.word	0xfffffff0
701		W(b)	__armv4_mmu_cache_on
702		W(b)	__armv4_mmu_cache_off
703		W(b)	__armv4_mmu_cache_flush
704
705		.word	0x56056900
706		.word	0xffffff00		@ PXA9xx
707		W(b)	__armv4_mmu_cache_on
708		W(b)	__armv4_mmu_cache_off
709		W(b)	__armv4_mmu_cache_flush
710
711		.word	0x56158000		@ PXA168
712		.word	0xfffff000
713		W(b)	__armv4_mmu_cache_on
714		W(b)	__armv4_mmu_cache_off
715		W(b)	__armv5tej_mmu_cache_flush
716
717		.word	0x56050000		@ Feroceon
718		.word	0xff0f0000
719		W(b)	__armv4_mmu_cache_on
720		W(b)	__armv4_mmu_cache_off
721		W(b)	__armv5tej_mmu_cache_flush
722
723#ifdef CONFIG_CPU_FEROCEON_OLD_ID
724		/* this conflicts with the standard ARMv5TE entry */
725		.long	0x41009260		@ Old Feroceon
726		.long	0xff00fff0
727		b	__armv4_mmu_cache_on
728		b	__armv4_mmu_cache_off
729		b	__armv5tej_mmu_cache_flush
730#endif
731
732		.word	0x66015261		@ FA526
733		.word	0xff01fff1
734		W(b)	__fa526_cache_on
735		W(b)	__armv4_mmu_cache_off
736		W(b)	__fa526_cache_flush
737
738		@ These match on the architecture ID
739
740		.word	0x00020000		@ ARMv4T
741		.word	0x000f0000
742		W(b)	__armv4_mmu_cache_on
743		W(b)	__armv4_mmu_cache_off
744		W(b)	__armv4_mmu_cache_flush
745
746		.word	0x00050000		@ ARMv5TE
747		.word	0x000f0000
748		W(b)	__armv4_mmu_cache_on
749		W(b)	__armv4_mmu_cache_off
750		W(b)	__armv4_mmu_cache_flush
751
752		.word	0x00060000		@ ARMv5TEJ
753		.word	0x000f0000
754		W(b)	__armv4_mmu_cache_on
755		W(b)	__armv4_mmu_cache_off
756		W(b)	__armv5tej_mmu_cache_flush
757
758		.word	0x0007b000		@ ARMv6
759		.word	0x000ff000
760		W(b)	__armv4_mmu_cache_on
761		W(b)	__armv4_mmu_cache_off
762		W(b)	__armv6_mmu_cache_flush
763
764		.word	0x560f5810		@ Marvell PJ4 ARMv6
765		.word	0xff0ffff0
766		W(b)	__armv4_mmu_cache_on
767		W(b)	__armv4_mmu_cache_off
768		W(b)	__armv6_mmu_cache_flush
769
770		.word	0x000f0000		@ new CPU Id
771		.word	0x000f0000
772		W(b)	__armv7_mmu_cache_on
773		W(b)	__armv7_mmu_cache_off
774		W(b)	__armv7_mmu_cache_flush
775
776		.word	0			@ unrecognised type
777		.word	0
778		mov	pc, lr
779 THUMB(		nop				)
780		mov	pc, lr
781 THUMB(		nop				)
782		mov	pc, lr
783 THUMB(		nop				)
784
785		.size	proc_types, . - proc_types
786
787/*
788 * Turn off the Cache and MMU.  ARMv3 does not support
789 * reading the control register, but ARMv4 does.
790 *
791 * On exit,
792 *  r0, r1, r2, r3, r9, r12 corrupted
793 * This routine must preserve:
794 *  r4, r6, r7
795 */
796		.align	5
797cache_off:	mov	r3, #12			@ cache_off function
798		b	call_cache_fn
799
800__armv4_mpu_cache_off:
801		mrc	p15, 0, r0, c1, c0
802		bic	r0, r0, #0x000d
803		mcr	p15, 0, r0, c1, c0	@ turn MPU and cache off
804		mov	r0, #0
805		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
806		mcr	p15, 0, r0, c7, c6, 0	@ flush D-Cache
807		mcr	p15, 0, r0, c7, c5, 0	@ flush I-Cache
808		mov	pc, lr
809
810__armv3_mpu_cache_off:
811		mrc	p15, 0, r0, c1, c0
812		bic	r0, r0, #0x000d
813		mcr	p15, 0, r0, c1, c0, 0	@ turn MPU and cache off
814		mov	r0, #0
815		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
816		mov	pc, lr
817
818__armv4_mmu_cache_off:
819#ifdef CONFIG_MMU
820		mrc	p15, 0, r0, c1, c0
821		bic	r0, r0, #0x000d
822		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
823		mov	r0, #0
824		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
825		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
826#endif
827		mov	pc, lr
828
829__armv7_mmu_cache_off:
830		mrc	p15, 0, r0, c1, c0
831#ifdef CONFIG_MMU
832		bic	r0, r0, #0x000d
833#else
834		bic	r0, r0, #0x000c
835#endif
836		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
837		mov	r12, lr
838		bl	__armv7_mmu_cache_flush
839		mov	r0, #0
840#ifdef CONFIG_MMU
841		mcr	p15, 0, r0, c8, c7, 0	@ invalidate whole TLB
842#endif
843		mcr	p15, 0, r0, c7, c5, 6	@ invalidate BTC
844		mcr	p15, 0, r0, c7, c10, 4	@ DSB
845		mcr	p15, 0, r0, c7, c5, 4	@ ISB
846		mov	pc, r12
847
848__arm6_mmu_cache_off:
849		mov	r0, #0x00000030		@ ARM6 control reg.
850		b	__armv3_mmu_cache_off
851
852__arm7_mmu_cache_off:
853		mov	r0, #0x00000070		@ ARM7 control reg.
854		b	__armv3_mmu_cache_off
855
856__armv3_mmu_cache_off:
857		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
858		mov	r0, #0
859		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
860		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
861		mov	pc, lr
862
863/*
864 * Clean and flush the cache to maintain consistency.
865 *
866 * On exit,
867 *  r1, r2, r3, r9, r10, r11, r12 corrupted
868 * This routine must preserve:
869 *  r0, r4, r5, r6, r7
870 */
871		.align	5
872cache_clean_flush:
873		mov	r3, #16
874		b	call_cache_fn
875
876__armv4_mpu_cache_flush:
877		mov	r2, #1
878		mov	r3, #0
879		mcr	p15, 0, ip, c7, c6, 0	@ invalidate D cache
880		mov	r1, #7 << 5		@ 8 segments
8811:		orr	r3, r1, #63 << 26	@ 64 entries
8822:		mcr	p15, 0, r3, c7, c14, 2	@ clean & invalidate D index
883		subs	r3, r3, #1 << 26
884		bcs	2b			@ entries 63 to 0
885		subs 	r1, r1, #1 << 5
886		bcs	1b			@ segments 7 to 0
887
888		teq	r2, #0
889		mcrne	p15, 0, ip, c7, c5, 0	@ invalidate I cache
890		mcr	p15, 0, ip, c7, c10, 4	@ drain WB
891		mov	pc, lr
892
893__fa526_cache_flush:
894		mov	r1, #0
895		mcr	p15, 0, r1, c7, c14, 0	@ clean and invalidate D cache
896		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
897		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
898		mov	pc, lr
899
900__armv6_mmu_cache_flush:
901		mov	r1, #0
902		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
903		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
904		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
905		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
906		mov	pc, lr
907
908__armv7_mmu_cache_flush:
909		mrc	p15, 0, r10, c0, c1, 5	@ read ID_MMFR1
910		tst	r10, #0xf << 16		@ hierarchical cache (ARMv7)
911		mov	r10, #0
912		beq	hierarchical
913		mcr	p15, 0, r10, c7, c14, 0	@ clean+invalidate D
914		b	iflush
915hierarchical:
916		mcr	p15, 0, r10, c7, c10, 5	@ DMB
917		stmfd	sp!, {r0-r7, r9-r11}
918		mrc	p15, 1, r0, c0, c0, 1	@ read clidr
919		ands	r3, r0, #0x7000000	@ extract loc from clidr
920		mov	r3, r3, lsr #23		@ left align loc bit field
921		beq	finished		@ if loc is 0, then no need to clean
922		mov	r10, #0			@ start clean at cache level 0
923loop1:
924		add	r2, r10, r10, lsr #1	@ work out 3x current cache level
925		mov	r1, r0, lsr r2		@ extract cache type bits from clidr
926		and	r1, r1, #7		@ mask of the bits for current cache only
927		cmp	r1, #2			@ see what cache we have at this level
928		blt	skip			@ skip if no cache, or just i-cache
929		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
930		mcr	p15, 0, r10, c7, c5, 4	@ isb to sych the new cssr&csidr
931		mrc	p15, 1, r1, c0, c0, 0	@ read the new csidr
932		and	r2, r1, #7		@ extract the length of the cache lines
933		add	r2, r2, #4		@ add 4 (line length offset)
934		ldr	r4, =0x3ff
935		ands	r4, r4, r1, lsr #3	@ find maximum number on the way size
936		clz	r5, r4			@ find bit position of way size increment
937		ldr	r7, =0x7fff
938		ands	r7, r7, r1, lsr #13	@ extract max number of the index size
939loop2:
940		mov	r9, r4			@ create working copy of max way size
941loop3:
942 ARM(		orr	r11, r10, r9, lsl r5	) @ factor way and cache number into r11
943 ARM(		orr	r11, r11, r7, lsl r2	) @ factor index number into r11
944 THUMB(		lsl	r6, r9, r5		)
945 THUMB(		orr	r11, r10, r6		) @ factor way and cache number into r11
946 THUMB(		lsl	r6, r7, r2		)
947 THUMB(		orr	r11, r11, r6		) @ factor index number into r11
948		mcr	p15, 0, r11, c7, c14, 2	@ clean & invalidate by set/way
949		subs	r9, r9, #1		@ decrement the way
950		bge	loop3
951		subs	r7, r7, #1		@ decrement the index
952		bge	loop2
953skip:
954		add	r10, r10, #2		@ increment cache number
955		cmp	r3, r10
956		bgt	loop1
957finished:
958		ldmfd	sp!, {r0-r7, r9-r11}
959		mov	r10, #0			@ swith back to cache level 0
960		mcr	p15, 2, r10, c0, c0, 0	@ select current cache level in cssr
961iflush:
962		mcr	p15, 0, r10, c7, c10, 4	@ DSB
963		mcr	p15, 0, r10, c7, c5, 0	@ invalidate I+BTB
964		mcr	p15, 0, r10, c7, c10, 4	@ DSB
965		mcr	p15, 0, r10, c7, c5, 4	@ ISB
966		mov	pc, lr
967
968__armv5tej_mmu_cache_flush:
9691:		mrc	p15, 0, r15, c7, c14, 3	@ test,clean,invalidate D cache
970		bne	1b
971		mcr	p15, 0, r0, c7, c5, 0	@ flush I cache
972		mcr	p15, 0, r0, c7, c10, 4	@ drain WB
973		mov	pc, lr
974
975__armv4_mmu_cache_flush:
976		mov	r2, #64*1024		@ default: 32K dcache size (*2)
977		mov	r11, #32		@ default: 32 byte line size
978		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
979		teq	r3, r9			@ cache ID register present?
980		beq	no_cache_id
981		mov	r1, r3, lsr #18
982		and	r1, r1, #7
983		mov	r2, #1024
984		mov	r2, r2, lsl r1		@ base dcache size *2
985		tst	r3, #1 << 14		@ test M bit
986		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
987		mov	r3, r3, lsr #12
988		and	r3, r3, #3
989		mov	r11, #8
990		mov	r11, r11, lsl r3	@ cache line size in bytes
991no_cache_id:
992		mov	r1, pc
993		bic	r1, r1, #63		@ align to longest cache line
994		add	r2, r1, r2
9951:
996 ARM(		ldr	r3, [r1], r11		) @ s/w flush D cache
997 THUMB(		ldr     r3, [r1]		) @ s/w flush D cache
998 THUMB(		add     r1, r1, r11		)
999		teq	r1, r2
1000		bne	1b
1001
1002		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
1003		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
1004		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
1005		mov	pc, lr
1006
1007__armv3_mmu_cache_flush:
1008__armv3_mpu_cache_flush:
1009		mov	r1, #0
1010		mcr	p15, 0, r1, c7, c0, 0	@ invalidate whole cache v3
1011		mov	pc, lr
1012
1013/*
1014 * Various debugging routines for printing hex characters and
1015 * memory, which again must be relocatable.
1016 */
1017#ifdef DEBUG
1018		.align	2
1019		.type	phexbuf,#object
1020phexbuf:	.space	12
1021		.size	phexbuf, . - phexbuf
1022
1023@ phex corrupts {r0, r1, r2, r3}
1024phex:		adr	r3, phexbuf
1025		mov	r2, #0
1026		strb	r2, [r3, r1]
10271:		subs	r1, r1, #1
1028		movmi	r0, r3
1029		bmi	puts
1030		and	r2, r0, #15
1031		mov	r0, r0, lsr #4
1032		cmp	r2, #10
1033		addge	r2, r2, #7
1034		add	r2, r2, #'0'
1035		strb	r2, [r3, r1]
1036		b	1b
1037
1038@ puts corrupts {r0, r1, r2, r3}
1039puts:		loadsp	r3, r1
10401:		ldrb	r2, [r0], #1
1041		teq	r2, #0
1042		moveq	pc, lr
10432:		writeb	r2, r3
1044		mov	r1, #0x00020000
10453:		subs	r1, r1, #1
1046		bne	3b
1047		teq	r2, #'\n'
1048		moveq	r2, #'\r'
1049		beq	2b
1050		teq	r0, #0
1051		bne	1b
1052		mov	pc, lr
1053@ putc corrupts {r0, r1, r2, r3}
1054putc:
1055		mov	r2, r0
1056		mov	r0, #0
1057		loadsp	r3, r1
1058		b	2b
1059
1060@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
1061memdump:	mov	r12, r0
1062		mov	r10, lr
1063		mov	r11, #0
10642:		mov	r0, r11, lsl #2
1065		add	r0, r0, r12
1066		mov	r1, #8
1067		bl	phex
1068		mov	r0, #':'
1069		bl	putc
10701:		mov	r0, #' '
1071		bl	putc
1072		ldr	r0, [r12, r11, lsl #2]
1073		mov	r1, #8
1074		bl	phex
1075		and	r0, r11, #7
1076		teq	r0, #3
1077		moveq	r0, #' '
1078		bleq	putc
1079		and	r0, r11, #7
1080		add	r11, r11, #1
1081		teq	r0, #7
1082		bne	1b
1083		mov	r0, #'\n'
1084		bl	putc
1085		cmp	r11, #64
1086		blt	2b
1087		mov	pc, r10
1088#endif
1089
1090		.ltorg
1091reloc_end:
1092
1093		.align
1094		.section ".stack", "aw", %nobits
1095user_stack:	.space	4096
1096user_stack_end:
1097