xref: /openbmc/linux/arch/arm/boot/compressed/head.S (revision 87c2ce3b)
1/*
2 *  linux/arch/arm/boot/compressed/head.S
3 *
4 *  Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#include <linux/config.h>
11#include <linux/linkage.h>
12
13/*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable.  Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20#ifdef DEBUG
21
22#if defined(CONFIG_DEBUG_ICEDCC)
23		.macro	loadsp, rb
24		.endm
25		.macro	writeb, ch, rb
26		mcr	p14, 0, \ch, c0, c1, 0
27		.endm
28#else
29
30#include <asm/arch/debug-macro.S>
31
32		.macro	writeb,	ch, rb
33		senduart \ch, \rb
34		.endm
35
36#if defined(CONFIG_ARCH_SA1100)
37		.macro	loadsp, rb
38		mov	\rb, #0x80000000	@ physical base address
39#ifdef CONFIG_DEBUG_LL_SER3
40		add	\rb, \rb, #0x00050000	@ Ser3
41#else
42		add	\rb, \rb, #0x00010000	@ Ser1
43#endif
44		.endm
45#elif defined(CONFIG_ARCH_IOP331)
46		.macro loadsp, rb
47                mov   	\rb, #0xff000000
48                orr     \rb, \rb, #0x00ff0000
49                orr     \rb, \rb, #0x0000f700   @ location of the UART
50		.endm
51#elif defined(CONFIG_ARCH_S3C2410)
52		.macro loadsp, rb
53		mov	\rb, #0x50000000
54		add	\rb, \rb, #0x4000 * CONFIG_S3C2410_LOWLEVEL_UART_PORT
55		.endm
56#else
57		.macro	loadsp,	rb
58		addruart \rb
59		.endm
60#endif
61#endif
62#endif
63
64		.macro	kputc,val
65		mov	r0, \val
66		bl	putc
67		.endm
68
69		.macro	kphex,val,len
70		mov	r0, \val
71		mov	r1, #\len
72		bl	phex
73		.endm
74
75		.macro	debug_reloc_start
76#ifdef DEBUG
77		kputc	#'\n'
78		kphex	r6, 8		/* processor id */
79		kputc	#':'
80		kphex	r7, 8		/* architecture id */
81		kputc	#':'
82		mrc	p15, 0, r0, c1, c0
83		kphex	r0, 8		/* control reg */
84		kputc	#'\n'
85		kphex	r5, 8		/* decompressed kernel start */
86		kputc	#'-'
87		kphex	r8, 8		/* decompressed kernel end  */
88		kputc	#'>'
89		kphex	r4, 8		/* kernel execution address */
90		kputc	#'\n'
91#endif
92		.endm
93
94		.macro	debug_reloc_end
95#ifdef DEBUG
96		kphex	r5, 8		/* end of kernel */
97		kputc	#'\n'
98		mov	r0, r4
99		bl	memdump		/* dump 256 bytes at start of kernel */
100#endif
101		.endm
102
103		.section ".start", #alloc, #execinstr
104/*
105 * sort out different calling conventions
106 */
107		.align
108start:
109		.type	start,#function
110		.rept	8
111		mov	r0, r0
112		.endr
113
114		b	1f
115		.word	0x016f2818		@ Magic numbers to help the loader
116		.word	start			@ absolute load/run zImage address
117		.word	_edata			@ zImage end address
1181:		mov	r7, r1			@ save architecture ID
119		mov	r8, #0			@ save r0
120
121#ifndef __ARM_ARCH_2__
122		/*
123		 * Booting from Angel - need to enter SVC mode and disable
124		 * FIQs/IRQs (numeric definitions from angel arm.h source).
125		 * We only do this if we were in user mode on entry.
126		 */
127		mrs	r2, cpsr		@ get current mode
128		tst	r2, #3			@ not user?
129		bne	not_angel
130		mov	r0, #0x17		@ angel_SWIreason_EnterSVC
131		swi	0x123456		@ angel_SWI_ARM
132not_angel:
133		mrs	r2, cpsr		@ turn off interrupts to
134		orr	r2, r2, #0xc0		@ prevent angel from running
135		msr	cpsr_c, r2
136#else
137		teqp	pc, #0x0c000003		@ turn off interrupts
138#endif
139
140		/*
141		 * Note that some cache flushing and other stuff may
142		 * be needed here - is there an Angel SWI call for this?
143		 */
144
145		/*
146		 * some architecture specific code can be inserted
147		 * by the linker here, but it should preserve r7 and r8.
148		 */
149
150		.text
151		adr	r0, LC0
152		ldmia	r0, {r1, r2, r3, r4, r5, r6, ip, sp}
153		subs	r0, r0, r1		@ calculate the delta offset
154
155						@ if delta is zero, we are
156		beq	not_relocated		@ running at the address we
157						@ were linked at.
158
159		/*
160		 * We're running at a different address.  We need to fix
161		 * up various pointers:
162		 *   r5 - zImage base address
163		 *   r6 - GOT start
164		 *   ip - GOT end
165		 */
166		add	r5, r5, r0
167		add	r6, r6, r0
168		add	ip, ip, r0
169
170#ifndef CONFIG_ZBOOT_ROM
171		/*
172		 * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
173		 * we need to fix up pointers into the BSS region.
174		 *   r2 - BSS start
175		 *   r3 - BSS end
176		 *   sp - stack pointer
177		 */
178		add	r2, r2, r0
179		add	r3, r3, r0
180		add	sp, sp, r0
181
182		/*
183		 * Relocate all entries in the GOT table.
184		 */
1851:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
186		add	r1, r1, r0		@ table.  This fixes up the
187		str	r1, [r6], #4		@ C references.
188		cmp	r6, ip
189		blo	1b
190#else
191
192		/*
193		 * Relocate entries in the GOT table.  We only relocate
194		 * the entries that are outside the (relocated) BSS region.
195		 */
1961:		ldr	r1, [r6, #0]		@ relocate entries in the GOT
197		cmp	r1, r2			@ entry < bss_start ||
198		cmphs	r3, r1			@ _end < entry
199		addlo	r1, r1, r0		@ table.  This fixes up the
200		str	r1, [r6], #4		@ C references.
201		cmp	r6, ip
202		blo	1b
203#endif
204
205not_relocated:	mov	r0, #0
2061:		str	r0, [r2], #4		@ clear bss
207		str	r0, [r2], #4
208		str	r0, [r2], #4
209		str	r0, [r2], #4
210		cmp	r2, r3
211		blo	1b
212
213		/*
214		 * The C runtime environment should now be setup
215		 * sufficiently.  Turn the cache on, set up some
216		 * pointers, and start decompressing.
217		 */
218		bl	cache_on
219
220		mov	r1, sp			@ malloc space above stack
221		add	r2, sp, #0x10000	@ 64k max
222
223/*
224 * Check to see if we will overwrite ourselves.
225 *   r4 = final kernel address
226 *   r5 = start of this image
227 *   r2 = end of malloc space (and therefore this image)
228 * We basically want:
229 *   r4 >= r2 -> OK
230 *   r4 + image length <= r5 -> OK
231 */
232		cmp	r4, r2
233		bhs	wont_overwrite
234		add	r0, r4, #4096*1024	@ 4MB largest kernel size
235		cmp	r0, r5
236		bls	wont_overwrite
237
238		mov	r5, r2			@ decompress after malloc space
239		mov	r0, r5
240		mov	r3, r7
241		bl	decompress_kernel
242
243		add	r0, r0, #127
244		bic	r0, r0, #127		@ align the kernel length
245/*
246 * r0     = decompressed kernel length
247 * r1-r3  = unused
248 * r4     = kernel execution address
249 * r5     = decompressed kernel start
250 * r6     = processor ID
251 * r7     = architecture ID
252 * r8-r14 = unused
253 */
254		add	r1, r5, r0		@ end of decompressed kernel
255		adr	r2, reloc_start
256		ldr	r3, LC1
257		add	r3, r2, r3
2581:		ldmia	r2!, {r8 - r13}		@ copy relocation code
259		stmia	r1!, {r8 - r13}
260		ldmia	r2!, {r8 - r13}
261		stmia	r1!, {r8 - r13}
262		cmp	r2, r3
263		blo	1b
264
265		bl	cache_clean_flush
266		add	pc, r5, r0		@ call relocation code
267
268/*
269 * We're not in danger of overwriting ourselves.  Do this the simple way.
270 *
271 * r4     = kernel execution address
272 * r7     = architecture ID
273 */
274wont_overwrite:	mov	r0, r4
275		mov	r3, r7
276		bl	decompress_kernel
277		b	call_kernel
278
279		.type	LC0, #object
280LC0:		.word	LC0			@ r1
281		.word	__bss_start		@ r2
282		.word	_end			@ r3
283		.word	zreladdr		@ r4
284		.word	_start			@ r5
285		.word	_got_start		@ r6
286		.word	_got_end		@ ip
287		.word	user_stack+4096		@ sp
288LC1:		.word	reloc_end - reloc_start
289		.size	LC0, . - LC0
290
291#ifdef CONFIG_ARCH_RPC
292		.globl	params
293params:		ldr	r0, =params_phys
294		mov	pc, lr
295		.ltorg
296		.align
297#endif
298
299/*
300 * Turn on the cache.  We need to setup some page tables so that we
301 * can have both the I and D caches on.
302 *
303 * We place the page tables 16k down from the kernel execution address,
304 * and we hope that nothing else is using it.  If we're using it, we
305 * will go pop!
306 *
307 * On entry,
308 *  r4 = kernel execution address
309 *  r6 = processor ID
310 *  r7 = architecture number
311 *  r8 = run-time address of "start"
312 * On exit,
313 *  r1, r2, r3, r8, r9, r12 corrupted
314 * This routine must preserve:
315 *  r4, r5, r6, r7
316 */
317		.align	5
318cache_on:	mov	r3, #8			@ cache_on function
319		b	call_cache_fn
320
321__setup_mmu:	sub	r3, r4, #16384		@ Page directory size
322		bic	r3, r3, #0xff		@ Align the pointer
323		bic	r3, r3, #0x3f00
324/*
325 * Initialise the page tables, turning on the cacheable and bufferable
326 * bits for the RAM area only.
327 */
328		mov	r0, r3
329		mov	r8, r0, lsr #18
330		mov	r8, r8, lsl #18		@ start of RAM
331		add	r9, r8, #0x10000000	@ a reasonable RAM size
332		mov	r1, #0x12
333		orr	r1, r1, #3 << 10
334		add	r2, r3, #16384
3351:		cmp	r1, r8			@ if virt > start of RAM
336		orrhs	r1, r1, #0x0c		@ set cacheable, bufferable
337		cmp	r1, r9			@ if virt > end of RAM
338		bichs	r1, r1, #0x0c		@ clear cacheable, bufferable
339		str	r1, [r0], #4		@ 1:1 mapping
340		add	r1, r1, #1048576
341		teq	r0, r2
342		bne	1b
343/*
344 * If ever we are running from Flash, then we surely want the cache
345 * to be enabled also for our execution instance...  We map 2MB of it
346 * so there is no map overlap problem for up to 1 MB compressed kernel.
347 * If the execution is in RAM then we would only be duplicating the above.
348 */
349		mov	r1, #0x1e
350		orr	r1, r1, #3 << 10
351		mov	r2, pc, lsr #20
352		orr	r1, r1, r2, lsl #20
353		add	r0, r3, r2, lsl #2
354		str	r1, [r0], #4
355		add	r1, r1, #1048576
356		str	r1, [r0]
357		mov	pc, lr
358
359__armv4_cache_on:
360		mov	r12, lr
361		bl	__setup_mmu
362		mov	r0, #0
363		mcr	p15, 0, r0, c7, c10, 4	@ drain write buffer
364		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
365		mrc	p15, 0, r0, c1, c0, 0	@ read control reg
366		orr	r0, r0, #0x5000		@ I-cache enable, RR cache replacement
367		orr	r0, r0, #0x0030
368		bl	__common_cache_on
369		mov	r0, #0
370		mcr	p15, 0, r0, c8, c7, 0	@ flush I,D TLBs
371		mov	pc, r12
372
373__arm6_cache_on:
374		mov	r12, lr
375		bl	__setup_mmu
376		mov	r0, #0
377		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
378		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
379		mov	r0, #0x30
380		bl	__common_cache_on
381		mov	r0, #0
382		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
383		mov	pc, r12
384
385__common_cache_on:
386#ifndef DEBUG
387		orr	r0, r0, #0x000d		@ Write buffer, mmu
388#endif
389		mov	r1, #-1
390		mcr	p15, 0, r3, c2, c0, 0	@ load page table pointer
391		mcr	p15, 0, r1, c3, c0, 0	@ load domain access control
392		mcr	p15, 0, r0, c1, c0, 0	@ load control register
393		mov	pc, lr
394
395/*
396 * All code following this line is relocatable.  It is relocated by
397 * the above code to the end of the decompressed kernel image and
398 * executed there.  During this time, we have no stacks.
399 *
400 * r0     = decompressed kernel length
401 * r1-r3  = unused
402 * r4     = kernel execution address
403 * r5     = decompressed kernel start
404 * r6     = processor ID
405 * r7     = architecture ID
406 * r8-r14 = unused
407 */
408		.align	5
409reloc_start:	add	r8, r5, r0
410		debug_reloc_start
411		mov	r1, r4
4121:
413		.rept	4
414		ldmia	r5!, {r0, r2, r3, r9 - r13}	@ relocate kernel
415		stmia	r1!, {r0, r2, r3, r9 - r13}
416		.endr
417
418		cmp	r5, r8
419		blo	1b
420		debug_reloc_end
421
422call_kernel:	bl	cache_clean_flush
423		bl	cache_off
424		mov	r0, #0
425		mov	r1, r7			@ restore architecture number
426		mov	pc, r4			@ call kernel
427
428/*
429 * Here follow the relocatable cache support functions for the
430 * various processors.  This is a generic hook for locating an
431 * entry and jumping to an instruction at the specified offset
432 * from the start of the block.  Please note this is all position
433 * independent code.
434 *
435 *  r1  = corrupted
436 *  r2  = corrupted
437 *  r3  = block offset
438 *  r6  = corrupted
439 *  r12 = corrupted
440 */
441
442call_cache_fn:	adr	r12, proc_types
443		mrc	p15, 0, r6, c0, c0	@ get processor ID
4441:		ldr	r1, [r12, #0]		@ get value
445		ldr	r2, [r12, #4]		@ get mask
446		eor	r1, r1, r6		@ (real ^ match)
447		tst	r1, r2			@       & mask
448		addeq	pc, r12, r3		@ call cache function
449		add	r12, r12, #4*5
450		b	1b
451
452/*
453 * Table for cache operations.  This is basically:
454 *   - CPU ID match
455 *   - CPU ID mask
456 *   - 'cache on' method instruction
457 *   - 'cache off' method instruction
458 *   - 'cache flush' method instruction
459 *
460 * We match an entry using: ((real_id ^ match) & mask) == 0
461 *
462 * Writethrough caches generally only need 'on' and 'off'
463 * methods.  Writeback caches _must_ have the flush method
464 * defined.
465 */
466		.type	proc_types,#object
467proc_types:
468		.word	0x41560600		@ ARM6/610
469		.word	0xffffffe0
470		b	__arm6_cache_off	@ works, but slow
471		b	__arm6_cache_off
472		mov	pc, lr
473@		b	__arm6_cache_on		@ untested
474@		b	__arm6_cache_off
475@		b	__armv3_cache_flush
476
477		.word	0x00000000		@ old ARM ID
478		.word	0x0000f000
479		mov	pc, lr
480		mov	pc, lr
481		mov	pc, lr
482
483		.word	0x41007000		@ ARM7/710
484		.word	0xfff8fe00
485		b	__arm7_cache_off
486		b	__arm7_cache_off
487		mov	pc, lr
488
489		.word	0x41807200		@ ARM720T (writethrough)
490		.word	0xffffff00
491		b	__armv4_cache_on
492		b	__armv4_cache_off
493		mov	pc, lr
494
495		.word	0x00007000		@ ARM7 IDs
496		.word	0x0000f000
497		mov	pc, lr
498		mov	pc, lr
499		mov	pc, lr
500
501		@ Everything from here on will be the new ID system.
502
503		.word	0x4401a100		@ sa110 / sa1100
504		.word	0xffffffe0
505		b	__armv4_cache_on
506		b	__armv4_cache_off
507		b	__armv4_cache_flush
508
509		.word	0x6901b110		@ sa1110
510		.word	0xfffffff0
511		b	__armv4_cache_on
512		b	__armv4_cache_off
513		b	__armv4_cache_flush
514
515		@ These match on the architecture ID
516
517		.word	0x00020000		@ ARMv4T
518		.word	0x000f0000
519		b	__armv4_cache_on
520		b	__armv4_cache_off
521		b	__armv4_cache_flush
522
523		.word	0x00050000		@ ARMv5TE
524		.word	0x000f0000
525		b	__armv4_cache_on
526		b	__armv4_cache_off
527		b	__armv4_cache_flush
528
529		.word	0x00060000		@ ARMv5TEJ
530		.word	0x000f0000
531		b	__armv4_cache_on
532		b	__armv4_cache_off
533		b	__armv4_cache_flush
534
535		.word	0x00070000		@ ARMv6
536		.word	0x000f0000
537		b	__armv4_cache_on
538		b	__armv4_cache_off
539		b	__armv6_cache_flush
540
541		.word	0			@ unrecognised type
542		.word	0
543		mov	pc, lr
544		mov	pc, lr
545		mov	pc, lr
546
547		.size	proc_types, . - proc_types
548
549/*
550 * Turn off the Cache and MMU.  ARMv3 does not support
551 * reading the control register, but ARMv4 does.
552 *
553 * On entry,  r6 = processor ID
554 * On exit,   r0, r1, r2, r3, r12 corrupted
555 * This routine must preserve: r4, r6, r7
556 */
557		.align	5
558cache_off:	mov	r3, #12			@ cache_off function
559		b	call_cache_fn
560
561__armv4_cache_off:
562		mrc	p15, 0, r0, c1, c0
563		bic	r0, r0, #0x000d
564		mcr	p15, 0, r0, c1, c0	@ turn MMU and cache off
565		mov	r0, #0
566		mcr	p15, 0, r0, c7, c7	@ invalidate whole cache v4
567		mcr	p15, 0, r0, c8, c7	@ invalidate whole TLB v4
568		mov	pc, lr
569
570__arm6_cache_off:
571		mov	r0, #0x00000030		@ ARM6 control reg.
572		b	__armv3_cache_off
573
574__arm7_cache_off:
575		mov	r0, #0x00000070		@ ARM7 control reg.
576		b	__armv3_cache_off
577
578__armv3_cache_off:
579		mcr	p15, 0, r0, c1, c0, 0	@ turn MMU and cache off
580		mov	r0, #0
581		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
582		mcr	p15, 0, r0, c5, c0, 0	@ invalidate whole TLB v3
583		mov	pc, lr
584
585/*
586 * Clean and flush the cache to maintain consistency.
587 *
588 * On entry,
589 *  r6 = processor ID
590 * On exit,
591 *  r1, r2, r3, r11, r12 corrupted
592 * This routine must preserve:
593 *  r0, r4, r5, r6, r7
594 */
595		.align	5
596cache_clean_flush:
597		mov	r3, #16
598		b	call_cache_fn
599
600__armv6_cache_flush:
601		mov	r1, #0
602		mcr	p15, 0, r1, c7, c14, 0	@ clean+invalidate D
603		mcr	p15, 0, r1, c7, c5, 0	@ invalidate I+BTB
604		mcr	p15, 0, r1, c7, c15, 0	@ clean+invalidate unified
605		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
606		mov	pc, lr
607
608__armv4_cache_flush:
609		mov	r2, #64*1024		@ default: 32K dcache size (*2)
610		mov	r11, #32		@ default: 32 byte line size
611		mrc	p15, 0, r3, c0, c0, 1	@ read cache type
612		teq	r3, r6			@ cache ID register present?
613		beq	no_cache_id
614		mov	r1, r3, lsr #18
615		and	r1, r1, #7
616		mov	r2, #1024
617		mov	r2, r2, lsl r1		@ base dcache size *2
618		tst	r3, #1 << 14		@ test M bit
619		addne	r2, r2, r2, lsr #1	@ +1/2 size if M == 1
620		mov	r3, r3, lsr #12
621		and	r3, r3, #3
622		mov	r11, #8
623		mov	r11, r11, lsl r3	@ cache line size in bytes
624no_cache_id:
625		bic	r1, pc, #63		@ align to longest cache line
626		add	r2, r1, r2
6271:		ldr	r3, [r1], r11		@ s/w flush D cache
628		teq	r1, r2
629		bne	1b
630
631		mcr	p15, 0, r1, c7, c5, 0	@ flush I cache
632		mcr	p15, 0, r1, c7, c6, 0	@ flush D cache
633		mcr	p15, 0, r1, c7, c10, 4	@ drain WB
634		mov	pc, lr
635
636__armv3_cache_flush:
637		mov	r1, #0
638		mcr	p15, 0, r0, c7, c0, 0	@ invalidate whole cache v3
639		mov	pc, lr
640
641/*
642 * Various debugging routines for printing hex characters and
643 * memory, which again must be relocatable.
644 */
645#ifdef DEBUG
646		.type	phexbuf,#object
647phexbuf:	.space	12
648		.size	phexbuf, . - phexbuf
649
650phex:		adr	r3, phexbuf
651		mov	r2, #0
652		strb	r2, [r3, r1]
6531:		subs	r1, r1, #1
654		movmi	r0, r3
655		bmi	puts
656		and	r2, r0, #15
657		mov	r0, r0, lsr #4
658		cmp	r2, #10
659		addge	r2, r2, #7
660		add	r2, r2, #'0'
661		strb	r2, [r3, r1]
662		b	1b
663
664puts:		loadsp	r3
6651:		ldrb	r2, [r0], #1
666		teq	r2, #0
667		moveq	pc, lr
6682:		writeb	r2, r3
669		mov	r1, #0x00020000
6703:		subs	r1, r1, #1
671		bne	3b
672		teq	r2, #'\n'
673		moveq	r2, #'\r'
674		beq	2b
675		teq	r0, #0
676		bne	1b
677		mov	pc, lr
678putc:
679		mov	r2, r0
680		mov	r0, #0
681		loadsp	r3
682		b	2b
683
684memdump:	mov	r12, r0
685		mov	r10, lr
686		mov	r11, #0
6872:		mov	r0, r11, lsl #2
688		add	r0, r0, r12
689		mov	r1, #8
690		bl	phex
691		mov	r0, #':'
692		bl	putc
6931:		mov	r0, #' '
694		bl	putc
695		ldr	r0, [r12, r11, lsl #2]
696		mov	r1, #8
697		bl	phex
698		and	r0, r11, #7
699		teq	r0, #3
700		moveq	r0, #' '
701		bleq	putc
702		and	r0, r11, #7
703		add	r11, r11, #1
704		teq	r0, #7
705		bne	1b
706		mov	r0, #'\n'
707		bl	putc
708		cmp	r11, #64
709		blt	2b
710		mov	pc, r10
711#endif
712
713reloc_end:
714
715		.align
716		.section ".stack", "w"
717user_stack:	.space	4096
718