xref: /openbmc/u-boot/arch/nios2/cpu/start.S (revision 425faf74)
1/*
2 * (C) Copyright 2004, Psyent Corporation <www.psyent.com>
3 * Scott McNutt <smcnutt@psyent.com>
4 *
5 * SPDX-License-Identifier:	GPL-2.0+
6 */
7
8#include <asm-offsets.h>
9#include <config.h>
10#include <version.h>
11
12/*************************************************************************
13 * RESTART
14 ************************************************************************/
15
16	.text
17	.global _start
18
19_start:
20	wrctl	status, r0		/* Disable interrupts */
21	/* ICACHE INIT -- only the icache line at the reset address
22	 * is invalidated at reset. So the init must stay within
23	 * the cache line size (8 words). If GERMS is used, we'll
24	 * just be invalidating the cache a second time. If cache
25	 * is not implemented initi behaves as nop.
26	 */
27	ori	r4, r0, %lo(CONFIG_SYS_ICACHELINE_SIZE)
28	movhi	r5, %hi(CONFIG_SYS_ICACHE_SIZE)
29	ori	r5, r5, %lo(CONFIG_SYS_ICACHE_SIZE)
300:	initi	r5
31	sub	r5, r5, r4
32	bgt	r5, r0, 0b
33	br	_except_end	/* Skip the tramp */
34
35	/* EXCEPTION TRAMPOLINE -- the following gets copied
36	 * to the exception address (below), but is otherwise at the
37	 * default exception vector offset (0x0020).
38	 */
39_except_start:
40	movhi	et, %hi(_exception)
41	ori	et, et, %lo(_exception)
42	jmp	et
43_except_end:
44
45	/* INTERRUPTS -- for now, all interrupts masked and globally
46	 * disabled.
47	 */
48	wrctl	ienable, r0		/* All disabled	*/
49
50	/* DCACHE INIT -- if dcache not implemented, initd behaves as
51	 * nop.
52	 */
53	movhi	r4, %hi(CONFIG_SYS_DCACHELINE_SIZE)
54	ori	r4, r4, %lo(CONFIG_SYS_DCACHELINE_SIZE)
55	movhi	r5, %hi(CONFIG_SYS_DCACHE_SIZE)
56	ori	r5, r5, %lo(CONFIG_SYS_DCACHE_SIZE)
57	mov	r6, r0
581:	initd	0(r6)
59	add	r6, r6, r4
60	bltu	r6, r5, 1b
61
62	/* RELOCATE CODE, DATA & COMMAND TABLE -- the following code
63	 * assumes code, data and the command table are all
64	 * contiguous. This lets us relocate everything as a single
65	 * block. Make sure the linker script matches this ;-)
66	 */
67	nextpc	r4
68_cur:	movhi	r5, %hi(_cur - _start)
69	ori	r5, r5, %lo(_cur - _start)
70	sub	r4, r4, r5		/* r4 <- cur _start */
71	mov	r8, r4
72	movhi	r5, %hi(_start)
73	ori	r5, r5, %lo(_start)	/* r5 <- linked _start */
74	beq	r4, r5, 3f
75
76	movhi	r6, %hi(_edata)
77	ori	r6, r6, %lo(_edata)
782:	ldwio	r7, 0(r4)
79	addi	r4, r4, 4
80	stwio	r7, 0(r5)
81	addi	r5, r5, 4
82	bne	r5, r6, 2b
833:
84
85	/* ZERO BSS/SBSS -- bss and sbss are assumed to be adjacent
86	 * and between __bss_start and __bss_end.
87	 */
88	 movhi	r5, %hi(__bss_start)
89	 ori	r5, r5, %lo(__bss_start)
90	 movhi	r6, %hi(__bss_end)
91	 ori	r6, r6, %lo(__bss_end)
92	 beq	r5, r6, 5f
93
944:	stwio	r0, 0(r5)
95	 addi	r5, r5, 4
96	 bne	r5, r6, 4b
975:
98
99	/* JUMP TO RELOC ADDR */
100	movhi	r4, %hi(_reloc)
101	ori	r4, r4, %lo(_reloc)
102	jmp	r4
103_reloc:
104
105	/* COPY EXCEPTION TRAMPOLINE -- copy the tramp to the
106	 * exception address. Define CONFIG_ROM_STUBS to prevent
107	 * the copy (e.g. exception in flash or in other
108	 * softare/firmware component).
109	 */
110#if !defined(CONFIG_ROM_STUBS)
111	movhi	r4, %hi(_except_start)
112	ori	r4, r4, %lo(_except_start)
113	movhi	r5, %hi(_except_end)
114	ori	r5, r5, %lo(_except_end)
115	movhi	r6, %hi(CONFIG_SYS_EXCEPTION_ADDR)
116	ori	r6, r6, %lo(CONFIG_SYS_EXCEPTION_ADDR)
117	beq	r4, r6, 7f	/* Skip if at proper addr */
118
1196:	ldwio	r7, 0(r4)
120	stwio	r7, 0(r6)
121	addi	r4, r4, 4
122	addi	r6, r6, 4
123	bne	r4, r5, 6b
1247:
125#endif
126
127	/* STACK INIT -- zero top two words for call back chain.
128	 */
129	movhi	sp, %hi(CONFIG_SYS_INIT_SP)
130	ori	sp, sp, %lo(CONFIG_SYS_INIT_SP)
131	addi	sp, sp, -8
132	stw	r0, 0(sp)
133	stw	r0, 4(sp)
134	mov	fp, sp
135
136	/*
137	 * Call board_init -- never returns
138	 */
139	movhi	r4, %hi(board_init@h)
140	ori	r4, r4, %lo(board_init@h)
141	callr	r4
142
143	/* NEVER RETURNS -- but branch to the _start just
144	 * in case ;-)
145	 */
146	br	_start
147
148
149/*
150 * dly_clks -- Nios2 (like Nios1) doesn't have a timebase in
151 * the core. For simple delay loops, we do our best by counting
152 * instruction cycles.
153 *
154 * Instruction performance varies based on the core. For cores
155 * with icache and static/dynamic branch prediction (II/f, II/s):
156 *
157 *	Normal ALU (e.g. add, cmp, etc):	1 cycle
158 *	Branch (correctly predicted, taken):	2 cycles
159 *	Negative offset is predicted (II/s).
160 *
161 * For cores without icache and no branch prediction (II/e):
162 *
163 *	Normal ALU (e.g. add, cmp, etc):	6 cycles
164 *	Branch (no prediction):			6 cycles
165 *
166 * For simplicity, if an instruction cache is implemented we
167 * assume II/f or II/s. Otherwise, we use the II/e.
168 *
169 */
170	.globl dly_clks
171
172dly_clks:
173
174#if (CONFIG_SYS_ICACHE_SIZE > 0)
175	subi	r4, r4, 3		/* 3 clocks/loop	*/
176#else
177	subi	r4, r4, 12		/* 12 clocks/loop	*/
178#endif
179	bge	r4, r0, dly_clks
180	ret
181
182	.data
183	.globl	version_string
184
185version_string:
186	.ascii U_BOOT_VERSION_STRING, "\0"
187