xref: /openbmc/u-boot/arch/nios2/cpu/start.S (revision fea7f3aa)
1/*
2 * (C) Copyright 2004, Psyent Corporation <www.psyent.com>
3 * Scott McNutt <smcnutt@psyent.com>
4 *
5 * SPDX-License-Identifier:	GPL-2.0+
6 */
7
8#include <asm-offsets.h>
9#include <config.h>
10#include <version.h>
11
12/*************************************************************************
13 * RESTART
14 ************************************************************************/
15
16	.text
17	.global _start
18
19_start:
20	wrctl	status, r0		/* Disable interrupts */
21	/* ICACHE INIT -- only the icache line at the reset address
22	 * is invalidated at reset. So the init must stay within
23	 * the cache line size (8 words). If GERMS is used, we'll
24	 * just be invalidating the cache a second time. If cache
25	 * is not implemented initi behaves as nop.
26	 */
27	ori	r4, r0, %lo(CONFIG_SYS_ICACHELINE_SIZE)
28	movhi	r5, %hi(CONFIG_SYS_ICACHE_SIZE)
29	ori	r5, r5, %lo(CONFIG_SYS_ICACHE_SIZE)
300:	initi	r5
31	sub	r5, r5, r4
32	bgt	r5, r0, 0b
33	br	_except_end	/* Skip the tramp */
34
35	/* EXCEPTION TRAMPOLINE -- the following gets copied
36	 * to the exception address (below), but is otherwise at the
37	 * default exception vector offset (0x0020).
38	 */
39_except_start:
40	movhi	et, %hi(_exception)
41	ori	et, et, %lo(_exception)
42	jmp	et
43_except_end:
44
45	/* INTERRUPTS -- for now, all interrupts masked and globally
46	 * disabled.
47	 */
48	wrctl	ienable, r0		/* All disabled	*/
49
50	/* DCACHE INIT -- if dcache not implemented, initd behaves as
51	 * nop.
52	 */
53	movhi	r4, %hi(CONFIG_SYS_DCACHELINE_SIZE)
54	ori	r4, r4, %lo(CONFIG_SYS_DCACHELINE_SIZE)
55	movhi	r5, %hi(CONFIG_SYS_DCACHE_SIZE)
56	ori	r5, r5, %lo(CONFIG_SYS_DCACHE_SIZE)
57	mov	r6, r0
581:	initd	0(r6)
59	add	r6, r6, r4
60	bltu	r6, r5, 1b
61
62	/* RELOCATE CODE, DATA & COMMAND TABLE -- the following code
63	 * assumes code, data and the command table are all
64	 * contiguous. This lets us relocate everything as a single
65	 * block. Make sure the linker script matches this ;-)
66	 */
67	nextpc	r4
68_cur:	movhi	r5, %hi(_cur - _start)
69	ori	r5, r5, %lo(_cur - _start)
70	sub	r4, r4, r5		/* r4 <- cur _start */
71	mov	r8, r4
72	movhi	r5, %hi(_start)
73	ori	r5, r5, %lo(_start)	/* r5 <- linked _start */
74	beq	r4, r5, 3f
75
76	movhi	r6, %hi(CONFIG_SYS_MONITOR_LEN)
77	ori	r6, r6, %lo(CONFIG_SYS_MONITOR_LEN)
78	add	r6, r6, r5
792:	ldwio	r7, 0(r4)
80	addi	r4, r4, 4
81	stwio	r7, 0(r5)
82	addi	r5, r5, 4
83	bne	r5, r6, 2b
843:
85
86	/* JUMP TO RELOC ADDR */
87	movhi	r4, %hi(_reloc)
88	ori	r4, r4, %lo(_reloc)
89	jmp	r4
90_reloc:
91
92	/* COPY EXCEPTION TRAMPOLINE -- copy the tramp to the
93	 * exception address. Define CONFIG_ROM_STUBS to prevent
94	 * the copy (e.g. exception in flash or in other
95	 * softare/firmware component).
96	 */
97#if !defined(CONFIG_ROM_STUBS)
98	movhi	r4, %hi(_except_start)
99	ori	r4, r4, %lo(_except_start)
100	movhi	r5, %hi(_except_end)
101	ori	r5, r5, %lo(_except_end)
102	movhi	r6, %hi(CONFIG_SYS_EXCEPTION_ADDR)
103	ori	r6, r6, %lo(CONFIG_SYS_EXCEPTION_ADDR)
104	beq	r4, r6, 7f	/* Skip if at proper addr */
105
1066:	ldwio	r7, 0(r4)
107	stwio	r7, 0(r6)
108	addi	r4, r4, 4
109	addi	r6, r6, 4
110	bne	r4, r5, 6b
1117:
112#endif
113
114	/* STACK INIT -- zero top two words for call back chain.
115	 */
116	movhi	sp, %hi(CONFIG_SYS_INIT_SP)
117	ori	sp, sp, %lo(CONFIG_SYS_INIT_SP)
118	addi	sp, sp, -8
119	stw	r0, 0(sp)
120	stw	r0, 4(sp)
121	mov	fp, sp
122
123	/*
124	 * Call board_init_f -- never returns
125	 */
126	mov	r4, r0
127	movhi	r2, %hi(board_init_f@h)
128	ori	r2, r2, %lo(board_init_f@h)
129	callr	r2
130
131	/* NEVER RETURNS -- but branch to the _start just
132	 * in case ;-)
133	 */
134	br	_start
135
136
137
138/*
139 * relocate_code -- Nios2 handles the relocation above. But
140 * the generic board code monkeys with the heap, stack, etc.
141 * (it makes some assumptions that may not be appropriate
142 * for Nios). Nevertheless, we capitulate here.
143 *
144 * We'll call the board_init_r from here since this isn't
145 * supposed to return.
146 *
147 * void relocate_code (ulong sp, gd_t *global_data,
148 *			ulong reloc_addr)
149 *			__attribute__ ((noreturn));
150 */
151	.text
152	.global relocate_code
153
154relocate_code:
155	mov	sp, r4		/* Set the new sp */
156	mov	r4, r5
157
158	/*
159	 * ZERO BSS/SBSS -- bss and sbss are assumed to be adjacent
160	 * and between __bss_start and __bss_end.
161	 */
162	movhi	r5, %hi(__bss_start)
163	ori	r5, r5, %lo(__bss_start)
164	movhi	r6, %hi(__bss_end)
165	ori	r6, r6, %lo(__bss_end)
166	beq	r5, r6, 5f
167
1684:	stwio	r0, 0(r5)
169	addi	r5, r5, 4
170	bne	r5, r6, 4b
1715:
172
173	movhi	r8, %hi(board_init_r@h)
174	ori	r8, r8, %lo(board_init_r@h)
175	callr	r8
176	ret
177
178/*
179 * dly_clks -- Nios2 (like Nios1) doesn't have a timebase in
180 * the core. For simple delay loops, we do our best by counting
181 * instruction cycles.
182 *
183 * Instruction performance varies based on the core. For cores
184 * with icache and static/dynamic branch prediction (II/f, II/s):
185 *
186 *	Normal ALU (e.g. add, cmp, etc):	1 cycle
187 *	Branch (correctly predicted, taken):	2 cycles
188 *	Negative offset is predicted (II/s).
189 *
190 * For cores without icache and no branch prediction (II/e):
191 *
192 *	Normal ALU (e.g. add, cmp, etc):	6 cycles
193 *	Branch (no prediction):			6 cycles
194 *
195 * For simplicity, if an instruction cache is implemented we
196 * assume II/f or II/s. Otherwise, we use the II/e.
197 *
198 */
199	.globl dly_clks
200
201dly_clks:
202
203#if (CONFIG_SYS_ICACHE_SIZE > 0)
204	subi	r4, r4, 3		/* 3 clocks/loop	*/
205#else
206	subi	r4, r4, 12		/* 12 clocks/loop	*/
207#endif
208	bge	r4, r0, dly_clks
209	ret
210
211	.data
212	.globl	version_string
213
214version_string:
215	.ascii U_BOOT_VERSION_STRING, "\0"
216