1/* 2 * (C) Copyright 2004, Psyent Corporation <www.psyent.com> 3 * Scott McNutt <smcnutt@psyent.com> 4 * 5 * SPDX-License-Identifier: GPL-2.0+ 6 */ 7 8#include <asm-offsets.h> 9#include <config.h> 10#include <version.h> 11 12/************************************************************************* 13 * RESTART 14 ************************************************************************/ 15 16 .text 17 .global _start 18 19_start: 20 wrctl status, r0 /* Disable interrupts */ 21 /* ICACHE INIT -- only the icache line at the reset address 22 * is invalidated at reset. So the init must stay within 23 * the cache line size (8 words). If GERMS is used, we'll 24 * just be invalidating the cache a second time. If cache 25 * is not implemented initi behaves as nop. 26 */ 27 ori r4, r0, %lo(CONFIG_SYS_ICACHELINE_SIZE) 28 movhi r5, %hi(CONFIG_SYS_ICACHE_SIZE) 29 ori r5, r5, %lo(CONFIG_SYS_ICACHE_SIZE) 300: initi r5 31 sub r5, r5, r4 32 bgt r5, r0, 0b 33 br _except_end /* Skip the tramp */ 34 35 /* EXCEPTION TRAMPOLINE -- the following gets copied 36 * to the exception address (below), but is otherwise at the 37 * default exception vector offset (0x0020). 38 */ 39_except_start: 40 movhi et, %hi(_exception) 41 ori et, et, %lo(_exception) 42 jmp et 43_except_end: 44 45 /* INTERRUPTS -- for now, all interrupts masked and globally 46 * disabled. 47 */ 48 wrctl ienable, r0 /* All disabled */ 49 50 /* DCACHE INIT -- if dcache not implemented, initd behaves as 51 * nop. 52 */ 53 movhi r4, %hi(CONFIG_SYS_DCACHELINE_SIZE) 54 ori r4, r4, %lo(CONFIG_SYS_DCACHELINE_SIZE) 55 movhi r5, %hi(CONFIG_SYS_DCACHE_SIZE) 56 ori r5, r5, %lo(CONFIG_SYS_DCACHE_SIZE) 57 mov r6, r0 581: initd 0(r6) 59 add r6, r6, r4 60 bltu r6, r5, 1b 61 62 /* RELOCATE CODE, DATA & COMMAND TABLE -- the following code 63 * assumes code, data and the command table are all 64 * contiguous. This lets us relocate everything as a single 65 * block. Make sure the linker script matches this ;-) 66 */ 67 nextpc r4 68_cur: movhi r5, %hi(_cur - _start) 69 ori r5, r5, %lo(_cur - _start) 70 sub r4, r4, r5 /* r4 <- cur _start */ 71 mov r8, r4 72 movhi r5, %hi(_start) 73 ori r5, r5, %lo(_start) /* r5 <- linked _start */ 74 beq r4, r5, 3f 75 76 movhi r6, %hi(_edata) 77 ori r6, r6, %lo(_edata) 782: ldwio r7, 0(r4) 79 addi r4, r4, 4 80 stwio r7, 0(r5) 81 addi r5, r5, 4 82 bne r5, r6, 2b 833: 84 85 /* ZERO BSS/SBSS -- bss and sbss are assumed to be adjacent 86 * and between __bss_start and __bss_end. 87 */ 88 movhi r5, %hi(__bss_start) 89 ori r5, r5, %lo(__bss_start) 90 movhi r6, %hi(__bss_end) 91 ori r6, r6, %lo(__bss_end) 92 beq r5, r6, 5f 93 944: stwio r0, 0(r5) 95 addi r5, r5, 4 96 bne r5, r6, 4b 975: 98 99 /* JUMP TO RELOC ADDR */ 100 movhi r4, %hi(_reloc) 101 ori r4, r4, %lo(_reloc) 102 jmp r4 103_reloc: 104 105 /* COPY EXCEPTION TRAMPOLINE -- copy the tramp to the 106 * exception address. Define CONFIG_ROM_STUBS to prevent 107 * the copy (e.g. exception in flash or in other 108 * softare/firmware component). 109 */ 110#if !defined(CONFIG_ROM_STUBS) 111 movhi r4, %hi(_except_start) 112 ori r4, r4, %lo(_except_start) 113 movhi r5, %hi(_except_end) 114 ori r5, r5, %lo(_except_end) 115 movhi r6, %hi(CONFIG_SYS_EXCEPTION_ADDR) 116 ori r6, r6, %lo(CONFIG_SYS_EXCEPTION_ADDR) 117 beq r4, r6, 7f /* Skip if at proper addr */ 118 1196: ldwio r7, 0(r4) 120 stwio r7, 0(r6) 121 addi r4, r4, 4 122 addi r6, r6, 4 123 bne r4, r5, 6b 1247: 125#endif 126 127 /* STACK INIT -- zero top two words for call back chain. 128 */ 129 movhi sp, %hi(CONFIG_SYS_INIT_SP) 130 ori sp, sp, %lo(CONFIG_SYS_INIT_SP) 131 addi sp, sp, -8 132 stw r0, 0(sp) 133 stw r0, 4(sp) 134 mov fp, sp 135 136 /* 137 * Call board_init -- never returns 138 */ 139 movhi r4, %hi(board_init@h) 140 ori r4, r4, %lo(board_init@h) 141 callr r4 142 143 /* NEVER RETURNS -- but branch to the _start just 144 * in case ;-) 145 */ 146 br _start 147 148 149/* 150 * dly_clks -- Nios2 (like Nios1) doesn't have a timebase in 151 * the core. For simple delay loops, we do our best by counting 152 * instruction cycles. 153 * 154 * Instruction performance varies based on the core. For cores 155 * with icache and static/dynamic branch prediction (II/f, II/s): 156 * 157 * Normal ALU (e.g. add, cmp, etc): 1 cycle 158 * Branch (correctly predicted, taken): 2 cycles 159 * Negative offset is predicted (II/s). 160 * 161 * For cores without icache and no branch prediction (II/e): 162 * 163 * Normal ALU (e.g. add, cmp, etc): 6 cycles 164 * Branch (no prediction): 6 cycles 165 * 166 * For simplicity, if an instruction cache is implemented we 167 * assume II/f or II/s. Otherwise, we use the II/e. 168 * 169 */ 170 .globl dly_clks 171 172dly_clks: 173 174#if (CONFIG_SYS_ICACHE_SIZE > 0) 175 subi r4, r4, 3 /* 3 clocks/loop */ 176#else 177 subi r4, r4, 12 /* 12 clocks/loop */ 178#endif 179 bge r4, r0, dly_clks 180 ret 181 182 .data 183 .globl version_string 184 185version_string: 186 .ascii U_BOOT_VERSION_STRING, "\0" 187