11ccea77eSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */ 2771a0163SLey Foon Tan/* 3771a0163SLey Foon Tan * Copyright (C) 2003-2013 Altera Corporation 4771a0163SLey Foon Tan * All rights reserved. 5771a0163SLey Foon Tan */ 6771a0163SLey Foon Tan 7771a0163SLey Foon Tan 8771a0163SLey Foon Tan#include <linux/linkage.h> 9771a0163SLey Foon Tan#include <asm/entry.h> 10771a0163SLey Foon Tan 11771a0163SLey Foon Tan.set noat 12771a0163SLey Foon Tan.set nobreak 13771a0163SLey Foon Tan 14771a0163SLey Foon Tan/* 15771a0163SLey Foon Tan* Explicitly allow the use of r1 (the assembler temporary register) 16771a0163SLey Foon Tan* within this code. This register is normally reserved for the use of 17771a0163SLey Foon Tan* the compiler. 18771a0163SLey Foon Tan*/ 19771a0163SLey Foon Tan 20771a0163SLey Foon TanENTRY(instruction_trap) 21771a0163SLey Foon Tan ldw r1, PT_R1(sp) // Restore registers 22771a0163SLey Foon Tan ldw r2, PT_R2(sp) 23771a0163SLey Foon Tan ldw r3, PT_R3(sp) 24771a0163SLey Foon Tan ldw r4, PT_R4(sp) 25771a0163SLey Foon Tan ldw r5, PT_R5(sp) 26771a0163SLey Foon Tan ldw r6, PT_R6(sp) 27771a0163SLey Foon Tan ldw r7, PT_R7(sp) 28771a0163SLey Foon Tan ldw r8, PT_R8(sp) 29771a0163SLey Foon Tan ldw r9, PT_R9(sp) 30771a0163SLey Foon Tan ldw r10, PT_R10(sp) 31771a0163SLey Foon Tan ldw r11, PT_R11(sp) 32771a0163SLey Foon Tan ldw r12, PT_R12(sp) 33771a0163SLey Foon Tan ldw r13, PT_R13(sp) 34771a0163SLey Foon Tan ldw r14, PT_R14(sp) 35771a0163SLey Foon Tan ldw r15, PT_R15(sp) 36771a0163SLey Foon Tan ldw ra, PT_RA(sp) 37771a0163SLey Foon Tan ldw fp, PT_FP(sp) 38771a0163SLey Foon Tan ldw gp, PT_GP(sp) 39771a0163SLey Foon Tan ldw et, PT_ESTATUS(sp) 40771a0163SLey Foon Tan wrctl estatus, et 41771a0163SLey Foon Tan ldw ea, PT_EA(sp) 42771a0163SLey Foon Tan ldw et, PT_SP(sp) /* backup sp in et */ 43771a0163SLey Foon Tan 44771a0163SLey Foon Tan addi sp, sp, PT_REGS_SIZE 45771a0163SLey Foon Tan 46771a0163SLey Foon Tan /* INSTRUCTION EMULATION 47771a0163SLey Foon Tan * --------------------- 48771a0163SLey Foon Tan * 49771a0163SLey Foon Tan * Nios II processors generate exceptions for unimplemented instructions. 50771a0163SLey Foon Tan * The routines below emulate these instructions. Depending on the 51771a0163SLey Foon Tan * processor core, the only instructions that might need to be emulated 52771a0163SLey Foon Tan * are div, divu, mul, muli, mulxss, mulxsu, and mulxuu. 53771a0163SLey Foon Tan * 54771a0163SLey Foon Tan * The emulations match the instructions, except for the following 55771a0163SLey Foon Tan * limitations: 56771a0163SLey Foon Tan * 57771a0163SLey Foon Tan * 1) The emulation routines do not emulate the use of the exception 58771a0163SLey Foon Tan * temporary register (et) as a source operand because the exception 59771a0163SLey Foon Tan * handler already has modified it. 60771a0163SLey Foon Tan * 61771a0163SLey Foon Tan * 2) The routines do not emulate the use of the stack pointer (sp) or 62771a0163SLey Foon Tan * the exception return address register (ea) as a destination because 63771a0163SLey Foon Tan * modifying these registers crashes the exception handler or the 64771a0163SLey Foon Tan * interrupted routine. 65771a0163SLey Foon Tan * 66771a0163SLey Foon Tan * Detailed Design 67771a0163SLey Foon Tan * --------------- 68771a0163SLey Foon Tan * 69771a0163SLey Foon Tan * The emulation routines expect the contents of integer registers r0-r31 70771a0163SLey Foon Tan * to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp). The 71771a0163SLey Foon Tan * routines retrieve source operands from the stack and modify the 72771a0163SLey Foon Tan * destination register's value on the stack prior to the end of the 73771a0163SLey Foon Tan * exception handler. Then all registers except the destination register 74771a0163SLey Foon Tan * are restored to their previous values. 75771a0163SLey Foon Tan * 76771a0163SLey Foon Tan * The instruction that causes the exception is found at address -4(ea). 77771a0163SLey Foon Tan * The instruction's OP and OPX fields identify the operation to be 78771a0163SLey Foon Tan * performed. 79771a0163SLey Foon Tan * 80771a0163SLey Foon Tan * One instruction, muli, is an I-type instruction that is identified by 81771a0163SLey Foon Tan * an OP field of 0x24. 82771a0163SLey Foon Tan * 83771a0163SLey Foon Tan * muli AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24- 84771a0163SLey Foon Tan * 27 22 6 0 <-- LSB of field 85771a0163SLey Foon Tan * 86771a0163SLey Foon Tan * The remaining emulated instructions are R-type and have an OP field 87771a0163SLey Foon Tan * of 0x3a. Their OPX fields identify them. 88771a0163SLey Foon Tan * 89771a0163SLey Foon Tan * R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a- 90771a0163SLey Foon Tan * 27 22 17 11 6 0 <-- LSB of field 91771a0163SLey Foon Tan * 92771a0163SLey Foon Tan * 93771a0163SLey Foon Tan * Opcode Encoding. muli is identified by its OP value. Then OPX & 0x02 94771a0163SLey Foon Tan * is used to differentiate between the division opcodes and the 95771a0163SLey Foon Tan * remaining multiplication opcodes. 96771a0163SLey Foon Tan * 97771a0163SLey Foon Tan * Instruction OP OPX OPX & 0x02 98771a0163SLey Foon Tan * ----------- ---- ---- ---------- 99771a0163SLey Foon Tan * muli 0x24 100771a0163SLey Foon Tan * divu 0x3a 0x24 0 101771a0163SLey Foon Tan * div 0x3a 0x25 0 102771a0163SLey Foon Tan * mul 0x3a 0x27 != 0 103771a0163SLey Foon Tan * mulxuu 0x3a 0x07 != 0 104771a0163SLey Foon Tan * mulxsu 0x3a 0x17 != 0 105771a0163SLey Foon Tan * mulxss 0x3a 0x1f != 0 106771a0163SLey Foon Tan */ 107771a0163SLey Foon Tan 108771a0163SLey Foon Tan 109771a0163SLey Foon Tan /* 110771a0163SLey Foon Tan * Save everything on the stack to make it easy for the emulation 111771a0163SLey Foon Tan * routines to retrieve the source register operands. 112771a0163SLey Foon Tan */ 113771a0163SLey Foon Tan 114771a0163SLey Foon Tan addi sp, sp, -128 115771a0163SLey Foon Tan stw zero, 0(sp) /* Save zero on stack to avoid special case for r0. */ 116771a0163SLey Foon Tan stw r1, 4(sp) 117771a0163SLey Foon Tan stw r2, 8(sp) 118771a0163SLey Foon Tan stw r3, 12(sp) 119771a0163SLey Foon Tan stw r4, 16(sp) 120771a0163SLey Foon Tan stw r5, 20(sp) 121771a0163SLey Foon Tan stw r6, 24(sp) 122771a0163SLey Foon Tan stw r7, 28(sp) 123771a0163SLey Foon Tan stw r8, 32(sp) 124771a0163SLey Foon Tan stw r9, 36(sp) 125771a0163SLey Foon Tan stw r10, 40(sp) 126771a0163SLey Foon Tan stw r11, 44(sp) 127771a0163SLey Foon Tan stw r12, 48(sp) 128771a0163SLey Foon Tan stw r13, 52(sp) 129771a0163SLey Foon Tan stw r14, 56(sp) 130771a0163SLey Foon Tan stw r15, 60(sp) 131771a0163SLey Foon Tan stw r16, 64(sp) 132771a0163SLey Foon Tan stw r17, 68(sp) 133771a0163SLey Foon Tan stw r18, 72(sp) 134771a0163SLey Foon Tan stw r19, 76(sp) 135771a0163SLey Foon Tan stw r20, 80(sp) 136771a0163SLey Foon Tan stw r21, 84(sp) 137771a0163SLey Foon Tan stw r22, 88(sp) 138771a0163SLey Foon Tan stw r23, 92(sp) 139771a0163SLey Foon Tan /* Don't bother to save et. It's already been changed. */ 140771a0163SLey Foon Tan rdctl r5, estatus 141771a0163SLey Foon Tan stw r5, 100(sp) 142771a0163SLey Foon Tan 143771a0163SLey Foon Tan stw gp, 104(sp) 144771a0163SLey Foon Tan stw et, 108(sp) /* et contains previous sp value. */ 145771a0163SLey Foon Tan stw fp, 112(sp) 146771a0163SLey Foon Tan stw ea, 116(sp) 147771a0163SLey Foon Tan stw ra, 120(sp) 148771a0163SLey Foon Tan 149771a0163SLey Foon Tan 150771a0163SLey Foon Tan /* 151771a0163SLey Foon Tan * Split the instruction into its fields. We need 4*A, 4*B, and 4*C as 152771a0163SLey Foon Tan * offsets to the stack pointer for access to the stored register values. 153771a0163SLey Foon Tan */ 154771a0163SLey Foon Tan ldw r2,-4(ea) /* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */ 155771a0163SLey Foon Tan roli r3, r2, 7 /* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */ 156771a0163SLey Foon Tan roli r4, r3, 3 /* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */ 157771a0163SLey Foon Tan roli r5, r4, 2 /* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */ 158771a0163SLey Foon Tan srai r4, r4, 16 /* r4 = (sign-extended) IMM16 */ 159771a0163SLey Foon Tan roli r6, r5, 5 /* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */ 160771a0163SLey Foon Tan andi r2, r2, 0x3f /* r2 = 00000000000000000000000000,PPPPPP */ 161771a0163SLey Foon Tan andi r3, r3, 0x7c /* r3 = 0000000000000000000000000,AAAAA,00 */ 162771a0163SLey Foon Tan andi r5, r5, 0x7c /* r5 = 0000000000000000000000000,BBBBB,00 */ 163771a0163SLey Foon Tan andi r6, r6, 0x7c /* r6 = 0000000000000000000000000,CCCCC,00 */ 164771a0163SLey Foon Tan 165771a0163SLey Foon Tan /* Now 166771a0163SLey Foon Tan * r2 = OP 167771a0163SLey Foon Tan * r3 = 4*A 168771a0163SLey Foon Tan * r4 = IMM16 (sign extended) 169771a0163SLey Foon Tan * r5 = 4*B 170771a0163SLey Foon Tan * r6 = 4*C 171771a0163SLey Foon Tan */ 172771a0163SLey Foon Tan 173771a0163SLey Foon Tan /* 174771a0163SLey Foon Tan * Get the operands. 175771a0163SLey Foon Tan * 176771a0163SLey Foon Tan * It is necessary to check for muli because it uses an I-type 177771a0163SLey Foon Tan * instruction format, while the other instructions are have an R-type 178771a0163SLey Foon Tan * format. 179771a0163SLey Foon Tan * 180771a0163SLey Foon Tan * Prepare for either multiplication or division loop. 181771a0163SLey Foon Tan * They both loop 32 times. 182771a0163SLey Foon Tan */ 183771a0163SLey Foon Tan movi r14, 32 184771a0163SLey Foon Tan 185771a0163SLey Foon Tan add r3, r3, sp /* r3 = address of A-operand. */ 186771a0163SLey Foon Tan ldw r3, 0(r3) /* r3 = A-operand. */ 187771a0163SLey Foon Tan movi r7, 0x24 /* muli opcode (I-type instruction format) */ 188771a0163SLey Foon Tan beq r2, r7, mul_immed /* muli doesn't use the B register as a source */ 189771a0163SLey Foon Tan 190771a0163SLey Foon Tan add r5, r5, sp /* r5 = address of B-operand. */ 191771a0163SLey Foon Tan ldw r5, 0(r5) /* r5 = B-operand. */ 192771a0163SLey Foon Tan /* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */ 193771a0163SLey Foon Tan /* IMM16 not needed, align OPX portion */ 194771a0163SLey Foon Tan /* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */ 195771a0163SLey Foon Tan srli r4, r4, 5 /* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */ 196771a0163SLey Foon Tan andi r4, r4, 0x3f /* r4 = 00000000000000000000000000,-OPX-- */ 197771a0163SLey Foon Tan 198771a0163SLey Foon Tan /* Now 199771a0163SLey Foon Tan * r2 = OP 200771a0163SLey Foon Tan * r3 = src1 201771a0163SLey Foon Tan * r5 = src2 202771a0163SLey Foon Tan * r4 = OPX (no longer can be muli) 203771a0163SLey Foon Tan * r6 = 4*C 204771a0163SLey Foon Tan */ 205771a0163SLey Foon Tan 206771a0163SLey Foon Tan 207771a0163SLey Foon Tan /* 208771a0163SLey Foon Tan * Multiply or Divide? 209771a0163SLey Foon Tan */ 210771a0163SLey Foon Tan andi r7, r4, 0x02 /* For R-type multiply instructions, 211771a0163SLey Foon Tan OPX & 0x02 != 0 */ 212771a0163SLey Foon Tan bne r7, zero, multiply 213771a0163SLey Foon Tan 214771a0163SLey Foon Tan 215771a0163SLey Foon Tan /* DIVISION 216771a0163SLey Foon Tan * 217771a0163SLey Foon Tan * Divide an unsigned dividend by an unsigned divisor using 218771a0163SLey Foon Tan * a shift-and-subtract algorithm. The example below shows 219771a0163SLey Foon Tan * 43 div 7 = 6 for 8-bit integers. This classic algorithm uses a 220771a0163SLey Foon Tan * single register to store both the dividend and the quotient, 221771a0163SLey Foon Tan * allowing both values to be shifted with a single instruction. 222771a0163SLey Foon Tan * 223771a0163SLey Foon Tan * remainder dividend:quotient 224771a0163SLey Foon Tan * --------- ----------------- 225771a0163SLey Foon Tan * initialize 00000000 00101011: 226771a0163SLey Foon Tan * shift 00000000 0101011:_ 227771a0163SLey Foon Tan * remainder >= divisor? no 00000000 0101011:0 228771a0163SLey Foon Tan * shift 00000000 101011:0_ 229771a0163SLey Foon Tan * remainder >= divisor? no 00000000 101011:00 230771a0163SLey Foon Tan * shift 00000001 01011:00_ 231771a0163SLey Foon Tan * remainder >= divisor? no 00000001 01011:000 232771a0163SLey Foon Tan * shift 00000010 1011:000_ 233771a0163SLey Foon Tan * remainder >= divisor? no 00000010 1011:0000 234771a0163SLey Foon Tan * shift 00000101 011:0000_ 235771a0163SLey Foon Tan * remainder >= divisor? no 00000101 011:00000 236771a0163SLey Foon Tan * shift 00001010 11:00000_ 237771a0163SLey Foon Tan * remainder >= divisor? yes 00001010 11:000001 238771a0163SLey Foon Tan * remainder -= divisor - 00000111 239771a0163SLey Foon Tan * ---------- 240771a0163SLey Foon Tan * 00000011 11:000001 241771a0163SLey Foon Tan * shift 00000111 1:000001_ 242771a0163SLey Foon Tan * remainder >= divisor? yes 00000111 1:0000011 243771a0163SLey Foon Tan * remainder -= divisor - 00000111 244771a0163SLey Foon Tan * ---------- 245771a0163SLey Foon Tan * 00000000 1:0000011 246771a0163SLey Foon Tan * shift 00000001 :0000011_ 247771a0163SLey Foon Tan * remainder >= divisor? no 00000001 :00000110 248771a0163SLey Foon Tan * 249771a0163SLey Foon Tan * The quotient is 00000110. 250771a0163SLey Foon Tan */ 251771a0163SLey Foon Tan 252771a0163SLey Foon Tandivide: 253771a0163SLey Foon Tan /* 254771a0163SLey Foon Tan * Prepare for division by assuming the result 255771a0163SLey Foon Tan * is unsigned, and storing its "sign" as 0. 256771a0163SLey Foon Tan */ 257771a0163SLey Foon Tan movi r17, 0 258771a0163SLey Foon Tan 259771a0163SLey Foon Tan 260771a0163SLey Foon Tan /* Which division opcode? */ 261771a0163SLey Foon Tan xori r7, r4, 0x25 /* OPX of div */ 262771a0163SLey Foon Tan bne r7, zero, unsigned_division 263771a0163SLey Foon Tan 264771a0163SLey Foon Tan 265771a0163SLey Foon Tan /* 266771a0163SLey Foon Tan * OPX is div. Determine and store the sign of the quotient. 267771a0163SLey Foon Tan * Then take the absolute value of both operands. 268771a0163SLey Foon Tan */ 269771a0163SLey Foon Tan xor r17, r3, r5 /* MSB contains sign of quotient */ 270771a0163SLey Foon Tan bge r3,zero,dividend_is_nonnegative 271771a0163SLey Foon Tan sub r3, zero, r3 /* -r3 */ 272771a0163SLey Foon Tandividend_is_nonnegative: 273771a0163SLey Foon Tan bge r5, zero, divisor_is_nonnegative 274771a0163SLey Foon Tan sub r5, zero, r5 /* -r5 */ 275771a0163SLey Foon Tandivisor_is_nonnegative: 276771a0163SLey Foon Tan 277771a0163SLey Foon Tan 278771a0163SLey Foon Tanunsigned_division: 279771a0163SLey Foon Tan /* Initialize the unsigned-division loop. */ 280771a0163SLey Foon Tan movi r13, 0 /* remainder = 0 */ 281771a0163SLey Foon Tan 282771a0163SLey Foon Tan /* Now 283771a0163SLey Foon Tan * r3 = dividend : quotient 284771a0163SLey Foon Tan * r4 = 0x25 for div, 0x24 for divu 285771a0163SLey Foon Tan * r5 = divisor 286771a0163SLey Foon Tan * r13 = remainder 287771a0163SLey Foon Tan * r14 = loop counter (already initialized to 32) 288771a0163SLey Foon Tan * r17 = MSB contains sign of quotient 289771a0163SLey Foon Tan */ 290771a0163SLey Foon Tan 291771a0163SLey Foon Tan 292771a0163SLey Foon Tan /* 293771a0163SLey Foon Tan * for (count = 32; count > 0; --count) 294771a0163SLey Foon Tan * { 295771a0163SLey Foon Tan */ 296771a0163SLey Foon Tandivide_loop: 297771a0163SLey Foon Tan 298771a0163SLey Foon Tan /* 299771a0163SLey Foon Tan * Division: 300771a0163SLey Foon Tan * 301771a0163SLey Foon Tan * (remainder:dividend:quotient) <<= 1; 302771a0163SLey Foon Tan */ 303771a0163SLey Foon Tan slli r13, r13, 1 304771a0163SLey Foon Tan cmplt r7, r3, zero /* r7 = MSB of r3 */ 305771a0163SLey Foon Tan or r13, r13, r7 306771a0163SLey Foon Tan slli r3, r3, 1 307771a0163SLey Foon Tan 308771a0163SLey Foon Tan 309771a0163SLey Foon Tan /* 310771a0163SLey Foon Tan * if (remainder >= divisor) 311771a0163SLey Foon Tan * { 312771a0163SLey Foon Tan * set LSB of quotient 313771a0163SLey Foon Tan * remainder -= divisor; 314771a0163SLey Foon Tan * } 315771a0163SLey Foon Tan */ 316771a0163SLey Foon Tan bltu r13, r5, div_skip 317771a0163SLey Foon Tan ori r3, r3, 1 318771a0163SLey Foon Tan sub r13, r13, r5 319771a0163SLey Foon Tandiv_skip: 320771a0163SLey Foon Tan 321771a0163SLey Foon Tan /* 322771a0163SLey Foon Tan * } 323771a0163SLey Foon Tan */ 324771a0163SLey Foon Tan subi r14, r14, 1 325771a0163SLey Foon Tan bne r14, zero, divide_loop 326771a0163SLey Foon Tan 327771a0163SLey Foon Tan 328771a0163SLey Foon Tan /* Now 329771a0163SLey Foon Tan * r3 = quotient 330771a0163SLey Foon Tan * r4 = 0x25 for div, 0x24 for divu 331771a0163SLey Foon Tan * r6 = 4*C 332771a0163SLey Foon Tan * r17 = MSB contains sign of quotient 333771a0163SLey Foon Tan */ 334771a0163SLey Foon Tan 335771a0163SLey Foon Tan 336771a0163SLey Foon Tan /* 337771a0163SLey Foon Tan * Conditionally negate signed quotient. If quotient is unsigned, 338771a0163SLey Foon Tan * the sign already is initialized to 0. 339771a0163SLey Foon Tan */ 340771a0163SLey Foon Tan bge r17, zero, quotient_is_nonnegative 341771a0163SLey Foon Tan sub r3, zero, r3 /* -r3 */ 342771a0163SLey Foon Tan quotient_is_nonnegative: 343771a0163SLey Foon Tan 344771a0163SLey Foon Tan 345771a0163SLey Foon Tan /* 346771a0163SLey Foon Tan * Final quotient is in r3. 347771a0163SLey Foon Tan */ 348771a0163SLey Foon Tan add r6, r6, sp 349771a0163SLey Foon Tan stw r3, 0(r6) /* write quotient to stack */ 350771a0163SLey Foon Tan br restore_registers 351771a0163SLey Foon Tan 352771a0163SLey Foon Tan 353771a0163SLey Foon Tan 354771a0163SLey Foon Tan 355771a0163SLey Foon Tan /* MULTIPLICATION 356771a0163SLey Foon Tan * 357771a0163SLey Foon Tan * A "product" is the number that one gets by summing a "multiplicand" 358771a0163SLey Foon Tan * several times. The "multiplier" specifies the number of copies of the 359771a0163SLey Foon Tan * multiplicand that are summed. 360771a0163SLey Foon Tan * 361771a0163SLey Foon Tan * Actual multiplication algorithms don't use repeated addition, however. 362771a0163SLey Foon Tan * Shift-and-add algorithms get the same answer as repeated addition, and 363771a0163SLey Foon Tan * they are faster. To compute the lower half of a product (pppp below) 364771a0163SLey Foon Tan * one shifts the product left before adding in each of the partial 365771a0163SLey Foon Tan * products (a * mmmm) through (d * mmmm). 366771a0163SLey Foon Tan * 367771a0163SLey Foon Tan * To compute the upper half of a product (PPPP below), one adds in the 368771a0163SLey Foon Tan * partial products (d * mmmm) through (a * mmmm), each time following 369771a0163SLey Foon Tan * the add by a right shift of the product. 370771a0163SLey Foon Tan * 371771a0163SLey Foon Tan * mmmm 372771a0163SLey Foon Tan * * abcd 373771a0163SLey Foon Tan * ------ 374771a0163SLey Foon Tan * #### = d * mmmm 375771a0163SLey Foon Tan * #### = c * mmmm 376771a0163SLey Foon Tan * #### = b * mmmm 377771a0163SLey Foon Tan * #### = a * mmmm 378771a0163SLey Foon Tan * -------- 379771a0163SLey Foon Tan * PPPPpppp 380771a0163SLey Foon Tan * 381771a0163SLey Foon Tan * The example above shows 4 partial products. Computing actual Nios II 382771a0163SLey Foon Tan * products requires 32 partials. 383771a0163SLey Foon Tan * 384771a0163SLey Foon Tan * It is possible to compute the result of mulxsu from the result of 385771a0163SLey Foon Tan * mulxuu because the only difference between the results of these two 386771a0163SLey Foon Tan * opcodes is the value of the partial product associated with the sign 387771a0163SLey Foon Tan * bit of rA. 388771a0163SLey Foon Tan * 389771a0163SLey Foon Tan * mulxsu = mulxuu - (rA < 0) ? rB : 0; 390771a0163SLey Foon Tan * 391771a0163SLey Foon Tan * It is possible to compute the result of mulxss from the result of 392771a0163SLey Foon Tan * mulxsu because the only difference between the results of these two 393771a0163SLey Foon Tan * opcodes is the value of the partial product associated with the sign 394771a0163SLey Foon Tan * bit of rB. 395771a0163SLey Foon Tan * 396771a0163SLey Foon Tan * mulxss = mulxsu - (rB < 0) ? rA : 0; 397771a0163SLey Foon Tan * 398771a0163SLey Foon Tan */ 399771a0163SLey Foon Tan 400771a0163SLey Foon Tanmul_immed: 401771a0163SLey Foon Tan /* Opcode is muli. Change it into mul for remainder of algorithm. */ 402771a0163SLey Foon Tan mov r6, r5 /* Field B is dest register, not field C. */ 403771a0163SLey Foon Tan mov r5, r4 /* Field IMM16 is src2, not field B. */ 404771a0163SLey Foon Tan movi r4, 0x27 /* OPX of mul is 0x27 */ 405771a0163SLey Foon Tan 406771a0163SLey Foon Tanmultiply: 407771a0163SLey Foon Tan /* Initialize the multiplication loop. */ 408771a0163SLey Foon Tan movi r9, 0 /* mul_product = 0 */ 409771a0163SLey Foon Tan movi r10, 0 /* mulxuu_product = 0 */ 410771a0163SLey Foon Tan mov r11, r5 /* save original multiplier for mulxsu and mulxss */ 411771a0163SLey Foon Tan mov r12, r5 /* mulxuu_multiplier (will be shifted) */ 412771a0163SLey Foon Tan movi r16, 1 /* used to create "rori B,A,1" from "ror B,A,r16" */ 413771a0163SLey Foon Tan 414771a0163SLey Foon Tan /* Now 415771a0163SLey Foon Tan * r3 = multiplicand 416771a0163SLey Foon Tan * r5 = mul_multiplier 417771a0163SLey Foon Tan * r6 = 4 * dest_register (used later as offset to sp) 418771a0163SLey Foon Tan * r7 = temp 419771a0163SLey Foon Tan * r9 = mul_product 420771a0163SLey Foon Tan * r10 = mulxuu_product 421771a0163SLey Foon Tan * r11 = original multiplier 422771a0163SLey Foon Tan * r12 = mulxuu_multiplier 423771a0163SLey Foon Tan * r14 = loop counter (already initialized) 424771a0163SLey Foon Tan * r16 = 1 425771a0163SLey Foon Tan */ 426771a0163SLey Foon Tan 427771a0163SLey Foon Tan 428771a0163SLey Foon Tan /* 429771a0163SLey Foon Tan * for (count = 32; count > 0; --count) 430771a0163SLey Foon Tan * { 431771a0163SLey Foon Tan */ 432771a0163SLey Foon Tanmultiply_loop: 433771a0163SLey Foon Tan 434771a0163SLey Foon Tan /* 435771a0163SLey Foon Tan * mul_product <<= 1; 436771a0163SLey Foon Tan * lsb = multiplier & 1; 437771a0163SLey Foon Tan */ 438771a0163SLey Foon Tan slli r9, r9, 1 439771a0163SLey Foon Tan andi r7, r12, 1 440771a0163SLey Foon Tan 441771a0163SLey Foon Tan /* 442771a0163SLey Foon Tan * if (lsb == 1) 443771a0163SLey Foon Tan * { 444771a0163SLey Foon Tan * mulxuu_product += multiplicand; 445771a0163SLey Foon Tan * } 446771a0163SLey Foon Tan */ 447771a0163SLey Foon Tan beq r7, zero, mulx_skip 448771a0163SLey Foon Tan add r10, r10, r3 449771a0163SLey Foon Tan cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */ 450771a0163SLey Foon Tan ror r7, r7, r16 /* r7 = 0x80000000 on carry, or else 0x00000000 */ 451771a0163SLey Foon Tanmulx_skip: 452771a0163SLey Foon Tan 453771a0163SLey Foon Tan /* 454771a0163SLey Foon Tan * if (MSB of mul_multiplier == 1) 455771a0163SLey Foon Tan * { 456771a0163SLey Foon Tan * mul_product += multiplicand; 457771a0163SLey Foon Tan * } 458771a0163SLey Foon Tan */ 459771a0163SLey Foon Tan bge r5, zero, mul_skip 460771a0163SLey Foon Tan add r9, r9, r3 461771a0163SLey Foon Tanmul_skip: 462771a0163SLey Foon Tan 463771a0163SLey Foon Tan /* 464771a0163SLey Foon Tan * mulxuu_product >>= 1; logical shift 465771a0163SLey Foon Tan * mul_multiplier <<= 1; done with MSB 466771a0163SLey Foon Tan * mulx_multiplier >>= 1; done with LSB 467771a0163SLey Foon Tan */ 468771a0163SLey Foon Tan srli r10, r10, 1 469771a0163SLey Foon Tan or r10, r10, r7 /* OR in the saved carry bit. */ 470771a0163SLey Foon Tan slli r5, r5, 1 471771a0163SLey Foon Tan srli r12, r12, 1 472771a0163SLey Foon Tan 473771a0163SLey Foon Tan 474771a0163SLey Foon Tan /* 475771a0163SLey Foon Tan * } 476771a0163SLey Foon Tan */ 477771a0163SLey Foon Tan subi r14, r14, 1 478771a0163SLey Foon Tan bne r14, zero, multiply_loop 479771a0163SLey Foon Tan 480771a0163SLey Foon Tan 481771a0163SLey Foon Tan /* 482771a0163SLey Foon Tan * Multiply emulation loop done. 483771a0163SLey Foon Tan */ 484771a0163SLey Foon Tan 485771a0163SLey Foon Tan /* Now 486771a0163SLey Foon Tan * r3 = multiplicand 487771a0163SLey Foon Tan * r4 = OPX 488771a0163SLey Foon Tan * r6 = 4 * dest_register (used later as offset to sp) 489771a0163SLey Foon Tan * r7 = temp 490771a0163SLey Foon Tan * r9 = mul_product 491771a0163SLey Foon Tan * r10 = mulxuu_product 492771a0163SLey Foon Tan * r11 = original multiplier 493771a0163SLey Foon Tan */ 494771a0163SLey Foon Tan 495771a0163SLey Foon Tan 496771a0163SLey Foon Tan /* Calculate address for result from 4 * dest_register */ 497771a0163SLey Foon Tan add r6, r6, sp 498771a0163SLey Foon Tan 499771a0163SLey Foon Tan 500771a0163SLey Foon Tan /* 501771a0163SLey Foon Tan * Select/compute the result based on OPX. 502771a0163SLey Foon Tan */ 503771a0163SLey Foon Tan 504771a0163SLey Foon Tan 505771a0163SLey Foon Tan /* OPX == mul? Then store. */ 506771a0163SLey Foon Tan xori r7, r4, 0x27 507771a0163SLey Foon Tan beq r7, zero, store_product 508771a0163SLey Foon Tan 509771a0163SLey Foon Tan /* It's one of the mulx.. opcodes. Move over the result. */ 510771a0163SLey Foon Tan mov r9, r10 511771a0163SLey Foon Tan 512771a0163SLey Foon Tan /* OPX == mulxuu? Then store. */ 513771a0163SLey Foon Tan xori r7, r4, 0x07 514771a0163SLey Foon Tan beq r7, zero, store_product 515771a0163SLey Foon Tan 516771a0163SLey Foon Tan /* Compute mulxsu 517771a0163SLey Foon Tan * 518771a0163SLey Foon Tan * mulxsu = mulxuu - (rA < 0) ? rB : 0; 519771a0163SLey Foon Tan */ 520771a0163SLey Foon Tan bge r3, zero, mulxsu_skip 521771a0163SLey Foon Tan sub r9, r9, r11 522771a0163SLey Foon Tanmulxsu_skip: 523771a0163SLey Foon Tan 524771a0163SLey Foon Tan /* OPX == mulxsu? Then store. */ 525771a0163SLey Foon Tan xori r7, r4, 0x17 526771a0163SLey Foon Tan beq r7, zero, store_product 527771a0163SLey Foon Tan 528771a0163SLey Foon Tan /* Compute mulxss 529771a0163SLey Foon Tan * 530771a0163SLey Foon Tan * mulxss = mulxsu - (rB < 0) ? rA : 0; 531771a0163SLey Foon Tan */ 532771a0163SLey Foon Tan bge r11,zero,mulxss_skip 533771a0163SLey Foon Tan sub r9, r9, r3 534771a0163SLey Foon Tanmulxss_skip: 535771a0163SLey Foon Tan /* At this point, assume that OPX is mulxss, so store*/ 536771a0163SLey Foon Tan 537771a0163SLey Foon Tan 538771a0163SLey Foon Tanstore_product: 539771a0163SLey Foon Tan stw r9, 0(r6) 540771a0163SLey Foon Tan 541771a0163SLey Foon Tan 542771a0163SLey Foon Tanrestore_registers: 543771a0163SLey Foon Tan /* No need to restore r0. */ 544771a0163SLey Foon Tan ldw r5, 100(sp) 545771a0163SLey Foon Tan wrctl estatus, r5 546771a0163SLey Foon Tan 547771a0163SLey Foon Tan ldw r1, 4(sp) 548771a0163SLey Foon Tan ldw r2, 8(sp) 549771a0163SLey Foon Tan ldw r3, 12(sp) 550771a0163SLey Foon Tan ldw r4, 16(sp) 551771a0163SLey Foon Tan ldw r5, 20(sp) 552771a0163SLey Foon Tan ldw r6, 24(sp) 553771a0163SLey Foon Tan ldw r7, 28(sp) 554771a0163SLey Foon Tan ldw r8, 32(sp) 555771a0163SLey Foon Tan ldw r9, 36(sp) 556771a0163SLey Foon Tan ldw r10, 40(sp) 557771a0163SLey Foon Tan ldw r11, 44(sp) 558771a0163SLey Foon Tan ldw r12, 48(sp) 559771a0163SLey Foon Tan ldw r13, 52(sp) 560771a0163SLey Foon Tan ldw r14, 56(sp) 561771a0163SLey Foon Tan ldw r15, 60(sp) 562771a0163SLey Foon Tan ldw r16, 64(sp) 563771a0163SLey Foon Tan ldw r17, 68(sp) 564771a0163SLey Foon Tan ldw r18, 72(sp) 565771a0163SLey Foon Tan ldw r19, 76(sp) 566771a0163SLey Foon Tan ldw r20, 80(sp) 567771a0163SLey Foon Tan ldw r21, 84(sp) 568771a0163SLey Foon Tan ldw r22, 88(sp) 569771a0163SLey Foon Tan ldw r23, 92(sp) 570771a0163SLey Foon Tan /* Does not need to restore et */ 571771a0163SLey Foon Tan ldw gp, 104(sp) 572771a0163SLey Foon Tan 573771a0163SLey Foon Tan ldw fp, 112(sp) 574771a0163SLey Foon Tan ldw ea, 116(sp) 575771a0163SLey Foon Tan ldw ra, 120(sp) 576771a0163SLey Foon Tan ldw sp, 108(sp) /* last restore sp */ 577771a0163SLey Foon Tan eret 578771a0163SLey Foon Tan 579771a0163SLey Foon Tan.set at 580771a0163SLey Foon Tan.set break 581