xref: /openbmc/linux/arch/nios2/kernel/insnemu.S (revision 1ccea77e)
11ccea77eSThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-or-later */
2771a0163SLey Foon Tan/*
3771a0163SLey Foon Tan *  Copyright (C) 2003-2013 Altera Corporation
4771a0163SLey Foon Tan *  All rights reserved.
5771a0163SLey Foon Tan */
6771a0163SLey Foon Tan
7771a0163SLey Foon Tan
8771a0163SLey Foon Tan#include <linux/linkage.h>
9771a0163SLey Foon Tan#include <asm/entry.h>
10771a0163SLey Foon Tan
11771a0163SLey Foon Tan.set noat
12771a0163SLey Foon Tan.set nobreak
13771a0163SLey Foon Tan
14771a0163SLey Foon Tan/*
15771a0163SLey Foon Tan* Explicitly allow the use of r1 (the assembler temporary register)
16771a0163SLey Foon Tan* within this code. This register is normally reserved for the use of
17771a0163SLey Foon Tan* the compiler.
18771a0163SLey Foon Tan*/
19771a0163SLey Foon Tan
20771a0163SLey Foon TanENTRY(instruction_trap)
21771a0163SLey Foon Tan	ldw	r1, PT_R1(sp)		// Restore registers
22771a0163SLey Foon Tan	ldw	r2, PT_R2(sp)
23771a0163SLey Foon Tan	ldw	r3, PT_R3(sp)
24771a0163SLey Foon Tan	ldw	r4, PT_R4(sp)
25771a0163SLey Foon Tan	ldw	r5, PT_R5(sp)
26771a0163SLey Foon Tan	ldw	r6, PT_R6(sp)
27771a0163SLey Foon Tan	ldw	r7, PT_R7(sp)
28771a0163SLey Foon Tan	ldw	r8, PT_R8(sp)
29771a0163SLey Foon Tan	ldw	r9, PT_R9(sp)
30771a0163SLey Foon Tan	ldw	r10, PT_R10(sp)
31771a0163SLey Foon Tan	ldw	r11, PT_R11(sp)
32771a0163SLey Foon Tan	ldw	r12, PT_R12(sp)
33771a0163SLey Foon Tan	ldw	r13, PT_R13(sp)
34771a0163SLey Foon Tan	ldw	r14, PT_R14(sp)
35771a0163SLey Foon Tan	ldw	r15, PT_R15(sp)
36771a0163SLey Foon Tan	ldw	ra, PT_RA(sp)
37771a0163SLey Foon Tan	ldw	fp, PT_FP(sp)
38771a0163SLey Foon Tan	ldw	gp, PT_GP(sp)
39771a0163SLey Foon Tan	ldw	et, PT_ESTATUS(sp)
40771a0163SLey Foon Tan	wrctl	estatus, et
41771a0163SLey Foon Tan	ldw	ea, PT_EA(sp)
42771a0163SLey Foon Tan	ldw	et, PT_SP(sp)		/* backup sp in et */
43771a0163SLey Foon Tan
44771a0163SLey Foon Tan	addi	sp, sp, PT_REGS_SIZE
45771a0163SLey Foon Tan
46771a0163SLey Foon Tan	/* INSTRUCTION EMULATION
47771a0163SLey Foon Tan	*  ---------------------
48771a0163SLey Foon Tan	*
49771a0163SLey Foon Tan	* Nios II processors generate exceptions for unimplemented instructions.
50771a0163SLey Foon Tan	* The routines below emulate these instructions.  Depending on the
51771a0163SLey Foon Tan	* processor core, the only instructions that might need to be emulated
52771a0163SLey Foon Tan	* are div, divu, mul, muli, mulxss, mulxsu, and mulxuu.
53771a0163SLey Foon Tan	*
54771a0163SLey Foon Tan	* The emulations match the instructions, except for the following
55771a0163SLey Foon Tan	* limitations:
56771a0163SLey Foon Tan	*
57771a0163SLey Foon Tan	* 1) The emulation routines do not emulate the use of the exception
58771a0163SLey Foon Tan	*    temporary register (et) as a source operand because the exception
59771a0163SLey Foon Tan	*    handler already has modified it.
60771a0163SLey Foon Tan	*
61771a0163SLey Foon Tan	* 2) The routines do not emulate the use of the stack pointer (sp) or
62771a0163SLey Foon Tan	*    the exception return address register (ea) as a destination because
63771a0163SLey Foon Tan	*    modifying these registers crashes the exception handler or the
64771a0163SLey Foon Tan	*    interrupted routine.
65771a0163SLey Foon Tan	*
66771a0163SLey Foon Tan	* Detailed Design
67771a0163SLey Foon Tan	* ---------------
68771a0163SLey Foon Tan	*
69771a0163SLey Foon Tan	* The emulation routines expect the contents of integer registers r0-r31
70771a0163SLey Foon Tan	* to be on the stack at addresses sp, 4(sp), 8(sp), ... 124(sp).  The
71771a0163SLey Foon Tan	* routines retrieve source operands from the stack and modify the
72771a0163SLey Foon Tan	* destination register's value on the stack prior to the end of the
73771a0163SLey Foon Tan	* exception handler.  Then all registers except the destination register
74771a0163SLey Foon Tan	* are restored to their previous values.
75771a0163SLey Foon Tan	*
76771a0163SLey Foon Tan	* The instruction that causes the exception is found at address -4(ea).
77771a0163SLey Foon Tan	* The instruction's OP and OPX fields identify the operation to be
78771a0163SLey Foon Tan	* performed.
79771a0163SLey Foon Tan	*
80771a0163SLey Foon Tan	* One instruction, muli, is an I-type instruction that is identified by
81771a0163SLey Foon Tan	* an OP field of 0x24.
82771a0163SLey Foon Tan	*
83771a0163SLey Foon Tan	* muli   AAAAA,BBBBB,IIIIIIIIIIIIIIII,-0x24-
84771a0163SLey Foon Tan	*           27    22                6      0    <-- LSB of field
85771a0163SLey Foon Tan	*
86771a0163SLey Foon Tan	* The remaining emulated instructions are R-type and have an OP field
87771a0163SLey Foon Tan	* of 0x3a.  Their OPX fields identify them.
88771a0163SLey Foon Tan	*
89771a0163SLey Foon Tan	* R-type AAAAA,BBBBB,CCCCC,XXXXXX,NNNNN,-0x3a-
90771a0163SLey Foon Tan	*           27    22    17     11     6      0  <-- LSB of field
91771a0163SLey Foon Tan	*
92771a0163SLey Foon Tan	*
93771a0163SLey Foon Tan	* Opcode Encoding.  muli is identified by its OP value.  Then OPX & 0x02
94771a0163SLey Foon Tan	* is used to differentiate between the division opcodes and the
95771a0163SLey Foon Tan	* remaining multiplication opcodes.
96771a0163SLey Foon Tan	*
97771a0163SLey Foon Tan	* Instruction   OP      OPX    OPX & 0x02
98771a0163SLey Foon Tan	* -----------   ----    ----   ----------
99771a0163SLey Foon Tan	* muli          0x24
100771a0163SLey Foon Tan	* divu          0x3a    0x24         0
101771a0163SLey Foon Tan	* div           0x3a    0x25         0
102771a0163SLey Foon Tan	* mul           0x3a    0x27      != 0
103771a0163SLey Foon Tan	* mulxuu        0x3a    0x07      != 0
104771a0163SLey Foon Tan	* mulxsu        0x3a    0x17      != 0
105771a0163SLey Foon Tan	* mulxss        0x3a    0x1f      != 0
106771a0163SLey Foon Tan	*/
107771a0163SLey Foon Tan
108771a0163SLey Foon Tan
109771a0163SLey Foon Tan	/*
110771a0163SLey Foon Tan	* Save everything on the stack to make it easy for the emulation
111771a0163SLey Foon Tan	* routines to retrieve the source register operands.
112771a0163SLey Foon Tan	*/
113771a0163SLey Foon Tan
114771a0163SLey Foon Tan	addi sp, sp, -128
115771a0163SLey Foon Tan	stw zero, 0(sp)	/* Save zero on stack to avoid special case for r0. */
116771a0163SLey Foon Tan	stw r1, 4(sp)
117771a0163SLey Foon Tan	stw r2,  8(sp)
118771a0163SLey Foon Tan	stw r3, 12(sp)
119771a0163SLey Foon Tan	stw r4, 16(sp)
120771a0163SLey Foon Tan	stw r5, 20(sp)
121771a0163SLey Foon Tan	stw r6, 24(sp)
122771a0163SLey Foon Tan	stw r7, 28(sp)
123771a0163SLey Foon Tan	stw r8, 32(sp)
124771a0163SLey Foon Tan	stw r9, 36(sp)
125771a0163SLey Foon Tan	stw r10, 40(sp)
126771a0163SLey Foon Tan	stw r11, 44(sp)
127771a0163SLey Foon Tan	stw r12, 48(sp)
128771a0163SLey Foon Tan	stw r13, 52(sp)
129771a0163SLey Foon Tan	stw r14, 56(sp)
130771a0163SLey Foon Tan	stw r15, 60(sp)
131771a0163SLey Foon Tan	stw r16, 64(sp)
132771a0163SLey Foon Tan	stw r17, 68(sp)
133771a0163SLey Foon Tan	stw r18, 72(sp)
134771a0163SLey Foon Tan	stw r19, 76(sp)
135771a0163SLey Foon Tan	stw r20, 80(sp)
136771a0163SLey Foon Tan	stw r21, 84(sp)
137771a0163SLey Foon Tan	stw r22, 88(sp)
138771a0163SLey Foon Tan	stw r23, 92(sp)
139771a0163SLey Foon Tan		/* Don't bother to save et.  It's already been changed. */
140771a0163SLey Foon Tan	rdctl r5, estatus
141771a0163SLey Foon Tan	stw r5,  100(sp)
142771a0163SLey Foon Tan
143771a0163SLey Foon Tan	stw gp, 104(sp)
144771a0163SLey Foon Tan	stw et, 108(sp)	/* et contains previous sp value. */
145771a0163SLey Foon Tan	stw fp, 112(sp)
146771a0163SLey Foon Tan	stw ea, 116(sp)
147771a0163SLey Foon Tan	stw ra, 120(sp)
148771a0163SLey Foon Tan
149771a0163SLey Foon Tan
150771a0163SLey Foon Tan	/*
151771a0163SLey Foon Tan	* Split the instruction into its fields.  We need 4*A, 4*B, and 4*C as
152771a0163SLey Foon Tan	* offsets to the stack pointer for access to the stored register values.
153771a0163SLey Foon Tan	*/
154771a0163SLey Foon Tan	ldw r2,-4(ea)	/* r2 = AAAAA,BBBBB,IIIIIIIIIIIIIIII,PPPPPP */
155771a0163SLey Foon Tan	roli r3, r2, 7	/* r3 = BBB,IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BB */
156771a0163SLey Foon Tan	roli r4, r3, 3	/* r4 = IIIIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB */
157771a0163SLey Foon Tan	roli r5, r4, 2	/* r5 = IIIIIIIIIIIIII,PPPPPP,AAAAA,BBBBB,II */
158771a0163SLey Foon Tan	srai r4, r4, 16	/* r4 = (sign-extended) IMM16 */
159771a0163SLey Foon Tan	roli r6, r5, 5	/* r6 = XXXX,NNNNN,PPPPPP,AAAAA,BBBBB,CCCCC,XX */
160771a0163SLey Foon Tan	andi r2, r2, 0x3f	/* r2 = 00000000000000000000000000,PPPPPP */
161771a0163SLey Foon Tan	andi r3, r3, 0x7c	/* r3 = 0000000000000000000000000,AAAAA,00 */
162771a0163SLey Foon Tan	andi r5, r5, 0x7c	/* r5 = 0000000000000000000000000,BBBBB,00 */
163771a0163SLey Foon Tan	andi r6, r6, 0x7c	/* r6 = 0000000000000000000000000,CCCCC,00 */
164771a0163SLey Foon Tan
165771a0163SLey Foon Tan	/* Now
166771a0163SLey Foon Tan	* r2 = OP
167771a0163SLey Foon Tan	* r3 = 4*A
168771a0163SLey Foon Tan	* r4 = IMM16 (sign extended)
169771a0163SLey Foon Tan	* r5 = 4*B
170771a0163SLey Foon Tan	* r6 = 4*C
171771a0163SLey Foon Tan	*/
172771a0163SLey Foon Tan
173771a0163SLey Foon Tan	/*
174771a0163SLey Foon Tan	* Get the operands.
175771a0163SLey Foon Tan	*
176771a0163SLey Foon Tan	* It is necessary to check for muli because it uses an I-type
177771a0163SLey Foon Tan	* instruction format, while the other instructions are have an R-type
178771a0163SLey Foon Tan	* format.
179771a0163SLey Foon Tan	*
180771a0163SLey Foon Tan	*  Prepare for either multiplication or division loop.
181771a0163SLey Foon Tan	*  They both loop 32 times.
182771a0163SLey Foon Tan	*/
183771a0163SLey Foon Tan	movi r14, 32
184771a0163SLey Foon Tan
185771a0163SLey Foon Tan	add  r3, r3, sp		/* r3 = address of A-operand. */
186771a0163SLey Foon Tan	ldw  r3, 0(r3)		/* r3 = A-operand. */
187771a0163SLey Foon Tan	movi r7, 0x24		/* muli opcode (I-type instruction format) */
188771a0163SLey Foon Tan	beq r2, r7, mul_immed /* muli doesn't use the B register as a source */
189771a0163SLey Foon Tan
190771a0163SLey Foon Tan	add  r5, r5, sp		/* r5 = address of B-operand. */
191771a0163SLey Foon Tan	ldw  r5, 0(r5)		/* r5 = B-operand. */
192771a0163SLey Foon Tan				/* r4 = SSSSSSSSSSSSSSSS,-----IMM16------ */
193771a0163SLey Foon Tan				/* IMM16 not needed, align OPX portion */
194771a0163SLey Foon Tan				/* r4 = SSSSSSSSSSSSSSSS,CCCCC,-OPX--,00000 */
195771a0163SLey Foon Tan	srli r4, r4, 5		/* r4 = 00000,SSSSSSSSSSSSSSSS,CCCCC,-OPX-- */
196771a0163SLey Foon Tan	andi r4, r4, 0x3f	/* r4 = 00000000000000000000000000,-OPX-- */
197771a0163SLey Foon Tan
198771a0163SLey Foon Tan	/* Now
199771a0163SLey Foon Tan	* r2 = OP
200771a0163SLey Foon Tan	* r3 = src1
201771a0163SLey Foon Tan	* r5 = src2
202771a0163SLey Foon Tan	* r4 = OPX (no longer can be muli)
203771a0163SLey Foon Tan	* r6 = 4*C
204771a0163SLey Foon Tan	*/
205771a0163SLey Foon Tan
206771a0163SLey Foon Tan
207771a0163SLey Foon Tan	/*
208771a0163SLey Foon Tan	*  Multiply or Divide?
209771a0163SLey Foon Tan	*/
210771a0163SLey Foon Tan	andi r7, r4, 0x02	/* For R-type multiply instructions,
211771a0163SLey Foon Tan				   OPX & 0x02 != 0 */
212771a0163SLey Foon Tan	bne r7, zero, multiply
213771a0163SLey Foon Tan
214771a0163SLey Foon Tan
215771a0163SLey Foon Tan	/* DIVISION
216771a0163SLey Foon Tan	*
217771a0163SLey Foon Tan	* Divide an unsigned dividend by an unsigned divisor using
218771a0163SLey Foon Tan	* a shift-and-subtract algorithm.  The example below shows
219771a0163SLey Foon Tan	* 43 div 7 = 6 for 8-bit integers.  This classic algorithm uses a
220771a0163SLey Foon Tan	* single register to store both the dividend and the quotient,
221771a0163SLey Foon Tan	* allowing both values to be shifted with a single instruction.
222771a0163SLey Foon Tan	*
223771a0163SLey Foon Tan	*                               remainder dividend:quotient
224771a0163SLey Foon Tan	*                               --------- -----------------
225771a0163SLey Foon Tan	*   initialize                   00000000     00101011:
226771a0163SLey Foon Tan	*   shift                        00000000     0101011:_
227771a0163SLey Foon Tan	*   remainder >= divisor? no     00000000     0101011:0
228771a0163SLey Foon Tan	*   shift                        00000000     101011:0_
229771a0163SLey Foon Tan	*   remainder >= divisor? no     00000000     101011:00
230771a0163SLey Foon Tan	*   shift                        00000001     01011:00_
231771a0163SLey Foon Tan	*   remainder >= divisor? no     00000001     01011:000
232771a0163SLey Foon Tan	*   shift                        00000010     1011:000_
233771a0163SLey Foon Tan	*   remainder >= divisor? no     00000010     1011:0000
234771a0163SLey Foon Tan	*   shift                        00000101     011:0000_
235771a0163SLey Foon Tan	*   remainder >= divisor? no     00000101     011:00000
236771a0163SLey Foon Tan	*   shift                        00001010     11:00000_
237771a0163SLey Foon Tan	*   remainder >= divisor? yes    00001010     11:000001
238771a0163SLey Foon Tan	*       remainder -= divisor   - 00000111
239771a0163SLey Foon Tan	*                              ----------
240771a0163SLey Foon Tan	*                                00000011     11:000001
241771a0163SLey Foon Tan	*   shift                        00000111     1:000001_
242771a0163SLey Foon Tan	*   remainder >= divisor? yes    00000111     1:0000011
243771a0163SLey Foon Tan	*       remainder -= divisor   - 00000111
244771a0163SLey Foon Tan	*                              ----------
245771a0163SLey Foon Tan	*                                00000000     1:0000011
246771a0163SLey Foon Tan	*   shift                        00000001     :0000011_
247771a0163SLey Foon Tan	*   remainder >= divisor? no     00000001     :00000110
248771a0163SLey Foon Tan	*
249771a0163SLey Foon Tan	* The quotient is 00000110.
250771a0163SLey Foon Tan	*/
251771a0163SLey Foon Tan
252771a0163SLey Foon Tandivide:
253771a0163SLey Foon Tan	/*
254771a0163SLey Foon Tan	*  Prepare for division by assuming the result
255771a0163SLey Foon Tan	*  is unsigned, and storing its "sign" as 0.
256771a0163SLey Foon Tan	*/
257771a0163SLey Foon Tan	movi r17, 0
258771a0163SLey Foon Tan
259771a0163SLey Foon Tan
260771a0163SLey Foon Tan	/* Which division opcode? */
261771a0163SLey Foon Tan	xori r7, r4, 0x25		/* OPX of div */
262771a0163SLey Foon Tan	bne r7, zero, unsigned_division
263771a0163SLey Foon Tan
264771a0163SLey Foon Tan
265771a0163SLey Foon Tan	/*
266771a0163SLey Foon Tan	*  OPX is div.  Determine and store the sign of the quotient.
267771a0163SLey Foon Tan	*  Then take the absolute value of both operands.
268771a0163SLey Foon Tan	*/
269771a0163SLey Foon Tan	xor r17, r3, r5		/* MSB contains sign of quotient */
270771a0163SLey Foon Tan	bge r3,zero,dividend_is_nonnegative
271771a0163SLey Foon Tan	sub r3, zero, r3	/* -r3 */
272771a0163SLey Foon Tandividend_is_nonnegative:
273771a0163SLey Foon Tan	bge r5, zero, divisor_is_nonnegative
274771a0163SLey Foon Tan	sub r5, zero, r5	/* -r5 */
275771a0163SLey Foon Tandivisor_is_nonnegative:
276771a0163SLey Foon Tan
277771a0163SLey Foon Tan
278771a0163SLey Foon Tanunsigned_division:
279771a0163SLey Foon Tan	/* Initialize the unsigned-division loop. */
280771a0163SLey Foon Tan	movi r13, 0	/* remainder = 0 */
281771a0163SLey Foon Tan
282771a0163SLey Foon Tan	/* Now
283771a0163SLey Foon Tan	* r3 = dividend : quotient
284771a0163SLey Foon Tan	* r4 = 0x25 for div, 0x24 for divu
285771a0163SLey Foon Tan	* r5 = divisor
286771a0163SLey Foon Tan	* r13 = remainder
287771a0163SLey Foon Tan	* r14 = loop counter (already initialized to 32)
288771a0163SLey Foon Tan	* r17 = MSB contains sign of quotient
289771a0163SLey Foon Tan	*/
290771a0163SLey Foon Tan
291771a0163SLey Foon Tan
292771a0163SLey Foon Tan	/*
293771a0163SLey Foon Tan	*   for (count = 32; count > 0; --count)
294771a0163SLey Foon Tan	*   {
295771a0163SLey Foon Tan	*/
296771a0163SLey Foon Tandivide_loop:
297771a0163SLey Foon Tan
298771a0163SLey Foon Tan	/*
299771a0163SLey Foon Tan	*       Division:
300771a0163SLey Foon Tan	*
301771a0163SLey Foon Tan	*       (remainder:dividend:quotient) <<= 1;
302771a0163SLey Foon Tan	*/
303771a0163SLey Foon Tan	slli r13, r13, 1
304771a0163SLey Foon Tan	cmplt r7, r3, zero	/* r7 = MSB of r3 */
305771a0163SLey Foon Tan	or r13, r13, r7
306771a0163SLey Foon Tan	slli r3, r3, 1
307771a0163SLey Foon Tan
308771a0163SLey Foon Tan
309771a0163SLey Foon Tan	/*
310771a0163SLey Foon Tan	*       if (remainder >= divisor)
311771a0163SLey Foon Tan	*       {
312771a0163SLey Foon Tan	*           set LSB of quotient
313771a0163SLey Foon Tan	*           remainder -= divisor;
314771a0163SLey Foon Tan	*       }
315771a0163SLey Foon Tan	*/
316771a0163SLey Foon Tan	bltu r13, r5, div_skip
317771a0163SLey Foon Tan	ori r3, r3, 1
318771a0163SLey Foon Tan	sub r13, r13, r5
319771a0163SLey Foon Tandiv_skip:
320771a0163SLey Foon Tan
321771a0163SLey Foon Tan	/*
322771a0163SLey Foon Tan	*   }
323771a0163SLey Foon Tan	*/
324771a0163SLey Foon Tan	subi r14, r14, 1
325771a0163SLey Foon Tan	bne r14, zero, divide_loop
326771a0163SLey Foon Tan
327771a0163SLey Foon Tan
328771a0163SLey Foon Tan	/* Now
329771a0163SLey Foon Tan	* r3 = quotient
330771a0163SLey Foon Tan	* r4 = 0x25 for div, 0x24 for divu
331771a0163SLey Foon Tan	* r6 = 4*C
332771a0163SLey Foon Tan	* r17 = MSB contains sign of quotient
333771a0163SLey Foon Tan	*/
334771a0163SLey Foon Tan
335771a0163SLey Foon Tan
336771a0163SLey Foon Tan	/*
337771a0163SLey Foon Tan	*  Conditionally negate signed quotient.  If quotient is unsigned,
338771a0163SLey Foon Tan	*  the sign already is initialized to 0.
339771a0163SLey Foon Tan	*/
340771a0163SLey Foon Tan	bge r17, zero, quotient_is_nonnegative
341771a0163SLey Foon Tan	sub r3, zero, r3		/* -r3 */
342771a0163SLey Foon Tan	quotient_is_nonnegative:
343771a0163SLey Foon Tan
344771a0163SLey Foon Tan
345771a0163SLey Foon Tan	/*
346771a0163SLey Foon Tan	*  Final quotient is in r3.
347771a0163SLey Foon Tan	*/
348771a0163SLey Foon Tan	add r6, r6, sp
349771a0163SLey Foon Tan	stw r3, 0(r6)	/* write quotient to stack */
350771a0163SLey Foon Tan	br restore_registers
351771a0163SLey Foon Tan
352771a0163SLey Foon Tan
353771a0163SLey Foon Tan
354771a0163SLey Foon Tan
355771a0163SLey Foon Tan	/* MULTIPLICATION
356771a0163SLey Foon Tan	*
357771a0163SLey Foon Tan	* A "product" is the number that one gets by summing a "multiplicand"
358771a0163SLey Foon Tan	* several times.  The "multiplier" specifies the number of copies of the
359771a0163SLey Foon Tan	* multiplicand that are summed.
360771a0163SLey Foon Tan	*
361771a0163SLey Foon Tan	* Actual multiplication algorithms don't use repeated addition, however.
362771a0163SLey Foon Tan	* Shift-and-add algorithms get the same answer as repeated addition, and
363771a0163SLey Foon Tan	* they are faster.  To compute the lower half of a product (pppp below)
364771a0163SLey Foon Tan	* one shifts the product left before adding in each of the partial
365771a0163SLey Foon Tan	* products (a * mmmm) through (d * mmmm).
366771a0163SLey Foon Tan	*
367771a0163SLey Foon Tan	* To compute the upper half of a product (PPPP below), one adds in the
368771a0163SLey Foon Tan	* partial products (d * mmmm) through (a * mmmm), each time following
369771a0163SLey Foon Tan	* the add by a right shift of the product.
370771a0163SLey Foon Tan	*
371771a0163SLey Foon Tan	*     mmmm
372771a0163SLey Foon Tan	*   * abcd
373771a0163SLey Foon Tan	*   ------
374771a0163SLey Foon Tan	*     ####  = d * mmmm
375771a0163SLey Foon Tan	*    ####   = c * mmmm
376771a0163SLey Foon Tan	*   ####    = b * mmmm
377771a0163SLey Foon Tan	*  ####     = a * mmmm
378771a0163SLey Foon Tan	* --------
379771a0163SLey Foon Tan	* PPPPpppp
380771a0163SLey Foon Tan	*
381771a0163SLey Foon Tan	* The example above shows 4 partial products.  Computing actual Nios II
382771a0163SLey Foon Tan	* products requires 32 partials.
383771a0163SLey Foon Tan	*
384771a0163SLey Foon Tan	* It is possible to compute the result of mulxsu from the result of
385771a0163SLey Foon Tan	* mulxuu because the only difference between the results of these two
386771a0163SLey Foon Tan	* opcodes is the value of the partial product associated with the sign
387771a0163SLey Foon Tan	* bit of rA.
388771a0163SLey Foon Tan	*
389771a0163SLey Foon Tan	*   mulxsu = mulxuu - (rA < 0) ? rB : 0;
390771a0163SLey Foon Tan	*
391771a0163SLey Foon Tan	* It is possible to compute the result of mulxss from the result of
392771a0163SLey Foon Tan	* mulxsu because the only difference between the results of these two
393771a0163SLey Foon Tan	* opcodes is the value of the partial product associated with the sign
394771a0163SLey Foon Tan	* bit of rB.
395771a0163SLey Foon Tan	*
396771a0163SLey Foon Tan	*   mulxss = mulxsu - (rB < 0) ? rA : 0;
397771a0163SLey Foon Tan	*
398771a0163SLey Foon Tan	*/
399771a0163SLey Foon Tan
400771a0163SLey Foon Tanmul_immed:
401771a0163SLey Foon Tan	/* Opcode is muli.  Change it into mul for remainder of algorithm. */
402771a0163SLey Foon Tan	mov r6, r5		/* Field B is dest register, not field C. */
403771a0163SLey Foon Tan	mov r5, r4		/* Field IMM16 is src2, not field B. */
404771a0163SLey Foon Tan	movi r4, 0x27		/* OPX of mul is 0x27 */
405771a0163SLey Foon Tan
406771a0163SLey Foon Tanmultiply:
407771a0163SLey Foon Tan	/* Initialize the multiplication loop. */
408771a0163SLey Foon Tan	movi r9, 0	/* mul_product    = 0 */
409771a0163SLey Foon Tan	movi r10, 0	/* mulxuu_product = 0 */
410771a0163SLey Foon Tan	mov r11, r5	/* save original multiplier for mulxsu and mulxss */
411771a0163SLey Foon Tan	mov r12, r5	/* mulxuu_multiplier (will be shifted) */
412771a0163SLey Foon Tan	movi r16, 1	/* used to create "rori B,A,1" from "ror B,A,r16" */
413771a0163SLey Foon Tan
414771a0163SLey Foon Tan	/* Now
415771a0163SLey Foon Tan	* r3 = multiplicand
416771a0163SLey Foon Tan	* r5 = mul_multiplier
417771a0163SLey Foon Tan	* r6 = 4 * dest_register (used later as offset to sp)
418771a0163SLey Foon Tan	* r7 = temp
419771a0163SLey Foon Tan	* r9 = mul_product
420771a0163SLey Foon Tan	* r10 = mulxuu_product
421771a0163SLey Foon Tan	* r11 = original multiplier
422771a0163SLey Foon Tan	* r12 = mulxuu_multiplier
423771a0163SLey Foon Tan	* r14 = loop counter (already initialized)
424771a0163SLey Foon Tan	* r16 = 1
425771a0163SLey Foon Tan	*/
426771a0163SLey Foon Tan
427771a0163SLey Foon Tan
428771a0163SLey Foon Tan	/*
429771a0163SLey Foon Tan	*   for (count = 32; count > 0; --count)
430771a0163SLey Foon Tan	*   {
431771a0163SLey Foon Tan	*/
432771a0163SLey Foon Tanmultiply_loop:
433771a0163SLey Foon Tan
434771a0163SLey Foon Tan	/*
435771a0163SLey Foon Tan	*       mul_product <<= 1;
436771a0163SLey Foon Tan	*       lsb = multiplier & 1;
437771a0163SLey Foon Tan	*/
438771a0163SLey Foon Tan	slli r9, r9, 1
439771a0163SLey Foon Tan	andi r7, r12, 1
440771a0163SLey Foon Tan
441771a0163SLey Foon Tan	/*
442771a0163SLey Foon Tan	*       if (lsb == 1)
443771a0163SLey Foon Tan	*       {
444771a0163SLey Foon Tan	*           mulxuu_product += multiplicand;
445771a0163SLey Foon Tan	*       }
446771a0163SLey Foon Tan	*/
447771a0163SLey Foon Tan	beq r7, zero, mulx_skip
448771a0163SLey Foon Tan	add r10, r10, r3
449771a0163SLey Foon Tan	cmpltu r7, r10, r3 /* Save the carry from the MSB of mulxuu_product. */
450771a0163SLey Foon Tan	ror r7, r7, r16	/* r7 = 0x80000000 on carry, or else 0x00000000 */
451771a0163SLey Foon Tanmulx_skip:
452771a0163SLey Foon Tan
453771a0163SLey Foon Tan	/*
454771a0163SLey Foon Tan	*       if (MSB of mul_multiplier == 1)
455771a0163SLey Foon Tan	*       {
456771a0163SLey Foon Tan	*           mul_product += multiplicand;
457771a0163SLey Foon Tan	*       }
458771a0163SLey Foon Tan	*/
459771a0163SLey Foon Tan	bge r5, zero, mul_skip
460771a0163SLey Foon Tan	add r9, r9, r3
461771a0163SLey Foon Tanmul_skip:
462771a0163SLey Foon Tan
463771a0163SLey Foon Tan	/*
464771a0163SLey Foon Tan	*       mulxuu_product >>= 1;           logical shift
465771a0163SLey Foon Tan	*       mul_multiplier <<= 1;           done with MSB
466771a0163SLey Foon Tan	*       mulx_multiplier >>= 1;          done with LSB
467771a0163SLey Foon Tan	*/
468771a0163SLey Foon Tan	srli r10, r10, 1
469771a0163SLey Foon Tan	or r10, r10, r7		/* OR in the saved carry bit. */
470771a0163SLey Foon Tan	slli r5, r5, 1
471771a0163SLey Foon Tan	srli r12, r12, 1
472771a0163SLey Foon Tan
473771a0163SLey Foon Tan
474771a0163SLey Foon Tan	/*
475771a0163SLey Foon Tan	*   }
476771a0163SLey Foon Tan	*/
477771a0163SLey Foon Tan	subi r14, r14, 1
478771a0163SLey Foon Tan	bne r14, zero, multiply_loop
479771a0163SLey Foon Tan
480771a0163SLey Foon Tan
481771a0163SLey Foon Tan	/*
482771a0163SLey Foon Tan	*  Multiply emulation loop done.
483771a0163SLey Foon Tan	*/
484771a0163SLey Foon Tan
485771a0163SLey Foon Tan	/* Now
486771a0163SLey Foon Tan	* r3 = multiplicand
487771a0163SLey Foon Tan	* r4 = OPX
488771a0163SLey Foon Tan	* r6 = 4 * dest_register (used later as offset to sp)
489771a0163SLey Foon Tan	* r7 = temp
490771a0163SLey Foon Tan	* r9 = mul_product
491771a0163SLey Foon Tan	* r10 = mulxuu_product
492771a0163SLey Foon Tan	* r11 = original multiplier
493771a0163SLey Foon Tan	*/
494771a0163SLey Foon Tan
495771a0163SLey Foon Tan
496771a0163SLey Foon Tan	/* Calculate address for result from 4 * dest_register */
497771a0163SLey Foon Tan	add r6, r6, sp
498771a0163SLey Foon Tan
499771a0163SLey Foon Tan
500771a0163SLey Foon Tan	/*
501771a0163SLey Foon Tan	* Select/compute the result based on OPX.
502771a0163SLey Foon Tan	*/
503771a0163SLey Foon Tan
504771a0163SLey Foon Tan
505771a0163SLey Foon Tan	/* OPX == mul?  Then store. */
506771a0163SLey Foon Tan	xori r7, r4, 0x27
507771a0163SLey Foon Tan	beq r7, zero, store_product
508771a0163SLey Foon Tan
509771a0163SLey Foon Tan	/* It's one of the mulx.. opcodes.  Move over the result. */
510771a0163SLey Foon Tan	mov r9, r10
511771a0163SLey Foon Tan
512771a0163SLey Foon Tan	/* OPX == mulxuu?  Then store. */
513771a0163SLey Foon Tan	xori r7, r4, 0x07
514771a0163SLey Foon Tan	beq r7, zero, store_product
515771a0163SLey Foon Tan
516771a0163SLey Foon Tan	/* Compute mulxsu
517771a0163SLey Foon Tan	 *
518771a0163SLey Foon Tan	 * mulxsu = mulxuu - (rA < 0) ? rB : 0;
519771a0163SLey Foon Tan	 */
520771a0163SLey Foon Tan	bge r3, zero, mulxsu_skip
521771a0163SLey Foon Tan	sub r9, r9, r11
522771a0163SLey Foon Tanmulxsu_skip:
523771a0163SLey Foon Tan
524771a0163SLey Foon Tan	/* OPX == mulxsu?  Then store. */
525771a0163SLey Foon Tan	xori r7, r4, 0x17
526771a0163SLey Foon Tan	beq r7, zero, store_product
527771a0163SLey Foon Tan
528771a0163SLey Foon Tan	/* Compute mulxss
529771a0163SLey Foon Tan	 *
530771a0163SLey Foon Tan	 * mulxss = mulxsu - (rB < 0) ? rA : 0;
531771a0163SLey Foon Tan	 */
532771a0163SLey Foon Tan	bge r11,zero,mulxss_skip
533771a0163SLey Foon Tan	sub r9, r9, r3
534771a0163SLey Foon Tanmulxss_skip:
535771a0163SLey Foon Tan	/* At this point, assume that OPX is mulxss, so store*/
536771a0163SLey Foon Tan
537771a0163SLey Foon Tan
538771a0163SLey Foon Tanstore_product:
539771a0163SLey Foon Tan	stw r9, 0(r6)
540771a0163SLey Foon Tan
541771a0163SLey Foon Tan
542771a0163SLey Foon Tanrestore_registers:
543771a0163SLey Foon Tan			/* No need to restore r0. */
544771a0163SLey Foon Tan	ldw r5, 100(sp)
545771a0163SLey Foon Tan	wrctl estatus, r5
546771a0163SLey Foon Tan
547771a0163SLey Foon Tan	ldw r1, 4(sp)
548771a0163SLey Foon Tan	ldw r2, 8(sp)
549771a0163SLey Foon Tan	ldw r3, 12(sp)
550771a0163SLey Foon Tan	ldw r4, 16(sp)
551771a0163SLey Foon Tan	ldw r5, 20(sp)
552771a0163SLey Foon Tan	ldw r6, 24(sp)
553771a0163SLey Foon Tan	ldw r7, 28(sp)
554771a0163SLey Foon Tan	ldw r8, 32(sp)
555771a0163SLey Foon Tan	ldw r9, 36(sp)
556771a0163SLey Foon Tan	ldw r10, 40(sp)
557771a0163SLey Foon Tan	ldw r11, 44(sp)
558771a0163SLey Foon Tan	ldw r12, 48(sp)
559771a0163SLey Foon Tan	ldw r13, 52(sp)
560771a0163SLey Foon Tan	ldw r14, 56(sp)
561771a0163SLey Foon Tan	ldw r15, 60(sp)
562771a0163SLey Foon Tan	ldw r16, 64(sp)
563771a0163SLey Foon Tan	ldw r17, 68(sp)
564771a0163SLey Foon Tan	ldw r18, 72(sp)
565771a0163SLey Foon Tan	ldw r19, 76(sp)
566771a0163SLey Foon Tan	ldw r20, 80(sp)
567771a0163SLey Foon Tan	ldw r21, 84(sp)
568771a0163SLey Foon Tan	ldw r22, 88(sp)
569771a0163SLey Foon Tan	ldw r23, 92(sp)
570771a0163SLey Foon Tan			/* Does not need to restore et */
571771a0163SLey Foon Tan	ldw gp, 104(sp)
572771a0163SLey Foon Tan
573771a0163SLey Foon Tan	ldw fp, 112(sp)
574771a0163SLey Foon Tan	ldw ea, 116(sp)
575771a0163SLey Foon Tan	ldw ra, 120(sp)
576771a0163SLey Foon Tan	ldw sp, 108(sp)	/* last restore sp */
577771a0163SLey Foon Tan	eret
578771a0163SLey Foon Tan
579771a0163SLey Foon Tan.set at
580771a0163SLey Foon Tan.set break
581