xref: /openbmc/linux/arch/mips/math-emu/sp_maddf.c (revision 56d06fa2)
1 /*
2  * IEEE754 floating point arithmetic
3  * single precision: MADDF.f (Fused Multiply Add)
4  * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
5  *
6  * MIPS floating point support
7  * Copyright (C) 2015 Imagination Technologies, Ltd.
8  * Author: Markos Chandras <markos.chandras@imgtec.com>
9  *
10  *  This program is free software; you can distribute it and/or modify it
11  *  under the terms of the GNU General Public License as published by the
12  *  Free Software Foundation; version 2 of the License.
13  */
14 
15 #include "ieee754sp.h"
16 
17 union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
18 				union ieee754sp y)
19 {
20 	int re;
21 	int rs;
22 	unsigned rm;
23 	unsigned short lxm;
24 	unsigned short hxm;
25 	unsigned short lym;
26 	unsigned short hym;
27 	unsigned lrm;
28 	unsigned hrm;
29 	unsigned t;
30 	unsigned at;
31 	int s;
32 
33 	COMPXSP;
34 	COMPYSP;
35 	u32 zm; int ze; int zs __maybe_unused; int zc;
36 
37 	EXPLODEXSP;
38 	EXPLODEYSP;
39 	EXPLODESP(z, zc, zs, ze, zm)
40 
41 	FLUSHXSP;
42 	FLUSHYSP;
43 	FLUSHSP(z, zc, zs, ze, zm);
44 
45 	ieee754_clearcx();
46 
47 	switch (zc) {
48 	case IEEE754_CLASS_SNAN:
49 		ieee754_setcx(IEEE754_INVALID_OPERATION);
50 		return ieee754sp_nanxcpt(z);
51 	case IEEE754_CLASS_DNORM:
52 		SPDNORMx(zm, ze);
53 	/* QNAN is handled separately below */
54 	}
55 
56 	switch (CLPAIR(xc, yc)) {
57 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
58 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
59 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
60 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
61 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
62 		return ieee754sp_nanxcpt(y);
63 
64 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
65 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
66 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
67 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
68 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
69 	case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
70 		return ieee754sp_nanxcpt(x);
71 
72 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
73 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
74 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
75 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
76 		return y;
77 
78 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
79 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
80 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
81 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
82 	case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
83 		return x;
84 
85 	/*
86 	 * Infinity handling
87 	 */
88 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
89 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
90 		if (zc == IEEE754_CLASS_QNAN)
91 			return z;
92 		ieee754_setcx(IEEE754_INVALID_OPERATION);
93 		return ieee754sp_indef();
94 
95 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
96 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
97 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
98 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
99 	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
100 		if (zc == IEEE754_CLASS_QNAN)
101 			return z;
102 		return ieee754sp_inf(xs ^ ys);
103 
104 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
105 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
106 	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
107 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
108 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
109 		if (zc == IEEE754_CLASS_INF)
110 			return ieee754sp_inf(zs);
111 		/* Multiplication is 0 so just return z */
112 		return z;
113 
114 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
115 		SPDNORMX;
116 
117 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
118 		if (zc == IEEE754_CLASS_QNAN)
119 			return z;
120 		else if (zc == IEEE754_CLASS_INF)
121 			return ieee754sp_inf(zs);
122 		SPDNORMY;
123 		break;
124 
125 	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
126 		if (zc == IEEE754_CLASS_QNAN)
127 			return z;
128 		else if (zc == IEEE754_CLASS_INF)
129 			return ieee754sp_inf(zs);
130 		SPDNORMX;
131 		break;
132 
133 	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
134 		if (zc == IEEE754_CLASS_QNAN)
135 			return z;
136 		else if (zc == IEEE754_CLASS_INF)
137 			return ieee754sp_inf(zs);
138 		/* fall through to real computations */
139 	}
140 
141 	/* Finally get to do some computation */
142 
143 	/*
144 	 * Do the multiplication bit first
145 	 *
146 	 * rm = xm * ym, re = xe + ye basically
147 	 *
148 	 * At this point xm and ym should have been normalized.
149 	 */
150 
151 	/* rm = xm * ym, re = xe+ye basically */
152 	assert(xm & SP_HIDDEN_BIT);
153 	assert(ym & SP_HIDDEN_BIT);
154 
155 	re = xe + ye;
156 	rs = xs ^ ys;
157 
158 	/* shunt to top of word */
159 	xm <<= 32 - (SP_FBITS + 1);
160 	ym <<= 32 - (SP_FBITS + 1);
161 
162 	/*
163 	 * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
164 	 */
165 	lxm = xm & 0xffff;
166 	hxm = xm >> 16;
167 	lym = ym & 0xffff;
168 	hym = ym >> 16;
169 
170 	lrm = lxm * lym;	/* 16 * 16 => 32 */
171 	hrm = hxm * hym;	/* 16 * 16 => 32 */
172 
173 	t = lxm * hym; /* 16 * 16 => 32 */
174 	at = lrm + (t << 16);
175 	hrm += at < lrm;
176 	lrm = at;
177 	hrm = hrm + (t >> 16);
178 
179 	t = hxm * lym; /* 16 * 16 => 32 */
180 	at = lrm + (t << 16);
181 	hrm += at < lrm;
182 	lrm = at;
183 	hrm = hrm + (t >> 16);
184 
185 	rm = hrm | (lrm != 0);
186 
187 	/*
188 	 * Sticky shift down to normal rounding precision.
189 	 */
190 	if ((int) rm < 0) {
191 		rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
192 		    ((rm << (SP_FBITS + 1 + 3)) != 0);
193 		re++;
194 	} else {
195 		rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
196 		     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
197 	}
198 	assert(rm & (SP_HIDDEN_BIT << 3));
199 
200 	/* And now the addition */
201 
202 	assert(zm & SP_HIDDEN_BIT);
203 
204 	/*
205 	 * Provide guard,round and stick bit space.
206 	 */
207 	zm <<= 3;
208 
209 	if (ze > re) {
210 		/*
211 		 * Have to shift y fraction right to align.
212 		 */
213 		s = ze - re;
214 		SPXSRSYn(s);
215 	} else if (re > ze) {
216 		/*
217 		 * Have to shift x fraction right to align.
218 		 */
219 		s = re - ze;
220 		SPXSRSYn(s);
221 	}
222 	assert(ze == re);
223 	assert(ze <= SP_EMAX);
224 
225 	if (zs == rs) {
226 		/*
227 		 * Generate 28 bit result of adding two 27 bit numbers
228 		 * leaving result in zm, zs and ze.
229 		 */
230 		zm = zm + rm;
231 
232 		if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
233 			SPXSRSX1();
234 		}
235 	} else {
236 		if (zm >= rm) {
237 			zm = zm - rm;
238 		} else {
239 			zm = rm - zm;
240 			zs = rs;
241 		}
242 		if (zm == 0)
243 			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
244 
245 		/*
246 		 * Normalize in extended single precision
247 		 */
248 		while ((zm >> (SP_MBITS + 3)) == 0) {
249 			zm <<= 1;
250 			ze--;
251 		}
252 
253 	}
254 	return ieee754sp_format(zs, ze, zm);
255 }
256