xref: /openbmc/qemu/target/i386/ops_sse.h (revision ee04a3c86dc125bbbdb04abaf4188dbafe612891)
1fcf5ef2aSThomas Huth /*
2fcf5ef2aSThomas Huth  *  MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support
3fcf5ef2aSThomas Huth  *
4fcf5ef2aSThomas Huth  *  Copyright (c) 2005 Fabrice Bellard
5fcf5ef2aSThomas Huth  *  Copyright (c) 2008 Intel Corporation  <andrew.zaborowski@intel.com>
6fcf5ef2aSThomas Huth  *
7fcf5ef2aSThomas Huth  * This library is free software; you can redistribute it and/or
8fcf5ef2aSThomas Huth  * modify it under the terms of the GNU Lesser General Public
9fcf5ef2aSThomas Huth  * License as published by the Free Software Foundation; either
10d9ff33adSChetan Pant  * version 2.1 of the License, or (at your option) any later version.
11fcf5ef2aSThomas Huth  *
12fcf5ef2aSThomas Huth  * This library is distributed in the hope that it will be useful,
13fcf5ef2aSThomas Huth  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14fcf5ef2aSThomas Huth  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15fcf5ef2aSThomas Huth  * Lesser General Public License for more details.
16fcf5ef2aSThomas Huth  *
17fcf5ef2aSThomas Huth  * You should have received a copy of the GNU Lesser General Public
18fcf5ef2aSThomas Huth  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19fcf5ef2aSThomas Huth  */
20fcf5ef2aSThomas Huth 
21fcf5ef2aSThomas Huth #include "crypto/aes.h"
22fcf5ef2aSThomas Huth 
23fcf5ef2aSThomas Huth #if SHIFT == 0
24fcf5ef2aSThomas Huth #define Reg MMXReg
25d22697ddSPaolo Bonzini #define SIZE 8
26fcf5ef2aSThomas Huth #define XMM_ONLY(...)
27fcf5ef2aSThomas Huth #define B(n) MMX_B(n)
28fcf5ef2aSThomas Huth #define W(n) MMX_W(n)
29fcf5ef2aSThomas Huth #define L(n) MMX_L(n)
30fcf5ef2aSThomas Huth #define Q(n) MMX_Q(n)
31fcf5ef2aSThomas Huth #define SUFFIX _mmx
32fcf5ef2aSThomas Huth #else
33fcf5ef2aSThomas Huth #define Reg ZMMReg
34d22697ddSPaolo Bonzini #define SIZE 16
35fcf5ef2aSThomas Huth #define XMM_ONLY(...) __VA_ARGS__
36fcf5ef2aSThomas Huth #define B(n) ZMM_B(n)
37fcf5ef2aSThomas Huth #define W(n) ZMM_W(n)
38fcf5ef2aSThomas Huth #define L(n) ZMM_L(n)
39fcf5ef2aSThomas Huth #define Q(n) ZMM_Q(n)
40fcf5ef2aSThomas Huth #define SUFFIX _xmm
41fcf5ef2aSThomas Huth #endif
42fcf5ef2aSThomas Huth 
4318592d2eSPaul Brook #define LANE_WIDTH (SHIFT ? 16 : 8)
4418592d2eSPaul Brook 
45d22697ddSPaolo Bonzini /*
46d22697ddSPaolo Bonzini  * Copy the relevant parts of a Reg value around. In the case where
47d22697ddSPaolo Bonzini  * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
48d22697ddSPaolo Bonzini  * a 64 byte ZMMReg, so we must copy only those and keep the top bytes
49d22697ddSPaolo Bonzini  * untouched in the guest-visible destination destination register.
50d22697ddSPaolo Bonzini  * Note that the "lower bytes" are placed last in memory on big-endian
51d22697ddSPaolo Bonzini  * hosts, which store the vector backwards in memory.  In that case the
52d22697ddSPaolo Bonzini  * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of
53d22697ddSPaolo Bonzini  * the little-endian case.
54d22697ddSPaolo Bonzini  */
55d22697ddSPaolo Bonzini #if HOST_BIG_ENDIAN
56d22697ddSPaolo Bonzini #define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE)
57d22697ddSPaolo Bonzini #else
58d22697ddSPaolo Bonzini #define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
59d22697ddSPaolo Bonzini #endif
60d22697ddSPaolo Bonzini 
6118592d2eSPaul Brook #if SHIFT == 0
6218592d2eSPaul Brook #define FPSRL(x, c) ((x) >> shift)
6318592d2eSPaul Brook #define FPSRAW(x, c) ((int16_t)(x) >> shift)
6418592d2eSPaul Brook #define FPSRAL(x, c) ((int32_t)(x) >> shift)
6518592d2eSPaul Brook #define FPSLL(x, c) ((x) << shift)
66fcf5ef2aSThomas Huth #endif
6718592d2eSPaul Brook 
6818592d2eSPaul Brook void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
6918592d2eSPaul Brook {
7018592d2eSPaul Brook     Reg *s = d;
7118592d2eSPaul Brook     int shift;
7218592d2eSPaul Brook     if (c->Q(0) > 15) {
7318592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
7418592d2eSPaul Brook             d->Q(i) = 0;
7518592d2eSPaul Brook         }
76fcf5ef2aSThomas Huth     } else {
7718592d2eSPaul Brook         shift = c->B(0);
7818592d2eSPaul Brook         for (int i = 0; i < 4 << SHIFT; i++) {
7918592d2eSPaul Brook             d->W(i) = FPSRL(s->W(i), shift);
8018592d2eSPaul Brook         }
81fcf5ef2aSThomas Huth     }
82fcf5ef2aSThomas Huth }
83fcf5ef2aSThomas Huth 
8418592d2eSPaul Brook void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
85fcf5ef2aSThomas Huth {
8618592d2eSPaul Brook     Reg *s = d;
87fcf5ef2aSThomas Huth     int shift;
8818592d2eSPaul Brook     if (c->Q(0) > 15) {
8918592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
9018592d2eSPaul Brook             d->Q(i) = 0;
9118592d2eSPaul Brook         }
9218592d2eSPaul Brook     } else {
9318592d2eSPaul Brook         shift = c->B(0);
9418592d2eSPaul Brook         for (int i = 0; i < 4 << SHIFT; i++) {
9518592d2eSPaul Brook             d->W(i) = FPSLL(s->W(i), shift);
9618592d2eSPaul Brook         }
9718592d2eSPaul Brook     }
9818592d2eSPaul Brook }
99fcf5ef2aSThomas Huth 
10018592d2eSPaul Brook void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
10118592d2eSPaul Brook {
10218592d2eSPaul Brook     Reg *s = d;
10318592d2eSPaul Brook     int shift;
10418592d2eSPaul Brook     if (c->Q(0) > 15) {
105fcf5ef2aSThomas Huth         shift = 15;
106fcf5ef2aSThomas Huth     } else {
10718592d2eSPaul Brook         shift = c->B(0);
108fcf5ef2aSThomas Huth     }
10918592d2eSPaul Brook     for (int i = 0; i < 4 << SHIFT; i++) {
11018592d2eSPaul Brook         d->W(i) = FPSRAW(s->W(i), shift);
11118592d2eSPaul Brook     }
112fcf5ef2aSThomas Huth }
113fcf5ef2aSThomas Huth 
11418592d2eSPaul Brook void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
115fcf5ef2aSThomas Huth {
11618592d2eSPaul Brook     Reg *s = d;
117fcf5ef2aSThomas Huth     int shift;
11818592d2eSPaul Brook     if (c->Q(0) > 31) {
11918592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
12018592d2eSPaul Brook             d->Q(i) = 0;
12118592d2eSPaul Brook         }
122fcf5ef2aSThomas Huth     } else {
12318592d2eSPaul Brook         shift = c->B(0);
12418592d2eSPaul Brook         for (int i = 0; i < 2 << SHIFT; i++) {
12518592d2eSPaul Brook             d->L(i) = FPSRL(s->L(i), shift);
12618592d2eSPaul Brook         }
127fcf5ef2aSThomas Huth     }
128fcf5ef2aSThomas Huth }
129fcf5ef2aSThomas Huth 
13018592d2eSPaul Brook void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
131fcf5ef2aSThomas Huth {
13218592d2eSPaul Brook     Reg *s = d;
133fcf5ef2aSThomas Huth     int shift;
13418592d2eSPaul Brook     if (c->Q(0) > 31) {
13518592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
13618592d2eSPaul Brook             d->Q(i) = 0;
13718592d2eSPaul Brook         }
138fcf5ef2aSThomas Huth     } else {
13918592d2eSPaul Brook         shift = c->B(0);
14018592d2eSPaul Brook         for (int i = 0; i < 2 << SHIFT; i++) {
14118592d2eSPaul Brook             d->L(i) = FPSLL(s->L(i), shift);
14218592d2eSPaul Brook         }
143fcf5ef2aSThomas Huth     }
144fcf5ef2aSThomas Huth }
145fcf5ef2aSThomas Huth 
14618592d2eSPaul Brook void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
147fcf5ef2aSThomas Huth {
14818592d2eSPaul Brook     Reg *s = d;
149fcf5ef2aSThomas Huth     int shift;
15018592d2eSPaul Brook     if (c->Q(0) > 31) {
151fcf5ef2aSThomas Huth         shift = 31;
152fcf5ef2aSThomas Huth     } else {
15318592d2eSPaul Brook         shift = c->B(0);
154fcf5ef2aSThomas Huth     }
15518592d2eSPaul Brook     for (int i = 0; i < 2 << SHIFT; i++) {
15618592d2eSPaul Brook         d->L(i) = FPSRAL(s->L(i), shift);
15718592d2eSPaul Brook     }
158fcf5ef2aSThomas Huth }
159fcf5ef2aSThomas Huth 
16018592d2eSPaul Brook void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
161fcf5ef2aSThomas Huth {
16218592d2eSPaul Brook     Reg *s = d;
163fcf5ef2aSThomas Huth     int shift;
16418592d2eSPaul Brook     if (c->Q(0) > 63) {
16518592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
16618592d2eSPaul Brook             d->Q(i) = 0;
16718592d2eSPaul Brook         }
168fcf5ef2aSThomas Huth     } else {
16918592d2eSPaul Brook         shift = c->B(0);
17018592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
17118592d2eSPaul Brook             d->Q(i) = FPSRL(s->Q(i), shift);
17218592d2eSPaul Brook         }
173fcf5ef2aSThomas Huth     }
174fcf5ef2aSThomas Huth }
175fcf5ef2aSThomas Huth 
17618592d2eSPaul Brook void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
177fcf5ef2aSThomas Huth {
17818592d2eSPaul Brook     Reg *s = d;
179fcf5ef2aSThomas Huth     int shift;
18018592d2eSPaul Brook     if (c->Q(0) > 63) {
18118592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
18218592d2eSPaul Brook             d->Q(i) = 0;
18318592d2eSPaul Brook         }
184fcf5ef2aSThomas Huth     } else {
18518592d2eSPaul Brook         shift = c->B(0);
18618592d2eSPaul Brook         for (int i = 0; i < 1 << SHIFT; i++) {
18718592d2eSPaul Brook             d->Q(i) = FPSLL(s->Q(i), shift);
18818592d2eSPaul Brook         }
189fcf5ef2aSThomas Huth     }
190fcf5ef2aSThomas Huth }
191fcf5ef2aSThomas Huth 
19218592d2eSPaul Brook #if SHIFT >= 1
19318592d2eSPaul Brook void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
194fcf5ef2aSThomas Huth {
19518592d2eSPaul Brook     Reg *s = d;
19618592d2eSPaul Brook     int shift, i, j;
197fcf5ef2aSThomas Huth 
19818592d2eSPaul Brook     shift = c->L(0);
199fcf5ef2aSThomas Huth     if (shift > 16) {
200fcf5ef2aSThomas Huth         shift = 16;
201fcf5ef2aSThomas Huth     }
20218592d2eSPaul Brook     for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
203fcf5ef2aSThomas Huth         for (i = 0; i < 16 - shift; i++) {
20418592d2eSPaul Brook             d->B(j + i) = s->B(j + i + shift);
205fcf5ef2aSThomas Huth         }
206fcf5ef2aSThomas Huth         for (i = 16 - shift; i < 16; i++) {
20718592d2eSPaul Brook             d->B(j + i) = 0;
20818592d2eSPaul Brook         }
209fcf5ef2aSThomas Huth     }
210fcf5ef2aSThomas Huth }
211fcf5ef2aSThomas Huth 
21218592d2eSPaul Brook void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
213fcf5ef2aSThomas Huth {
21418592d2eSPaul Brook     Reg *s = d;
21518592d2eSPaul Brook     int shift, i, j;
216fcf5ef2aSThomas Huth 
21718592d2eSPaul Brook     shift = c->L(0);
218fcf5ef2aSThomas Huth     if (shift > 16) {
219fcf5ef2aSThomas Huth         shift = 16;
220fcf5ef2aSThomas Huth     }
22118592d2eSPaul Brook     for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) {
222fcf5ef2aSThomas Huth         for (i = 15; i >= shift; i--) {
22318592d2eSPaul Brook             d->B(j + i) = s->B(j + i - shift);
224fcf5ef2aSThomas Huth         }
225fcf5ef2aSThomas Huth         for (i = 0; i < shift; i++) {
22618592d2eSPaul Brook             d->B(j + i) = 0;
22718592d2eSPaul Brook         }
228fcf5ef2aSThomas Huth     }
229fcf5ef2aSThomas Huth }
230fcf5ef2aSThomas Huth #endif
231fcf5ef2aSThomas Huth 
232*ee04a3c8SPaul Brook #define SSE_HELPER_1(name, elem, num, F)                        \
233fcf5ef2aSThomas Huth     void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
234fcf5ef2aSThomas Huth     {                                                           \
235*ee04a3c8SPaul Brook         int n = num;                                            \
236*ee04a3c8SPaul Brook         for (int i = 0; i < n; i++) {                           \
237*ee04a3c8SPaul Brook             d->elem(i) = F(s->elem(i));                         \
238*ee04a3c8SPaul Brook         }                                                       \
239fcf5ef2aSThomas Huth     }
240fcf5ef2aSThomas Huth 
241*ee04a3c8SPaul Brook #define SSE_HELPER_2(name, elem, num, F)                        \
242*ee04a3c8SPaul Brook     void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)   \
243*ee04a3c8SPaul Brook     {                                                           \
244*ee04a3c8SPaul Brook         Reg *v = d;                                             \
245*ee04a3c8SPaul Brook         int n = num;                                            \
246*ee04a3c8SPaul Brook         for (int i = 0; i < n; i++) {                           \
247*ee04a3c8SPaul Brook             d->elem(i) = F(v->elem(i), s->elem(i));             \
248*ee04a3c8SPaul Brook         }                                                       \
249*ee04a3c8SPaul Brook     }
250*ee04a3c8SPaul Brook 
251*ee04a3c8SPaul Brook #define SSE_HELPER_B(name, F)                                   \
252*ee04a3c8SPaul Brook     SSE_HELPER_2(name, B, 8 << SHIFT, F)
253*ee04a3c8SPaul Brook 
254fcf5ef2aSThomas Huth #define SSE_HELPER_W(name, F)                                   \
255*ee04a3c8SPaul Brook     SSE_HELPER_2(name, W, 4 << SHIFT, F)
256fcf5ef2aSThomas Huth 
257fcf5ef2aSThomas Huth #define SSE_HELPER_L(name, F)                                   \
258*ee04a3c8SPaul Brook     SSE_HELPER_2(name, L, 2 << SHIFT, F)
259fcf5ef2aSThomas Huth 
260fcf5ef2aSThomas Huth #define SSE_HELPER_Q(name, F)                                   \
261*ee04a3c8SPaul Brook     SSE_HELPER_2(name, Q, 1 << SHIFT, F)
262fcf5ef2aSThomas Huth 
263fcf5ef2aSThomas Huth #if SHIFT == 0
264fcf5ef2aSThomas Huth static inline int satub(int x)
265fcf5ef2aSThomas Huth {
266fcf5ef2aSThomas Huth     if (x < 0) {
267fcf5ef2aSThomas Huth         return 0;
268fcf5ef2aSThomas Huth     } else if (x > 255) {
269fcf5ef2aSThomas Huth         return 255;
270fcf5ef2aSThomas Huth     } else {
271fcf5ef2aSThomas Huth         return x;
272fcf5ef2aSThomas Huth     }
273fcf5ef2aSThomas Huth }
274fcf5ef2aSThomas Huth 
275fcf5ef2aSThomas Huth static inline int satuw(int x)
276fcf5ef2aSThomas Huth {
277fcf5ef2aSThomas Huth     if (x < 0) {
278fcf5ef2aSThomas Huth         return 0;
279fcf5ef2aSThomas Huth     } else if (x > 65535) {
280fcf5ef2aSThomas Huth         return 65535;
281fcf5ef2aSThomas Huth     } else {
282fcf5ef2aSThomas Huth         return x;
283fcf5ef2aSThomas Huth     }
284fcf5ef2aSThomas Huth }
285fcf5ef2aSThomas Huth 
286fcf5ef2aSThomas Huth static inline int satsb(int x)
287fcf5ef2aSThomas Huth {
288fcf5ef2aSThomas Huth     if (x < -128) {
289fcf5ef2aSThomas Huth         return -128;
290fcf5ef2aSThomas Huth     } else if (x > 127) {
291fcf5ef2aSThomas Huth         return 127;
292fcf5ef2aSThomas Huth     } else {
293fcf5ef2aSThomas Huth         return x;
294fcf5ef2aSThomas Huth     }
295fcf5ef2aSThomas Huth }
296fcf5ef2aSThomas Huth 
297fcf5ef2aSThomas Huth static inline int satsw(int x)
298fcf5ef2aSThomas Huth {
299fcf5ef2aSThomas Huth     if (x < -32768) {
300fcf5ef2aSThomas Huth         return -32768;
301fcf5ef2aSThomas Huth     } else if (x > 32767) {
302fcf5ef2aSThomas Huth         return 32767;
303fcf5ef2aSThomas Huth     } else {
304fcf5ef2aSThomas Huth         return x;
305fcf5ef2aSThomas Huth     }
306fcf5ef2aSThomas Huth }
307fcf5ef2aSThomas Huth 
308fcf5ef2aSThomas Huth #define FADD(a, b) ((a) + (b))
309fcf5ef2aSThomas Huth #define FADDUB(a, b) satub((a) + (b))
310fcf5ef2aSThomas Huth #define FADDUW(a, b) satuw((a) + (b))
311fcf5ef2aSThomas Huth #define FADDSB(a, b) satsb((int8_t)(a) + (int8_t)(b))
312fcf5ef2aSThomas Huth #define FADDSW(a, b) satsw((int16_t)(a) + (int16_t)(b))
313fcf5ef2aSThomas Huth 
314fcf5ef2aSThomas Huth #define FSUB(a, b) ((a) - (b))
315fcf5ef2aSThomas Huth #define FSUBUB(a, b) satub((a) - (b))
316fcf5ef2aSThomas Huth #define FSUBUW(a, b) satuw((a) - (b))
317fcf5ef2aSThomas Huth #define FSUBSB(a, b) satsb((int8_t)(a) - (int8_t)(b))
318fcf5ef2aSThomas Huth #define FSUBSW(a, b) satsw((int16_t)(a) - (int16_t)(b))
319fcf5ef2aSThomas Huth #define FMINUB(a, b) ((a) < (b)) ? (a) : (b)
320fcf5ef2aSThomas Huth #define FMINSW(a, b) ((int16_t)(a) < (int16_t)(b)) ? (a) : (b)
321fcf5ef2aSThomas Huth #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b)
322fcf5ef2aSThomas Huth #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b)
323fcf5ef2aSThomas Huth 
324fcf5ef2aSThomas Huth #define FAND(a, b) ((a) & (b))
325fcf5ef2aSThomas Huth #define FANDN(a, b) ((~(a)) & (b))
326fcf5ef2aSThomas Huth #define FOR(a, b) ((a) | (b))
327fcf5ef2aSThomas Huth #define FXOR(a, b) ((a) ^ (b))
328fcf5ef2aSThomas Huth 
329fcf5ef2aSThomas Huth #define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0)
330fcf5ef2aSThomas Huth #define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0)
331fcf5ef2aSThomas Huth #define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0)
332fcf5ef2aSThomas Huth #define FCMPEQ(a, b) ((a) == (b) ? -1 : 0)
333fcf5ef2aSThomas Huth 
334fcf5ef2aSThomas Huth #define FMULLW(a, b) ((a) * (b))
335fcf5ef2aSThomas Huth #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16)
336fcf5ef2aSThomas Huth #define FMULHUW(a, b) ((a) * (b) >> 16)
337fcf5ef2aSThomas Huth #define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16)
338fcf5ef2aSThomas Huth 
339fcf5ef2aSThomas Huth #define FAVG(a, b) (((a) + (b) + 1) >> 1)
340fcf5ef2aSThomas Huth #endif
341fcf5ef2aSThomas Huth 
342fcf5ef2aSThomas Huth SSE_HELPER_B(helper_paddb, FADD)
343fcf5ef2aSThomas Huth SSE_HELPER_W(helper_paddw, FADD)
344fcf5ef2aSThomas Huth SSE_HELPER_L(helper_paddl, FADD)
345fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_paddq, FADD)
346fcf5ef2aSThomas Huth 
347fcf5ef2aSThomas Huth SSE_HELPER_B(helper_psubb, FSUB)
348fcf5ef2aSThomas Huth SSE_HELPER_W(helper_psubw, FSUB)
349fcf5ef2aSThomas Huth SSE_HELPER_L(helper_psubl, FSUB)
350fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_psubq, FSUB)
351fcf5ef2aSThomas Huth 
352fcf5ef2aSThomas Huth SSE_HELPER_B(helper_paddusb, FADDUB)
353fcf5ef2aSThomas Huth SSE_HELPER_B(helper_paddsb, FADDSB)
354fcf5ef2aSThomas Huth SSE_HELPER_B(helper_psubusb, FSUBUB)
355fcf5ef2aSThomas Huth SSE_HELPER_B(helper_psubsb, FSUBSB)
356fcf5ef2aSThomas Huth 
357fcf5ef2aSThomas Huth SSE_HELPER_W(helper_paddusw, FADDUW)
358fcf5ef2aSThomas Huth SSE_HELPER_W(helper_paddsw, FADDSW)
359fcf5ef2aSThomas Huth SSE_HELPER_W(helper_psubusw, FSUBUW)
360fcf5ef2aSThomas Huth SSE_HELPER_W(helper_psubsw, FSUBSW)
361fcf5ef2aSThomas Huth 
362fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pminub, FMINUB)
363fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pmaxub, FMAXUB)
364fcf5ef2aSThomas Huth 
365fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pminsw, FMINSW)
366fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmaxsw, FMAXSW)
367fcf5ef2aSThomas Huth 
368fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_pand, FAND)
369fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_pandn, FANDN)
370fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_por, FOR)
371fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_pxor, FXOR)
372fcf5ef2aSThomas Huth 
373fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pcmpgtb, FCMPGTB)
374fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pcmpgtw, FCMPGTW)
375fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pcmpgtl, FCMPGTL)
376fcf5ef2aSThomas Huth 
377fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pcmpeqb, FCMPEQ)
378fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pcmpeqw, FCMPEQ)
379fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pcmpeql, FCMPEQ)
380fcf5ef2aSThomas Huth 
381fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmullw, FMULLW)
382fcf5ef2aSThomas Huth #if SHIFT == 0
383fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmulhrw, FMULHRW)
384fcf5ef2aSThomas Huth #endif
385fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmulhuw, FMULHUW)
386fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmulhw, FMULHW)
387fcf5ef2aSThomas Huth 
388fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pavgb, FAVG)
389fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pavgw, FAVG)
390fcf5ef2aSThomas Huth 
391fcf5ef2aSThomas Huth void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
392fcf5ef2aSThomas Huth {
393fcf5ef2aSThomas Huth     d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0);
394fcf5ef2aSThomas Huth #if SHIFT == 1
395fcf5ef2aSThomas Huth     d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2);
396fcf5ef2aSThomas Huth #endif
397fcf5ef2aSThomas Huth }
398fcf5ef2aSThomas Huth 
399fcf5ef2aSThomas Huth void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
400fcf5ef2aSThomas Huth {
401fcf5ef2aSThomas Huth     int i;
402fcf5ef2aSThomas Huth 
403fcf5ef2aSThomas Huth     for (i = 0; i < (2 << SHIFT); i++) {
404fcf5ef2aSThomas Huth         d->L(i) = (int16_t)s->W(2 * i) * (int16_t)d->W(2 * i) +
405fcf5ef2aSThomas Huth             (int16_t)s->W(2 * i + 1) * (int16_t)d->W(2 * i + 1);
406fcf5ef2aSThomas Huth     }
407fcf5ef2aSThomas Huth }
408fcf5ef2aSThomas Huth 
409fcf5ef2aSThomas Huth #if SHIFT == 0
410fcf5ef2aSThomas Huth static inline int abs1(int a)
411fcf5ef2aSThomas Huth {
412fcf5ef2aSThomas Huth     if (a < 0) {
413fcf5ef2aSThomas Huth         return -a;
414fcf5ef2aSThomas Huth     } else {
415fcf5ef2aSThomas Huth         return a;
416fcf5ef2aSThomas Huth     }
417fcf5ef2aSThomas Huth }
418fcf5ef2aSThomas Huth #endif
419fcf5ef2aSThomas Huth void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
420fcf5ef2aSThomas Huth {
421fcf5ef2aSThomas Huth     unsigned int val;
422fcf5ef2aSThomas Huth 
423fcf5ef2aSThomas Huth     val = 0;
424fcf5ef2aSThomas Huth     val += abs1(d->B(0) - s->B(0));
425fcf5ef2aSThomas Huth     val += abs1(d->B(1) - s->B(1));
426fcf5ef2aSThomas Huth     val += abs1(d->B(2) - s->B(2));
427fcf5ef2aSThomas Huth     val += abs1(d->B(3) - s->B(3));
428fcf5ef2aSThomas Huth     val += abs1(d->B(4) - s->B(4));
429fcf5ef2aSThomas Huth     val += abs1(d->B(5) - s->B(5));
430fcf5ef2aSThomas Huth     val += abs1(d->B(6) - s->B(6));
431fcf5ef2aSThomas Huth     val += abs1(d->B(7) - s->B(7));
432fcf5ef2aSThomas Huth     d->Q(0) = val;
433fcf5ef2aSThomas Huth #if SHIFT == 1
434fcf5ef2aSThomas Huth     val = 0;
435fcf5ef2aSThomas Huth     val += abs1(d->B(8) - s->B(8));
436fcf5ef2aSThomas Huth     val += abs1(d->B(9) - s->B(9));
437fcf5ef2aSThomas Huth     val += abs1(d->B(10) - s->B(10));
438fcf5ef2aSThomas Huth     val += abs1(d->B(11) - s->B(11));
439fcf5ef2aSThomas Huth     val += abs1(d->B(12) - s->B(12));
440fcf5ef2aSThomas Huth     val += abs1(d->B(13) - s->B(13));
441fcf5ef2aSThomas Huth     val += abs1(d->B(14) - s->B(14));
442fcf5ef2aSThomas Huth     val += abs1(d->B(15) - s->B(15));
443fcf5ef2aSThomas Huth     d->Q(1) = val;
444fcf5ef2aSThomas Huth #endif
445fcf5ef2aSThomas Huth }
446fcf5ef2aSThomas Huth 
447fcf5ef2aSThomas Huth void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
448fcf5ef2aSThomas Huth                                   target_ulong a0)
449fcf5ef2aSThomas Huth {
450fcf5ef2aSThomas Huth     int i;
451fcf5ef2aSThomas Huth 
452fcf5ef2aSThomas Huth     for (i = 0; i < (8 << SHIFT); i++) {
453fcf5ef2aSThomas Huth         if (s->B(i) & 0x80) {
454fcf5ef2aSThomas Huth             cpu_stb_data_ra(env, a0 + i, d->B(i), GETPC());
455fcf5ef2aSThomas Huth         }
456fcf5ef2aSThomas Huth     }
457fcf5ef2aSThomas Huth }
458fcf5ef2aSThomas Huth 
459fcf5ef2aSThomas Huth void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val)
460fcf5ef2aSThomas Huth {
461fcf5ef2aSThomas Huth     d->L(0) = val;
462fcf5ef2aSThomas Huth     d->L(1) = 0;
463fcf5ef2aSThomas Huth #if SHIFT == 1
464fcf5ef2aSThomas Huth     d->Q(1) = 0;
465fcf5ef2aSThomas Huth #endif
466fcf5ef2aSThomas Huth }
467fcf5ef2aSThomas Huth 
468fcf5ef2aSThomas Huth #ifdef TARGET_X86_64
469fcf5ef2aSThomas Huth void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val)
470fcf5ef2aSThomas Huth {
471fcf5ef2aSThomas Huth     d->Q(0) = val;
472fcf5ef2aSThomas Huth #if SHIFT == 1
473fcf5ef2aSThomas Huth     d->Q(1) = 0;
474fcf5ef2aSThomas Huth #endif
475fcf5ef2aSThomas Huth }
476fcf5ef2aSThomas Huth #endif
477fcf5ef2aSThomas Huth 
478fcf5ef2aSThomas Huth #if SHIFT == 0
479fcf5ef2aSThomas Huth void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
480fcf5ef2aSThomas Huth {
481fcf5ef2aSThomas Huth     Reg r;
482fcf5ef2aSThomas Huth 
483fcf5ef2aSThomas Huth     r.W(0) = s->W(order & 3);
484fcf5ef2aSThomas Huth     r.W(1) = s->W((order >> 2) & 3);
485fcf5ef2aSThomas Huth     r.W(2) = s->W((order >> 4) & 3);
486fcf5ef2aSThomas Huth     r.W(3) = s->W((order >> 6) & 3);
487d22697ddSPaolo Bonzini     MOVE(*d, r);
488fcf5ef2aSThomas Huth }
489fcf5ef2aSThomas Huth #else
490ce4fa29fSPaolo Bonzini void glue(helper_shufps, SUFFIX)(Reg *d, Reg *s, int order)
491fcf5ef2aSThomas Huth {
492fcf5ef2aSThomas Huth     Reg r;
493fcf5ef2aSThomas Huth 
494fcf5ef2aSThomas Huth     r.L(0) = d->L(order & 3);
495fcf5ef2aSThomas Huth     r.L(1) = d->L((order >> 2) & 3);
496fcf5ef2aSThomas Huth     r.L(2) = s->L((order >> 4) & 3);
497fcf5ef2aSThomas Huth     r.L(3) = s->L((order >> 6) & 3);
498d22697ddSPaolo Bonzini     MOVE(*d, r);
499fcf5ef2aSThomas Huth }
500fcf5ef2aSThomas Huth 
501ce4fa29fSPaolo Bonzini void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *s, int order)
502fcf5ef2aSThomas Huth {
503fcf5ef2aSThomas Huth     Reg r;
504fcf5ef2aSThomas Huth 
505fcf5ef2aSThomas Huth     r.Q(0) = d->Q(order & 1);
506fcf5ef2aSThomas Huth     r.Q(1) = s->Q((order >> 1) & 1);
507d22697ddSPaolo Bonzini     MOVE(*d, r);
508fcf5ef2aSThomas Huth }
509fcf5ef2aSThomas Huth 
510fcf5ef2aSThomas Huth void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
511fcf5ef2aSThomas Huth {
512fcf5ef2aSThomas Huth     Reg r;
513fcf5ef2aSThomas Huth 
514fcf5ef2aSThomas Huth     r.L(0) = s->L(order & 3);
515fcf5ef2aSThomas Huth     r.L(1) = s->L((order >> 2) & 3);
516fcf5ef2aSThomas Huth     r.L(2) = s->L((order >> 4) & 3);
517fcf5ef2aSThomas Huth     r.L(3) = s->L((order >> 6) & 3);
518d22697ddSPaolo Bonzini     MOVE(*d, r);
519fcf5ef2aSThomas Huth }
520fcf5ef2aSThomas Huth 
521fcf5ef2aSThomas Huth void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
522fcf5ef2aSThomas Huth {
523fcf5ef2aSThomas Huth     Reg r;
524fcf5ef2aSThomas Huth 
525fcf5ef2aSThomas Huth     r.W(0) = s->W(order & 3);
526fcf5ef2aSThomas Huth     r.W(1) = s->W((order >> 2) & 3);
527fcf5ef2aSThomas Huth     r.W(2) = s->W((order >> 4) & 3);
528fcf5ef2aSThomas Huth     r.W(3) = s->W((order >> 6) & 3);
529fcf5ef2aSThomas Huth     r.Q(1) = s->Q(1);
530d22697ddSPaolo Bonzini     MOVE(*d, r);
531fcf5ef2aSThomas Huth }
532fcf5ef2aSThomas Huth 
533fcf5ef2aSThomas Huth void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
534fcf5ef2aSThomas Huth {
535fcf5ef2aSThomas Huth     Reg r;
536fcf5ef2aSThomas Huth 
537fcf5ef2aSThomas Huth     r.Q(0) = s->Q(0);
538fcf5ef2aSThomas Huth     r.W(4) = s->W(4 + (order & 3));
539fcf5ef2aSThomas Huth     r.W(5) = s->W(4 + ((order >> 2) & 3));
540fcf5ef2aSThomas Huth     r.W(6) = s->W(4 + ((order >> 4) & 3));
541fcf5ef2aSThomas Huth     r.W(7) = s->W(4 + ((order >> 6) & 3));
542d22697ddSPaolo Bonzini     MOVE(*d, r);
543fcf5ef2aSThomas Huth }
544fcf5ef2aSThomas Huth #endif
545fcf5ef2aSThomas Huth 
546fcf5ef2aSThomas Huth #if SHIFT == 1
547fcf5ef2aSThomas Huth /* FPU ops */
548fcf5ef2aSThomas Huth /* XXX: not accurate */
549fcf5ef2aSThomas Huth 
550fcf5ef2aSThomas Huth #define SSE_HELPER_S(name, F)                                           \
551ce4fa29fSPaolo Bonzini     void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
552fcf5ef2aSThomas Huth     {                                                                   \
553fcf5ef2aSThomas Huth         d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));                  \
554fcf5ef2aSThomas Huth         d->ZMM_S(1) = F(32, d->ZMM_S(1), s->ZMM_S(1));                  \
555fcf5ef2aSThomas Huth         d->ZMM_S(2) = F(32, d->ZMM_S(2), s->ZMM_S(2));                  \
556fcf5ef2aSThomas Huth         d->ZMM_S(3) = F(32, d->ZMM_S(3), s->ZMM_S(3));                  \
557fcf5ef2aSThomas Huth     }                                                                   \
558fcf5ef2aSThomas Huth                                                                         \
559fcf5ef2aSThomas Huth     void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)        \
560fcf5ef2aSThomas Huth     {                                                                   \
561fcf5ef2aSThomas Huth         d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));                  \
562fcf5ef2aSThomas Huth     }                                                                   \
563fcf5ef2aSThomas Huth                                                                         \
564ce4fa29fSPaolo Bonzini     void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
565fcf5ef2aSThomas Huth     {                                                                   \
566fcf5ef2aSThomas Huth         d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));                  \
567fcf5ef2aSThomas Huth         d->ZMM_D(1) = F(64, d->ZMM_D(1), s->ZMM_D(1));                  \
568fcf5ef2aSThomas Huth     }                                                                   \
569fcf5ef2aSThomas Huth                                                                         \
570fcf5ef2aSThomas Huth     void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)        \
571fcf5ef2aSThomas Huth     {                                                                   \
572fcf5ef2aSThomas Huth         d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));                  \
573fcf5ef2aSThomas Huth     }
574fcf5ef2aSThomas Huth 
575fcf5ef2aSThomas Huth #define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status)
576fcf5ef2aSThomas Huth #define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status)
577fcf5ef2aSThomas Huth #define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status)
578fcf5ef2aSThomas Huth #define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status)
579fcf5ef2aSThomas Huth #define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status)
580fcf5ef2aSThomas Huth 
581fcf5ef2aSThomas Huth /* Note that the choice of comparison op here is important to get the
582fcf5ef2aSThomas Huth  * special cases right: for min and max Intel specifies that (-0,0),
583fcf5ef2aSThomas Huth  * (NaN, anything) and (anything, NaN) return the second argument.
584fcf5ef2aSThomas Huth  */
585fcf5ef2aSThomas Huth #define FPU_MIN(size, a, b)                                     \
586fcf5ef2aSThomas Huth     (float ## size ## _lt(a, b, &env->sse_status) ? (a) : (b))
587fcf5ef2aSThomas Huth #define FPU_MAX(size, a, b)                                     \
588fcf5ef2aSThomas Huth     (float ## size ## _lt(b, a, &env->sse_status) ? (a) : (b))
589fcf5ef2aSThomas Huth 
590fcf5ef2aSThomas Huth SSE_HELPER_S(add, FPU_ADD)
591fcf5ef2aSThomas Huth SSE_HELPER_S(sub, FPU_SUB)
592fcf5ef2aSThomas Huth SSE_HELPER_S(mul, FPU_MUL)
593fcf5ef2aSThomas Huth SSE_HELPER_S(div, FPU_DIV)
594fcf5ef2aSThomas Huth SSE_HELPER_S(min, FPU_MIN)
595fcf5ef2aSThomas Huth SSE_HELPER_S(max, FPU_MAX)
596fcf5ef2aSThomas Huth SSE_HELPER_S(sqrt, FPU_SQRT)
597fcf5ef2aSThomas Huth 
598fcf5ef2aSThomas Huth 
599fcf5ef2aSThomas Huth /* float to float conversions */
600ce4fa29fSPaolo Bonzini void glue(helper_cvtps2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
601fcf5ef2aSThomas Huth {
602fcf5ef2aSThomas Huth     float32 s0, s1;
603fcf5ef2aSThomas Huth 
604fcf5ef2aSThomas Huth     s0 = s->ZMM_S(0);
605fcf5ef2aSThomas Huth     s1 = s->ZMM_S(1);
606fcf5ef2aSThomas Huth     d->ZMM_D(0) = float32_to_float64(s0, &env->sse_status);
607fcf5ef2aSThomas Huth     d->ZMM_D(1) = float32_to_float64(s1, &env->sse_status);
608fcf5ef2aSThomas Huth }
609fcf5ef2aSThomas Huth 
610ce4fa29fSPaolo Bonzini void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
611fcf5ef2aSThomas Huth {
612fcf5ef2aSThomas Huth     d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status);
613fcf5ef2aSThomas Huth     d->ZMM_S(1) = float64_to_float32(s->ZMM_D(1), &env->sse_status);
614fcf5ef2aSThomas Huth     d->Q(1) = 0;
615fcf5ef2aSThomas Huth }
616fcf5ef2aSThomas Huth 
617fcf5ef2aSThomas Huth void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s)
618fcf5ef2aSThomas Huth {
619fcf5ef2aSThomas Huth     d->ZMM_D(0) = float32_to_float64(s->ZMM_S(0), &env->sse_status);
620fcf5ef2aSThomas Huth }
621fcf5ef2aSThomas Huth 
622fcf5ef2aSThomas Huth void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s)
623fcf5ef2aSThomas Huth {
624fcf5ef2aSThomas Huth     d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status);
625fcf5ef2aSThomas Huth }
626fcf5ef2aSThomas Huth 
627fcf5ef2aSThomas Huth /* integer to float */
628ce4fa29fSPaolo Bonzini void glue(helper_cvtdq2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
629fcf5ef2aSThomas Huth {
630fcf5ef2aSThomas Huth     d->ZMM_S(0) = int32_to_float32(s->ZMM_L(0), &env->sse_status);
631fcf5ef2aSThomas Huth     d->ZMM_S(1) = int32_to_float32(s->ZMM_L(1), &env->sse_status);
632fcf5ef2aSThomas Huth     d->ZMM_S(2) = int32_to_float32(s->ZMM_L(2), &env->sse_status);
633fcf5ef2aSThomas Huth     d->ZMM_S(3) = int32_to_float32(s->ZMM_L(3), &env->sse_status);
634fcf5ef2aSThomas Huth }
635fcf5ef2aSThomas Huth 
636ce4fa29fSPaolo Bonzini void glue(helper_cvtdq2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
637fcf5ef2aSThomas Huth {
638fcf5ef2aSThomas Huth     int32_t l0, l1;
639fcf5ef2aSThomas Huth 
640fcf5ef2aSThomas Huth     l0 = (int32_t)s->ZMM_L(0);
641fcf5ef2aSThomas Huth     l1 = (int32_t)s->ZMM_L(1);
642fcf5ef2aSThomas Huth     d->ZMM_D(0) = int32_to_float64(l0, &env->sse_status);
643fcf5ef2aSThomas Huth     d->ZMM_D(1) = int32_to_float64(l1, &env->sse_status);
644fcf5ef2aSThomas Huth }
645fcf5ef2aSThomas Huth 
646fcf5ef2aSThomas Huth void helper_cvtpi2ps(CPUX86State *env, ZMMReg *d, MMXReg *s)
647fcf5ef2aSThomas Huth {
648fcf5ef2aSThomas Huth     d->ZMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status);
649fcf5ef2aSThomas Huth     d->ZMM_S(1) = int32_to_float32(s->MMX_L(1), &env->sse_status);
650fcf5ef2aSThomas Huth }
651fcf5ef2aSThomas Huth 
652fcf5ef2aSThomas Huth void helper_cvtpi2pd(CPUX86State *env, ZMMReg *d, MMXReg *s)
653fcf5ef2aSThomas Huth {
654fcf5ef2aSThomas Huth     d->ZMM_D(0) = int32_to_float64(s->MMX_L(0), &env->sse_status);
655fcf5ef2aSThomas Huth     d->ZMM_D(1) = int32_to_float64(s->MMX_L(1), &env->sse_status);
656fcf5ef2aSThomas Huth }
657fcf5ef2aSThomas Huth 
658fcf5ef2aSThomas Huth void helper_cvtsi2ss(CPUX86State *env, ZMMReg *d, uint32_t val)
659fcf5ef2aSThomas Huth {
660fcf5ef2aSThomas Huth     d->ZMM_S(0) = int32_to_float32(val, &env->sse_status);
661fcf5ef2aSThomas Huth }
662fcf5ef2aSThomas Huth 
663fcf5ef2aSThomas Huth void helper_cvtsi2sd(CPUX86State *env, ZMMReg *d, uint32_t val)
664fcf5ef2aSThomas Huth {
665fcf5ef2aSThomas Huth     d->ZMM_D(0) = int32_to_float64(val, &env->sse_status);
666fcf5ef2aSThomas Huth }
667fcf5ef2aSThomas Huth 
668fcf5ef2aSThomas Huth #ifdef TARGET_X86_64
669fcf5ef2aSThomas Huth void helper_cvtsq2ss(CPUX86State *env, ZMMReg *d, uint64_t val)
670fcf5ef2aSThomas Huth {
671fcf5ef2aSThomas Huth     d->ZMM_S(0) = int64_to_float32(val, &env->sse_status);
672fcf5ef2aSThomas Huth }
673fcf5ef2aSThomas Huth 
674fcf5ef2aSThomas Huth void helper_cvtsq2sd(CPUX86State *env, ZMMReg *d, uint64_t val)
675fcf5ef2aSThomas Huth {
676fcf5ef2aSThomas Huth     d->ZMM_D(0) = int64_to_float64(val, &env->sse_status);
677fcf5ef2aSThomas Huth }
678fcf5ef2aSThomas Huth #endif
679fcf5ef2aSThomas Huth 
680fcf5ef2aSThomas Huth /* float to integer */
6811e8a98b5SPeter Maydell 
6821e8a98b5SPeter Maydell /*
6831e8a98b5SPeter Maydell  * x86 mandates that we return the indefinite integer value for the result
6841e8a98b5SPeter Maydell  * of any float-to-integer conversion that raises the 'invalid' exception.
6851e8a98b5SPeter Maydell  * Wrap the softfloat functions to get this behaviour.
6861e8a98b5SPeter Maydell  */
6871e8a98b5SPeter Maydell #define WRAP_FLOATCONV(RETTYPE, FN, FLOATTYPE, INDEFVALUE)              \
6881e8a98b5SPeter Maydell     static inline RETTYPE x86_##FN(FLOATTYPE a, float_status *s)        \
6891e8a98b5SPeter Maydell     {                                                                   \
6901e8a98b5SPeter Maydell         int oldflags, newflags;                                         \
6911e8a98b5SPeter Maydell         RETTYPE r;                                                      \
6921e8a98b5SPeter Maydell                                                                         \
6931e8a98b5SPeter Maydell         oldflags = get_float_exception_flags(s);                        \
6941e8a98b5SPeter Maydell         set_float_exception_flags(0, s);                                \
6951e8a98b5SPeter Maydell         r = FN(a, s);                                                   \
6961e8a98b5SPeter Maydell         newflags = get_float_exception_flags(s);                        \
6971e8a98b5SPeter Maydell         if (newflags & float_flag_invalid) {                            \
6981e8a98b5SPeter Maydell             r = INDEFVALUE;                                             \
6991e8a98b5SPeter Maydell         }                                                               \
7001e8a98b5SPeter Maydell         set_float_exception_flags(newflags | oldflags, s);              \
7011e8a98b5SPeter Maydell         return r;                                                       \
7021e8a98b5SPeter Maydell     }
7031e8a98b5SPeter Maydell 
7041e8a98b5SPeter Maydell WRAP_FLOATCONV(int32_t, float32_to_int32, float32, INT32_MIN)
7051e8a98b5SPeter Maydell WRAP_FLOATCONV(int32_t, float32_to_int32_round_to_zero, float32, INT32_MIN)
7061e8a98b5SPeter Maydell WRAP_FLOATCONV(int32_t, float64_to_int32, float64, INT32_MIN)
7071e8a98b5SPeter Maydell WRAP_FLOATCONV(int32_t, float64_to_int32_round_to_zero, float64, INT32_MIN)
7081e8a98b5SPeter Maydell WRAP_FLOATCONV(int64_t, float32_to_int64, float32, INT64_MIN)
7091e8a98b5SPeter Maydell WRAP_FLOATCONV(int64_t, float32_to_int64_round_to_zero, float32, INT64_MIN)
7101e8a98b5SPeter Maydell WRAP_FLOATCONV(int64_t, float64_to_int64, float64, INT64_MIN)
7111e8a98b5SPeter Maydell WRAP_FLOATCONV(int64_t, float64_to_int64_round_to_zero, float64, INT64_MIN)
7121e8a98b5SPeter Maydell 
713ce4fa29fSPaolo Bonzini void glue(helper_cvtps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
714fcf5ef2aSThomas Huth {
7151e8a98b5SPeter Maydell     d->ZMM_L(0) = x86_float32_to_int32(s->ZMM_S(0), &env->sse_status);
7161e8a98b5SPeter Maydell     d->ZMM_L(1) = x86_float32_to_int32(s->ZMM_S(1), &env->sse_status);
7171e8a98b5SPeter Maydell     d->ZMM_L(2) = x86_float32_to_int32(s->ZMM_S(2), &env->sse_status);
7181e8a98b5SPeter Maydell     d->ZMM_L(3) = x86_float32_to_int32(s->ZMM_S(3), &env->sse_status);
719fcf5ef2aSThomas Huth }
720fcf5ef2aSThomas Huth 
721ce4fa29fSPaolo Bonzini void glue(helper_cvtpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
722fcf5ef2aSThomas Huth {
7231e8a98b5SPeter Maydell     d->ZMM_L(0) = x86_float64_to_int32(s->ZMM_D(0), &env->sse_status);
7241e8a98b5SPeter Maydell     d->ZMM_L(1) = x86_float64_to_int32(s->ZMM_D(1), &env->sse_status);
725fcf5ef2aSThomas Huth     d->ZMM_Q(1) = 0;
726fcf5ef2aSThomas Huth }
727fcf5ef2aSThomas Huth 
728fcf5ef2aSThomas Huth void helper_cvtps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s)
729fcf5ef2aSThomas Huth {
7301e8a98b5SPeter Maydell     d->MMX_L(0) = x86_float32_to_int32(s->ZMM_S(0), &env->sse_status);
7311e8a98b5SPeter Maydell     d->MMX_L(1) = x86_float32_to_int32(s->ZMM_S(1), &env->sse_status);
732fcf5ef2aSThomas Huth }
733fcf5ef2aSThomas Huth 
734fcf5ef2aSThomas Huth void helper_cvtpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s)
735fcf5ef2aSThomas Huth {
7361e8a98b5SPeter Maydell     d->MMX_L(0) = x86_float64_to_int32(s->ZMM_D(0), &env->sse_status);
7371e8a98b5SPeter Maydell     d->MMX_L(1) = x86_float64_to_int32(s->ZMM_D(1), &env->sse_status);
738fcf5ef2aSThomas Huth }
739fcf5ef2aSThomas Huth 
740fcf5ef2aSThomas Huth int32_t helper_cvtss2si(CPUX86State *env, ZMMReg *s)
741fcf5ef2aSThomas Huth {
7421e8a98b5SPeter Maydell     return x86_float32_to_int32(s->ZMM_S(0), &env->sse_status);
743fcf5ef2aSThomas Huth }
744fcf5ef2aSThomas Huth 
745fcf5ef2aSThomas Huth int32_t helper_cvtsd2si(CPUX86State *env, ZMMReg *s)
746fcf5ef2aSThomas Huth {
7471e8a98b5SPeter Maydell     return x86_float64_to_int32(s->ZMM_D(0), &env->sse_status);
748fcf5ef2aSThomas Huth }
749fcf5ef2aSThomas Huth 
750fcf5ef2aSThomas Huth #ifdef TARGET_X86_64
751fcf5ef2aSThomas Huth int64_t helper_cvtss2sq(CPUX86State *env, ZMMReg *s)
752fcf5ef2aSThomas Huth {
7531e8a98b5SPeter Maydell     return x86_float32_to_int64(s->ZMM_S(0), &env->sse_status);
754fcf5ef2aSThomas Huth }
755fcf5ef2aSThomas Huth 
756fcf5ef2aSThomas Huth int64_t helper_cvtsd2sq(CPUX86State *env, ZMMReg *s)
757fcf5ef2aSThomas Huth {
7581e8a98b5SPeter Maydell     return x86_float64_to_int64(s->ZMM_D(0), &env->sse_status);
759fcf5ef2aSThomas Huth }
760fcf5ef2aSThomas Huth #endif
761fcf5ef2aSThomas Huth 
762fcf5ef2aSThomas Huth /* float to integer truncated */
763ce4fa29fSPaolo Bonzini void glue(helper_cvttps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
764fcf5ef2aSThomas Huth {
7651e8a98b5SPeter Maydell     d->ZMM_L(0) = x86_float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status);
7661e8a98b5SPeter Maydell     d->ZMM_L(1) = x86_float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status);
7671e8a98b5SPeter Maydell     d->ZMM_L(2) = x86_float32_to_int32_round_to_zero(s->ZMM_S(2), &env->sse_status);
7681e8a98b5SPeter Maydell     d->ZMM_L(3) = x86_float32_to_int32_round_to_zero(s->ZMM_S(3), &env->sse_status);
769fcf5ef2aSThomas Huth }
770fcf5ef2aSThomas Huth 
771ce4fa29fSPaolo Bonzini void glue(helper_cvttpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
772fcf5ef2aSThomas Huth {
7731e8a98b5SPeter Maydell     d->ZMM_L(0) = x86_float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status);
7741e8a98b5SPeter Maydell     d->ZMM_L(1) = x86_float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status);
775fcf5ef2aSThomas Huth     d->ZMM_Q(1) = 0;
776fcf5ef2aSThomas Huth }
777fcf5ef2aSThomas Huth 
778fcf5ef2aSThomas Huth void helper_cvttps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s)
779fcf5ef2aSThomas Huth {
7801e8a98b5SPeter Maydell     d->MMX_L(0) = x86_float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status);
7811e8a98b5SPeter Maydell     d->MMX_L(1) = x86_float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status);
782fcf5ef2aSThomas Huth }
783fcf5ef2aSThomas Huth 
784fcf5ef2aSThomas Huth void helper_cvttpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s)
785fcf5ef2aSThomas Huth {
7861e8a98b5SPeter Maydell     d->MMX_L(0) = x86_float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status);
7871e8a98b5SPeter Maydell     d->MMX_L(1) = x86_float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status);
788fcf5ef2aSThomas Huth }
789fcf5ef2aSThomas Huth 
790fcf5ef2aSThomas Huth int32_t helper_cvttss2si(CPUX86State *env, ZMMReg *s)
791fcf5ef2aSThomas Huth {
7921e8a98b5SPeter Maydell     return x86_float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status);
793fcf5ef2aSThomas Huth }
794fcf5ef2aSThomas Huth 
795fcf5ef2aSThomas Huth int32_t helper_cvttsd2si(CPUX86State *env, ZMMReg *s)
796fcf5ef2aSThomas Huth {
7971e8a98b5SPeter Maydell     return x86_float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status);
798fcf5ef2aSThomas Huth }
799fcf5ef2aSThomas Huth 
800fcf5ef2aSThomas Huth #ifdef TARGET_X86_64
801fcf5ef2aSThomas Huth int64_t helper_cvttss2sq(CPUX86State *env, ZMMReg *s)
802fcf5ef2aSThomas Huth {
8031e8a98b5SPeter Maydell     return x86_float32_to_int64_round_to_zero(s->ZMM_S(0), &env->sse_status);
804fcf5ef2aSThomas Huth }
805fcf5ef2aSThomas Huth 
806fcf5ef2aSThomas Huth int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s)
807fcf5ef2aSThomas Huth {
8081e8a98b5SPeter Maydell     return x86_float64_to_int64_round_to_zero(s->ZMM_D(0), &env->sse_status);
809fcf5ef2aSThomas Huth }
810fcf5ef2aSThomas Huth #endif
811fcf5ef2aSThomas Huth 
812ce4fa29fSPaolo Bonzini void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
813fcf5ef2aSThomas Huth {
814418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
815fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_div(float32_one,
816fcf5ef2aSThomas Huth                               float32_sqrt(s->ZMM_S(0), &env->sse_status),
817fcf5ef2aSThomas Huth                               &env->sse_status);
818fcf5ef2aSThomas Huth     d->ZMM_S(1) = float32_div(float32_one,
819fcf5ef2aSThomas Huth                               float32_sqrt(s->ZMM_S(1), &env->sse_status),
820fcf5ef2aSThomas Huth                               &env->sse_status);
821fcf5ef2aSThomas Huth     d->ZMM_S(2) = float32_div(float32_one,
822fcf5ef2aSThomas Huth                               float32_sqrt(s->ZMM_S(2), &env->sse_status),
823fcf5ef2aSThomas Huth                               &env->sse_status);
824fcf5ef2aSThomas Huth     d->ZMM_S(3) = float32_div(float32_one,
825fcf5ef2aSThomas Huth                               float32_sqrt(s->ZMM_S(3), &env->sse_status),
826fcf5ef2aSThomas Huth                               &env->sse_status);
827418b0f93SJoseph Myers     set_float_exception_flags(old_flags, &env->sse_status);
828fcf5ef2aSThomas Huth }
829fcf5ef2aSThomas Huth 
830fcf5ef2aSThomas Huth void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
831fcf5ef2aSThomas Huth {
832418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
833fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_div(float32_one,
834fcf5ef2aSThomas Huth                               float32_sqrt(s->ZMM_S(0), &env->sse_status),
835fcf5ef2aSThomas Huth                               &env->sse_status);
836418b0f93SJoseph Myers     set_float_exception_flags(old_flags, &env->sse_status);
837fcf5ef2aSThomas Huth }
838fcf5ef2aSThomas Huth 
839ce4fa29fSPaolo Bonzini void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
840fcf5ef2aSThomas Huth {
841418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
842fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
843fcf5ef2aSThomas Huth     d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status);
844fcf5ef2aSThomas Huth     d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status);
845fcf5ef2aSThomas Huth     d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status);
846418b0f93SJoseph Myers     set_float_exception_flags(old_flags, &env->sse_status);
847fcf5ef2aSThomas Huth }
848fcf5ef2aSThomas Huth 
849fcf5ef2aSThomas Huth void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s)
850fcf5ef2aSThomas Huth {
851418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
852fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status);
853418b0f93SJoseph Myers     set_float_exception_flags(old_flags, &env->sse_status);
854fcf5ef2aSThomas Huth }
855fcf5ef2aSThomas Huth 
856fcf5ef2aSThomas Huth static inline uint64_t helper_extrq(uint64_t src, int shift, int len)
857fcf5ef2aSThomas Huth {
858fcf5ef2aSThomas Huth     uint64_t mask;
859fcf5ef2aSThomas Huth 
860fcf5ef2aSThomas Huth     if (len == 0) {
861fcf5ef2aSThomas Huth         mask = ~0LL;
862fcf5ef2aSThomas Huth     } else {
863fcf5ef2aSThomas Huth         mask = (1ULL << len) - 1;
864fcf5ef2aSThomas Huth     }
865fcf5ef2aSThomas Huth     return (src >> shift) & mask;
866fcf5ef2aSThomas Huth }
867fcf5ef2aSThomas Huth 
868fcf5ef2aSThomas Huth void helper_extrq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s)
869fcf5ef2aSThomas Huth {
870fcf5ef2aSThomas Huth     d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), s->ZMM_B(1), s->ZMM_B(0));
871fcf5ef2aSThomas Huth }
872fcf5ef2aSThomas Huth 
873fcf5ef2aSThomas Huth void helper_extrq_i(CPUX86State *env, ZMMReg *d, int index, int length)
874fcf5ef2aSThomas Huth {
875fcf5ef2aSThomas Huth     d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), index, length);
876fcf5ef2aSThomas Huth }
877fcf5ef2aSThomas Huth 
878fcf5ef2aSThomas Huth static inline uint64_t helper_insertq(uint64_t src, int shift, int len)
879fcf5ef2aSThomas Huth {
880fcf5ef2aSThomas Huth     uint64_t mask;
881fcf5ef2aSThomas Huth 
882fcf5ef2aSThomas Huth     if (len == 0) {
883fcf5ef2aSThomas Huth         mask = ~0ULL;
884fcf5ef2aSThomas Huth     } else {
885fcf5ef2aSThomas Huth         mask = (1ULL << len) - 1;
886fcf5ef2aSThomas Huth     }
887fcf5ef2aSThomas Huth     return (src & ~(mask << shift)) | ((src & mask) << shift);
888fcf5ef2aSThomas Huth }
889fcf5ef2aSThomas Huth 
890fcf5ef2aSThomas Huth void helper_insertq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s)
891fcf5ef2aSThomas Huth {
892fcf5ef2aSThomas Huth     d->ZMM_Q(0) = helper_insertq(s->ZMM_Q(0), s->ZMM_B(9), s->ZMM_B(8));
893fcf5ef2aSThomas Huth }
894fcf5ef2aSThomas Huth 
895fcf5ef2aSThomas Huth void helper_insertq_i(CPUX86State *env, ZMMReg *d, int index, int length)
896fcf5ef2aSThomas Huth {
897fcf5ef2aSThomas Huth     d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
898fcf5ef2aSThomas Huth }
899fcf5ef2aSThomas Huth 
900ce4fa29fSPaolo Bonzini void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
901fcf5ef2aSThomas Huth {
902fcf5ef2aSThomas Huth     ZMMReg r;
903fcf5ef2aSThomas Huth 
904fcf5ef2aSThomas Huth     r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
905fcf5ef2aSThomas Huth     r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
906fcf5ef2aSThomas Huth     r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
907fcf5ef2aSThomas Huth     r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
908d22697ddSPaolo Bonzini     MOVE(*d, r);
909fcf5ef2aSThomas Huth }
910fcf5ef2aSThomas Huth 
911ce4fa29fSPaolo Bonzini void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
912fcf5ef2aSThomas Huth {
913fcf5ef2aSThomas Huth     ZMMReg r;
914fcf5ef2aSThomas Huth 
915fcf5ef2aSThomas Huth     r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
916fcf5ef2aSThomas Huth     r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
917d22697ddSPaolo Bonzini     MOVE(*d, r);
918fcf5ef2aSThomas Huth }
919fcf5ef2aSThomas Huth 
920ce4fa29fSPaolo Bonzini void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
921fcf5ef2aSThomas Huth {
922fcf5ef2aSThomas Huth     ZMMReg r;
923fcf5ef2aSThomas Huth 
924fcf5ef2aSThomas Huth     r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
925fcf5ef2aSThomas Huth     r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
926fcf5ef2aSThomas Huth     r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
927fcf5ef2aSThomas Huth     r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
928d22697ddSPaolo Bonzini     MOVE(*d, r);
929fcf5ef2aSThomas Huth }
930fcf5ef2aSThomas Huth 
931ce4fa29fSPaolo Bonzini void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
932fcf5ef2aSThomas Huth {
933fcf5ef2aSThomas Huth     ZMMReg r;
934fcf5ef2aSThomas Huth 
935fcf5ef2aSThomas Huth     r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
936fcf5ef2aSThomas Huth     r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
937d22697ddSPaolo Bonzini     MOVE(*d, r);
938fcf5ef2aSThomas Huth }
939fcf5ef2aSThomas Huth 
940ce4fa29fSPaolo Bonzini void glue(helper_addsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
941fcf5ef2aSThomas Huth {
942fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_sub(d->ZMM_S(0), s->ZMM_S(0), &env->sse_status);
943fcf5ef2aSThomas Huth     d->ZMM_S(1) = float32_add(d->ZMM_S(1), s->ZMM_S(1), &env->sse_status);
944fcf5ef2aSThomas Huth     d->ZMM_S(2) = float32_sub(d->ZMM_S(2), s->ZMM_S(2), &env->sse_status);
945fcf5ef2aSThomas Huth     d->ZMM_S(3) = float32_add(d->ZMM_S(3), s->ZMM_S(3), &env->sse_status);
946fcf5ef2aSThomas Huth }
947fcf5ef2aSThomas Huth 
948ce4fa29fSPaolo Bonzini void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
949fcf5ef2aSThomas Huth {
950fcf5ef2aSThomas Huth     d->ZMM_D(0) = float64_sub(d->ZMM_D(0), s->ZMM_D(0), &env->sse_status);
951fcf5ef2aSThomas Huth     d->ZMM_D(1) = float64_add(d->ZMM_D(1), s->ZMM_D(1), &env->sse_status);
952fcf5ef2aSThomas Huth }
953fcf5ef2aSThomas Huth 
954fcf5ef2aSThomas Huth /* XXX: unordered */
955fcf5ef2aSThomas Huth #define SSE_HELPER_CMP(name, F)                                         \
956ce4fa29fSPaolo Bonzini     void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
957fcf5ef2aSThomas Huth     {                                                                   \
958fcf5ef2aSThomas Huth         d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));                  \
959fcf5ef2aSThomas Huth         d->ZMM_L(1) = F(32, d->ZMM_S(1), s->ZMM_S(1));                  \
960fcf5ef2aSThomas Huth         d->ZMM_L(2) = F(32, d->ZMM_S(2), s->ZMM_S(2));                  \
961fcf5ef2aSThomas Huth         d->ZMM_L(3) = F(32, d->ZMM_S(3), s->ZMM_S(3));                  \
962fcf5ef2aSThomas Huth     }                                                                   \
963fcf5ef2aSThomas Huth                                                                         \
964fcf5ef2aSThomas Huth     void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s)        \
965fcf5ef2aSThomas Huth     {                                                                   \
966fcf5ef2aSThomas Huth         d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0));                  \
967fcf5ef2aSThomas Huth     }                                                                   \
968fcf5ef2aSThomas Huth                                                                         \
969ce4fa29fSPaolo Bonzini     void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)\
970fcf5ef2aSThomas Huth     {                                                                   \
971fcf5ef2aSThomas Huth         d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));                  \
972fcf5ef2aSThomas Huth         d->ZMM_Q(1) = F(64, d->ZMM_D(1), s->ZMM_D(1));                  \
973fcf5ef2aSThomas Huth     }                                                                   \
974fcf5ef2aSThomas Huth                                                                         \
975fcf5ef2aSThomas Huth     void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s)        \
976fcf5ef2aSThomas Huth     {                                                                   \
977fcf5ef2aSThomas Huth         d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0));                  \
978fcf5ef2aSThomas Huth     }
979fcf5ef2aSThomas Huth 
980fcf5ef2aSThomas Huth #define FPU_CMPEQ(size, a, b)                                           \
981fcf5ef2aSThomas Huth     (float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0)
982fcf5ef2aSThomas Huth #define FPU_CMPLT(size, a, b)                                           \
983fcf5ef2aSThomas Huth     (float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0)
984fcf5ef2aSThomas Huth #define FPU_CMPLE(size, a, b)                                           \
985fcf5ef2aSThomas Huth     (float ## size ## _le(a, b, &env->sse_status) ? -1 : 0)
986fcf5ef2aSThomas Huth #define FPU_CMPUNORD(size, a, b)                                        \
987fcf5ef2aSThomas Huth     (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? -1 : 0)
988fcf5ef2aSThomas Huth #define FPU_CMPNEQ(size, a, b)                                          \
989fcf5ef2aSThomas Huth     (float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1)
990fcf5ef2aSThomas Huth #define FPU_CMPNLT(size, a, b)                                          \
991fcf5ef2aSThomas Huth     (float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1)
992fcf5ef2aSThomas Huth #define FPU_CMPNLE(size, a, b)                                          \
993fcf5ef2aSThomas Huth     (float ## size ## _le(a, b, &env->sse_status) ? 0 : -1)
994fcf5ef2aSThomas Huth #define FPU_CMPORD(size, a, b)                                          \
995fcf5ef2aSThomas Huth     (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1)
996fcf5ef2aSThomas Huth 
997fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpeq, FPU_CMPEQ)
998fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmplt, FPU_CMPLT)
999fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmple, FPU_CMPLE)
1000fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD)
1001fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ)
1002fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
1003fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
1004fcf5ef2aSThomas Huth SSE_HELPER_CMP(cmpord, FPU_CMPORD)
1005fcf5ef2aSThomas Huth 
1006fcf5ef2aSThomas Huth static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
1007fcf5ef2aSThomas Huth 
1008fcf5ef2aSThomas Huth void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s)
1009fcf5ef2aSThomas Huth {
101071bfd65cSRichard Henderson     FloatRelation ret;
1011fcf5ef2aSThomas Huth     float32 s0, s1;
1012fcf5ef2aSThomas Huth 
1013fcf5ef2aSThomas Huth     s0 = d->ZMM_S(0);
1014fcf5ef2aSThomas Huth     s1 = s->ZMM_S(0);
1015fcf5ef2aSThomas Huth     ret = float32_compare_quiet(s0, s1, &env->sse_status);
1016fcf5ef2aSThomas Huth     CC_SRC = comis_eflags[ret + 1];
1017fcf5ef2aSThomas Huth }
1018fcf5ef2aSThomas Huth 
1019fcf5ef2aSThomas Huth void helper_comiss(CPUX86State *env, Reg *d, Reg *s)
1020fcf5ef2aSThomas Huth {
102171bfd65cSRichard Henderson     FloatRelation ret;
1022fcf5ef2aSThomas Huth     float32 s0, s1;
1023fcf5ef2aSThomas Huth 
1024fcf5ef2aSThomas Huth     s0 = d->ZMM_S(0);
1025fcf5ef2aSThomas Huth     s1 = s->ZMM_S(0);
1026fcf5ef2aSThomas Huth     ret = float32_compare(s0, s1, &env->sse_status);
1027fcf5ef2aSThomas Huth     CC_SRC = comis_eflags[ret + 1];
1028fcf5ef2aSThomas Huth }
1029fcf5ef2aSThomas Huth 
1030fcf5ef2aSThomas Huth void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s)
1031fcf5ef2aSThomas Huth {
103271bfd65cSRichard Henderson     FloatRelation ret;
1033fcf5ef2aSThomas Huth     float64 d0, d1;
1034fcf5ef2aSThomas Huth 
1035fcf5ef2aSThomas Huth     d0 = d->ZMM_D(0);
1036fcf5ef2aSThomas Huth     d1 = s->ZMM_D(0);
1037fcf5ef2aSThomas Huth     ret = float64_compare_quiet(d0, d1, &env->sse_status);
1038fcf5ef2aSThomas Huth     CC_SRC = comis_eflags[ret + 1];
1039fcf5ef2aSThomas Huth }
1040fcf5ef2aSThomas Huth 
1041fcf5ef2aSThomas Huth void helper_comisd(CPUX86State *env, Reg *d, Reg *s)
1042fcf5ef2aSThomas Huth {
104371bfd65cSRichard Henderson     FloatRelation ret;
1044fcf5ef2aSThomas Huth     float64 d0, d1;
1045fcf5ef2aSThomas Huth 
1046fcf5ef2aSThomas Huth     d0 = d->ZMM_D(0);
1047fcf5ef2aSThomas Huth     d1 = s->ZMM_D(0);
1048fcf5ef2aSThomas Huth     ret = float64_compare(d0, d1, &env->sse_status);
1049fcf5ef2aSThomas Huth     CC_SRC = comis_eflags[ret + 1];
1050fcf5ef2aSThomas Huth }
1051fcf5ef2aSThomas Huth 
1052ce4fa29fSPaolo Bonzini uint32_t glue(helper_movmskps, SUFFIX)(CPUX86State *env, Reg *s)
1053fcf5ef2aSThomas Huth {
1054fcf5ef2aSThomas Huth     int b0, b1, b2, b3;
1055fcf5ef2aSThomas Huth 
1056fcf5ef2aSThomas Huth     b0 = s->ZMM_L(0) >> 31;
1057fcf5ef2aSThomas Huth     b1 = s->ZMM_L(1) >> 31;
1058fcf5ef2aSThomas Huth     b2 = s->ZMM_L(2) >> 31;
1059fcf5ef2aSThomas Huth     b3 = s->ZMM_L(3) >> 31;
1060fcf5ef2aSThomas Huth     return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3);
1061fcf5ef2aSThomas Huth }
1062fcf5ef2aSThomas Huth 
1063ce4fa29fSPaolo Bonzini uint32_t glue(helper_movmskpd, SUFFIX)(CPUX86State *env, Reg *s)
1064fcf5ef2aSThomas Huth {
1065fcf5ef2aSThomas Huth     int b0, b1;
1066fcf5ef2aSThomas Huth 
1067fcf5ef2aSThomas Huth     b0 = s->ZMM_L(1) >> 31;
1068fcf5ef2aSThomas Huth     b1 = s->ZMM_L(3) >> 31;
1069fcf5ef2aSThomas Huth     return b0 | (b1 << 1);
1070fcf5ef2aSThomas Huth }
1071fcf5ef2aSThomas Huth 
1072fcf5ef2aSThomas Huth #endif
1073fcf5ef2aSThomas Huth 
1074fcf5ef2aSThomas Huth uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *env, Reg *s)
1075fcf5ef2aSThomas Huth {
1076fcf5ef2aSThomas Huth     uint32_t val;
1077fcf5ef2aSThomas Huth 
1078fcf5ef2aSThomas Huth     val = 0;
1079fcf5ef2aSThomas Huth     val |= (s->B(0) >> 7);
1080fcf5ef2aSThomas Huth     val |= (s->B(1) >> 6) & 0x02;
1081fcf5ef2aSThomas Huth     val |= (s->B(2) >> 5) & 0x04;
1082fcf5ef2aSThomas Huth     val |= (s->B(3) >> 4) & 0x08;
1083fcf5ef2aSThomas Huth     val |= (s->B(4) >> 3) & 0x10;
1084fcf5ef2aSThomas Huth     val |= (s->B(5) >> 2) & 0x20;
1085fcf5ef2aSThomas Huth     val |= (s->B(6) >> 1) & 0x40;
1086fcf5ef2aSThomas Huth     val |= (s->B(7)) & 0x80;
1087fcf5ef2aSThomas Huth #if SHIFT == 1
1088fcf5ef2aSThomas Huth     val |= (s->B(8) << 1) & 0x0100;
1089fcf5ef2aSThomas Huth     val |= (s->B(9) << 2) & 0x0200;
1090fcf5ef2aSThomas Huth     val |= (s->B(10) << 3) & 0x0400;
1091fcf5ef2aSThomas Huth     val |= (s->B(11) << 4) & 0x0800;
1092fcf5ef2aSThomas Huth     val |= (s->B(12) << 5) & 0x1000;
1093fcf5ef2aSThomas Huth     val |= (s->B(13) << 6) & 0x2000;
1094fcf5ef2aSThomas Huth     val |= (s->B(14) << 7) & 0x4000;
1095fcf5ef2aSThomas Huth     val |= (s->B(15) << 8) & 0x8000;
1096fcf5ef2aSThomas Huth #endif
1097fcf5ef2aSThomas Huth     return val;
1098fcf5ef2aSThomas Huth }
1099fcf5ef2aSThomas Huth 
1100fcf5ef2aSThomas Huth void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1101fcf5ef2aSThomas Huth {
1102fcf5ef2aSThomas Huth     Reg r;
1103fcf5ef2aSThomas Huth 
1104fcf5ef2aSThomas Huth     r.B(0) = satsb((int16_t)d->W(0));
1105fcf5ef2aSThomas Huth     r.B(1) = satsb((int16_t)d->W(1));
1106fcf5ef2aSThomas Huth     r.B(2) = satsb((int16_t)d->W(2));
1107fcf5ef2aSThomas Huth     r.B(3) = satsb((int16_t)d->W(3));
1108fcf5ef2aSThomas Huth #if SHIFT == 1
1109fcf5ef2aSThomas Huth     r.B(4) = satsb((int16_t)d->W(4));
1110fcf5ef2aSThomas Huth     r.B(5) = satsb((int16_t)d->W(5));
1111fcf5ef2aSThomas Huth     r.B(6) = satsb((int16_t)d->W(6));
1112fcf5ef2aSThomas Huth     r.B(7) = satsb((int16_t)d->W(7));
1113fcf5ef2aSThomas Huth #endif
1114fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0));
1115fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1));
1116fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2));
1117fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3));
1118fcf5ef2aSThomas Huth #if SHIFT == 1
1119fcf5ef2aSThomas Huth     r.B(12) = satsb((int16_t)s->W(4));
1120fcf5ef2aSThomas Huth     r.B(13) = satsb((int16_t)s->W(5));
1121fcf5ef2aSThomas Huth     r.B(14) = satsb((int16_t)s->W(6));
1122fcf5ef2aSThomas Huth     r.B(15) = satsb((int16_t)s->W(7));
1123fcf5ef2aSThomas Huth #endif
1124d22697ddSPaolo Bonzini     MOVE(*d, r);
1125fcf5ef2aSThomas Huth }
1126fcf5ef2aSThomas Huth 
1127fcf5ef2aSThomas Huth void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1128fcf5ef2aSThomas Huth {
1129fcf5ef2aSThomas Huth     Reg r;
1130fcf5ef2aSThomas Huth 
1131fcf5ef2aSThomas Huth     r.B(0) = satub((int16_t)d->W(0));
1132fcf5ef2aSThomas Huth     r.B(1) = satub((int16_t)d->W(1));
1133fcf5ef2aSThomas Huth     r.B(2) = satub((int16_t)d->W(2));
1134fcf5ef2aSThomas Huth     r.B(3) = satub((int16_t)d->W(3));
1135fcf5ef2aSThomas Huth #if SHIFT == 1
1136fcf5ef2aSThomas Huth     r.B(4) = satub((int16_t)d->W(4));
1137fcf5ef2aSThomas Huth     r.B(5) = satub((int16_t)d->W(5));
1138fcf5ef2aSThomas Huth     r.B(6) = satub((int16_t)d->W(6));
1139fcf5ef2aSThomas Huth     r.B(7) = satub((int16_t)d->W(7));
1140fcf5ef2aSThomas Huth #endif
1141fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0));
1142fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1));
1143fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2));
1144fcf5ef2aSThomas Huth     r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3));
1145fcf5ef2aSThomas Huth #if SHIFT == 1
1146fcf5ef2aSThomas Huth     r.B(12) = satub((int16_t)s->W(4));
1147fcf5ef2aSThomas Huth     r.B(13) = satub((int16_t)s->W(5));
1148fcf5ef2aSThomas Huth     r.B(14) = satub((int16_t)s->W(6));
1149fcf5ef2aSThomas Huth     r.B(15) = satub((int16_t)s->W(7));
1150fcf5ef2aSThomas Huth #endif
1151d22697ddSPaolo Bonzini     MOVE(*d, r);
1152fcf5ef2aSThomas Huth }
1153fcf5ef2aSThomas Huth 
1154fcf5ef2aSThomas Huth void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1155fcf5ef2aSThomas Huth {
1156fcf5ef2aSThomas Huth     Reg r;
1157fcf5ef2aSThomas Huth 
1158fcf5ef2aSThomas Huth     r.W(0) = satsw(d->L(0));
1159fcf5ef2aSThomas Huth     r.W(1) = satsw(d->L(1));
1160fcf5ef2aSThomas Huth #if SHIFT == 1
1161fcf5ef2aSThomas Huth     r.W(2) = satsw(d->L(2));
1162fcf5ef2aSThomas Huth     r.W(3) = satsw(d->L(3));
1163fcf5ef2aSThomas Huth #endif
1164fcf5ef2aSThomas Huth     r.W((2 << SHIFT) + 0) = satsw(s->L(0));
1165fcf5ef2aSThomas Huth     r.W((2 << SHIFT) + 1) = satsw(s->L(1));
1166fcf5ef2aSThomas Huth #if SHIFT == 1
1167fcf5ef2aSThomas Huth     r.W(6) = satsw(s->L(2));
1168fcf5ef2aSThomas Huth     r.W(7) = satsw(s->L(3));
1169fcf5ef2aSThomas Huth #endif
1170d22697ddSPaolo Bonzini     MOVE(*d, r);
1171fcf5ef2aSThomas Huth }
1172fcf5ef2aSThomas Huth 
1173fcf5ef2aSThomas Huth #define UNPCK_OP(base_name, base)                                       \
1174fcf5ef2aSThomas Huth                                                                         \
1175fcf5ef2aSThomas Huth     void glue(helper_punpck ## base_name ## bw, SUFFIX)(CPUX86State *env,\
1176fcf5ef2aSThomas Huth                                                         Reg *d, Reg *s) \
1177fcf5ef2aSThomas Huth     {                                                                   \
1178fcf5ef2aSThomas Huth         Reg r;                                                          \
1179fcf5ef2aSThomas Huth                                                                         \
1180fcf5ef2aSThomas Huth         r.B(0) = d->B((base << (SHIFT + 2)) + 0);                       \
1181fcf5ef2aSThomas Huth         r.B(1) = s->B((base << (SHIFT + 2)) + 0);                       \
1182fcf5ef2aSThomas Huth         r.B(2) = d->B((base << (SHIFT + 2)) + 1);                       \
1183fcf5ef2aSThomas Huth         r.B(3) = s->B((base << (SHIFT + 2)) + 1);                       \
1184fcf5ef2aSThomas Huth         r.B(4) = d->B((base << (SHIFT + 2)) + 2);                       \
1185fcf5ef2aSThomas Huth         r.B(5) = s->B((base << (SHIFT + 2)) + 2);                       \
1186fcf5ef2aSThomas Huth         r.B(6) = d->B((base << (SHIFT + 2)) + 3);                       \
1187fcf5ef2aSThomas Huth         r.B(7) = s->B((base << (SHIFT + 2)) + 3);                       \
1188fcf5ef2aSThomas Huth         XMM_ONLY(                                                       \
1189fcf5ef2aSThomas Huth                  r.B(8) = d->B((base << (SHIFT + 2)) + 4);              \
1190fcf5ef2aSThomas Huth                  r.B(9) = s->B((base << (SHIFT + 2)) + 4);              \
1191fcf5ef2aSThomas Huth                  r.B(10) = d->B((base << (SHIFT + 2)) + 5);             \
1192fcf5ef2aSThomas Huth                  r.B(11) = s->B((base << (SHIFT + 2)) + 5);             \
1193fcf5ef2aSThomas Huth                  r.B(12) = d->B((base << (SHIFT + 2)) + 6);             \
1194fcf5ef2aSThomas Huth                  r.B(13) = s->B((base << (SHIFT + 2)) + 6);             \
1195fcf5ef2aSThomas Huth                  r.B(14) = d->B((base << (SHIFT + 2)) + 7);             \
1196fcf5ef2aSThomas Huth                  r.B(15) = s->B((base << (SHIFT + 2)) + 7);             \
1197fcf5ef2aSThomas Huth                                                                       ) \
1198d22697ddSPaolo Bonzini         MOVE(*d, r);                                                    \
1199fcf5ef2aSThomas Huth     }                                                                   \
1200fcf5ef2aSThomas Huth                                                                         \
1201fcf5ef2aSThomas Huth     void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\
1202fcf5ef2aSThomas Huth                                                         Reg *d, Reg *s) \
1203fcf5ef2aSThomas Huth     {                                                                   \
1204fcf5ef2aSThomas Huth         Reg r;                                                          \
1205fcf5ef2aSThomas Huth                                                                         \
1206fcf5ef2aSThomas Huth         r.W(0) = d->W((base << (SHIFT + 1)) + 0);                       \
1207fcf5ef2aSThomas Huth         r.W(1) = s->W((base << (SHIFT + 1)) + 0);                       \
1208fcf5ef2aSThomas Huth         r.W(2) = d->W((base << (SHIFT + 1)) + 1);                       \
1209fcf5ef2aSThomas Huth         r.W(3) = s->W((base << (SHIFT + 1)) + 1);                       \
1210fcf5ef2aSThomas Huth         XMM_ONLY(                                                       \
1211fcf5ef2aSThomas Huth                  r.W(4) = d->W((base << (SHIFT + 1)) + 2);              \
1212fcf5ef2aSThomas Huth                  r.W(5) = s->W((base << (SHIFT + 1)) + 2);              \
1213fcf5ef2aSThomas Huth                  r.W(6) = d->W((base << (SHIFT + 1)) + 3);              \
1214fcf5ef2aSThomas Huth                  r.W(7) = s->W((base << (SHIFT + 1)) + 3);              \
1215fcf5ef2aSThomas Huth                                                                       ) \
1216d22697ddSPaolo Bonzini             MOVE(*d, r);                                                \
1217fcf5ef2aSThomas Huth     }                                                                   \
1218fcf5ef2aSThomas Huth                                                                         \
1219fcf5ef2aSThomas Huth     void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\
1220fcf5ef2aSThomas Huth                                                         Reg *d, Reg *s) \
1221fcf5ef2aSThomas Huth     {                                                                   \
1222fcf5ef2aSThomas Huth         Reg r;                                                          \
1223fcf5ef2aSThomas Huth                                                                         \
1224fcf5ef2aSThomas Huth         r.L(0) = d->L((base << SHIFT) + 0);                             \
1225fcf5ef2aSThomas Huth         r.L(1) = s->L((base << SHIFT) + 0);                             \
1226fcf5ef2aSThomas Huth         XMM_ONLY(                                                       \
1227fcf5ef2aSThomas Huth                  r.L(2) = d->L((base << SHIFT) + 1);                    \
1228fcf5ef2aSThomas Huth                  r.L(3) = s->L((base << SHIFT) + 1);                    \
1229fcf5ef2aSThomas Huth                                                                       ) \
1230d22697ddSPaolo Bonzini             MOVE(*d, r);                                                \
1231fcf5ef2aSThomas Huth     }                                                                   \
1232fcf5ef2aSThomas Huth                                                                         \
1233fcf5ef2aSThomas Huth     XMM_ONLY(                                                           \
1234fcf5ef2aSThomas Huth              void glue(helper_punpck ## base_name ## qdq, SUFFIX)(CPUX86State \
1235fcf5ef2aSThomas Huth                                                                   *env, \
1236fcf5ef2aSThomas Huth                                                                   Reg *d, \
1237fcf5ef2aSThomas Huth                                                                   Reg *s) \
1238fcf5ef2aSThomas Huth              {                                                          \
1239fcf5ef2aSThomas Huth                  Reg r;                                                 \
1240fcf5ef2aSThomas Huth                                                                         \
1241fcf5ef2aSThomas Huth                  r.Q(0) = d->Q(base);                                   \
1242fcf5ef2aSThomas Huth                  r.Q(1) = s->Q(base);                                   \
1243d22697ddSPaolo Bonzini                  MOVE(*d, r);                                           \
1244fcf5ef2aSThomas Huth              }                                                          \
1245fcf5ef2aSThomas Huth                                                                         )
1246fcf5ef2aSThomas Huth 
1247fcf5ef2aSThomas Huth UNPCK_OP(l, 0)
1248fcf5ef2aSThomas Huth UNPCK_OP(h, 1)
1249fcf5ef2aSThomas Huth 
1250fcf5ef2aSThomas Huth /* 3DNow! float ops */
1251fcf5ef2aSThomas Huth #if SHIFT == 0
1252fcf5ef2aSThomas Huth void helper_pi2fd(CPUX86State *env, MMXReg *d, MMXReg *s)
1253fcf5ef2aSThomas Huth {
1254fcf5ef2aSThomas Huth     d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
1255fcf5ef2aSThomas Huth     d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
1256fcf5ef2aSThomas Huth }
1257fcf5ef2aSThomas Huth 
1258fcf5ef2aSThomas Huth void helper_pi2fw(CPUX86State *env, MMXReg *d, MMXReg *s)
1259fcf5ef2aSThomas Huth {
1260fcf5ef2aSThomas Huth     d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
1261fcf5ef2aSThomas Huth     d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
1262fcf5ef2aSThomas Huth }
1263fcf5ef2aSThomas Huth 
1264fcf5ef2aSThomas Huth void helper_pf2id(CPUX86State *env, MMXReg *d, MMXReg *s)
1265fcf5ef2aSThomas Huth {
1266fcf5ef2aSThomas Huth     d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
1267fcf5ef2aSThomas Huth     d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
1268fcf5ef2aSThomas Huth }
1269fcf5ef2aSThomas Huth 
1270fcf5ef2aSThomas Huth void helper_pf2iw(CPUX86State *env, MMXReg *d, MMXReg *s)
1271fcf5ef2aSThomas Huth {
1272fcf5ef2aSThomas Huth     d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0),
1273fcf5ef2aSThomas Huth                                                        &env->mmx_status));
1274fcf5ef2aSThomas Huth     d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1),
1275fcf5ef2aSThomas Huth                                                        &env->mmx_status));
1276fcf5ef2aSThomas Huth }
1277fcf5ef2aSThomas Huth 
1278fcf5ef2aSThomas Huth void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s)
1279fcf5ef2aSThomas Huth {
128025bdec79SPaolo Bonzini     float32 r;
1281fcf5ef2aSThomas Huth 
128225bdec79SPaolo Bonzini     r = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
128325bdec79SPaolo Bonzini     d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
128425bdec79SPaolo Bonzini     d->MMX_S(0) = r;
1285fcf5ef2aSThomas Huth }
1286fcf5ef2aSThomas Huth 
1287fcf5ef2aSThomas Huth void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s)
1288fcf5ef2aSThomas Huth {
1289fcf5ef2aSThomas Huth     d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1290fcf5ef2aSThomas Huth     d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1291fcf5ef2aSThomas Huth }
1292fcf5ef2aSThomas Huth 
1293fcf5ef2aSThomas Huth void helper_pfcmpeq(CPUX86State *env, MMXReg *d, MMXReg *s)
1294fcf5ef2aSThomas Huth {
1295fcf5ef2aSThomas Huth     d->MMX_L(0) = float32_eq_quiet(d->MMX_S(0), s->MMX_S(0),
1296fcf5ef2aSThomas Huth                                    &env->mmx_status) ? -1 : 0;
1297fcf5ef2aSThomas Huth     d->MMX_L(1) = float32_eq_quiet(d->MMX_S(1), s->MMX_S(1),
1298fcf5ef2aSThomas Huth                                    &env->mmx_status) ? -1 : 0;
1299fcf5ef2aSThomas Huth }
1300fcf5ef2aSThomas Huth 
1301fcf5ef2aSThomas Huth void helper_pfcmpge(CPUX86State *env, MMXReg *d, MMXReg *s)
1302fcf5ef2aSThomas Huth {
1303fcf5ef2aSThomas Huth     d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0),
1304fcf5ef2aSThomas Huth                              &env->mmx_status) ? -1 : 0;
1305fcf5ef2aSThomas Huth     d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1),
1306fcf5ef2aSThomas Huth                              &env->mmx_status) ? -1 : 0;
1307fcf5ef2aSThomas Huth }
1308fcf5ef2aSThomas Huth 
1309fcf5ef2aSThomas Huth void helper_pfcmpgt(CPUX86State *env, MMXReg *d, MMXReg *s)
1310fcf5ef2aSThomas Huth {
1311fcf5ef2aSThomas Huth     d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0),
1312fcf5ef2aSThomas Huth                              &env->mmx_status) ? -1 : 0;
1313fcf5ef2aSThomas Huth     d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1),
1314fcf5ef2aSThomas Huth                              &env->mmx_status) ? -1 : 0;
1315fcf5ef2aSThomas Huth }
1316fcf5ef2aSThomas Huth 
1317fcf5ef2aSThomas Huth void helper_pfmax(CPUX86State *env, MMXReg *d, MMXReg *s)
1318fcf5ef2aSThomas Huth {
1319fcf5ef2aSThomas Huth     if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) {
1320fcf5ef2aSThomas Huth         d->MMX_S(0) = s->MMX_S(0);
1321fcf5ef2aSThomas Huth     }
1322fcf5ef2aSThomas Huth     if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) {
1323fcf5ef2aSThomas Huth         d->MMX_S(1) = s->MMX_S(1);
1324fcf5ef2aSThomas Huth     }
1325fcf5ef2aSThomas Huth }
1326fcf5ef2aSThomas Huth 
1327fcf5ef2aSThomas Huth void helper_pfmin(CPUX86State *env, MMXReg *d, MMXReg *s)
1328fcf5ef2aSThomas Huth {
1329fcf5ef2aSThomas Huth     if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) {
1330fcf5ef2aSThomas Huth         d->MMX_S(0) = s->MMX_S(0);
1331fcf5ef2aSThomas Huth     }
1332fcf5ef2aSThomas Huth     if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) {
1333fcf5ef2aSThomas Huth         d->MMX_S(1) = s->MMX_S(1);
1334fcf5ef2aSThomas Huth     }
1335fcf5ef2aSThomas Huth }
1336fcf5ef2aSThomas Huth 
1337fcf5ef2aSThomas Huth void helper_pfmul(CPUX86State *env, MMXReg *d, MMXReg *s)
1338fcf5ef2aSThomas Huth {
1339fcf5ef2aSThomas Huth     d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1340fcf5ef2aSThomas Huth     d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1341fcf5ef2aSThomas Huth }
1342fcf5ef2aSThomas Huth 
1343fcf5ef2aSThomas Huth void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
1344fcf5ef2aSThomas Huth {
134525bdec79SPaolo Bonzini     float32 r;
1346fcf5ef2aSThomas Huth 
134725bdec79SPaolo Bonzini     r = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
134825bdec79SPaolo Bonzini     d->MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
134925bdec79SPaolo Bonzini     d->MMX_S(0) = r;
1350fcf5ef2aSThomas Huth }
1351fcf5ef2aSThomas Huth 
1352fcf5ef2aSThomas Huth void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
1353fcf5ef2aSThomas Huth {
135425bdec79SPaolo Bonzini     float32 r;
1355fcf5ef2aSThomas Huth 
135625bdec79SPaolo Bonzini     r = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
135725bdec79SPaolo Bonzini     d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
135825bdec79SPaolo Bonzini     d->MMX_S(0) = r;
1359fcf5ef2aSThomas Huth }
1360fcf5ef2aSThomas Huth 
1361fcf5ef2aSThomas Huth void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s)
1362fcf5ef2aSThomas Huth {
1363fcf5ef2aSThomas Huth     d->MMX_S(0) = float32_div(float32_one, s->MMX_S(0), &env->mmx_status);
1364fcf5ef2aSThomas Huth     d->MMX_S(1) = d->MMX_S(0);
1365fcf5ef2aSThomas Huth }
1366fcf5ef2aSThomas Huth 
1367fcf5ef2aSThomas Huth void helper_pfrsqrt(CPUX86State *env, MMXReg *d, MMXReg *s)
1368fcf5ef2aSThomas Huth {
1369fcf5ef2aSThomas Huth     d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
1370fcf5ef2aSThomas Huth     d->MMX_S(1) = float32_div(float32_one,
1371fcf5ef2aSThomas Huth                               float32_sqrt(d->MMX_S(1), &env->mmx_status),
1372fcf5ef2aSThomas Huth                               &env->mmx_status);
1373fcf5ef2aSThomas Huth     d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
1374fcf5ef2aSThomas Huth     d->MMX_L(0) = d->MMX_L(1);
1375fcf5ef2aSThomas Huth }
1376fcf5ef2aSThomas Huth 
1377fcf5ef2aSThomas Huth void helper_pfsub(CPUX86State *env, MMXReg *d, MMXReg *s)
1378fcf5ef2aSThomas Huth {
1379fcf5ef2aSThomas Huth     d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
1380fcf5ef2aSThomas Huth     d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
1381fcf5ef2aSThomas Huth }
1382fcf5ef2aSThomas Huth 
1383fcf5ef2aSThomas Huth void helper_pfsubr(CPUX86State *env, MMXReg *d, MMXReg *s)
1384fcf5ef2aSThomas Huth {
1385fcf5ef2aSThomas Huth     d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
1386fcf5ef2aSThomas Huth     d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
1387fcf5ef2aSThomas Huth }
1388fcf5ef2aSThomas Huth 
1389fcf5ef2aSThomas Huth void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s)
1390fcf5ef2aSThomas Huth {
139125bdec79SPaolo Bonzini     uint32_t r;
1392fcf5ef2aSThomas Huth 
139325bdec79SPaolo Bonzini     r = s->MMX_L(0);
139425bdec79SPaolo Bonzini     d->MMX_L(0) = s->MMX_L(1);
139525bdec79SPaolo Bonzini     d->MMX_L(1) = r;
1396fcf5ef2aSThomas Huth }
1397fcf5ef2aSThomas Huth #endif
1398fcf5ef2aSThomas Huth 
1399fcf5ef2aSThomas Huth /* SSSE3 op helpers */
1400fcf5ef2aSThomas Huth void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1401fcf5ef2aSThomas Huth {
1402fcf5ef2aSThomas Huth     int i;
1403fcf5ef2aSThomas Huth     Reg r;
1404fcf5ef2aSThomas Huth 
1405fcf5ef2aSThomas Huth     for (i = 0; i < (8 << SHIFT); i++) {
1406fcf5ef2aSThomas Huth         r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
1407fcf5ef2aSThomas Huth     }
1408fcf5ef2aSThomas Huth 
1409d22697ddSPaolo Bonzini     MOVE(*d, r);
1410fcf5ef2aSThomas Huth }
1411fcf5ef2aSThomas Huth 
1412fcf5ef2aSThomas Huth void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1413fcf5ef2aSThomas Huth {
14142dfbea1aSJanne Grunau 
14152dfbea1aSJanne Grunau     Reg r;
14162dfbea1aSJanne Grunau 
14172dfbea1aSJanne Grunau     r.W(0) = (int16_t)d->W(0) + (int16_t)d->W(1);
14182dfbea1aSJanne Grunau     r.W(1) = (int16_t)d->W(2) + (int16_t)d->W(3);
14192dfbea1aSJanne Grunau     XMM_ONLY(r.W(2) = (int16_t)d->W(4) + (int16_t)d->W(5));
14202dfbea1aSJanne Grunau     XMM_ONLY(r.W(3) = (int16_t)d->W(6) + (int16_t)d->W(7));
14212dfbea1aSJanne Grunau     r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1);
14222dfbea1aSJanne Grunau     r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3);
14232dfbea1aSJanne Grunau     XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
14242dfbea1aSJanne Grunau     XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
14252dfbea1aSJanne Grunau 
1426d22697ddSPaolo Bonzini     MOVE(*d, r);
1427fcf5ef2aSThomas Huth }
1428fcf5ef2aSThomas Huth 
1429fcf5ef2aSThomas Huth void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1430fcf5ef2aSThomas Huth {
14312dfbea1aSJanne Grunau     Reg r;
14322dfbea1aSJanne Grunau 
14332dfbea1aSJanne Grunau     r.L(0) = (int32_t)d->L(0) + (int32_t)d->L(1);
14342dfbea1aSJanne Grunau     XMM_ONLY(r.L(1) = (int32_t)d->L(2) + (int32_t)d->L(3));
14352dfbea1aSJanne Grunau     r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
14362dfbea1aSJanne Grunau     XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
14372dfbea1aSJanne Grunau 
1438d22697ddSPaolo Bonzini     MOVE(*d, r);
1439fcf5ef2aSThomas Huth }
1440fcf5ef2aSThomas Huth 
1441fcf5ef2aSThomas Huth void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1442fcf5ef2aSThomas Huth {
14432dfbea1aSJanne Grunau     Reg r;
14442dfbea1aSJanne Grunau 
14452dfbea1aSJanne Grunau     r.W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1));
14462dfbea1aSJanne Grunau     r.W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3));
14472dfbea1aSJanne Grunau     XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5)));
14482dfbea1aSJanne Grunau     XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7)));
14492dfbea1aSJanne Grunau     r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1));
14502dfbea1aSJanne Grunau     r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3));
14512dfbea1aSJanne Grunau     XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
14522dfbea1aSJanne Grunau     XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
14532dfbea1aSJanne Grunau 
1454d22697ddSPaolo Bonzini     MOVE(*d, r);
1455fcf5ef2aSThomas Huth }
1456fcf5ef2aSThomas Huth 
1457fcf5ef2aSThomas Huth void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1458fcf5ef2aSThomas Huth {
1459fcf5ef2aSThomas Huth     d->W(0) = satsw((int8_t)s->B(0) * (uint8_t)d->B(0) +
1460fcf5ef2aSThomas Huth                     (int8_t)s->B(1) * (uint8_t)d->B(1));
1461fcf5ef2aSThomas Huth     d->W(1) = satsw((int8_t)s->B(2) * (uint8_t)d->B(2) +
1462fcf5ef2aSThomas Huth                     (int8_t)s->B(3) * (uint8_t)d->B(3));
1463fcf5ef2aSThomas Huth     d->W(2) = satsw((int8_t)s->B(4) * (uint8_t)d->B(4) +
1464fcf5ef2aSThomas Huth                     (int8_t)s->B(5) * (uint8_t)d->B(5));
1465fcf5ef2aSThomas Huth     d->W(3) = satsw((int8_t)s->B(6) * (uint8_t)d->B(6) +
1466fcf5ef2aSThomas Huth                     (int8_t)s->B(7) * (uint8_t)d->B(7));
1467fcf5ef2aSThomas Huth #if SHIFT == 1
1468fcf5ef2aSThomas Huth     d->W(4) = satsw((int8_t)s->B(8) * (uint8_t)d->B(8) +
1469fcf5ef2aSThomas Huth                     (int8_t)s->B(9) * (uint8_t)d->B(9));
1470fcf5ef2aSThomas Huth     d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) +
1471fcf5ef2aSThomas Huth                     (int8_t)s->B(11) * (uint8_t)d->B(11));
1472fcf5ef2aSThomas Huth     d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) +
1473fcf5ef2aSThomas Huth                     (int8_t)s->B(13) * (uint8_t)d->B(13));
1474fcf5ef2aSThomas Huth     d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) +
1475fcf5ef2aSThomas Huth                     (int8_t)s->B(15) * (uint8_t)d->B(15));
1476fcf5ef2aSThomas Huth #endif
1477fcf5ef2aSThomas Huth }
1478fcf5ef2aSThomas Huth 
1479fcf5ef2aSThomas Huth void glue(helper_phsubw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1480fcf5ef2aSThomas Huth {
148175046ad7SPaolo Bonzini     Reg r;
148275046ad7SPaolo Bonzini 
148375046ad7SPaolo Bonzini     r.W(0) = (int16_t)d->W(0) - (int16_t)d->W(1);
148475046ad7SPaolo Bonzini     r.W(1) = (int16_t)d->W(2) - (int16_t)d->W(3);
148575046ad7SPaolo Bonzini     XMM_ONLY(r.W(2) = (int16_t)d->W(4) - (int16_t)d->W(5));
148675046ad7SPaolo Bonzini     XMM_ONLY(r.W(3) = (int16_t)d->W(6) - (int16_t)d->W(7));
148775046ad7SPaolo Bonzini     r.W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1);
148875046ad7SPaolo Bonzini     r.W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3);
148975046ad7SPaolo Bonzini     XMM_ONLY(r.W(6) = (int16_t)s->W(4) - (int16_t)s->W(5));
149075046ad7SPaolo Bonzini     XMM_ONLY(r.W(7) = (int16_t)s->W(6) - (int16_t)s->W(7));
149175046ad7SPaolo Bonzini     MOVE(*d, r);
1492fcf5ef2aSThomas Huth }
1493fcf5ef2aSThomas Huth 
1494fcf5ef2aSThomas Huth void glue(helper_phsubd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1495fcf5ef2aSThomas Huth {
149675046ad7SPaolo Bonzini     Reg r;
149775046ad7SPaolo Bonzini 
149875046ad7SPaolo Bonzini     r.L(0) = (int32_t)d->L(0) - (int32_t)d->L(1);
149975046ad7SPaolo Bonzini     XMM_ONLY(r.L(1) = (int32_t)d->L(2) - (int32_t)d->L(3));
150075046ad7SPaolo Bonzini     r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1);
150175046ad7SPaolo Bonzini     XMM_ONLY(r.L(3) = (int32_t)s->L(2) - (int32_t)s->L(3));
150275046ad7SPaolo Bonzini     MOVE(*d, r);
1503fcf5ef2aSThomas Huth }
1504fcf5ef2aSThomas Huth 
1505fcf5ef2aSThomas Huth void glue(helper_phsubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1506fcf5ef2aSThomas Huth {
150775046ad7SPaolo Bonzini     Reg r;
150875046ad7SPaolo Bonzini 
150975046ad7SPaolo Bonzini     r.W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1));
151075046ad7SPaolo Bonzini     r.W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3));
151175046ad7SPaolo Bonzini     XMM_ONLY(r.W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5)));
151275046ad7SPaolo Bonzini     XMM_ONLY(r.W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7)));
151375046ad7SPaolo Bonzini     r.W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1));
151475046ad7SPaolo Bonzini     r.W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3));
151575046ad7SPaolo Bonzini     XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5)));
151675046ad7SPaolo Bonzini     XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7)));
151775046ad7SPaolo Bonzini     MOVE(*d, r);
1518fcf5ef2aSThomas Huth }
1519fcf5ef2aSThomas Huth 
1520*ee04a3c8SPaul Brook #define FABSB(x) (x > INT8_MAX  ? -(int8_t)x : x)
1521*ee04a3c8SPaul Brook #define FABSW(x) (x > INT16_MAX ? -(int16_t)x : x)
1522*ee04a3c8SPaul Brook #define FABSL(x) (x > INT32_MAX ? -(int32_t)x : x)
1523*ee04a3c8SPaul Brook SSE_HELPER_1(helper_pabsb, B, 8 << SHIFT, FABSB)
1524*ee04a3c8SPaul Brook SSE_HELPER_1(helper_pabsw, W, 4 << SHIFT, FABSW)
1525*ee04a3c8SPaul Brook SSE_HELPER_1(helper_pabsd, L, 2 << SHIFT, FABSL)
1526fcf5ef2aSThomas Huth 
1527fcf5ef2aSThomas Huth #define FMULHRSW(d, s) (((int16_t) d * (int16_t)s + 0x4000) >> 15)
1528fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmulhrsw, FMULHRSW)
1529fcf5ef2aSThomas Huth 
1530fcf5ef2aSThomas Huth #define FSIGNB(d, s) (s <= INT8_MAX  ? s ? d : 0 : -(int8_t)d)
1531fcf5ef2aSThomas Huth #define FSIGNW(d, s) (s <= INT16_MAX ? s ? d : 0 : -(int16_t)d)
1532fcf5ef2aSThomas Huth #define FSIGNL(d, s) (s <= INT32_MAX ? s ? d : 0 : -(int32_t)d)
1533fcf5ef2aSThomas Huth SSE_HELPER_B(helper_psignb, FSIGNB)
1534fcf5ef2aSThomas Huth SSE_HELPER_W(helper_psignw, FSIGNW)
1535fcf5ef2aSThomas Huth SSE_HELPER_L(helper_psignd, FSIGNL)
1536fcf5ef2aSThomas Huth 
1537fcf5ef2aSThomas Huth void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1538fcf5ef2aSThomas Huth                                   int32_t shift)
1539fcf5ef2aSThomas Huth {
1540fcf5ef2aSThomas Huth     Reg r;
1541fcf5ef2aSThomas Huth 
1542fcf5ef2aSThomas Huth     /* XXX could be checked during translation */
1543fcf5ef2aSThomas Huth     if (shift >= (16 << SHIFT)) {
1544fcf5ef2aSThomas Huth         r.Q(0) = 0;
1545fcf5ef2aSThomas Huth         XMM_ONLY(r.Q(1) = 0);
1546fcf5ef2aSThomas Huth     } else {
1547fcf5ef2aSThomas Huth         shift <<= 3;
1548fcf5ef2aSThomas Huth #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0)
1549fcf5ef2aSThomas Huth #if SHIFT == 0
1550fcf5ef2aSThomas Huth         r.Q(0) = SHR(s->Q(0), shift - 0) |
1551fcf5ef2aSThomas Huth             SHR(d->Q(0), shift -  64);
1552fcf5ef2aSThomas Huth #else
1553fcf5ef2aSThomas Huth         r.Q(0) = SHR(s->Q(0), shift - 0) |
1554fcf5ef2aSThomas Huth             SHR(s->Q(1), shift -  64) |
1555fcf5ef2aSThomas Huth             SHR(d->Q(0), shift - 128) |
1556fcf5ef2aSThomas Huth             SHR(d->Q(1), shift - 192);
1557fcf5ef2aSThomas Huth         r.Q(1) = SHR(s->Q(0), shift + 64) |
1558fcf5ef2aSThomas Huth             SHR(s->Q(1), shift -   0) |
1559fcf5ef2aSThomas Huth             SHR(d->Q(0), shift -  64) |
1560fcf5ef2aSThomas Huth             SHR(d->Q(1), shift - 128);
1561fcf5ef2aSThomas Huth #endif
1562fcf5ef2aSThomas Huth #undef SHR
1563fcf5ef2aSThomas Huth     }
1564fcf5ef2aSThomas Huth 
1565d22697ddSPaolo Bonzini     MOVE(*d, r);
1566fcf5ef2aSThomas Huth }
1567fcf5ef2aSThomas Huth 
1568fcf5ef2aSThomas Huth #define XMM0 (env->xmm_regs[0])
1569fcf5ef2aSThomas Huth 
1570fcf5ef2aSThomas Huth #if SHIFT == 1
1571fcf5ef2aSThomas Huth #define SSE_HELPER_V(name, elem, num, F)                                \
1572fcf5ef2aSThomas Huth     void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)           \
1573fcf5ef2aSThomas Huth     {                                                                   \
1574fcf5ef2aSThomas Huth         d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0));           \
1575fcf5ef2aSThomas Huth         d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1));           \
1576fcf5ef2aSThomas Huth         if (num > 2) {                                                  \
1577fcf5ef2aSThomas Huth             d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2));       \
1578fcf5ef2aSThomas Huth             d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3));       \
1579fcf5ef2aSThomas Huth             if (num > 4) {                                              \
1580fcf5ef2aSThomas Huth                 d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4));   \
1581fcf5ef2aSThomas Huth                 d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5));   \
1582fcf5ef2aSThomas Huth                 d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6));   \
1583fcf5ef2aSThomas Huth                 d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7));   \
1584fcf5ef2aSThomas Huth                 if (num > 8) {                                          \
1585fcf5ef2aSThomas Huth                     d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8)); \
1586fcf5ef2aSThomas Huth                     d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9)); \
1587fcf5ef2aSThomas Huth                     d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10)); \
1588fcf5ef2aSThomas Huth                     d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11)); \
1589fcf5ef2aSThomas Huth                     d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12)); \
1590fcf5ef2aSThomas Huth                     d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13)); \
1591fcf5ef2aSThomas Huth                     d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14)); \
1592fcf5ef2aSThomas Huth                     d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15)); \
1593fcf5ef2aSThomas Huth                 }                                                       \
1594fcf5ef2aSThomas Huth             }                                                           \
1595fcf5ef2aSThomas Huth         }                                                               \
1596fcf5ef2aSThomas Huth     }
1597fcf5ef2aSThomas Huth 
1598fcf5ef2aSThomas Huth #define SSE_HELPER_I(name, elem, num, F)                                \
1599fcf5ef2aSThomas Huth     void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t imm) \
1600fcf5ef2aSThomas Huth     {                                                                   \
1601fcf5ef2aSThomas Huth         d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1));       \
1602fcf5ef2aSThomas Huth         d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1));       \
1603fcf5ef2aSThomas Huth         if (num > 2) {                                                  \
1604fcf5ef2aSThomas Huth             d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1));   \
1605fcf5ef2aSThomas Huth             d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1));   \
1606fcf5ef2aSThomas Huth             if (num > 4) {                                              \
1607fcf5ef2aSThomas Huth                 d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1)); \
1608fcf5ef2aSThomas Huth                 d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1)); \
1609fcf5ef2aSThomas Huth                 d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1)); \
1610fcf5ef2aSThomas Huth                 d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1)); \
1611fcf5ef2aSThomas Huth                 if (num > 8) {                                          \
1612fcf5ef2aSThomas Huth                     d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1)); \
1613fcf5ef2aSThomas Huth                     d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1)); \
1614fcf5ef2aSThomas Huth                     d->elem(10) = F(d->elem(10), s->elem(10),           \
1615fcf5ef2aSThomas Huth                                     ((imm >> 10) & 1));                 \
1616fcf5ef2aSThomas Huth                     d->elem(11) = F(d->elem(11), s->elem(11),           \
1617fcf5ef2aSThomas Huth                                     ((imm >> 11) & 1));                 \
1618fcf5ef2aSThomas Huth                     d->elem(12) = F(d->elem(12), s->elem(12),           \
1619fcf5ef2aSThomas Huth                                     ((imm >> 12) & 1));                 \
1620fcf5ef2aSThomas Huth                     d->elem(13) = F(d->elem(13), s->elem(13),           \
1621fcf5ef2aSThomas Huth                                     ((imm >> 13) & 1));                 \
1622fcf5ef2aSThomas Huth                     d->elem(14) = F(d->elem(14), s->elem(14),           \
1623fcf5ef2aSThomas Huth                                     ((imm >> 14) & 1));                 \
1624fcf5ef2aSThomas Huth                     d->elem(15) = F(d->elem(15), s->elem(15),           \
1625fcf5ef2aSThomas Huth                                     ((imm >> 15) & 1));                 \
1626fcf5ef2aSThomas Huth                 }                                                       \
1627fcf5ef2aSThomas Huth             }                                                           \
1628fcf5ef2aSThomas Huth         }                                                               \
1629fcf5ef2aSThomas Huth     }
1630fcf5ef2aSThomas Huth 
1631fcf5ef2aSThomas Huth /* SSE4.1 op helpers */
1632fcf5ef2aSThomas Huth #define FBLENDVB(d, s, m) ((m & 0x80) ? s : d)
1633fcf5ef2aSThomas Huth #define FBLENDVPS(d, s, m) ((m & 0x80000000) ? s : d)
1634fcf5ef2aSThomas Huth #define FBLENDVPD(d, s, m) ((m & 0x8000000000000000LL) ? s : d)
1635fcf5ef2aSThomas Huth SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB)
1636fcf5ef2aSThomas Huth SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS)
1637fcf5ef2aSThomas Huth SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD)
1638fcf5ef2aSThomas Huth 
1639fcf5ef2aSThomas Huth void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1640fcf5ef2aSThomas Huth {
1641fcf5ef2aSThomas Huth     uint64_t zf = (s->Q(0) &  d->Q(0)) | (s->Q(1) &  d->Q(1));
1642fcf5ef2aSThomas Huth     uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1));
1643fcf5ef2aSThomas Huth 
1644fcf5ef2aSThomas Huth     CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C);
1645fcf5ef2aSThomas Huth }
1646fcf5ef2aSThomas Huth 
1647fcf5ef2aSThomas Huth #define SSE_HELPER_F(name, elem, num, F)        \
1648fcf5ef2aSThomas Huth     void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)     \
1649fcf5ef2aSThomas Huth     {                                           \
1650fcf5ef2aSThomas Huth         if (num > 2) {                          \
1651fcf5ef2aSThomas Huth             if (num > 4) {                      \
1652fcf5ef2aSThomas Huth                 d->elem(7) = F(7);              \
1653c6a56c8eSJoseph Myers                 d->elem(6) = F(6);              \
1654c6a56c8eSJoseph Myers                 d->elem(5) = F(5);              \
1655c6a56c8eSJoseph Myers                 d->elem(4) = F(4);              \
1656fcf5ef2aSThomas Huth             }                                   \
1657c6a56c8eSJoseph Myers             d->elem(3) = F(3);                  \
1658c6a56c8eSJoseph Myers             d->elem(2) = F(2);                  \
1659fcf5ef2aSThomas Huth         }                                       \
1660c6a56c8eSJoseph Myers         d->elem(1) = F(1);                      \
1661c6a56c8eSJoseph Myers         d->elem(0) = F(0);                      \
1662fcf5ef2aSThomas Huth     }
1663fcf5ef2aSThomas Huth 
1664fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B)
1665fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B)
1666fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B)
1667fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W)
1668fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W)
1669fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L)
1670fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B)
1671fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B)
1672fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B)
1673fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W)
1674fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W)
1675fcf5ef2aSThomas Huth SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L)
1676fcf5ef2aSThomas Huth 
1677fcf5ef2aSThomas Huth void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1678fcf5ef2aSThomas Huth {
1679fcf5ef2aSThomas Huth     d->Q(0) = (int64_t)(int32_t) d->L(0) * (int32_t) s->L(0);
1680fcf5ef2aSThomas Huth     d->Q(1) = (int64_t)(int32_t) d->L(2) * (int32_t) s->L(2);
1681fcf5ef2aSThomas Huth }
1682fcf5ef2aSThomas Huth 
1683fcf5ef2aSThomas Huth #define FCMPEQQ(d, s) (d == s ? -1 : 0)
1684fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ)
1685fcf5ef2aSThomas Huth 
1686fcf5ef2aSThomas Huth void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1687fcf5ef2aSThomas Huth {
168880e19606SJoseph Myers     Reg r;
168980e19606SJoseph Myers 
169080e19606SJoseph Myers     r.W(0) = satuw((int32_t) d->L(0));
169180e19606SJoseph Myers     r.W(1) = satuw((int32_t) d->L(1));
169280e19606SJoseph Myers     r.W(2) = satuw((int32_t) d->L(2));
169380e19606SJoseph Myers     r.W(3) = satuw((int32_t) d->L(3));
169480e19606SJoseph Myers     r.W(4) = satuw((int32_t) s->L(0));
169580e19606SJoseph Myers     r.W(5) = satuw((int32_t) s->L(1));
169680e19606SJoseph Myers     r.W(6) = satuw((int32_t) s->L(2));
169780e19606SJoseph Myers     r.W(7) = satuw((int32_t) s->L(3));
1698d22697ddSPaolo Bonzini     MOVE(*d, r);
1699fcf5ef2aSThomas Huth }
1700fcf5ef2aSThomas Huth 
1701fcf5ef2aSThomas Huth #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s)
1702fcf5ef2aSThomas Huth #define FMINSD(d, s) MIN((int32_t)d, (int32_t)s)
1703fcf5ef2aSThomas Huth #define FMAXSB(d, s) MAX((int8_t)d, (int8_t)s)
1704fcf5ef2aSThomas Huth #define FMAXSD(d, s) MAX((int32_t)d, (int32_t)s)
1705fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pminsb, FMINSB)
1706fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pminsd, FMINSD)
1707fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pminuw, MIN)
1708fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pminud, MIN)
1709fcf5ef2aSThomas Huth SSE_HELPER_B(helper_pmaxsb, FMAXSB)
1710fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pmaxsd, FMAXSD)
1711fcf5ef2aSThomas Huth SSE_HELPER_W(helper_pmaxuw, MAX)
1712fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pmaxud, MAX)
1713fcf5ef2aSThomas Huth 
1714fcf5ef2aSThomas Huth #define FMULLD(d, s) ((int32_t)d * (int32_t)s)
1715fcf5ef2aSThomas Huth SSE_HELPER_L(helper_pmulld, FMULLD)
1716fcf5ef2aSThomas Huth 
1717fcf5ef2aSThomas Huth void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
1718fcf5ef2aSThomas Huth {
1719fcf5ef2aSThomas Huth     int idx = 0;
1720fcf5ef2aSThomas Huth 
1721fcf5ef2aSThomas Huth     if (s->W(1) < s->W(idx)) {
1722fcf5ef2aSThomas Huth         idx = 1;
1723fcf5ef2aSThomas Huth     }
1724fcf5ef2aSThomas Huth     if (s->W(2) < s->W(idx)) {
1725fcf5ef2aSThomas Huth         idx = 2;
1726fcf5ef2aSThomas Huth     }
1727fcf5ef2aSThomas Huth     if (s->W(3) < s->W(idx)) {
1728fcf5ef2aSThomas Huth         idx = 3;
1729fcf5ef2aSThomas Huth     }
1730fcf5ef2aSThomas Huth     if (s->W(4) < s->W(idx)) {
1731fcf5ef2aSThomas Huth         idx = 4;
1732fcf5ef2aSThomas Huth     }
1733fcf5ef2aSThomas Huth     if (s->W(5) < s->W(idx)) {
1734fcf5ef2aSThomas Huth         idx = 5;
1735fcf5ef2aSThomas Huth     }
1736fcf5ef2aSThomas Huth     if (s->W(6) < s->W(idx)) {
1737fcf5ef2aSThomas Huth         idx = 6;
1738fcf5ef2aSThomas Huth     }
1739fcf5ef2aSThomas Huth     if (s->W(7) < s->W(idx)) {
1740fcf5ef2aSThomas Huth         idx = 7;
1741fcf5ef2aSThomas Huth     }
1742fcf5ef2aSThomas Huth 
1743fcf5ef2aSThomas Huth     d->W(0) = s->W(idx);
1744aa406feaSJoseph Myers     d->W(1) = idx;
1745aa406feaSJoseph Myers     d->L(1) = 0;
1746aa406feaSJoseph Myers     d->Q(1) = 0;
1747fcf5ef2aSThomas Huth }
1748fcf5ef2aSThomas Huth 
1749fcf5ef2aSThomas Huth void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1750fcf5ef2aSThomas Huth                                   uint32_t mode)
1751fcf5ef2aSThomas Huth {
1752418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
1753fcf5ef2aSThomas Huth     signed char prev_rounding_mode;
1754fcf5ef2aSThomas Huth 
1755fcf5ef2aSThomas Huth     prev_rounding_mode = env->sse_status.float_rounding_mode;
1756fcf5ef2aSThomas Huth     if (!(mode & (1 << 2))) {
1757fcf5ef2aSThomas Huth         switch (mode & 3) {
1758fcf5ef2aSThomas Huth         case 0:
1759fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1760fcf5ef2aSThomas Huth             break;
1761fcf5ef2aSThomas Huth         case 1:
1762fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_down, &env->sse_status);
1763fcf5ef2aSThomas Huth             break;
1764fcf5ef2aSThomas Huth         case 2:
1765fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_up, &env->sse_status);
1766fcf5ef2aSThomas Huth             break;
1767fcf5ef2aSThomas Huth         case 3:
1768fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1769fcf5ef2aSThomas Huth             break;
1770fcf5ef2aSThomas Huth         }
1771fcf5ef2aSThomas Huth     }
1772fcf5ef2aSThomas Huth 
1773fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status);
1774fcf5ef2aSThomas Huth     d->ZMM_S(1) = float32_round_to_int(s->ZMM_S(1), &env->sse_status);
1775fcf5ef2aSThomas Huth     d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status);
1776fcf5ef2aSThomas Huth     d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status);
1777fcf5ef2aSThomas Huth 
1778418b0f93SJoseph Myers     if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
1779fcf5ef2aSThomas Huth         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
1780fcf5ef2aSThomas Huth                                   ~float_flag_inexact,
1781fcf5ef2aSThomas Huth                                   &env->sse_status);
1782fcf5ef2aSThomas Huth     }
1783fcf5ef2aSThomas Huth     env->sse_status.float_rounding_mode = prev_rounding_mode;
1784fcf5ef2aSThomas Huth }
1785fcf5ef2aSThomas Huth 
1786fcf5ef2aSThomas Huth void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1787fcf5ef2aSThomas Huth                                   uint32_t mode)
1788fcf5ef2aSThomas Huth {
1789418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
1790fcf5ef2aSThomas Huth     signed char prev_rounding_mode;
1791fcf5ef2aSThomas Huth 
1792fcf5ef2aSThomas Huth     prev_rounding_mode = env->sse_status.float_rounding_mode;
1793fcf5ef2aSThomas Huth     if (!(mode & (1 << 2))) {
1794fcf5ef2aSThomas Huth         switch (mode & 3) {
1795fcf5ef2aSThomas Huth         case 0:
1796fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1797fcf5ef2aSThomas Huth             break;
1798fcf5ef2aSThomas Huth         case 1:
1799fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_down, &env->sse_status);
1800fcf5ef2aSThomas Huth             break;
1801fcf5ef2aSThomas Huth         case 2:
1802fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_up, &env->sse_status);
1803fcf5ef2aSThomas Huth             break;
1804fcf5ef2aSThomas Huth         case 3:
1805fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1806fcf5ef2aSThomas Huth             break;
1807fcf5ef2aSThomas Huth         }
1808fcf5ef2aSThomas Huth     }
1809fcf5ef2aSThomas Huth 
1810fcf5ef2aSThomas Huth     d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
1811fcf5ef2aSThomas Huth     d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status);
1812fcf5ef2aSThomas Huth 
1813418b0f93SJoseph Myers     if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
1814fcf5ef2aSThomas Huth         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
1815fcf5ef2aSThomas Huth                                   ~float_flag_inexact,
1816fcf5ef2aSThomas Huth                                   &env->sse_status);
1817fcf5ef2aSThomas Huth     }
1818fcf5ef2aSThomas Huth     env->sse_status.float_rounding_mode = prev_rounding_mode;
1819fcf5ef2aSThomas Huth }
1820fcf5ef2aSThomas Huth 
1821fcf5ef2aSThomas Huth void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1822fcf5ef2aSThomas Huth                                   uint32_t mode)
1823fcf5ef2aSThomas Huth {
1824418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
1825fcf5ef2aSThomas Huth     signed char prev_rounding_mode;
1826fcf5ef2aSThomas Huth 
1827fcf5ef2aSThomas Huth     prev_rounding_mode = env->sse_status.float_rounding_mode;
1828fcf5ef2aSThomas Huth     if (!(mode & (1 << 2))) {
1829fcf5ef2aSThomas Huth         switch (mode & 3) {
1830fcf5ef2aSThomas Huth         case 0:
1831fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1832fcf5ef2aSThomas Huth             break;
1833fcf5ef2aSThomas Huth         case 1:
1834fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_down, &env->sse_status);
1835fcf5ef2aSThomas Huth             break;
1836fcf5ef2aSThomas Huth         case 2:
1837fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_up, &env->sse_status);
1838fcf5ef2aSThomas Huth             break;
1839fcf5ef2aSThomas Huth         case 3:
1840fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1841fcf5ef2aSThomas Huth             break;
1842fcf5ef2aSThomas Huth         }
1843fcf5ef2aSThomas Huth     }
1844fcf5ef2aSThomas Huth 
1845fcf5ef2aSThomas Huth     d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status);
1846fcf5ef2aSThomas Huth 
1847418b0f93SJoseph Myers     if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
1848fcf5ef2aSThomas Huth         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
1849fcf5ef2aSThomas Huth                                   ~float_flag_inexact,
1850fcf5ef2aSThomas Huth                                   &env->sse_status);
1851fcf5ef2aSThomas Huth     }
1852fcf5ef2aSThomas Huth     env->sse_status.float_rounding_mode = prev_rounding_mode;
1853fcf5ef2aSThomas Huth }
1854fcf5ef2aSThomas Huth 
1855fcf5ef2aSThomas Huth void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1856fcf5ef2aSThomas Huth                                   uint32_t mode)
1857fcf5ef2aSThomas Huth {
1858418b0f93SJoseph Myers     uint8_t old_flags = get_float_exception_flags(&env->sse_status);
1859fcf5ef2aSThomas Huth     signed char prev_rounding_mode;
1860fcf5ef2aSThomas Huth 
1861fcf5ef2aSThomas Huth     prev_rounding_mode = env->sse_status.float_rounding_mode;
1862fcf5ef2aSThomas Huth     if (!(mode & (1 << 2))) {
1863fcf5ef2aSThomas Huth         switch (mode & 3) {
1864fcf5ef2aSThomas Huth         case 0:
1865fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_nearest_even, &env->sse_status);
1866fcf5ef2aSThomas Huth             break;
1867fcf5ef2aSThomas Huth         case 1:
1868fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_down, &env->sse_status);
1869fcf5ef2aSThomas Huth             break;
1870fcf5ef2aSThomas Huth         case 2:
1871fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_up, &env->sse_status);
1872fcf5ef2aSThomas Huth             break;
1873fcf5ef2aSThomas Huth         case 3:
1874fcf5ef2aSThomas Huth             set_float_rounding_mode(float_round_to_zero, &env->sse_status);
1875fcf5ef2aSThomas Huth             break;
1876fcf5ef2aSThomas Huth         }
1877fcf5ef2aSThomas Huth     }
1878fcf5ef2aSThomas Huth 
1879fcf5ef2aSThomas Huth     d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status);
1880fcf5ef2aSThomas Huth 
1881418b0f93SJoseph Myers     if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) {
1882fcf5ef2aSThomas Huth         set_float_exception_flags(get_float_exception_flags(&env->sse_status) &
1883fcf5ef2aSThomas Huth                                   ~float_flag_inexact,
1884fcf5ef2aSThomas Huth                                   &env->sse_status);
1885fcf5ef2aSThomas Huth     }
1886fcf5ef2aSThomas Huth     env->sse_status.float_rounding_mode = prev_rounding_mode;
1887fcf5ef2aSThomas Huth }
1888fcf5ef2aSThomas Huth 
1889fcf5ef2aSThomas Huth #define FBLENDP(d, s, m) (m ? s : d)
1890fcf5ef2aSThomas Huth SSE_HELPER_I(helper_blendps, L, 4, FBLENDP)
1891fcf5ef2aSThomas Huth SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP)
1892fcf5ef2aSThomas Huth SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP)
1893fcf5ef2aSThomas Huth 
1894fcf5ef2aSThomas Huth void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
1895fcf5ef2aSThomas Huth {
1896bf30ad8cSPaolo Bonzini     float32 prod1, prod2, temp2, temp3, temp4;
1897fcf5ef2aSThomas Huth 
1898bf30ad8cSPaolo Bonzini     /*
1899bf30ad8cSPaolo Bonzini      * We must evaluate (A+B)+(C+D), not ((A+B)+C)+D
1900bf30ad8cSPaolo Bonzini      * to correctly round the intermediate results
1901bf30ad8cSPaolo Bonzini      */
1902fcf5ef2aSThomas Huth     if (mask & (1 << 4)) {
1903bf30ad8cSPaolo Bonzini         prod1 = float32_mul(d->ZMM_S(0), s->ZMM_S(0), &env->sse_status);
1904bf30ad8cSPaolo Bonzini     } else {
1905bf30ad8cSPaolo Bonzini         prod1 = float32_zero;
1906fcf5ef2aSThomas Huth     }
1907fcf5ef2aSThomas Huth     if (mask & (1 << 5)) {
1908bf30ad8cSPaolo Bonzini         prod2 = float32_mul(d->ZMM_S(1), s->ZMM_S(1), &env->sse_status);
1909bf30ad8cSPaolo Bonzini     } else {
1910bf30ad8cSPaolo Bonzini         prod2 = float32_zero;
1911fcf5ef2aSThomas Huth     }
1912bf30ad8cSPaolo Bonzini     temp2 = float32_add(prod1, prod2, &env->sse_status);
1913fcf5ef2aSThomas Huth     if (mask & (1 << 6)) {
1914bf30ad8cSPaolo Bonzini         prod1 = float32_mul(d->ZMM_S(2), s->ZMM_S(2), &env->sse_status);
1915bf30ad8cSPaolo Bonzini     } else {
1916bf30ad8cSPaolo Bonzini         prod1 = float32_zero;
1917fcf5ef2aSThomas Huth     }
1918fcf5ef2aSThomas Huth     if (mask & (1 << 7)) {
1919bf30ad8cSPaolo Bonzini         prod2 = float32_mul(d->ZMM_S(3), s->ZMM_S(3), &env->sse_status);
1920bf30ad8cSPaolo Bonzini     } else {
1921bf30ad8cSPaolo Bonzini         prod2 = float32_zero;
1922fcf5ef2aSThomas Huth     }
1923bf30ad8cSPaolo Bonzini     temp3 = float32_add(prod1, prod2, &env->sse_status);
1924bf30ad8cSPaolo Bonzini     temp4 = float32_add(temp2, temp3, &env->sse_status);
1925bf30ad8cSPaolo Bonzini 
1926bf30ad8cSPaolo Bonzini     d->ZMM_S(0) = (mask & (1 << 0)) ? temp4 : float32_zero;
1927bf30ad8cSPaolo Bonzini     d->ZMM_S(1) = (mask & (1 << 1)) ? temp4 : float32_zero;
1928bf30ad8cSPaolo Bonzini     d->ZMM_S(2) = (mask & (1 << 2)) ? temp4 : float32_zero;
1929bf30ad8cSPaolo Bonzini     d->ZMM_S(3) = (mask & (1 << 3)) ? temp4 : float32_zero;
1930fcf5ef2aSThomas Huth }
1931fcf5ef2aSThomas Huth 
1932fcf5ef2aSThomas Huth void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask)
1933fcf5ef2aSThomas Huth {
1934bf30ad8cSPaolo Bonzini     float64 prod1, prod2, temp2;
1935fcf5ef2aSThomas Huth 
1936fcf5ef2aSThomas Huth     if (mask & (1 << 4)) {
1937bf30ad8cSPaolo Bonzini         prod1 = float64_mul(d->ZMM_D(0), s->ZMM_D(0), &env->sse_status);
1938bf30ad8cSPaolo Bonzini     } else {
1939bf30ad8cSPaolo Bonzini         prod1 = float64_zero;
1940fcf5ef2aSThomas Huth     }
1941fcf5ef2aSThomas Huth     if (mask & (1 << 5)) {
1942bf30ad8cSPaolo Bonzini         prod2 = float64_mul(d->ZMM_D(1), s->ZMM_D(1), &env->sse_status);
1943bf30ad8cSPaolo Bonzini     } else {
1944bf30ad8cSPaolo Bonzini         prod2 = float64_zero;
1945fcf5ef2aSThomas Huth     }
1946bf30ad8cSPaolo Bonzini     temp2 = float64_add(prod1, prod2, &env->sse_status);
1947bf30ad8cSPaolo Bonzini     d->ZMM_D(0) = (mask & (1 << 0)) ? temp2 : float64_zero;
1948bf30ad8cSPaolo Bonzini     d->ZMM_D(1) = (mask & (1 << 1)) ? temp2 : float64_zero;
1949fcf5ef2aSThomas Huth }
1950fcf5ef2aSThomas Huth 
1951fcf5ef2aSThomas Huth void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
1952fcf5ef2aSThomas Huth                                   uint32_t offset)
1953fcf5ef2aSThomas Huth {
1954fcf5ef2aSThomas Huth     int s0 = (offset & 3) << 2;
1955fcf5ef2aSThomas Huth     int d0 = (offset & 4) << 0;
1956fcf5ef2aSThomas Huth     int i;
1957fcf5ef2aSThomas Huth     Reg r;
1958fcf5ef2aSThomas Huth 
1959fcf5ef2aSThomas Huth     for (i = 0; i < 8; i++, d0++) {
1960fcf5ef2aSThomas Huth         r.W(i) = 0;
1961fcf5ef2aSThomas Huth         r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0));
1962fcf5ef2aSThomas Huth         r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1));
1963fcf5ef2aSThomas Huth         r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2));
1964fcf5ef2aSThomas Huth         r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
1965fcf5ef2aSThomas Huth     }
1966fcf5ef2aSThomas Huth 
1967d22697ddSPaolo Bonzini     MOVE(*d, r);
1968fcf5ef2aSThomas Huth }
1969fcf5ef2aSThomas Huth 
1970fcf5ef2aSThomas Huth /* SSE4.2 op helpers */
1971fcf5ef2aSThomas Huth #define FCMPGTQ(d, s) ((int64_t)d > (int64_t)s ? -1 : 0)
1972fcf5ef2aSThomas Huth SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ)
1973fcf5ef2aSThomas Huth 
1974fcf5ef2aSThomas Huth static inline int pcmp_elen(CPUX86State *env, int reg, uint32_t ctrl)
1975fcf5ef2aSThomas Huth {
1976d1da229fSPaul Brook     target_long val, limit;
1977fcf5ef2aSThomas Huth 
1978fcf5ef2aSThomas Huth     /* Presence of REX.W is indicated by a bit higher than 7 set */
1979fcf5ef2aSThomas Huth     if (ctrl >> 8) {
1980d1da229fSPaul Brook         val = (target_long)env->regs[reg];
1981fcf5ef2aSThomas Huth     } else {
1982d1da229fSPaul Brook         val = (int32_t)env->regs[reg];
1983fcf5ef2aSThomas Huth     }
1984fcf5ef2aSThomas Huth     if (ctrl & 1) {
1985d1da229fSPaul Brook         limit = 8;
1986fcf5ef2aSThomas Huth     } else {
1987d1da229fSPaul Brook         limit = 16;
1988fcf5ef2aSThomas Huth     }
1989d1da229fSPaul Brook     if ((val > limit) || (val < -limit)) {
1990d1da229fSPaul Brook         return limit;
1991fcf5ef2aSThomas Huth     }
1992d1da229fSPaul Brook     return abs1(val);
1993fcf5ef2aSThomas Huth }
1994fcf5ef2aSThomas Huth 
1995fcf5ef2aSThomas Huth static inline int pcmp_ilen(Reg *r, uint8_t ctrl)
1996fcf5ef2aSThomas Huth {
1997fcf5ef2aSThomas Huth     int val = 0;
1998fcf5ef2aSThomas Huth 
1999fcf5ef2aSThomas Huth     if (ctrl & 1) {
2000fcf5ef2aSThomas Huth         while (val < 8 && r->W(val)) {
2001fcf5ef2aSThomas Huth             val++;
2002fcf5ef2aSThomas Huth         }
2003fcf5ef2aSThomas Huth     } else {
2004fcf5ef2aSThomas Huth         while (val < 16 && r->B(val)) {
2005fcf5ef2aSThomas Huth             val++;
2006fcf5ef2aSThomas Huth         }
2007fcf5ef2aSThomas Huth     }
2008fcf5ef2aSThomas Huth 
2009fcf5ef2aSThomas Huth     return val;
2010fcf5ef2aSThomas Huth }
2011fcf5ef2aSThomas Huth 
2012fcf5ef2aSThomas Huth static inline int pcmp_val(Reg *r, uint8_t ctrl, int i)
2013fcf5ef2aSThomas Huth {
2014fcf5ef2aSThomas Huth     switch ((ctrl >> 0) & 3) {
2015fcf5ef2aSThomas Huth     case 0:
2016fcf5ef2aSThomas Huth         return r->B(i);
2017fcf5ef2aSThomas Huth     case 1:
2018fcf5ef2aSThomas Huth         return r->W(i);
2019fcf5ef2aSThomas Huth     case 2:
2020fcf5ef2aSThomas Huth         return (int8_t)r->B(i);
2021fcf5ef2aSThomas Huth     case 3:
2022fcf5ef2aSThomas Huth     default:
2023fcf5ef2aSThomas Huth         return (int16_t)r->W(i);
2024fcf5ef2aSThomas Huth     }
2025fcf5ef2aSThomas Huth }
2026fcf5ef2aSThomas Huth 
2027fcf5ef2aSThomas Huth static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s,
2028fcf5ef2aSThomas Huth                                  int8_t ctrl, int valids, int validd)
2029fcf5ef2aSThomas Huth {
2030fcf5ef2aSThomas Huth     unsigned int res = 0;
2031fcf5ef2aSThomas Huth     int v;
2032fcf5ef2aSThomas Huth     int j, i;
2033fcf5ef2aSThomas Huth     int upper = (ctrl & 1) ? 7 : 15;
2034fcf5ef2aSThomas Huth 
2035fcf5ef2aSThomas Huth     valids--;
2036fcf5ef2aSThomas Huth     validd--;
2037fcf5ef2aSThomas Huth 
2038fcf5ef2aSThomas Huth     CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0);
2039fcf5ef2aSThomas Huth 
2040fcf5ef2aSThomas Huth     switch ((ctrl >> 2) & 3) {
2041fcf5ef2aSThomas Huth     case 0:
2042fcf5ef2aSThomas Huth         for (j = valids; j >= 0; j--) {
2043fcf5ef2aSThomas Huth             res <<= 1;
2044fcf5ef2aSThomas Huth             v = pcmp_val(s, ctrl, j);
2045fcf5ef2aSThomas Huth             for (i = validd; i >= 0; i--) {
2046fcf5ef2aSThomas Huth                 res |= (v == pcmp_val(d, ctrl, i));
2047fcf5ef2aSThomas Huth             }
2048fcf5ef2aSThomas Huth         }
2049fcf5ef2aSThomas Huth         break;
2050fcf5ef2aSThomas Huth     case 1:
2051fcf5ef2aSThomas Huth         for (j = valids; j >= 0; j--) {
2052fcf5ef2aSThomas Huth             res <<= 1;
2053fcf5ef2aSThomas Huth             v = pcmp_val(s, ctrl, j);
2054fcf5ef2aSThomas Huth             for (i = ((validd - 1) | 1); i >= 0; i -= 2) {
2055fcf5ef2aSThomas Huth                 res |= (pcmp_val(d, ctrl, i - 0) >= v &&
2056fcf5ef2aSThomas Huth                         pcmp_val(d, ctrl, i - 1) <= v);
2057fcf5ef2aSThomas Huth             }
2058fcf5ef2aSThomas Huth         }
2059fcf5ef2aSThomas Huth         break;
2060fcf5ef2aSThomas Huth     case 2:
2061fcf5ef2aSThomas Huth         res = (1 << (upper - MAX(valids, validd))) - 1;
2062fcf5ef2aSThomas Huth         res <<= MAX(valids, validd) - MIN(valids, validd);
2063fcf5ef2aSThomas Huth         for (i = MIN(valids, validd); i >= 0; i--) {
2064fcf5ef2aSThomas Huth             res <<= 1;
2065fcf5ef2aSThomas Huth             v = pcmp_val(s, ctrl, i);
2066fcf5ef2aSThomas Huth             res |= (v == pcmp_val(d, ctrl, i));
2067fcf5ef2aSThomas Huth         }
2068fcf5ef2aSThomas Huth         break;
2069fcf5ef2aSThomas Huth     case 3:
2070ae35eea7SJoseph Myers         if (validd == -1) {
2071ae35eea7SJoseph Myers             res = (2 << upper) - 1;
2072ae35eea7SJoseph Myers             break;
2073ae35eea7SJoseph Myers         }
2074bc921b27SJoseph Myers         for (j = valids == upper ? valids : valids - validd; j >= 0; j--) {
2075fcf5ef2aSThomas Huth             res <<= 1;
2076fcf5ef2aSThomas Huth             v = 1;
2077bc921b27SJoseph Myers             for (i = MIN(valids - j, validd); i >= 0; i--) {
2078fcf5ef2aSThomas Huth                 v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i));
2079fcf5ef2aSThomas Huth             }
2080fcf5ef2aSThomas Huth             res |= v;
2081fcf5ef2aSThomas Huth         }
2082fcf5ef2aSThomas Huth         break;
2083fcf5ef2aSThomas Huth     }
2084fcf5ef2aSThomas Huth 
2085fcf5ef2aSThomas Huth     switch ((ctrl >> 4) & 3) {
2086fcf5ef2aSThomas Huth     case 1:
2087fcf5ef2aSThomas Huth         res ^= (2 << upper) - 1;
2088fcf5ef2aSThomas Huth         break;
2089fcf5ef2aSThomas Huth     case 3:
2090fcf5ef2aSThomas Huth         res ^= (1 << (valids + 1)) - 1;
2091fcf5ef2aSThomas Huth         break;
2092fcf5ef2aSThomas Huth     }
2093fcf5ef2aSThomas Huth 
2094fcf5ef2aSThomas Huth     if (res) {
2095fcf5ef2aSThomas Huth         CC_SRC |= CC_C;
2096fcf5ef2aSThomas Huth     }
2097fcf5ef2aSThomas Huth     if (res & 1) {
2098fcf5ef2aSThomas Huth         CC_SRC |= CC_O;
2099fcf5ef2aSThomas Huth     }
2100fcf5ef2aSThomas Huth 
2101fcf5ef2aSThomas Huth     return res;
2102fcf5ef2aSThomas Huth }
2103fcf5ef2aSThomas Huth 
2104fcf5ef2aSThomas Huth void glue(helper_pcmpestri, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2105fcf5ef2aSThomas Huth                                     uint32_t ctrl)
2106fcf5ef2aSThomas Huth {
2107fcf5ef2aSThomas Huth     unsigned int res = pcmpxstrx(env, d, s, ctrl,
2108fcf5ef2aSThomas Huth                                  pcmp_elen(env, R_EDX, ctrl),
2109fcf5ef2aSThomas Huth                                  pcmp_elen(env, R_EAX, ctrl));
2110fcf5ef2aSThomas Huth 
2111fcf5ef2aSThomas Huth     if (res) {
2112fcf5ef2aSThomas Huth         env->regs[R_ECX] = (ctrl & (1 << 6)) ? 31 - clz32(res) : ctz32(res);
2113fcf5ef2aSThomas Huth     } else {
2114fcf5ef2aSThomas Huth         env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
2115fcf5ef2aSThomas Huth     }
2116fcf5ef2aSThomas Huth }
2117fcf5ef2aSThomas Huth 
2118fcf5ef2aSThomas Huth void glue(helper_pcmpestrm, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2119fcf5ef2aSThomas Huth                                     uint32_t ctrl)
2120fcf5ef2aSThomas Huth {
2121fcf5ef2aSThomas Huth     int i;
2122fcf5ef2aSThomas Huth     unsigned int res = pcmpxstrx(env, d, s, ctrl,
2123fcf5ef2aSThomas Huth                                  pcmp_elen(env, R_EDX, ctrl),
2124fcf5ef2aSThomas Huth                                  pcmp_elen(env, R_EAX, ctrl));
2125fcf5ef2aSThomas Huth 
2126fcf5ef2aSThomas Huth     if ((ctrl >> 6) & 1) {
2127fcf5ef2aSThomas Huth         if (ctrl & 1) {
2128fcf5ef2aSThomas Huth             for (i = 0; i < 8; i++, res >>= 1) {
2129fcf5ef2aSThomas Huth                 env->xmm_regs[0].W(i) = (res & 1) ? ~0 : 0;
2130fcf5ef2aSThomas Huth             }
2131fcf5ef2aSThomas Huth         } else {
2132fcf5ef2aSThomas Huth             for (i = 0; i < 16; i++, res >>= 1) {
2133fcf5ef2aSThomas Huth                 env->xmm_regs[0].B(i) = (res & 1) ? ~0 : 0;
2134fcf5ef2aSThomas Huth             }
2135fcf5ef2aSThomas Huth         }
2136fcf5ef2aSThomas Huth     } else {
2137fcf5ef2aSThomas Huth         env->xmm_regs[0].Q(1) = 0;
2138fcf5ef2aSThomas Huth         env->xmm_regs[0].Q(0) = res;
2139fcf5ef2aSThomas Huth     }
2140fcf5ef2aSThomas Huth }
2141fcf5ef2aSThomas Huth 
2142fcf5ef2aSThomas Huth void glue(helper_pcmpistri, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2143fcf5ef2aSThomas Huth                                     uint32_t ctrl)
2144fcf5ef2aSThomas Huth {
2145fcf5ef2aSThomas Huth     unsigned int res = pcmpxstrx(env, d, s, ctrl,
2146fcf5ef2aSThomas Huth                                  pcmp_ilen(s, ctrl),
2147fcf5ef2aSThomas Huth                                  pcmp_ilen(d, ctrl));
2148fcf5ef2aSThomas Huth 
2149fcf5ef2aSThomas Huth     if (res) {
2150fcf5ef2aSThomas Huth         env->regs[R_ECX] = (ctrl & (1 << 6)) ? 31 - clz32(res) : ctz32(res);
2151fcf5ef2aSThomas Huth     } else {
2152fcf5ef2aSThomas Huth         env->regs[R_ECX] = 16 >> (ctrl & (1 << 0));
2153fcf5ef2aSThomas Huth     }
2154fcf5ef2aSThomas Huth }
2155fcf5ef2aSThomas Huth 
2156fcf5ef2aSThomas Huth void glue(helper_pcmpistrm, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2157fcf5ef2aSThomas Huth                                     uint32_t ctrl)
2158fcf5ef2aSThomas Huth {
2159fcf5ef2aSThomas Huth     int i;
2160fcf5ef2aSThomas Huth     unsigned int res = pcmpxstrx(env, d, s, ctrl,
2161fcf5ef2aSThomas Huth                                  pcmp_ilen(s, ctrl),
2162fcf5ef2aSThomas Huth                                  pcmp_ilen(d, ctrl));
2163fcf5ef2aSThomas Huth 
2164fcf5ef2aSThomas Huth     if ((ctrl >> 6) & 1) {
2165fcf5ef2aSThomas Huth         if (ctrl & 1) {
2166fcf5ef2aSThomas Huth             for (i = 0; i < 8; i++, res >>= 1) {
2167fcf5ef2aSThomas Huth                 env->xmm_regs[0].W(i) = (res & 1) ? ~0 : 0;
2168fcf5ef2aSThomas Huth             }
2169fcf5ef2aSThomas Huth         } else {
2170fcf5ef2aSThomas Huth             for (i = 0; i < 16; i++, res >>= 1) {
2171fcf5ef2aSThomas Huth                 env->xmm_regs[0].B(i) = (res & 1) ? ~0 : 0;
2172fcf5ef2aSThomas Huth             }
2173fcf5ef2aSThomas Huth         }
2174fcf5ef2aSThomas Huth     } else {
2175fcf5ef2aSThomas Huth         env->xmm_regs[0].Q(1) = 0;
2176fcf5ef2aSThomas Huth         env->xmm_regs[0].Q(0) = res;
2177fcf5ef2aSThomas Huth     }
2178fcf5ef2aSThomas Huth }
2179fcf5ef2aSThomas Huth 
2180fcf5ef2aSThomas Huth #define CRCPOLY        0x1edc6f41
2181fcf5ef2aSThomas Huth #define CRCPOLY_BITREV 0x82f63b78
2182fcf5ef2aSThomas Huth target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len)
2183fcf5ef2aSThomas Huth {
2184fcf5ef2aSThomas Huth     target_ulong crc = (msg & ((target_ulong) -1 >>
2185fcf5ef2aSThomas Huth                                (TARGET_LONG_BITS - len))) ^ crc1;
2186fcf5ef2aSThomas Huth 
2187fcf5ef2aSThomas Huth     while (len--) {
2188fcf5ef2aSThomas Huth         crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_BITREV : 0);
2189fcf5ef2aSThomas Huth     }
2190fcf5ef2aSThomas Huth 
2191fcf5ef2aSThomas Huth     return crc;
2192fcf5ef2aSThomas Huth }
2193fcf5ef2aSThomas Huth 
2194fcf5ef2aSThomas Huth void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2195fcf5ef2aSThomas Huth                                     uint32_t ctrl)
2196fcf5ef2aSThomas Huth {
2197fcf5ef2aSThomas Huth     uint64_t ah, al, b, resh, resl;
2198fcf5ef2aSThomas Huth 
2199fcf5ef2aSThomas Huth     ah = 0;
2200fcf5ef2aSThomas Huth     al = d->Q((ctrl & 1) != 0);
2201fcf5ef2aSThomas Huth     b = s->Q((ctrl & 16) != 0);
2202fcf5ef2aSThomas Huth     resh = resl = 0;
2203fcf5ef2aSThomas Huth 
2204fcf5ef2aSThomas Huth     while (b) {
2205fcf5ef2aSThomas Huth         if (b & 1) {
2206fcf5ef2aSThomas Huth             resl ^= al;
2207fcf5ef2aSThomas Huth             resh ^= ah;
2208fcf5ef2aSThomas Huth         }
2209fcf5ef2aSThomas Huth         ah = (ah << 1) | (al >> 63);
2210fcf5ef2aSThomas Huth         al <<= 1;
2211fcf5ef2aSThomas Huth         b >>= 1;
2212fcf5ef2aSThomas Huth     }
2213fcf5ef2aSThomas Huth 
2214fcf5ef2aSThomas Huth     d->Q(0) = resl;
2215fcf5ef2aSThomas Huth     d->Q(1) = resh;
2216fcf5ef2aSThomas Huth }
2217fcf5ef2aSThomas Huth 
2218fcf5ef2aSThomas Huth void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
2219fcf5ef2aSThomas Huth {
2220fcf5ef2aSThomas Huth     int i;
2221fcf5ef2aSThomas Huth     Reg st = *d;
2222fcf5ef2aSThomas Huth     Reg rk = *s;
2223fcf5ef2aSThomas Huth 
2224fcf5ef2aSThomas Huth     for (i = 0 ; i < 4 ; i++) {
2225fcf5ef2aSThomas Huth         d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(AES_ishifts[4*i+0])] ^
2226fcf5ef2aSThomas Huth                                     AES_Td1[st.B(AES_ishifts[4*i+1])] ^
2227fcf5ef2aSThomas Huth                                     AES_Td2[st.B(AES_ishifts[4*i+2])] ^
2228fcf5ef2aSThomas Huth                                     AES_Td3[st.B(AES_ishifts[4*i+3])]);
2229fcf5ef2aSThomas Huth     }
2230fcf5ef2aSThomas Huth }
2231fcf5ef2aSThomas Huth 
2232fcf5ef2aSThomas Huth void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
2233fcf5ef2aSThomas Huth {
2234fcf5ef2aSThomas Huth     int i;
2235fcf5ef2aSThomas Huth     Reg st = *d;
2236fcf5ef2aSThomas Huth     Reg rk = *s;
2237fcf5ef2aSThomas Huth 
2238fcf5ef2aSThomas Huth     for (i = 0; i < 16; i++) {
2239fcf5ef2aSThomas Huth         d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i])]);
2240fcf5ef2aSThomas Huth     }
2241fcf5ef2aSThomas Huth }
2242fcf5ef2aSThomas Huth 
2243fcf5ef2aSThomas Huth void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
2244fcf5ef2aSThomas Huth {
2245fcf5ef2aSThomas Huth     int i;
2246fcf5ef2aSThomas Huth     Reg st = *d;
2247fcf5ef2aSThomas Huth     Reg rk = *s;
2248fcf5ef2aSThomas Huth 
2249fcf5ef2aSThomas Huth     for (i = 0 ; i < 4 ; i++) {
2250fcf5ef2aSThomas Huth         d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(AES_shifts[4*i+0])] ^
2251fcf5ef2aSThomas Huth                                     AES_Te1[st.B(AES_shifts[4*i+1])] ^
2252fcf5ef2aSThomas Huth                                     AES_Te2[st.B(AES_shifts[4*i+2])] ^
2253fcf5ef2aSThomas Huth                                     AES_Te3[st.B(AES_shifts[4*i+3])]);
2254fcf5ef2aSThomas Huth     }
2255fcf5ef2aSThomas Huth }
2256fcf5ef2aSThomas Huth 
2257fcf5ef2aSThomas Huth void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
2258fcf5ef2aSThomas Huth {
2259fcf5ef2aSThomas Huth     int i;
2260fcf5ef2aSThomas Huth     Reg st = *d;
2261fcf5ef2aSThomas Huth     Reg rk = *s;
2262fcf5ef2aSThomas Huth 
2263fcf5ef2aSThomas Huth     for (i = 0; i < 16; i++) {
2264fcf5ef2aSThomas Huth         d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i])]);
2265fcf5ef2aSThomas Huth     }
2266fcf5ef2aSThomas Huth 
2267fcf5ef2aSThomas Huth }
2268fcf5ef2aSThomas Huth 
2269fcf5ef2aSThomas Huth void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
2270fcf5ef2aSThomas Huth {
2271fcf5ef2aSThomas Huth     int i;
2272fcf5ef2aSThomas Huth     Reg tmp = *s;
2273fcf5ef2aSThomas Huth 
2274fcf5ef2aSThomas Huth     for (i = 0 ; i < 4 ; i++) {
2275fcf5ef2aSThomas Huth         d->L(i) = bswap32(AES_imc[tmp.B(4*i+0)][0] ^
2276fcf5ef2aSThomas Huth                           AES_imc[tmp.B(4*i+1)][1] ^
2277fcf5ef2aSThomas Huth                           AES_imc[tmp.B(4*i+2)][2] ^
2278fcf5ef2aSThomas Huth                           AES_imc[tmp.B(4*i+3)][3]);
2279fcf5ef2aSThomas Huth     }
2280fcf5ef2aSThomas Huth }
2281fcf5ef2aSThomas Huth 
2282fcf5ef2aSThomas Huth void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
2283fcf5ef2aSThomas Huth                                           uint32_t ctrl)
2284fcf5ef2aSThomas Huth {
2285fcf5ef2aSThomas Huth     int i;
2286fcf5ef2aSThomas Huth     Reg tmp = *s;
2287fcf5ef2aSThomas Huth 
2288fcf5ef2aSThomas Huth     for (i = 0 ; i < 4 ; i++) {
2289fcf5ef2aSThomas Huth         d->B(i) = AES_sbox[tmp.B(i + 4)];
2290fcf5ef2aSThomas Huth         d->B(i + 8) = AES_sbox[tmp.B(i + 12)];
2291fcf5ef2aSThomas Huth     }
2292fcf5ef2aSThomas Huth     d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
2293fcf5ef2aSThomas Huth     d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
2294fcf5ef2aSThomas Huth }
2295fcf5ef2aSThomas Huth #endif
2296fcf5ef2aSThomas Huth 
2297fcf5ef2aSThomas Huth #undef SHIFT
2298fcf5ef2aSThomas Huth #undef XMM_ONLY
2299fcf5ef2aSThomas Huth #undef Reg
2300fcf5ef2aSThomas Huth #undef B
2301fcf5ef2aSThomas Huth #undef W
2302fcf5ef2aSThomas Huth #undef L
2303fcf5ef2aSThomas Huth #undef Q
2304fcf5ef2aSThomas Huth #undef SUFFIX
2305d22697ddSPaolo Bonzini #undef SIZE
2306