xref: /openbmc/qemu/target/mips/tcg/lmmi_helper.c (revision 1be5a765c08cee3a9587c8a8d3fc2ea247b13f9c)
1a2b0a27dSPhilippe Mathieu-Daudé /*
2a2b0a27dSPhilippe Mathieu-Daudé  *  Loongson Multimedia Instruction emulation helpers for QEMU.
3a2b0a27dSPhilippe Mathieu-Daudé  *
4a2b0a27dSPhilippe Mathieu-Daudé  *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
5a2b0a27dSPhilippe Mathieu-Daudé  *
6a2b0a27dSPhilippe Mathieu-Daudé  * This library is free software; you can redistribute it and/or
7a2b0a27dSPhilippe Mathieu-Daudé  * modify it under the terms of the GNU Lesser General Public
8a2b0a27dSPhilippe Mathieu-Daudé  * License as published by the Free Software Foundation; either
9a2b0a27dSPhilippe Mathieu-Daudé  * version 2.1 of the License, or (at your option) any later version.
10a2b0a27dSPhilippe Mathieu-Daudé  *
11a2b0a27dSPhilippe Mathieu-Daudé  * This library is distributed in the hope that it will be useful,
12a2b0a27dSPhilippe Mathieu-Daudé  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13a2b0a27dSPhilippe Mathieu-Daudé  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14a2b0a27dSPhilippe Mathieu-Daudé  * Lesser General Public License for more details.
15a2b0a27dSPhilippe Mathieu-Daudé  *
16a2b0a27dSPhilippe Mathieu-Daudé  * You should have received a copy of the GNU Lesser General Public
17a2b0a27dSPhilippe Mathieu-Daudé  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18a2b0a27dSPhilippe Mathieu-Daudé  */
19a2b0a27dSPhilippe Mathieu-Daudé 
20a2b0a27dSPhilippe Mathieu-Daudé #include "qemu/osdep.h"
21a2b0a27dSPhilippe Mathieu-Daudé #include "cpu.h"
22a2b0a27dSPhilippe Mathieu-Daudé #include "exec/helper-proto.h"
23a2b0a27dSPhilippe Mathieu-Daudé 
24a2b0a27dSPhilippe Mathieu-Daudé /*
25a2b0a27dSPhilippe Mathieu-Daudé  * If the byte ordering doesn't matter, i.e. all columns are treated
26a2b0a27dSPhilippe Mathieu-Daudé  * identically, then this union can be used directly.  If byte ordering
27a2b0a27dSPhilippe Mathieu-Daudé  * does matter, we generally ignore dumping to memory.
28a2b0a27dSPhilippe Mathieu-Daudé  */
29a2b0a27dSPhilippe Mathieu-Daudé typedef union {
30a2b0a27dSPhilippe Mathieu-Daudé     uint8_t  ub[8];
31a2b0a27dSPhilippe Mathieu-Daudé     int8_t   sb[8];
32a2b0a27dSPhilippe Mathieu-Daudé     uint16_t uh[4];
33a2b0a27dSPhilippe Mathieu-Daudé     int16_t  sh[4];
34a2b0a27dSPhilippe Mathieu-Daudé     uint32_t uw[2];
35a2b0a27dSPhilippe Mathieu-Daudé     int32_t  sw[2];
36a2b0a27dSPhilippe Mathieu-Daudé     uint64_t d;
37a2b0a27dSPhilippe Mathieu-Daudé } LMIValue;
38a2b0a27dSPhilippe Mathieu-Daudé 
39a2b0a27dSPhilippe Mathieu-Daudé /* Some byte ordering issues can be mitigated by XORing in the following.  */
40*e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN
41a2b0a27dSPhilippe Mathieu-Daudé # define BYTE_ORDER_XOR(N) N
42a2b0a27dSPhilippe Mathieu-Daudé #else
43a2b0a27dSPhilippe Mathieu-Daudé # define BYTE_ORDER_XOR(N) 0
44a2b0a27dSPhilippe Mathieu-Daudé #endif
45a2b0a27dSPhilippe Mathieu-Daudé 
46a2b0a27dSPhilippe Mathieu-Daudé #define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
47a2b0a27dSPhilippe Mathieu-Daudé #define SATUB(x)  (x > 0xff ? 0xff : x)
48a2b0a27dSPhilippe Mathieu-Daudé 
49a2b0a27dSPhilippe Mathieu-Daudé #define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
50a2b0a27dSPhilippe Mathieu-Daudé #define SATUH(x)  (x > 0xffff ? 0xffff : x)
51a2b0a27dSPhilippe Mathieu-Daudé 
52a2b0a27dSPhilippe Mathieu-Daudé #define SATSW(x) \
53a2b0a27dSPhilippe Mathieu-Daudé     (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
54a2b0a27dSPhilippe Mathieu-Daudé #define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
55a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddsb(uint64_t fs,uint64_t ft)56a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
57a2b0a27dSPhilippe Mathieu-Daudé {
58a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
59a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
60a2b0a27dSPhilippe Mathieu-Daudé 
61a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
62a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
63a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
64a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.sb[i] + vt.sb[i];
65a2b0a27dSPhilippe Mathieu-Daudé         vs.sb[i] = SATSB(r);
66a2b0a27dSPhilippe Mathieu-Daudé     }
67a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
68a2b0a27dSPhilippe Mathieu-Daudé }
69a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddusb(uint64_t fs,uint64_t ft)70a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
71a2b0a27dSPhilippe Mathieu-Daudé {
72a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
73a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
74a2b0a27dSPhilippe Mathieu-Daudé 
75a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
76a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
77a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
78a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.ub[i] + vt.ub[i];
79a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = SATUB(r);
80a2b0a27dSPhilippe Mathieu-Daudé     }
81a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
82a2b0a27dSPhilippe Mathieu-Daudé }
83a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddsh(uint64_t fs,uint64_t ft)84a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
85a2b0a27dSPhilippe Mathieu-Daudé {
86a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
87a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
88a2b0a27dSPhilippe Mathieu-Daudé 
89a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
90a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
91a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
92a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.sh[i] + vt.sh[i];
93a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] = SATSH(r);
94a2b0a27dSPhilippe Mathieu-Daudé     }
95a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
96a2b0a27dSPhilippe Mathieu-Daudé }
97a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddush(uint64_t fs,uint64_t ft)98a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddush(uint64_t fs, uint64_t ft)
99a2b0a27dSPhilippe Mathieu-Daudé {
100a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
101a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
102a2b0a27dSPhilippe Mathieu-Daudé 
103a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
104a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
105a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
106a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.uh[i] + vt.uh[i];
107a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = SATUH(r);
108a2b0a27dSPhilippe Mathieu-Daudé     }
109a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
110a2b0a27dSPhilippe Mathieu-Daudé }
111a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddb(uint64_t fs,uint64_t ft)112a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddb(uint64_t fs, uint64_t ft)
113a2b0a27dSPhilippe Mathieu-Daudé {
114a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
115a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
116a2b0a27dSPhilippe Mathieu-Daudé 
117a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
118a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
119a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
120a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] += vt.ub[i];
121a2b0a27dSPhilippe Mathieu-Daudé     }
122a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
123a2b0a27dSPhilippe Mathieu-Daudé }
124a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddh(uint64_t fs,uint64_t ft)125a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddh(uint64_t fs, uint64_t ft)
126a2b0a27dSPhilippe Mathieu-Daudé {
127a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
128a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
129a2b0a27dSPhilippe Mathieu-Daudé 
130a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
131a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
132a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
133a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] += vt.uh[i];
134a2b0a27dSPhilippe Mathieu-Daudé     }
135a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
136a2b0a27dSPhilippe Mathieu-Daudé }
137a2b0a27dSPhilippe Mathieu-Daudé 
helper_paddw(uint64_t fs,uint64_t ft)138a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddw(uint64_t fs, uint64_t ft)
139a2b0a27dSPhilippe Mathieu-Daudé {
140a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
141a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
142a2b0a27dSPhilippe Mathieu-Daudé 
143a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
144a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
145a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; ++i) {
146a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] += vt.uw[i];
147a2b0a27dSPhilippe Mathieu-Daudé     }
148a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
149a2b0a27dSPhilippe Mathieu-Daudé }
150a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubsb(uint64_t fs,uint64_t ft)151a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
152a2b0a27dSPhilippe Mathieu-Daudé {
153a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
154a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
155a2b0a27dSPhilippe Mathieu-Daudé 
156a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
157a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
158a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
159a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.sb[i] - vt.sb[i];
160a2b0a27dSPhilippe Mathieu-Daudé         vs.sb[i] = SATSB(r);
161a2b0a27dSPhilippe Mathieu-Daudé     }
162a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
163a2b0a27dSPhilippe Mathieu-Daudé }
164a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubusb(uint64_t fs,uint64_t ft)165a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
166a2b0a27dSPhilippe Mathieu-Daudé {
167a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
168a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
169a2b0a27dSPhilippe Mathieu-Daudé 
170a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
171a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
172a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
173a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.ub[i] - vt.ub[i];
174a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = SATUB(r);
175a2b0a27dSPhilippe Mathieu-Daudé     }
176a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
177a2b0a27dSPhilippe Mathieu-Daudé }
178a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubsh(uint64_t fs,uint64_t ft)179a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
180a2b0a27dSPhilippe Mathieu-Daudé {
181a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
182a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
183a2b0a27dSPhilippe Mathieu-Daudé 
184a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
185a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
186a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
187a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.sh[i] - vt.sh[i];
188a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] = SATSH(r);
189a2b0a27dSPhilippe Mathieu-Daudé     }
190a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
191a2b0a27dSPhilippe Mathieu-Daudé }
192a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubush(uint64_t fs,uint64_t ft)193a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubush(uint64_t fs, uint64_t ft)
194a2b0a27dSPhilippe Mathieu-Daudé {
195a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
196a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
197a2b0a27dSPhilippe Mathieu-Daudé 
198a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
199a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
200a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
201a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.uh[i] - vt.uh[i];
202a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = SATUH(r);
203a2b0a27dSPhilippe Mathieu-Daudé     }
204a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
205a2b0a27dSPhilippe Mathieu-Daudé }
206a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubb(uint64_t fs,uint64_t ft)207a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubb(uint64_t fs, uint64_t ft)
208a2b0a27dSPhilippe Mathieu-Daudé {
209a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
210a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
211a2b0a27dSPhilippe Mathieu-Daudé 
212a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
213a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
214a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
215a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] -= vt.ub[i];
216a2b0a27dSPhilippe Mathieu-Daudé     }
217a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
218a2b0a27dSPhilippe Mathieu-Daudé }
219a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubh(uint64_t fs,uint64_t ft)220a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubh(uint64_t fs, uint64_t ft)
221a2b0a27dSPhilippe Mathieu-Daudé {
222a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
223a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
224a2b0a27dSPhilippe Mathieu-Daudé 
225a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
226a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
227a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
228a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] -= vt.uh[i];
229a2b0a27dSPhilippe Mathieu-Daudé     }
230a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
231a2b0a27dSPhilippe Mathieu-Daudé }
232a2b0a27dSPhilippe Mathieu-Daudé 
helper_psubw(uint64_t fs,uint64_t ft)233a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubw(uint64_t fs, uint64_t ft)
234a2b0a27dSPhilippe Mathieu-Daudé {
235a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
236a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
237a2b0a27dSPhilippe Mathieu-Daudé 
238a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
239a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
240a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; ++i) {
241a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] -= vt.uw[i];
242a2b0a27dSPhilippe Mathieu-Daudé     }
243a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
244a2b0a27dSPhilippe Mathieu-Daudé }
245a2b0a27dSPhilippe Mathieu-Daudé 
helper_pshufh(uint64_t fs,uint64_t ft)246a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
247a2b0a27dSPhilippe Mathieu-Daudé {
248a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(3);
249a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vd, vs;
250a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
251a2b0a27dSPhilippe Mathieu-Daudé 
252a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
253a2b0a27dSPhilippe Mathieu-Daudé     vd.d = 0;
254a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++, ft >>= 2) {
255a2b0a27dSPhilippe Mathieu-Daudé         vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
256a2b0a27dSPhilippe Mathieu-Daudé     }
257a2b0a27dSPhilippe Mathieu-Daudé     return vd.d;
258a2b0a27dSPhilippe Mathieu-Daudé }
259a2b0a27dSPhilippe Mathieu-Daudé 
helper_packsswh(uint64_t fs,uint64_t ft)260a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
261a2b0a27dSPhilippe Mathieu-Daudé {
262a2b0a27dSPhilippe Mathieu-Daudé     uint64_t fd = 0;
263a2b0a27dSPhilippe Mathieu-Daudé     int64_t tmp;
264a2b0a27dSPhilippe Mathieu-Daudé 
265a2b0a27dSPhilippe Mathieu-Daudé     tmp = (int32_t)(fs >> 0);
266a2b0a27dSPhilippe Mathieu-Daudé     tmp = SATSH(tmp);
267a2b0a27dSPhilippe Mathieu-Daudé     fd |= (tmp & 0xffff) << 0;
268a2b0a27dSPhilippe Mathieu-Daudé 
269a2b0a27dSPhilippe Mathieu-Daudé     tmp = (int32_t)(fs >> 32);
270a2b0a27dSPhilippe Mathieu-Daudé     tmp = SATSH(tmp);
271a2b0a27dSPhilippe Mathieu-Daudé     fd |= (tmp & 0xffff) << 16;
272a2b0a27dSPhilippe Mathieu-Daudé 
273a2b0a27dSPhilippe Mathieu-Daudé     tmp = (int32_t)(ft >> 0);
274a2b0a27dSPhilippe Mathieu-Daudé     tmp = SATSH(tmp);
275a2b0a27dSPhilippe Mathieu-Daudé     fd |= (tmp & 0xffff) << 32;
276a2b0a27dSPhilippe Mathieu-Daudé 
277a2b0a27dSPhilippe Mathieu-Daudé     tmp = (int32_t)(ft >> 32);
278a2b0a27dSPhilippe Mathieu-Daudé     tmp = SATSH(tmp);
279a2b0a27dSPhilippe Mathieu-Daudé     fd |= (tmp & 0xffff) << 48;
280a2b0a27dSPhilippe Mathieu-Daudé 
281a2b0a27dSPhilippe Mathieu-Daudé     return fd;
282a2b0a27dSPhilippe Mathieu-Daudé }
283a2b0a27dSPhilippe Mathieu-Daudé 
helper_packsshb(uint64_t fs,uint64_t ft)284a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
285a2b0a27dSPhilippe Mathieu-Daudé {
286a2b0a27dSPhilippe Mathieu-Daudé     uint64_t fd = 0;
287a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
288a2b0a27dSPhilippe Mathieu-Daudé 
289a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
290a2b0a27dSPhilippe Mathieu-Daudé         int16_t tmp = fs >> (i * 16);
291a2b0a27dSPhilippe Mathieu-Daudé         tmp = SATSB(tmp);
292a2b0a27dSPhilippe Mathieu-Daudé         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
293a2b0a27dSPhilippe Mathieu-Daudé     }
294a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
295a2b0a27dSPhilippe Mathieu-Daudé         int16_t tmp = ft >> (i * 16);
296a2b0a27dSPhilippe Mathieu-Daudé         tmp = SATSB(tmp);
297a2b0a27dSPhilippe Mathieu-Daudé         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
298a2b0a27dSPhilippe Mathieu-Daudé     }
299a2b0a27dSPhilippe Mathieu-Daudé 
300a2b0a27dSPhilippe Mathieu-Daudé     return fd;
301a2b0a27dSPhilippe Mathieu-Daudé }
302a2b0a27dSPhilippe Mathieu-Daudé 
helper_packushb(uint64_t fs,uint64_t ft)303a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packushb(uint64_t fs, uint64_t ft)
304a2b0a27dSPhilippe Mathieu-Daudé {
305a2b0a27dSPhilippe Mathieu-Daudé     uint64_t fd = 0;
306a2b0a27dSPhilippe Mathieu-Daudé     unsigned int i;
307a2b0a27dSPhilippe Mathieu-Daudé 
308a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
309a2b0a27dSPhilippe Mathieu-Daudé         int16_t tmp = fs >> (i * 16);
310a2b0a27dSPhilippe Mathieu-Daudé         tmp = SATUB(tmp);
311a2b0a27dSPhilippe Mathieu-Daudé         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
312a2b0a27dSPhilippe Mathieu-Daudé     }
313a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
314a2b0a27dSPhilippe Mathieu-Daudé         int16_t tmp = ft >> (i * 16);
315a2b0a27dSPhilippe Mathieu-Daudé         tmp = SATUB(tmp);
316a2b0a27dSPhilippe Mathieu-Daudé         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
317a2b0a27dSPhilippe Mathieu-Daudé     }
318a2b0a27dSPhilippe Mathieu-Daudé 
319a2b0a27dSPhilippe Mathieu-Daudé     return fd;
320a2b0a27dSPhilippe Mathieu-Daudé }
321a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpcklwd(uint64_t fs,uint64_t ft)322a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
323a2b0a27dSPhilippe Mathieu-Daudé {
324a2b0a27dSPhilippe Mathieu-Daudé     return (fs & 0xffffffff) | (ft << 32);
325a2b0a27dSPhilippe Mathieu-Daudé }
326a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpckhwd(uint64_t fs,uint64_t ft)327a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
328a2b0a27dSPhilippe Mathieu-Daudé {
329a2b0a27dSPhilippe Mathieu-Daudé     return (fs >> 32) | (ft & ~0xffffffffull);
330a2b0a27dSPhilippe Mathieu-Daudé }
331a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpcklhw(uint64_t fs,uint64_t ft)332a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
333a2b0a27dSPhilippe Mathieu-Daudé {
334a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(3);
335a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vd, vs, vt;
336a2b0a27dSPhilippe Mathieu-Daudé 
337a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
338a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
339a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[0 ^ host] = vs.uh[0 ^ host];
340a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[1 ^ host] = vt.uh[0 ^ host];
341a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[2 ^ host] = vs.uh[1 ^ host];
342a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[3 ^ host] = vt.uh[1 ^ host];
343a2b0a27dSPhilippe Mathieu-Daudé 
344a2b0a27dSPhilippe Mathieu-Daudé     return vd.d;
345a2b0a27dSPhilippe Mathieu-Daudé }
346a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpckhhw(uint64_t fs,uint64_t ft)347a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
348a2b0a27dSPhilippe Mathieu-Daudé {
349a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(3);
350a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vd, vs, vt;
351a2b0a27dSPhilippe Mathieu-Daudé 
352a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
353a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
354a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[0 ^ host] = vs.uh[2 ^ host];
355a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[1 ^ host] = vt.uh[2 ^ host];
356a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[2 ^ host] = vs.uh[3 ^ host];
357a2b0a27dSPhilippe Mathieu-Daudé     vd.uh[3 ^ host] = vt.uh[3 ^ host];
358a2b0a27dSPhilippe Mathieu-Daudé 
359a2b0a27dSPhilippe Mathieu-Daudé     return vd.d;
360a2b0a27dSPhilippe Mathieu-Daudé }
361a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpcklbh(uint64_t fs,uint64_t ft)362a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
363a2b0a27dSPhilippe Mathieu-Daudé {
364a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(7);
365a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vd, vs, vt;
366a2b0a27dSPhilippe Mathieu-Daudé 
367a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
368a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
369a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[0 ^ host] = vs.ub[0 ^ host];
370a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[1 ^ host] = vt.ub[0 ^ host];
371a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[2 ^ host] = vs.ub[1 ^ host];
372a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[3 ^ host] = vt.ub[1 ^ host];
373a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[4 ^ host] = vs.ub[2 ^ host];
374a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[5 ^ host] = vt.ub[2 ^ host];
375a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[6 ^ host] = vs.ub[3 ^ host];
376a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[7 ^ host] = vt.ub[3 ^ host];
377a2b0a27dSPhilippe Mathieu-Daudé 
378a2b0a27dSPhilippe Mathieu-Daudé     return vd.d;
379a2b0a27dSPhilippe Mathieu-Daudé }
380a2b0a27dSPhilippe Mathieu-Daudé 
helper_punpckhbh(uint64_t fs,uint64_t ft)381a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
382a2b0a27dSPhilippe Mathieu-Daudé {
383a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(7);
384a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vd, vs, vt;
385a2b0a27dSPhilippe Mathieu-Daudé 
386a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
387a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
388a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[0 ^ host] = vs.ub[4 ^ host];
389a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[1 ^ host] = vt.ub[4 ^ host];
390a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[2 ^ host] = vs.ub[5 ^ host];
391a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[3 ^ host] = vt.ub[5 ^ host];
392a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[4 ^ host] = vs.ub[6 ^ host];
393a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[5 ^ host] = vt.ub[6 ^ host];
394a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[6 ^ host] = vs.ub[7 ^ host];
395a2b0a27dSPhilippe Mathieu-Daudé     vd.ub[7 ^ host] = vt.ub[7 ^ host];
396a2b0a27dSPhilippe Mathieu-Daudé 
397a2b0a27dSPhilippe Mathieu-Daudé     return vd.d;
398a2b0a27dSPhilippe Mathieu-Daudé }
399a2b0a27dSPhilippe Mathieu-Daudé 
helper_pavgh(uint64_t fs,uint64_t ft)400a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
401a2b0a27dSPhilippe Mathieu-Daudé {
402a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
403a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
404a2b0a27dSPhilippe Mathieu-Daudé 
405a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
406a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
407a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
408a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
409a2b0a27dSPhilippe Mathieu-Daudé     }
410a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
411a2b0a27dSPhilippe Mathieu-Daudé }
412a2b0a27dSPhilippe Mathieu-Daudé 
helper_pavgb(uint64_t fs,uint64_t ft)413a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
414a2b0a27dSPhilippe Mathieu-Daudé {
415a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
416a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
417a2b0a27dSPhilippe Mathieu-Daudé 
418a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
419a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
420a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; i++) {
421a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
422a2b0a27dSPhilippe Mathieu-Daudé     }
423a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
424a2b0a27dSPhilippe Mathieu-Daudé }
425a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmaxsh(uint64_t fs,uint64_t ft)426a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
427a2b0a27dSPhilippe Mathieu-Daudé {
428a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
429a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
430a2b0a27dSPhilippe Mathieu-Daudé 
431a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
432a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
433a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
434a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
435a2b0a27dSPhilippe Mathieu-Daudé     }
436a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
437a2b0a27dSPhilippe Mathieu-Daudé }
438a2b0a27dSPhilippe Mathieu-Daudé 
helper_pminsh(uint64_t fs,uint64_t ft)439a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
440a2b0a27dSPhilippe Mathieu-Daudé {
441a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
442a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
443a2b0a27dSPhilippe Mathieu-Daudé 
444a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
445a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
446a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
447a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
448a2b0a27dSPhilippe Mathieu-Daudé     }
449a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
450a2b0a27dSPhilippe Mathieu-Daudé }
451a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmaxub(uint64_t fs,uint64_t ft)452a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
453a2b0a27dSPhilippe Mathieu-Daudé {
454a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
455a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
456a2b0a27dSPhilippe Mathieu-Daudé 
457a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
458a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
459a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
460a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
461a2b0a27dSPhilippe Mathieu-Daudé     }
462a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
463a2b0a27dSPhilippe Mathieu-Daudé }
464a2b0a27dSPhilippe Mathieu-Daudé 
helper_pminub(uint64_t fs,uint64_t ft)465a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pminub(uint64_t fs, uint64_t ft)
466a2b0a27dSPhilippe Mathieu-Daudé {
467a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
468a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
469a2b0a27dSPhilippe Mathieu-Daudé 
470a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
471a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
472a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
473a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
474a2b0a27dSPhilippe Mathieu-Daudé     }
475a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
476a2b0a27dSPhilippe Mathieu-Daudé }
477a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpeqw(uint64_t fs,uint64_t ft)478a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
479a2b0a27dSPhilippe Mathieu-Daudé {
480a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
481a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
482a2b0a27dSPhilippe Mathieu-Daudé 
483a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
484a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
485a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; i++) {
486a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
487a2b0a27dSPhilippe Mathieu-Daudé     }
488a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
489a2b0a27dSPhilippe Mathieu-Daudé }
490a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpgtw(uint64_t fs,uint64_t ft)491a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
492a2b0a27dSPhilippe Mathieu-Daudé {
493a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
494a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
495a2b0a27dSPhilippe Mathieu-Daudé 
496a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
497a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
498a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; i++) {
499a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
500a2b0a27dSPhilippe Mathieu-Daudé     }
501a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
502a2b0a27dSPhilippe Mathieu-Daudé }
503a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpeqh(uint64_t fs,uint64_t ft)504a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
505a2b0a27dSPhilippe Mathieu-Daudé {
506a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
507a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
508a2b0a27dSPhilippe Mathieu-Daudé 
509a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
510a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
511a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
512a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
513a2b0a27dSPhilippe Mathieu-Daudé     }
514a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
515a2b0a27dSPhilippe Mathieu-Daudé }
516a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpgth(uint64_t fs,uint64_t ft)517a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
518a2b0a27dSPhilippe Mathieu-Daudé {
519a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
520a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
521a2b0a27dSPhilippe Mathieu-Daudé 
522a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
523a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
524a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; i++) {
525a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
526a2b0a27dSPhilippe Mathieu-Daudé     }
527a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
528a2b0a27dSPhilippe Mathieu-Daudé }
529a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpeqb(uint64_t fs,uint64_t ft)530a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
531a2b0a27dSPhilippe Mathieu-Daudé {
532a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
533a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
534a2b0a27dSPhilippe Mathieu-Daudé 
535a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
536a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
537a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; i++) {
538a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
539a2b0a27dSPhilippe Mathieu-Daudé     }
540a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
541a2b0a27dSPhilippe Mathieu-Daudé }
542a2b0a27dSPhilippe Mathieu-Daudé 
helper_pcmpgtb(uint64_t fs,uint64_t ft)543a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
544a2b0a27dSPhilippe Mathieu-Daudé {
545a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
546a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
547a2b0a27dSPhilippe Mathieu-Daudé 
548a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
549a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
550a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; i++) {
551a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
552a2b0a27dSPhilippe Mathieu-Daudé     }
553a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
554a2b0a27dSPhilippe Mathieu-Daudé }
555a2b0a27dSPhilippe Mathieu-Daudé 
helper_psllw(uint64_t fs,uint64_t ft)556a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psllw(uint64_t fs, uint64_t ft)
557a2b0a27dSPhilippe Mathieu-Daudé {
558a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
559a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
560a2b0a27dSPhilippe Mathieu-Daudé 
561a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
562a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 31) {
563a2b0a27dSPhilippe Mathieu-Daudé         return 0;
564a2b0a27dSPhilippe Mathieu-Daudé     }
565a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
566a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; ++i) {
567a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] <<= ft;
568a2b0a27dSPhilippe Mathieu-Daudé     }
569a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
570a2b0a27dSPhilippe Mathieu-Daudé }
571a2b0a27dSPhilippe Mathieu-Daudé 
helper_psrlw(uint64_t fs,uint64_t ft)572a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
573a2b0a27dSPhilippe Mathieu-Daudé {
574a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
575a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
576a2b0a27dSPhilippe Mathieu-Daudé 
577a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
578a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 31) {
579a2b0a27dSPhilippe Mathieu-Daudé         return 0;
580a2b0a27dSPhilippe Mathieu-Daudé     }
581a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
582a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; ++i) {
583a2b0a27dSPhilippe Mathieu-Daudé         vs.uw[i] >>= ft;
584a2b0a27dSPhilippe Mathieu-Daudé     }
585a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
586a2b0a27dSPhilippe Mathieu-Daudé }
587a2b0a27dSPhilippe Mathieu-Daudé 
helper_psraw(uint64_t fs,uint64_t ft)588a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psraw(uint64_t fs, uint64_t ft)
589a2b0a27dSPhilippe Mathieu-Daudé {
590a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
591a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
592a2b0a27dSPhilippe Mathieu-Daudé 
593a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
594a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 31) {
595a2b0a27dSPhilippe Mathieu-Daudé         ft = 31;
596a2b0a27dSPhilippe Mathieu-Daudé     }
597a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
598a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 2; ++i) {
599a2b0a27dSPhilippe Mathieu-Daudé         vs.sw[i] >>= ft;
600a2b0a27dSPhilippe Mathieu-Daudé     }
601a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
602a2b0a27dSPhilippe Mathieu-Daudé }
603a2b0a27dSPhilippe Mathieu-Daudé 
helper_psllh(uint64_t fs,uint64_t ft)604a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psllh(uint64_t fs, uint64_t ft)
605a2b0a27dSPhilippe Mathieu-Daudé {
606a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
607a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
608a2b0a27dSPhilippe Mathieu-Daudé 
609a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
610a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 15) {
611a2b0a27dSPhilippe Mathieu-Daudé         return 0;
612a2b0a27dSPhilippe Mathieu-Daudé     }
613a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
614a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
615a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] <<= ft;
616a2b0a27dSPhilippe Mathieu-Daudé     }
617a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
618a2b0a27dSPhilippe Mathieu-Daudé }
619a2b0a27dSPhilippe Mathieu-Daudé 
helper_psrlh(uint64_t fs,uint64_t ft)620a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
621a2b0a27dSPhilippe Mathieu-Daudé {
622a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
623a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
624a2b0a27dSPhilippe Mathieu-Daudé 
625a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
626a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 15) {
627a2b0a27dSPhilippe Mathieu-Daudé         return 0;
628a2b0a27dSPhilippe Mathieu-Daudé     }
629a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
630a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
631a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] >>= ft;
632a2b0a27dSPhilippe Mathieu-Daudé     }
633a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
634a2b0a27dSPhilippe Mathieu-Daudé }
635a2b0a27dSPhilippe Mathieu-Daudé 
helper_psrah(uint64_t fs,uint64_t ft)636a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrah(uint64_t fs, uint64_t ft)
637a2b0a27dSPhilippe Mathieu-Daudé {
638a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs;
639a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
640a2b0a27dSPhilippe Mathieu-Daudé 
641a2b0a27dSPhilippe Mathieu-Daudé     ft &= 0x7f;
642a2b0a27dSPhilippe Mathieu-Daudé     if (ft > 15) {
643a2b0a27dSPhilippe Mathieu-Daudé         ft = 15;
644a2b0a27dSPhilippe Mathieu-Daudé     }
645a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
646a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
647a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] >>= ft;
648a2b0a27dSPhilippe Mathieu-Daudé     }
649a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
650a2b0a27dSPhilippe Mathieu-Daudé }
651a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmullh(uint64_t fs,uint64_t ft)652a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
653a2b0a27dSPhilippe Mathieu-Daudé {
654a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
655a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
656a2b0a27dSPhilippe Mathieu-Daudé 
657a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
658a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
659a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
660a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] *= vt.sh[i];
661a2b0a27dSPhilippe Mathieu-Daudé     }
662a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
663a2b0a27dSPhilippe Mathieu-Daudé }
664a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmulhh(uint64_t fs,uint64_t ft)665a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
666a2b0a27dSPhilippe Mathieu-Daudé {
667a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
668a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
669a2b0a27dSPhilippe Mathieu-Daudé 
670a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
671a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
672a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
673a2b0a27dSPhilippe Mathieu-Daudé         int32_t r = vs.sh[i] * vt.sh[i];
674a2b0a27dSPhilippe Mathieu-Daudé         vs.sh[i] = r >> 16;
675a2b0a27dSPhilippe Mathieu-Daudé     }
676a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
677a2b0a27dSPhilippe Mathieu-Daudé }
678a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmulhuh(uint64_t fs,uint64_t ft)679a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
680a2b0a27dSPhilippe Mathieu-Daudé {
681a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
682a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
683a2b0a27dSPhilippe Mathieu-Daudé 
684a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
685a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
686a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 4; ++i) {
687a2b0a27dSPhilippe Mathieu-Daudé         uint32_t r = vs.uh[i] * vt.uh[i];
688a2b0a27dSPhilippe Mathieu-Daudé         vs.uh[i] = r >> 16;
689a2b0a27dSPhilippe Mathieu-Daudé     }
690a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
691a2b0a27dSPhilippe Mathieu-Daudé }
692a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmaddhw(uint64_t fs,uint64_t ft)693a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
694a2b0a27dSPhilippe Mathieu-Daudé {
695a2b0a27dSPhilippe Mathieu-Daudé     unsigned host = BYTE_ORDER_XOR(3);
696a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
697a2b0a27dSPhilippe Mathieu-Daudé     uint32_t p0, p1;
698a2b0a27dSPhilippe Mathieu-Daudé 
699a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
700a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
701a2b0a27dSPhilippe Mathieu-Daudé     p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
702a2b0a27dSPhilippe Mathieu-Daudé     p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
703a2b0a27dSPhilippe Mathieu-Daudé     p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
704a2b0a27dSPhilippe Mathieu-Daudé     p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
705a2b0a27dSPhilippe Mathieu-Daudé 
706a2b0a27dSPhilippe Mathieu-Daudé     return ((uint64_t)p1 << 32) | p0;
707a2b0a27dSPhilippe Mathieu-Daudé }
708a2b0a27dSPhilippe Mathieu-Daudé 
helper_pasubub(uint64_t fs,uint64_t ft)709a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
710a2b0a27dSPhilippe Mathieu-Daudé {
711a2b0a27dSPhilippe Mathieu-Daudé     LMIValue vs, vt;
712a2b0a27dSPhilippe Mathieu-Daudé     unsigned i;
713a2b0a27dSPhilippe Mathieu-Daudé 
714a2b0a27dSPhilippe Mathieu-Daudé     vs.d = fs;
715a2b0a27dSPhilippe Mathieu-Daudé     vt.d = ft;
716a2b0a27dSPhilippe Mathieu-Daudé     for (i = 0; i < 8; ++i) {
717a2b0a27dSPhilippe Mathieu-Daudé         int r = vs.ub[i] - vt.ub[i];
718a2b0a27dSPhilippe Mathieu-Daudé         vs.ub[i] = (r < 0 ? -r : r);
719a2b0a27dSPhilippe Mathieu-Daudé     }
720a2b0a27dSPhilippe Mathieu-Daudé     return vs.d;
721a2b0a27dSPhilippe Mathieu-Daudé }
722a2b0a27dSPhilippe Mathieu-Daudé 
helper_biadd(uint64_t fs)723a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_biadd(uint64_t fs)
724a2b0a27dSPhilippe Mathieu-Daudé {
725a2b0a27dSPhilippe Mathieu-Daudé     unsigned i, fd;
726a2b0a27dSPhilippe Mathieu-Daudé 
727a2b0a27dSPhilippe Mathieu-Daudé     for (i = fd = 0; i < 8; ++i) {
728a2b0a27dSPhilippe Mathieu-Daudé         fd += (fs >> (i * 8)) & 0xff;
729a2b0a27dSPhilippe Mathieu-Daudé     }
730a2b0a27dSPhilippe Mathieu-Daudé     return fd & 0xffff;
731a2b0a27dSPhilippe Mathieu-Daudé }
732a2b0a27dSPhilippe Mathieu-Daudé 
helper_pmovmskb(uint64_t fs)733a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmovmskb(uint64_t fs)
734a2b0a27dSPhilippe Mathieu-Daudé {
735a2b0a27dSPhilippe Mathieu-Daudé     unsigned fd = 0;
736a2b0a27dSPhilippe Mathieu-Daudé 
737a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >>  7) & 1) << 0;
738a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 15) & 1) << 1;
739a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 23) & 1) << 2;
740a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 31) & 1) << 3;
741a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 39) & 1) << 4;
742a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 47) & 1) << 5;
743a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 55) & 1) << 6;
744a2b0a27dSPhilippe Mathieu-Daudé     fd |= ((fs >> 63) & 1) << 7;
745a2b0a27dSPhilippe Mathieu-Daudé 
746a2b0a27dSPhilippe Mathieu-Daudé     return fd & 0xff;
747a2b0a27dSPhilippe Mathieu-Daudé }
748