xref: /openbmc/qemu/target/mips/tcg/lmmi_helper.c (revision 1be5a765c08cee3a9587c8a8d3fc2ea247b13f9c)
1 /*
2  *  Loongson Multimedia Instruction emulation helpers for QEMU.
3  *
4  *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23 
24 /*
25  * If the byte ordering doesn't matter, i.e. all columns are treated
26  * identically, then this union can be used directly.  If byte ordering
27  * does matter, we generally ignore dumping to memory.
28  */
29 typedef union {
30     uint8_t  ub[8];
31     int8_t   sb[8];
32     uint16_t uh[4];
33     int16_t  sh[4];
34     uint32_t uw[2];
35     int32_t  sw[2];
36     uint64_t d;
37 } LMIValue;
38 
39 /* Some byte ordering issues can be mitigated by XORing in the following.  */
40 #if HOST_BIG_ENDIAN
41 # define BYTE_ORDER_XOR(N) N
42 #else
43 # define BYTE_ORDER_XOR(N) 0
44 #endif
45 
46 #define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
47 #define SATUB(x)  (x > 0xff ? 0xff : x)
48 
49 #define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
50 #define SATUH(x)  (x > 0xffff ? 0xffff : x)
51 
52 #define SATSW(x) \
53     (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
54 #define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
55 
helper_paddsb(uint64_t fs,uint64_t ft)56 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
57 {
58     LMIValue vs, vt;
59     unsigned int i;
60 
61     vs.d = fs;
62     vt.d = ft;
63     for (i = 0; i < 8; ++i) {
64         int r = vs.sb[i] + vt.sb[i];
65         vs.sb[i] = SATSB(r);
66     }
67     return vs.d;
68 }
69 
helper_paddusb(uint64_t fs,uint64_t ft)70 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
71 {
72     LMIValue vs, vt;
73     unsigned int i;
74 
75     vs.d = fs;
76     vt.d = ft;
77     for (i = 0; i < 8; ++i) {
78         int r = vs.ub[i] + vt.ub[i];
79         vs.ub[i] = SATUB(r);
80     }
81     return vs.d;
82 }
83 
helper_paddsh(uint64_t fs,uint64_t ft)84 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
85 {
86     LMIValue vs, vt;
87     unsigned int i;
88 
89     vs.d = fs;
90     vt.d = ft;
91     for (i = 0; i < 4; ++i) {
92         int r = vs.sh[i] + vt.sh[i];
93         vs.sh[i] = SATSH(r);
94     }
95     return vs.d;
96 }
97 
helper_paddush(uint64_t fs,uint64_t ft)98 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
99 {
100     LMIValue vs, vt;
101     unsigned int i;
102 
103     vs.d = fs;
104     vt.d = ft;
105     for (i = 0; i < 4; ++i) {
106         int r = vs.uh[i] + vt.uh[i];
107         vs.uh[i] = SATUH(r);
108     }
109     return vs.d;
110 }
111 
helper_paddb(uint64_t fs,uint64_t ft)112 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
113 {
114     LMIValue vs, vt;
115     unsigned int i;
116 
117     vs.d = fs;
118     vt.d = ft;
119     for (i = 0; i < 8; ++i) {
120         vs.ub[i] += vt.ub[i];
121     }
122     return vs.d;
123 }
124 
helper_paddh(uint64_t fs,uint64_t ft)125 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
126 {
127     LMIValue vs, vt;
128     unsigned int i;
129 
130     vs.d = fs;
131     vt.d = ft;
132     for (i = 0; i < 4; ++i) {
133         vs.uh[i] += vt.uh[i];
134     }
135     return vs.d;
136 }
137 
helper_paddw(uint64_t fs,uint64_t ft)138 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
139 {
140     LMIValue vs, vt;
141     unsigned int i;
142 
143     vs.d = fs;
144     vt.d = ft;
145     for (i = 0; i < 2; ++i) {
146         vs.uw[i] += vt.uw[i];
147     }
148     return vs.d;
149 }
150 
helper_psubsb(uint64_t fs,uint64_t ft)151 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
152 {
153     LMIValue vs, vt;
154     unsigned int i;
155 
156     vs.d = fs;
157     vt.d = ft;
158     for (i = 0; i < 8; ++i) {
159         int r = vs.sb[i] - vt.sb[i];
160         vs.sb[i] = SATSB(r);
161     }
162     return vs.d;
163 }
164 
helper_psubusb(uint64_t fs,uint64_t ft)165 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
166 {
167     LMIValue vs, vt;
168     unsigned int i;
169 
170     vs.d = fs;
171     vt.d = ft;
172     for (i = 0; i < 8; ++i) {
173         int r = vs.ub[i] - vt.ub[i];
174         vs.ub[i] = SATUB(r);
175     }
176     return vs.d;
177 }
178 
helper_psubsh(uint64_t fs,uint64_t ft)179 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
180 {
181     LMIValue vs, vt;
182     unsigned int i;
183 
184     vs.d = fs;
185     vt.d = ft;
186     for (i = 0; i < 4; ++i) {
187         int r = vs.sh[i] - vt.sh[i];
188         vs.sh[i] = SATSH(r);
189     }
190     return vs.d;
191 }
192 
helper_psubush(uint64_t fs,uint64_t ft)193 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
194 {
195     LMIValue vs, vt;
196     unsigned int i;
197 
198     vs.d = fs;
199     vt.d = ft;
200     for (i = 0; i < 4; ++i) {
201         int r = vs.uh[i] - vt.uh[i];
202         vs.uh[i] = SATUH(r);
203     }
204     return vs.d;
205 }
206 
helper_psubb(uint64_t fs,uint64_t ft)207 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
208 {
209     LMIValue vs, vt;
210     unsigned int i;
211 
212     vs.d = fs;
213     vt.d = ft;
214     for (i = 0; i < 8; ++i) {
215         vs.ub[i] -= vt.ub[i];
216     }
217     return vs.d;
218 }
219 
helper_psubh(uint64_t fs,uint64_t ft)220 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
221 {
222     LMIValue vs, vt;
223     unsigned int i;
224 
225     vs.d = fs;
226     vt.d = ft;
227     for (i = 0; i < 4; ++i) {
228         vs.uh[i] -= vt.uh[i];
229     }
230     return vs.d;
231 }
232 
helper_psubw(uint64_t fs,uint64_t ft)233 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
234 {
235     LMIValue vs, vt;
236     unsigned int i;
237 
238     vs.d = fs;
239     vt.d = ft;
240     for (i = 0; i < 2; ++i) {
241         vs.uw[i] -= vt.uw[i];
242     }
243     return vs.d;
244 }
245 
helper_pshufh(uint64_t fs,uint64_t ft)246 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
247 {
248     unsigned host = BYTE_ORDER_XOR(3);
249     LMIValue vd, vs;
250     unsigned i;
251 
252     vs.d = fs;
253     vd.d = 0;
254     for (i = 0; i < 4; i++, ft >>= 2) {
255         vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
256     }
257     return vd.d;
258 }
259 
helper_packsswh(uint64_t fs,uint64_t ft)260 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
261 {
262     uint64_t fd = 0;
263     int64_t tmp;
264 
265     tmp = (int32_t)(fs >> 0);
266     tmp = SATSH(tmp);
267     fd |= (tmp & 0xffff) << 0;
268 
269     tmp = (int32_t)(fs >> 32);
270     tmp = SATSH(tmp);
271     fd |= (tmp & 0xffff) << 16;
272 
273     tmp = (int32_t)(ft >> 0);
274     tmp = SATSH(tmp);
275     fd |= (tmp & 0xffff) << 32;
276 
277     tmp = (int32_t)(ft >> 32);
278     tmp = SATSH(tmp);
279     fd |= (tmp & 0xffff) << 48;
280 
281     return fd;
282 }
283 
helper_packsshb(uint64_t fs,uint64_t ft)284 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
285 {
286     uint64_t fd = 0;
287     unsigned int i;
288 
289     for (i = 0; i < 4; ++i) {
290         int16_t tmp = fs >> (i * 16);
291         tmp = SATSB(tmp);
292         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
293     }
294     for (i = 0; i < 4; ++i) {
295         int16_t tmp = ft >> (i * 16);
296         tmp = SATSB(tmp);
297         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
298     }
299 
300     return fd;
301 }
302 
helper_packushb(uint64_t fs,uint64_t ft)303 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
304 {
305     uint64_t fd = 0;
306     unsigned int i;
307 
308     for (i = 0; i < 4; ++i) {
309         int16_t tmp = fs >> (i * 16);
310         tmp = SATUB(tmp);
311         fd |= (uint64_t)(tmp & 0xff) << (i * 8);
312     }
313     for (i = 0; i < 4; ++i) {
314         int16_t tmp = ft >> (i * 16);
315         tmp = SATUB(tmp);
316         fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
317     }
318 
319     return fd;
320 }
321 
helper_punpcklwd(uint64_t fs,uint64_t ft)322 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
323 {
324     return (fs & 0xffffffff) | (ft << 32);
325 }
326 
helper_punpckhwd(uint64_t fs,uint64_t ft)327 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
328 {
329     return (fs >> 32) | (ft & ~0xffffffffull);
330 }
331 
helper_punpcklhw(uint64_t fs,uint64_t ft)332 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
333 {
334     unsigned host = BYTE_ORDER_XOR(3);
335     LMIValue vd, vs, vt;
336 
337     vs.d = fs;
338     vt.d = ft;
339     vd.uh[0 ^ host] = vs.uh[0 ^ host];
340     vd.uh[1 ^ host] = vt.uh[0 ^ host];
341     vd.uh[2 ^ host] = vs.uh[1 ^ host];
342     vd.uh[3 ^ host] = vt.uh[1 ^ host];
343 
344     return vd.d;
345 }
346 
helper_punpckhhw(uint64_t fs,uint64_t ft)347 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
348 {
349     unsigned host = BYTE_ORDER_XOR(3);
350     LMIValue vd, vs, vt;
351 
352     vs.d = fs;
353     vt.d = ft;
354     vd.uh[0 ^ host] = vs.uh[2 ^ host];
355     vd.uh[1 ^ host] = vt.uh[2 ^ host];
356     vd.uh[2 ^ host] = vs.uh[3 ^ host];
357     vd.uh[3 ^ host] = vt.uh[3 ^ host];
358 
359     return vd.d;
360 }
361 
helper_punpcklbh(uint64_t fs,uint64_t ft)362 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
363 {
364     unsigned host = BYTE_ORDER_XOR(7);
365     LMIValue vd, vs, vt;
366 
367     vs.d = fs;
368     vt.d = ft;
369     vd.ub[0 ^ host] = vs.ub[0 ^ host];
370     vd.ub[1 ^ host] = vt.ub[0 ^ host];
371     vd.ub[2 ^ host] = vs.ub[1 ^ host];
372     vd.ub[3 ^ host] = vt.ub[1 ^ host];
373     vd.ub[4 ^ host] = vs.ub[2 ^ host];
374     vd.ub[5 ^ host] = vt.ub[2 ^ host];
375     vd.ub[6 ^ host] = vs.ub[3 ^ host];
376     vd.ub[7 ^ host] = vt.ub[3 ^ host];
377 
378     return vd.d;
379 }
380 
helper_punpckhbh(uint64_t fs,uint64_t ft)381 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
382 {
383     unsigned host = BYTE_ORDER_XOR(7);
384     LMIValue vd, vs, vt;
385 
386     vs.d = fs;
387     vt.d = ft;
388     vd.ub[0 ^ host] = vs.ub[4 ^ host];
389     vd.ub[1 ^ host] = vt.ub[4 ^ host];
390     vd.ub[2 ^ host] = vs.ub[5 ^ host];
391     vd.ub[3 ^ host] = vt.ub[5 ^ host];
392     vd.ub[4 ^ host] = vs.ub[6 ^ host];
393     vd.ub[5 ^ host] = vt.ub[6 ^ host];
394     vd.ub[6 ^ host] = vs.ub[7 ^ host];
395     vd.ub[7 ^ host] = vt.ub[7 ^ host];
396 
397     return vd.d;
398 }
399 
helper_pavgh(uint64_t fs,uint64_t ft)400 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
401 {
402     LMIValue vs, vt;
403     unsigned i;
404 
405     vs.d = fs;
406     vt.d = ft;
407     for (i = 0; i < 4; i++) {
408         vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
409     }
410     return vs.d;
411 }
412 
helper_pavgb(uint64_t fs,uint64_t ft)413 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
414 {
415     LMIValue vs, vt;
416     unsigned i;
417 
418     vs.d = fs;
419     vt.d = ft;
420     for (i = 0; i < 8; i++) {
421         vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
422     }
423     return vs.d;
424 }
425 
helper_pmaxsh(uint64_t fs,uint64_t ft)426 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
427 {
428     LMIValue vs, vt;
429     unsigned i;
430 
431     vs.d = fs;
432     vt.d = ft;
433     for (i = 0; i < 4; i++) {
434         vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
435     }
436     return vs.d;
437 }
438 
helper_pminsh(uint64_t fs,uint64_t ft)439 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
440 {
441     LMIValue vs, vt;
442     unsigned i;
443 
444     vs.d = fs;
445     vt.d = ft;
446     for (i = 0; i < 4; i++) {
447         vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
448     }
449     return vs.d;
450 }
451 
helper_pmaxub(uint64_t fs,uint64_t ft)452 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
453 {
454     LMIValue vs, vt;
455     unsigned i;
456 
457     vs.d = fs;
458     vt.d = ft;
459     for (i = 0; i < 4; i++) {
460         vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
461     }
462     return vs.d;
463 }
464 
helper_pminub(uint64_t fs,uint64_t ft)465 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
466 {
467     LMIValue vs, vt;
468     unsigned i;
469 
470     vs.d = fs;
471     vt.d = ft;
472     for (i = 0; i < 4; i++) {
473         vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
474     }
475     return vs.d;
476 }
477 
helper_pcmpeqw(uint64_t fs,uint64_t ft)478 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
479 {
480     LMIValue vs, vt;
481     unsigned i;
482 
483     vs.d = fs;
484     vt.d = ft;
485     for (i = 0; i < 2; i++) {
486         vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
487     }
488     return vs.d;
489 }
490 
helper_pcmpgtw(uint64_t fs,uint64_t ft)491 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
492 {
493     LMIValue vs, vt;
494     unsigned i;
495 
496     vs.d = fs;
497     vt.d = ft;
498     for (i = 0; i < 2; i++) {
499         vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
500     }
501     return vs.d;
502 }
503 
helper_pcmpeqh(uint64_t fs,uint64_t ft)504 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
505 {
506     LMIValue vs, vt;
507     unsigned i;
508 
509     vs.d = fs;
510     vt.d = ft;
511     for (i = 0; i < 4; i++) {
512         vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
513     }
514     return vs.d;
515 }
516 
helper_pcmpgth(uint64_t fs,uint64_t ft)517 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
518 {
519     LMIValue vs, vt;
520     unsigned i;
521 
522     vs.d = fs;
523     vt.d = ft;
524     for (i = 0; i < 4; i++) {
525         vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
526     }
527     return vs.d;
528 }
529 
helper_pcmpeqb(uint64_t fs,uint64_t ft)530 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
531 {
532     LMIValue vs, vt;
533     unsigned i;
534 
535     vs.d = fs;
536     vt.d = ft;
537     for (i = 0; i < 8; i++) {
538         vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
539     }
540     return vs.d;
541 }
542 
helper_pcmpgtb(uint64_t fs,uint64_t ft)543 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
544 {
545     LMIValue vs, vt;
546     unsigned i;
547 
548     vs.d = fs;
549     vt.d = ft;
550     for (i = 0; i < 8; i++) {
551         vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
552     }
553     return vs.d;
554 }
555 
helper_psllw(uint64_t fs,uint64_t ft)556 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
557 {
558     LMIValue vs;
559     unsigned i;
560 
561     ft &= 0x7f;
562     if (ft > 31) {
563         return 0;
564     }
565     vs.d = fs;
566     for (i = 0; i < 2; ++i) {
567         vs.uw[i] <<= ft;
568     }
569     return vs.d;
570 }
571 
helper_psrlw(uint64_t fs,uint64_t ft)572 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
573 {
574     LMIValue vs;
575     unsigned i;
576 
577     ft &= 0x7f;
578     if (ft > 31) {
579         return 0;
580     }
581     vs.d = fs;
582     for (i = 0; i < 2; ++i) {
583         vs.uw[i] >>= ft;
584     }
585     return vs.d;
586 }
587 
helper_psraw(uint64_t fs,uint64_t ft)588 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
589 {
590     LMIValue vs;
591     unsigned i;
592 
593     ft &= 0x7f;
594     if (ft > 31) {
595         ft = 31;
596     }
597     vs.d = fs;
598     for (i = 0; i < 2; ++i) {
599         vs.sw[i] >>= ft;
600     }
601     return vs.d;
602 }
603 
helper_psllh(uint64_t fs,uint64_t ft)604 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
605 {
606     LMIValue vs;
607     unsigned i;
608 
609     ft &= 0x7f;
610     if (ft > 15) {
611         return 0;
612     }
613     vs.d = fs;
614     for (i = 0; i < 4; ++i) {
615         vs.uh[i] <<= ft;
616     }
617     return vs.d;
618 }
619 
helper_psrlh(uint64_t fs,uint64_t ft)620 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
621 {
622     LMIValue vs;
623     unsigned i;
624 
625     ft &= 0x7f;
626     if (ft > 15) {
627         return 0;
628     }
629     vs.d = fs;
630     for (i = 0; i < 4; ++i) {
631         vs.uh[i] >>= ft;
632     }
633     return vs.d;
634 }
635 
helper_psrah(uint64_t fs,uint64_t ft)636 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
637 {
638     LMIValue vs;
639     unsigned i;
640 
641     ft &= 0x7f;
642     if (ft > 15) {
643         ft = 15;
644     }
645     vs.d = fs;
646     for (i = 0; i < 4; ++i) {
647         vs.sh[i] >>= ft;
648     }
649     return vs.d;
650 }
651 
helper_pmullh(uint64_t fs,uint64_t ft)652 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
653 {
654     LMIValue vs, vt;
655     unsigned i;
656 
657     vs.d = fs;
658     vt.d = ft;
659     for (i = 0; i < 4; ++i) {
660         vs.sh[i] *= vt.sh[i];
661     }
662     return vs.d;
663 }
664 
helper_pmulhh(uint64_t fs,uint64_t ft)665 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
666 {
667     LMIValue vs, vt;
668     unsigned i;
669 
670     vs.d = fs;
671     vt.d = ft;
672     for (i = 0; i < 4; ++i) {
673         int32_t r = vs.sh[i] * vt.sh[i];
674         vs.sh[i] = r >> 16;
675     }
676     return vs.d;
677 }
678 
helper_pmulhuh(uint64_t fs,uint64_t ft)679 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
680 {
681     LMIValue vs, vt;
682     unsigned i;
683 
684     vs.d = fs;
685     vt.d = ft;
686     for (i = 0; i < 4; ++i) {
687         uint32_t r = vs.uh[i] * vt.uh[i];
688         vs.uh[i] = r >> 16;
689     }
690     return vs.d;
691 }
692 
helper_pmaddhw(uint64_t fs,uint64_t ft)693 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
694 {
695     unsigned host = BYTE_ORDER_XOR(3);
696     LMIValue vs, vt;
697     uint32_t p0, p1;
698 
699     vs.d = fs;
700     vt.d = ft;
701     p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
702     p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
703     p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
704     p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
705 
706     return ((uint64_t)p1 << 32) | p0;
707 }
708 
helper_pasubub(uint64_t fs,uint64_t ft)709 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
710 {
711     LMIValue vs, vt;
712     unsigned i;
713 
714     vs.d = fs;
715     vt.d = ft;
716     for (i = 0; i < 8; ++i) {
717         int r = vs.ub[i] - vt.ub[i];
718         vs.ub[i] = (r < 0 ? -r : r);
719     }
720     return vs.d;
721 }
722 
helper_biadd(uint64_t fs)723 uint64_t helper_biadd(uint64_t fs)
724 {
725     unsigned i, fd;
726 
727     for (i = fd = 0; i < 8; ++i) {
728         fd += (fs >> (i * 8)) & 0xff;
729     }
730     return fd & 0xffff;
731 }
732 
helper_pmovmskb(uint64_t fs)733 uint64_t helper_pmovmskb(uint64_t fs)
734 {
735     unsigned fd = 0;
736 
737     fd |= ((fs >>  7) & 1) << 0;
738     fd |= ((fs >> 15) & 1) << 1;
739     fd |= ((fs >> 23) & 1) << 2;
740     fd |= ((fs >> 31) & 1) << 3;
741     fd |= ((fs >> 39) & 1) << 4;
742     fd |= ((fs >> 47) & 1) << 5;
743     fd |= ((fs >> 55) & 1) << 6;
744     fd |= ((fs >> 63) & 1) << 7;
745 
746     return fd & 0xff;
747 }
748