xref: /openbmc/qemu/target/mips/tcg/lmmi_helper.c (revision 1be5a765c08cee3a9587c8a8d3fc2ea247b13f9c)
1  /*
2   *  Loongson Multimedia Instruction emulation helpers for QEMU.
3   *
4   *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
5   *
6   * This library is free software; you can redistribute it and/or
7   * modify it under the terms of the GNU Lesser General Public
8   * License as published by the Free Software Foundation; either
9   * version 2.1 of the License, or (at your option) any later version.
10   *
11   * This library is distributed in the hope that it will be useful,
12   * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   * Lesser General Public License for more details.
15   *
16   * You should have received a copy of the GNU Lesser General Public
17   * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "cpu.h"
22  #include "exec/helper-proto.h"
23  
24  /*
25   * If the byte ordering doesn't matter, i.e. all columns are treated
26   * identically, then this union can be used directly.  If byte ordering
27   * does matter, we generally ignore dumping to memory.
28   */
29  typedef union {
30      uint8_t  ub[8];
31      int8_t   sb[8];
32      uint16_t uh[4];
33      int16_t  sh[4];
34      uint32_t uw[2];
35      int32_t  sw[2];
36      uint64_t d;
37  } LMIValue;
38  
39  /* Some byte ordering issues can be mitigated by XORing in the following.  */
40  #if HOST_BIG_ENDIAN
41  # define BYTE_ORDER_XOR(N) N
42  #else
43  # define BYTE_ORDER_XOR(N) 0
44  #endif
45  
46  #define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
47  #define SATUB(x)  (x > 0xff ? 0xff : x)
48  
49  #define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
50  #define SATUH(x)  (x > 0xffff ? 0xffff : x)
51  
52  #define SATSW(x) \
53      (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
54  #define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
55  
helper_paddsb(uint64_t fs,uint64_t ft)56  uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
57  {
58      LMIValue vs, vt;
59      unsigned int i;
60  
61      vs.d = fs;
62      vt.d = ft;
63      for (i = 0; i < 8; ++i) {
64          int r = vs.sb[i] + vt.sb[i];
65          vs.sb[i] = SATSB(r);
66      }
67      return vs.d;
68  }
69  
helper_paddusb(uint64_t fs,uint64_t ft)70  uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
71  {
72      LMIValue vs, vt;
73      unsigned int i;
74  
75      vs.d = fs;
76      vt.d = ft;
77      for (i = 0; i < 8; ++i) {
78          int r = vs.ub[i] + vt.ub[i];
79          vs.ub[i] = SATUB(r);
80      }
81      return vs.d;
82  }
83  
helper_paddsh(uint64_t fs,uint64_t ft)84  uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
85  {
86      LMIValue vs, vt;
87      unsigned int i;
88  
89      vs.d = fs;
90      vt.d = ft;
91      for (i = 0; i < 4; ++i) {
92          int r = vs.sh[i] + vt.sh[i];
93          vs.sh[i] = SATSH(r);
94      }
95      return vs.d;
96  }
97  
helper_paddush(uint64_t fs,uint64_t ft)98  uint64_t helper_paddush(uint64_t fs, uint64_t ft)
99  {
100      LMIValue vs, vt;
101      unsigned int i;
102  
103      vs.d = fs;
104      vt.d = ft;
105      for (i = 0; i < 4; ++i) {
106          int r = vs.uh[i] + vt.uh[i];
107          vs.uh[i] = SATUH(r);
108      }
109      return vs.d;
110  }
111  
helper_paddb(uint64_t fs,uint64_t ft)112  uint64_t helper_paddb(uint64_t fs, uint64_t ft)
113  {
114      LMIValue vs, vt;
115      unsigned int i;
116  
117      vs.d = fs;
118      vt.d = ft;
119      for (i = 0; i < 8; ++i) {
120          vs.ub[i] += vt.ub[i];
121      }
122      return vs.d;
123  }
124  
helper_paddh(uint64_t fs,uint64_t ft)125  uint64_t helper_paddh(uint64_t fs, uint64_t ft)
126  {
127      LMIValue vs, vt;
128      unsigned int i;
129  
130      vs.d = fs;
131      vt.d = ft;
132      for (i = 0; i < 4; ++i) {
133          vs.uh[i] += vt.uh[i];
134      }
135      return vs.d;
136  }
137  
helper_paddw(uint64_t fs,uint64_t ft)138  uint64_t helper_paddw(uint64_t fs, uint64_t ft)
139  {
140      LMIValue vs, vt;
141      unsigned int i;
142  
143      vs.d = fs;
144      vt.d = ft;
145      for (i = 0; i < 2; ++i) {
146          vs.uw[i] += vt.uw[i];
147      }
148      return vs.d;
149  }
150  
helper_psubsb(uint64_t fs,uint64_t ft)151  uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
152  {
153      LMIValue vs, vt;
154      unsigned int i;
155  
156      vs.d = fs;
157      vt.d = ft;
158      for (i = 0; i < 8; ++i) {
159          int r = vs.sb[i] - vt.sb[i];
160          vs.sb[i] = SATSB(r);
161      }
162      return vs.d;
163  }
164  
helper_psubusb(uint64_t fs,uint64_t ft)165  uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
166  {
167      LMIValue vs, vt;
168      unsigned int i;
169  
170      vs.d = fs;
171      vt.d = ft;
172      for (i = 0; i < 8; ++i) {
173          int r = vs.ub[i] - vt.ub[i];
174          vs.ub[i] = SATUB(r);
175      }
176      return vs.d;
177  }
178  
helper_psubsh(uint64_t fs,uint64_t ft)179  uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
180  {
181      LMIValue vs, vt;
182      unsigned int i;
183  
184      vs.d = fs;
185      vt.d = ft;
186      for (i = 0; i < 4; ++i) {
187          int r = vs.sh[i] - vt.sh[i];
188          vs.sh[i] = SATSH(r);
189      }
190      return vs.d;
191  }
192  
helper_psubush(uint64_t fs,uint64_t ft)193  uint64_t helper_psubush(uint64_t fs, uint64_t ft)
194  {
195      LMIValue vs, vt;
196      unsigned int i;
197  
198      vs.d = fs;
199      vt.d = ft;
200      for (i = 0; i < 4; ++i) {
201          int r = vs.uh[i] - vt.uh[i];
202          vs.uh[i] = SATUH(r);
203      }
204      return vs.d;
205  }
206  
helper_psubb(uint64_t fs,uint64_t ft)207  uint64_t helper_psubb(uint64_t fs, uint64_t ft)
208  {
209      LMIValue vs, vt;
210      unsigned int i;
211  
212      vs.d = fs;
213      vt.d = ft;
214      for (i = 0; i < 8; ++i) {
215          vs.ub[i] -= vt.ub[i];
216      }
217      return vs.d;
218  }
219  
helper_psubh(uint64_t fs,uint64_t ft)220  uint64_t helper_psubh(uint64_t fs, uint64_t ft)
221  {
222      LMIValue vs, vt;
223      unsigned int i;
224  
225      vs.d = fs;
226      vt.d = ft;
227      for (i = 0; i < 4; ++i) {
228          vs.uh[i] -= vt.uh[i];
229      }
230      return vs.d;
231  }
232  
helper_psubw(uint64_t fs,uint64_t ft)233  uint64_t helper_psubw(uint64_t fs, uint64_t ft)
234  {
235      LMIValue vs, vt;
236      unsigned int i;
237  
238      vs.d = fs;
239      vt.d = ft;
240      for (i = 0; i < 2; ++i) {
241          vs.uw[i] -= vt.uw[i];
242      }
243      return vs.d;
244  }
245  
helper_pshufh(uint64_t fs,uint64_t ft)246  uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
247  {
248      unsigned host = BYTE_ORDER_XOR(3);
249      LMIValue vd, vs;
250      unsigned i;
251  
252      vs.d = fs;
253      vd.d = 0;
254      for (i = 0; i < 4; i++, ft >>= 2) {
255          vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
256      }
257      return vd.d;
258  }
259  
helper_packsswh(uint64_t fs,uint64_t ft)260  uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
261  {
262      uint64_t fd = 0;
263      int64_t tmp;
264  
265      tmp = (int32_t)(fs >> 0);
266      tmp = SATSH(tmp);
267      fd |= (tmp & 0xffff) << 0;
268  
269      tmp = (int32_t)(fs >> 32);
270      tmp = SATSH(tmp);
271      fd |= (tmp & 0xffff) << 16;
272  
273      tmp = (int32_t)(ft >> 0);
274      tmp = SATSH(tmp);
275      fd |= (tmp & 0xffff) << 32;
276  
277      tmp = (int32_t)(ft >> 32);
278      tmp = SATSH(tmp);
279      fd |= (tmp & 0xffff) << 48;
280  
281      return fd;
282  }
283  
helper_packsshb(uint64_t fs,uint64_t ft)284  uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
285  {
286      uint64_t fd = 0;
287      unsigned int i;
288  
289      for (i = 0; i < 4; ++i) {
290          int16_t tmp = fs >> (i * 16);
291          tmp = SATSB(tmp);
292          fd |= (uint64_t)(tmp & 0xff) << (i * 8);
293      }
294      for (i = 0; i < 4; ++i) {
295          int16_t tmp = ft >> (i * 16);
296          tmp = SATSB(tmp);
297          fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
298      }
299  
300      return fd;
301  }
302  
helper_packushb(uint64_t fs,uint64_t ft)303  uint64_t helper_packushb(uint64_t fs, uint64_t ft)
304  {
305      uint64_t fd = 0;
306      unsigned int i;
307  
308      for (i = 0; i < 4; ++i) {
309          int16_t tmp = fs >> (i * 16);
310          tmp = SATUB(tmp);
311          fd |= (uint64_t)(tmp & 0xff) << (i * 8);
312      }
313      for (i = 0; i < 4; ++i) {
314          int16_t tmp = ft >> (i * 16);
315          tmp = SATUB(tmp);
316          fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
317      }
318  
319      return fd;
320  }
321  
helper_punpcklwd(uint64_t fs,uint64_t ft)322  uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
323  {
324      return (fs & 0xffffffff) | (ft << 32);
325  }
326  
helper_punpckhwd(uint64_t fs,uint64_t ft)327  uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
328  {
329      return (fs >> 32) | (ft & ~0xffffffffull);
330  }
331  
helper_punpcklhw(uint64_t fs,uint64_t ft)332  uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
333  {
334      unsigned host = BYTE_ORDER_XOR(3);
335      LMIValue vd, vs, vt;
336  
337      vs.d = fs;
338      vt.d = ft;
339      vd.uh[0 ^ host] = vs.uh[0 ^ host];
340      vd.uh[1 ^ host] = vt.uh[0 ^ host];
341      vd.uh[2 ^ host] = vs.uh[1 ^ host];
342      vd.uh[3 ^ host] = vt.uh[1 ^ host];
343  
344      return vd.d;
345  }
346  
helper_punpckhhw(uint64_t fs,uint64_t ft)347  uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
348  {
349      unsigned host = BYTE_ORDER_XOR(3);
350      LMIValue vd, vs, vt;
351  
352      vs.d = fs;
353      vt.d = ft;
354      vd.uh[0 ^ host] = vs.uh[2 ^ host];
355      vd.uh[1 ^ host] = vt.uh[2 ^ host];
356      vd.uh[2 ^ host] = vs.uh[3 ^ host];
357      vd.uh[3 ^ host] = vt.uh[3 ^ host];
358  
359      return vd.d;
360  }
361  
helper_punpcklbh(uint64_t fs,uint64_t ft)362  uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
363  {
364      unsigned host = BYTE_ORDER_XOR(7);
365      LMIValue vd, vs, vt;
366  
367      vs.d = fs;
368      vt.d = ft;
369      vd.ub[0 ^ host] = vs.ub[0 ^ host];
370      vd.ub[1 ^ host] = vt.ub[0 ^ host];
371      vd.ub[2 ^ host] = vs.ub[1 ^ host];
372      vd.ub[3 ^ host] = vt.ub[1 ^ host];
373      vd.ub[4 ^ host] = vs.ub[2 ^ host];
374      vd.ub[5 ^ host] = vt.ub[2 ^ host];
375      vd.ub[6 ^ host] = vs.ub[3 ^ host];
376      vd.ub[7 ^ host] = vt.ub[3 ^ host];
377  
378      return vd.d;
379  }
380  
helper_punpckhbh(uint64_t fs,uint64_t ft)381  uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
382  {
383      unsigned host = BYTE_ORDER_XOR(7);
384      LMIValue vd, vs, vt;
385  
386      vs.d = fs;
387      vt.d = ft;
388      vd.ub[0 ^ host] = vs.ub[4 ^ host];
389      vd.ub[1 ^ host] = vt.ub[4 ^ host];
390      vd.ub[2 ^ host] = vs.ub[5 ^ host];
391      vd.ub[3 ^ host] = vt.ub[5 ^ host];
392      vd.ub[4 ^ host] = vs.ub[6 ^ host];
393      vd.ub[5 ^ host] = vt.ub[6 ^ host];
394      vd.ub[6 ^ host] = vs.ub[7 ^ host];
395      vd.ub[7 ^ host] = vt.ub[7 ^ host];
396  
397      return vd.d;
398  }
399  
helper_pavgh(uint64_t fs,uint64_t ft)400  uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
401  {
402      LMIValue vs, vt;
403      unsigned i;
404  
405      vs.d = fs;
406      vt.d = ft;
407      for (i = 0; i < 4; i++) {
408          vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
409      }
410      return vs.d;
411  }
412  
helper_pavgb(uint64_t fs,uint64_t ft)413  uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
414  {
415      LMIValue vs, vt;
416      unsigned i;
417  
418      vs.d = fs;
419      vt.d = ft;
420      for (i = 0; i < 8; i++) {
421          vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
422      }
423      return vs.d;
424  }
425  
helper_pmaxsh(uint64_t fs,uint64_t ft)426  uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
427  {
428      LMIValue vs, vt;
429      unsigned i;
430  
431      vs.d = fs;
432      vt.d = ft;
433      for (i = 0; i < 4; i++) {
434          vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
435      }
436      return vs.d;
437  }
438  
helper_pminsh(uint64_t fs,uint64_t ft)439  uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
440  {
441      LMIValue vs, vt;
442      unsigned i;
443  
444      vs.d = fs;
445      vt.d = ft;
446      for (i = 0; i < 4; i++) {
447          vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
448      }
449      return vs.d;
450  }
451  
helper_pmaxub(uint64_t fs,uint64_t ft)452  uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
453  {
454      LMIValue vs, vt;
455      unsigned i;
456  
457      vs.d = fs;
458      vt.d = ft;
459      for (i = 0; i < 4; i++) {
460          vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
461      }
462      return vs.d;
463  }
464  
helper_pminub(uint64_t fs,uint64_t ft)465  uint64_t helper_pminub(uint64_t fs, uint64_t ft)
466  {
467      LMIValue vs, vt;
468      unsigned i;
469  
470      vs.d = fs;
471      vt.d = ft;
472      for (i = 0; i < 4; i++) {
473          vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
474      }
475      return vs.d;
476  }
477  
helper_pcmpeqw(uint64_t fs,uint64_t ft)478  uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
479  {
480      LMIValue vs, vt;
481      unsigned i;
482  
483      vs.d = fs;
484      vt.d = ft;
485      for (i = 0; i < 2; i++) {
486          vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
487      }
488      return vs.d;
489  }
490  
helper_pcmpgtw(uint64_t fs,uint64_t ft)491  uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
492  {
493      LMIValue vs, vt;
494      unsigned i;
495  
496      vs.d = fs;
497      vt.d = ft;
498      for (i = 0; i < 2; i++) {
499          vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
500      }
501      return vs.d;
502  }
503  
helper_pcmpeqh(uint64_t fs,uint64_t ft)504  uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
505  {
506      LMIValue vs, vt;
507      unsigned i;
508  
509      vs.d = fs;
510      vt.d = ft;
511      for (i = 0; i < 4; i++) {
512          vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
513      }
514      return vs.d;
515  }
516  
helper_pcmpgth(uint64_t fs,uint64_t ft)517  uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
518  {
519      LMIValue vs, vt;
520      unsigned i;
521  
522      vs.d = fs;
523      vt.d = ft;
524      for (i = 0; i < 4; i++) {
525          vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
526      }
527      return vs.d;
528  }
529  
helper_pcmpeqb(uint64_t fs,uint64_t ft)530  uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
531  {
532      LMIValue vs, vt;
533      unsigned i;
534  
535      vs.d = fs;
536      vt.d = ft;
537      for (i = 0; i < 8; i++) {
538          vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
539      }
540      return vs.d;
541  }
542  
helper_pcmpgtb(uint64_t fs,uint64_t ft)543  uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
544  {
545      LMIValue vs, vt;
546      unsigned i;
547  
548      vs.d = fs;
549      vt.d = ft;
550      for (i = 0; i < 8; i++) {
551          vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
552      }
553      return vs.d;
554  }
555  
helper_psllw(uint64_t fs,uint64_t ft)556  uint64_t helper_psllw(uint64_t fs, uint64_t ft)
557  {
558      LMIValue vs;
559      unsigned i;
560  
561      ft &= 0x7f;
562      if (ft > 31) {
563          return 0;
564      }
565      vs.d = fs;
566      for (i = 0; i < 2; ++i) {
567          vs.uw[i] <<= ft;
568      }
569      return vs.d;
570  }
571  
helper_psrlw(uint64_t fs,uint64_t ft)572  uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
573  {
574      LMIValue vs;
575      unsigned i;
576  
577      ft &= 0x7f;
578      if (ft > 31) {
579          return 0;
580      }
581      vs.d = fs;
582      for (i = 0; i < 2; ++i) {
583          vs.uw[i] >>= ft;
584      }
585      return vs.d;
586  }
587  
helper_psraw(uint64_t fs,uint64_t ft)588  uint64_t helper_psraw(uint64_t fs, uint64_t ft)
589  {
590      LMIValue vs;
591      unsigned i;
592  
593      ft &= 0x7f;
594      if (ft > 31) {
595          ft = 31;
596      }
597      vs.d = fs;
598      for (i = 0; i < 2; ++i) {
599          vs.sw[i] >>= ft;
600      }
601      return vs.d;
602  }
603  
helper_psllh(uint64_t fs,uint64_t ft)604  uint64_t helper_psllh(uint64_t fs, uint64_t ft)
605  {
606      LMIValue vs;
607      unsigned i;
608  
609      ft &= 0x7f;
610      if (ft > 15) {
611          return 0;
612      }
613      vs.d = fs;
614      for (i = 0; i < 4; ++i) {
615          vs.uh[i] <<= ft;
616      }
617      return vs.d;
618  }
619  
helper_psrlh(uint64_t fs,uint64_t ft)620  uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
621  {
622      LMIValue vs;
623      unsigned i;
624  
625      ft &= 0x7f;
626      if (ft > 15) {
627          return 0;
628      }
629      vs.d = fs;
630      for (i = 0; i < 4; ++i) {
631          vs.uh[i] >>= ft;
632      }
633      return vs.d;
634  }
635  
helper_psrah(uint64_t fs,uint64_t ft)636  uint64_t helper_psrah(uint64_t fs, uint64_t ft)
637  {
638      LMIValue vs;
639      unsigned i;
640  
641      ft &= 0x7f;
642      if (ft > 15) {
643          ft = 15;
644      }
645      vs.d = fs;
646      for (i = 0; i < 4; ++i) {
647          vs.sh[i] >>= ft;
648      }
649      return vs.d;
650  }
651  
helper_pmullh(uint64_t fs,uint64_t ft)652  uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
653  {
654      LMIValue vs, vt;
655      unsigned i;
656  
657      vs.d = fs;
658      vt.d = ft;
659      for (i = 0; i < 4; ++i) {
660          vs.sh[i] *= vt.sh[i];
661      }
662      return vs.d;
663  }
664  
helper_pmulhh(uint64_t fs,uint64_t ft)665  uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
666  {
667      LMIValue vs, vt;
668      unsigned i;
669  
670      vs.d = fs;
671      vt.d = ft;
672      for (i = 0; i < 4; ++i) {
673          int32_t r = vs.sh[i] * vt.sh[i];
674          vs.sh[i] = r >> 16;
675      }
676      return vs.d;
677  }
678  
helper_pmulhuh(uint64_t fs,uint64_t ft)679  uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
680  {
681      LMIValue vs, vt;
682      unsigned i;
683  
684      vs.d = fs;
685      vt.d = ft;
686      for (i = 0; i < 4; ++i) {
687          uint32_t r = vs.uh[i] * vt.uh[i];
688          vs.uh[i] = r >> 16;
689      }
690      return vs.d;
691  }
692  
helper_pmaddhw(uint64_t fs,uint64_t ft)693  uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
694  {
695      unsigned host = BYTE_ORDER_XOR(3);
696      LMIValue vs, vt;
697      uint32_t p0, p1;
698  
699      vs.d = fs;
700      vt.d = ft;
701      p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
702      p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
703      p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
704      p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
705  
706      return ((uint64_t)p1 << 32) | p0;
707  }
708  
helper_pasubub(uint64_t fs,uint64_t ft)709  uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
710  {
711      LMIValue vs, vt;
712      unsigned i;
713  
714      vs.d = fs;
715      vt.d = ft;
716      for (i = 0; i < 8; ++i) {
717          int r = vs.ub[i] - vt.ub[i];
718          vs.ub[i] = (r < 0 ? -r : r);
719      }
720      return vs.d;
721  }
722  
helper_biadd(uint64_t fs)723  uint64_t helper_biadd(uint64_t fs)
724  {
725      unsigned i, fd;
726  
727      for (i = fd = 0; i < 8; ++i) {
728          fd += (fs >> (i * 8)) & 0xff;
729      }
730      return fd & 0xffff;
731  }
732  
helper_pmovmskb(uint64_t fs)733  uint64_t helper_pmovmskb(uint64_t fs)
734  {
735      unsigned fd = 0;
736  
737      fd |= ((fs >>  7) & 1) << 0;
738      fd |= ((fs >> 15) & 1) << 1;
739      fd |= ((fs >> 23) & 1) << 2;
740      fd |= ((fs >> 31) & 1) << 3;
741      fd |= ((fs >> 39) & 1) << 4;
742      fd |= ((fs >> 47) & 1) << 5;
743      fd |= ((fs >> 55) & 1) << 6;
744      fd |= ((fs >> 63) & 1) << 7;
745  
746      return fd & 0xff;
747  }
748