1 /*
2 * Loongson Multimedia Instruction emulation helpers for QEMU.
3 *
4 * Copyright (c) 2011 Richard Henderson <rth@twiddle.net>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23
24 /*
25 * If the byte ordering doesn't matter, i.e. all columns are treated
26 * identically, then this union can be used directly. If byte ordering
27 * does matter, we generally ignore dumping to memory.
28 */
29 typedef union {
30 uint8_t ub[8];
31 int8_t sb[8];
32 uint16_t uh[4];
33 int16_t sh[4];
34 uint32_t uw[2];
35 int32_t sw[2];
36 uint64_t d;
37 } LMIValue;
38
39 /* Some byte ordering issues can be mitigated by XORing in the following. */
40 #if HOST_BIG_ENDIAN
41 # define BYTE_ORDER_XOR(N) N
42 #else
43 # define BYTE_ORDER_XOR(N) 0
44 #endif
45
46 #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
47 #define SATUB(x) (x > 0xff ? 0xff : x)
48
49 #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
50 #define SATUH(x) (x > 0xffff ? 0xffff : x)
51
52 #define SATSW(x) \
53 (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
54 #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x)
55
helper_paddsb(uint64_t fs,uint64_t ft)56 uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
57 {
58 LMIValue vs, vt;
59 unsigned int i;
60
61 vs.d = fs;
62 vt.d = ft;
63 for (i = 0; i < 8; ++i) {
64 int r = vs.sb[i] + vt.sb[i];
65 vs.sb[i] = SATSB(r);
66 }
67 return vs.d;
68 }
69
helper_paddusb(uint64_t fs,uint64_t ft)70 uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
71 {
72 LMIValue vs, vt;
73 unsigned int i;
74
75 vs.d = fs;
76 vt.d = ft;
77 for (i = 0; i < 8; ++i) {
78 int r = vs.ub[i] + vt.ub[i];
79 vs.ub[i] = SATUB(r);
80 }
81 return vs.d;
82 }
83
helper_paddsh(uint64_t fs,uint64_t ft)84 uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
85 {
86 LMIValue vs, vt;
87 unsigned int i;
88
89 vs.d = fs;
90 vt.d = ft;
91 for (i = 0; i < 4; ++i) {
92 int r = vs.sh[i] + vt.sh[i];
93 vs.sh[i] = SATSH(r);
94 }
95 return vs.d;
96 }
97
helper_paddush(uint64_t fs,uint64_t ft)98 uint64_t helper_paddush(uint64_t fs, uint64_t ft)
99 {
100 LMIValue vs, vt;
101 unsigned int i;
102
103 vs.d = fs;
104 vt.d = ft;
105 for (i = 0; i < 4; ++i) {
106 int r = vs.uh[i] + vt.uh[i];
107 vs.uh[i] = SATUH(r);
108 }
109 return vs.d;
110 }
111
helper_paddb(uint64_t fs,uint64_t ft)112 uint64_t helper_paddb(uint64_t fs, uint64_t ft)
113 {
114 LMIValue vs, vt;
115 unsigned int i;
116
117 vs.d = fs;
118 vt.d = ft;
119 for (i = 0; i < 8; ++i) {
120 vs.ub[i] += vt.ub[i];
121 }
122 return vs.d;
123 }
124
helper_paddh(uint64_t fs,uint64_t ft)125 uint64_t helper_paddh(uint64_t fs, uint64_t ft)
126 {
127 LMIValue vs, vt;
128 unsigned int i;
129
130 vs.d = fs;
131 vt.d = ft;
132 for (i = 0; i < 4; ++i) {
133 vs.uh[i] += vt.uh[i];
134 }
135 return vs.d;
136 }
137
helper_paddw(uint64_t fs,uint64_t ft)138 uint64_t helper_paddw(uint64_t fs, uint64_t ft)
139 {
140 LMIValue vs, vt;
141 unsigned int i;
142
143 vs.d = fs;
144 vt.d = ft;
145 for (i = 0; i < 2; ++i) {
146 vs.uw[i] += vt.uw[i];
147 }
148 return vs.d;
149 }
150
helper_psubsb(uint64_t fs,uint64_t ft)151 uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
152 {
153 LMIValue vs, vt;
154 unsigned int i;
155
156 vs.d = fs;
157 vt.d = ft;
158 for (i = 0; i < 8; ++i) {
159 int r = vs.sb[i] - vt.sb[i];
160 vs.sb[i] = SATSB(r);
161 }
162 return vs.d;
163 }
164
helper_psubusb(uint64_t fs,uint64_t ft)165 uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
166 {
167 LMIValue vs, vt;
168 unsigned int i;
169
170 vs.d = fs;
171 vt.d = ft;
172 for (i = 0; i < 8; ++i) {
173 int r = vs.ub[i] - vt.ub[i];
174 vs.ub[i] = SATUB(r);
175 }
176 return vs.d;
177 }
178
helper_psubsh(uint64_t fs,uint64_t ft)179 uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
180 {
181 LMIValue vs, vt;
182 unsigned int i;
183
184 vs.d = fs;
185 vt.d = ft;
186 for (i = 0; i < 4; ++i) {
187 int r = vs.sh[i] - vt.sh[i];
188 vs.sh[i] = SATSH(r);
189 }
190 return vs.d;
191 }
192
helper_psubush(uint64_t fs,uint64_t ft)193 uint64_t helper_psubush(uint64_t fs, uint64_t ft)
194 {
195 LMIValue vs, vt;
196 unsigned int i;
197
198 vs.d = fs;
199 vt.d = ft;
200 for (i = 0; i < 4; ++i) {
201 int r = vs.uh[i] - vt.uh[i];
202 vs.uh[i] = SATUH(r);
203 }
204 return vs.d;
205 }
206
helper_psubb(uint64_t fs,uint64_t ft)207 uint64_t helper_psubb(uint64_t fs, uint64_t ft)
208 {
209 LMIValue vs, vt;
210 unsigned int i;
211
212 vs.d = fs;
213 vt.d = ft;
214 for (i = 0; i < 8; ++i) {
215 vs.ub[i] -= vt.ub[i];
216 }
217 return vs.d;
218 }
219
helper_psubh(uint64_t fs,uint64_t ft)220 uint64_t helper_psubh(uint64_t fs, uint64_t ft)
221 {
222 LMIValue vs, vt;
223 unsigned int i;
224
225 vs.d = fs;
226 vt.d = ft;
227 for (i = 0; i < 4; ++i) {
228 vs.uh[i] -= vt.uh[i];
229 }
230 return vs.d;
231 }
232
helper_psubw(uint64_t fs,uint64_t ft)233 uint64_t helper_psubw(uint64_t fs, uint64_t ft)
234 {
235 LMIValue vs, vt;
236 unsigned int i;
237
238 vs.d = fs;
239 vt.d = ft;
240 for (i = 0; i < 2; ++i) {
241 vs.uw[i] -= vt.uw[i];
242 }
243 return vs.d;
244 }
245
helper_pshufh(uint64_t fs,uint64_t ft)246 uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
247 {
248 unsigned host = BYTE_ORDER_XOR(3);
249 LMIValue vd, vs;
250 unsigned i;
251
252 vs.d = fs;
253 vd.d = 0;
254 for (i = 0; i < 4; i++, ft >>= 2) {
255 vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
256 }
257 return vd.d;
258 }
259
helper_packsswh(uint64_t fs,uint64_t ft)260 uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
261 {
262 uint64_t fd = 0;
263 int64_t tmp;
264
265 tmp = (int32_t)(fs >> 0);
266 tmp = SATSH(tmp);
267 fd |= (tmp & 0xffff) << 0;
268
269 tmp = (int32_t)(fs >> 32);
270 tmp = SATSH(tmp);
271 fd |= (tmp & 0xffff) << 16;
272
273 tmp = (int32_t)(ft >> 0);
274 tmp = SATSH(tmp);
275 fd |= (tmp & 0xffff) << 32;
276
277 tmp = (int32_t)(ft >> 32);
278 tmp = SATSH(tmp);
279 fd |= (tmp & 0xffff) << 48;
280
281 return fd;
282 }
283
helper_packsshb(uint64_t fs,uint64_t ft)284 uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
285 {
286 uint64_t fd = 0;
287 unsigned int i;
288
289 for (i = 0; i < 4; ++i) {
290 int16_t tmp = fs >> (i * 16);
291 tmp = SATSB(tmp);
292 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
293 }
294 for (i = 0; i < 4; ++i) {
295 int16_t tmp = ft >> (i * 16);
296 tmp = SATSB(tmp);
297 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
298 }
299
300 return fd;
301 }
302
helper_packushb(uint64_t fs,uint64_t ft)303 uint64_t helper_packushb(uint64_t fs, uint64_t ft)
304 {
305 uint64_t fd = 0;
306 unsigned int i;
307
308 for (i = 0; i < 4; ++i) {
309 int16_t tmp = fs >> (i * 16);
310 tmp = SATUB(tmp);
311 fd |= (uint64_t)(tmp & 0xff) << (i * 8);
312 }
313 for (i = 0; i < 4; ++i) {
314 int16_t tmp = ft >> (i * 16);
315 tmp = SATUB(tmp);
316 fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
317 }
318
319 return fd;
320 }
321
helper_punpcklwd(uint64_t fs,uint64_t ft)322 uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
323 {
324 return (fs & 0xffffffff) | (ft << 32);
325 }
326
helper_punpckhwd(uint64_t fs,uint64_t ft)327 uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
328 {
329 return (fs >> 32) | (ft & ~0xffffffffull);
330 }
331
helper_punpcklhw(uint64_t fs,uint64_t ft)332 uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
333 {
334 unsigned host = BYTE_ORDER_XOR(3);
335 LMIValue vd, vs, vt;
336
337 vs.d = fs;
338 vt.d = ft;
339 vd.uh[0 ^ host] = vs.uh[0 ^ host];
340 vd.uh[1 ^ host] = vt.uh[0 ^ host];
341 vd.uh[2 ^ host] = vs.uh[1 ^ host];
342 vd.uh[3 ^ host] = vt.uh[1 ^ host];
343
344 return vd.d;
345 }
346
helper_punpckhhw(uint64_t fs,uint64_t ft)347 uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
348 {
349 unsigned host = BYTE_ORDER_XOR(3);
350 LMIValue vd, vs, vt;
351
352 vs.d = fs;
353 vt.d = ft;
354 vd.uh[0 ^ host] = vs.uh[2 ^ host];
355 vd.uh[1 ^ host] = vt.uh[2 ^ host];
356 vd.uh[2 ^ host] = vs.uh[3 ^ host];
357 vd.uh[3 ^ host] = vt.uh[3 ^ host];
358
359 return vd.d;
360 }
361
helper_punpcklbh(uint64_t fs,uint64_t ft)362 uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
363 {
364 unsigned host = BYTE_ORDER_XOR(7);
365 LMIValue vd, vs, vt;
366
367 vs.d = fs;
368 vt.d = ft;
369 vd.ub[0 ^ host] = vs.ub[0 ^ host];
370 vd.ub[1 ^ host] = vt.ub[0 ^ host];
371 vd.ub[2 ^ host] = vs.ub[1 ^ host];
372 vd.ub[3 ^ host] = vt.ub[1 ^ host];
373 vd.ub[4 ^ host] = vs.ub[2 ^ host];
374 vd.ub[5 ^ host] = vt.ub[2 ^ host];
375 vd.ub[6 ^ host] = vs.ub[3 ^ host];
376 vd.ub[7 ^ host] = vt.ub[3 ^ host];
377
378 return vd.d;
379 }
380
helper_punpckhbh(uint64_t fs,uint64_t ft)381 uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
382 {
383 unsigned host = BYTE_ORDER_XOR(7);
384 LMIValue vd, vs, vt;
385
386 vs.d = fs;
387 vt.d = ft;
388 vd.ub[0 ^ host] = vs.ub[4 ^ host];
389 vd.ub[1 ^ host] = vt.ub[4 ^ host];
390 vd.ub[2 ^ host] = vs.ub[5 ^ host];
391 vd.ub[3 ^ host] = vt.ub[5 ^ host];
392 vd.ub[4 ^ host] = vs.ub[6 ^ host];
393 vd.ub[5 ^ host] = vt.ub[6 ^ host];
394 vd.ub[6 ^ host] = vs.ub[7 ^ host];
395 vd.ub[7 ^ host] = vt.ub[7 ^ host];
396
397 return vd.d;
398 }
399
helper_pavgh(uint64_t fs,uint64_t ft)400 uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
401 {
402 LMIValue vs, vt;
403 unsigned i;
404
405 vs.d = fs;
406 vt.d = ft;
407 for (i = 0; i < 4; i++) {
408 vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
409 }
410 return vs.d;
411 }
412
helper_pavgb(uint64_t fs,uint64_t ft)413 uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
414 {
415 LMIValue vs, vt;
416 unsigned i;
417
418 vs.d = fs;
419 vt.d = ft;
420 for (i = 0; i < 8; i++) {
421 vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
422 }
423 return vs.d;
424 }
425
helper_pmaxsh(uint64_t fs,uint64_t ft)426 uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
427 {
428 LMIValue vs, vt;
429 unsigned i;
430
431 vs.d = fs;
432 vt.d = ft;
433 for (i = 0; i < 4; i++) {
434 vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
435 }
436 return vs.d;
437 }
438
helper_pminsh(uint64_t fs,uint64_t ft)439 uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
440 {
441 LMIValue vs, vt;
442 unsigned i;
443
444 vs.d = fs;
445 vt.d = ft;
446 for (i = 0; i < 4; i++) {
447 vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
448 }
449 return vs.d;
450 }
451
helper_pmaxub(uint64_t fs,uint64_t ft)452 uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
453 {
454 LMIValue vs, vt;
455 unsigned i;
456
457 vs.d = fs;
458 vt.d = ft;
459 for (i = 0; i < 4; i++) {
460 vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
461 }
462 return vs.d;
463 }
464
helper_pminub(uint64_t fs,uint64_t ft)465 uint64_t helper_pminub(uint64_t fs, uint64_t ft)
466 {
467 LMIValue vs, vt;
468 unsigned i;
469
470 vs.d = fs;
471 vt.d = ft;
472 for (i = 0; i < 4; i++) {
473 vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
474 }
475 return vs.d;
476 }
477
helper_pcmpeqw(uint64_t fs,uint64_t ft)478 uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
479 {
480 LMIValue vs, vt;
481 unsigned i;
482
483 vs.d = fs;
484 vt.d = ft;
485 for (i = 0; i < 2; i++) {
486 vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
487 }
488 return vs.d;
489 }
490
helper_pcmpgtw(uint64_t fs,uint64_t ft)491 uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
492 {
493 LMIValue vs, vt;
494 unsigned i;
495
496 vs.d = fs;
497 vt.d = ft;
498 for (i = 0; i < 2; i++) {
499 vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
500 }
501 return vs.d;
502 }
503
helper_pcmpeqh(uint64_t fs,uint64_t ft)504 uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
505 {
506 LMIValue vs, vt;
507 unsigned i;
508
509 vs.d = fs;
510 vt.d = ft;
511 for (i = 0; i < 4; i++) {
512 vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
513 }
514 return vs.d;
515 }
516
helper_pcmpgth(uint64_t fs,uint64_t ft)517 uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
518 {
519 LMIValue vs, vt;
520 unsigned i;
521
522 vs.d = fs;
523 vt.d = ft;
524 for (i = 0; i < 4; i++) {
525 vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
526 }
527 return vs.d;
528 }
529
helper_pcmpeqb(uint64_t fs,uint64_t ft)530 uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
531 {
532 LMIValue vs, vt;
533 unsigned i;
534
535 vs.d = fs;
536 vt.d = ft;
537 for (i = 0; i < 8; i++) {
538 vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
539 }
540 return vs.d;
541 }
542
helper_pcmpgtb(uint64_t fs,uint64_t ft)543 uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
544 {
545 LMIValue vs, vt;
546 unsigned i;
547
548 vs.d = fs;
549 vt.d = ft;
550 for (i = 0; i < 8; i++) {
551 vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
552 }
553 return vs.d;
554 }
555
helper_psllw(uint64_t fs,uint64_t ft)556 uint64_t helper_psllw(uint64_t fs, uint64_t ft)
557 {
558 LMIValue vs;
559 unsigned i;
560
561 ft &= 0x7f;
562 if (ft > 31) {
563 return 0;
564 }
565 vs.d = fs;
566 for (i = 0; i < 2; ++i) {
567 vs.uw[i] <<= ft;
568 }
569 return vs.d;
570 }
571
helper_psrlw(uint64_t fs,uint64_t ft)572 uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
573 {
574 LMIValue vs;
575 unsigned i;
576
577 ft &= 0x7f;
578 if (ft > 31) {
579 return 0;
580 }
581 vs.d = fs;
582 for (i = 0; i < 2; ++i) {
583 vs.uw[i] >>= ft;
584 }
585 return vs.d;
586 }
587
helper_psraw(uint64_t fs,uint64_t ft)588 uint64_t helper_psraw(uint64_t fs, uint64_t ft)
589 {
590 LMIValue vs;
591 unsigned i;
592
593 ft &= 0x7f;
594 if (ft > 31) {
595 ft = 31;
596 }
597 vs.d = fs;
598 for (i = 0; i < 2; ++i) {
599 vs.sw[i] >>= ft;
600 }
601 return vs.d;
602 }
603
helper_psllh(uint64_t fs,uint64_t ft)604 uint64_t helper_psllh(uint64_t fs, uint64_t ft)
605 {
606 LMIValue vs;
607 unsigned i;
608
609 ft &= 0x7f;
610 if (ft > 15) {
611 return 0;
612 }
613 vs.d = fs;
614 for (i = 0; i < 4; ++i) {
615 vs.uh[i] <<= ft;
616 }
617 return vs.d;
618 }
619
helper_psrlh(uint64_t fs,uint64_t ft)620 uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
621 {
622 LMIValue vs;
623 unsigned i;
624
625 ft &= 0x7f;
626 if (ft > 15) {
627 return 0;
628 }
629 vs.d = fs;
630 for (i = 0; i < 4; ++i) {
631 vs.uh[i] >>= ft;
632 }
633 return vs.d;
634 }
635
helper_psrah(uint64_t fs,uint64_t ft)636 uint64_t helper_psrah(uint64_t fs, uint64_t ft)
637 {
638 LMIValue vs;
639 unsigned i;
640
641 ft &= 0x7f;
642 if (ft > 15) {
643 ft = 15;
644 }
645 vs.d = fs;
646 for (i = 0; i < 4; ++i) {
647 vs.sh[i] >>= ft;
648 }
649 return vs.d;
650 }
651
helper_pmullh(uint64_t fs,uint64_t ft)652 uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
653 {
654 LMIValue vs, vt;
655 unsigned i;
656
657 vs.d = fs;
658 vt.d = ft;
659 for (i = 0; i < 4; ++i) {
660 vs.sh[i] *= vt.sh[i];
661 }
662 return vs.d;
663 }
664
helper_pmulhh(uint64_t fs,uint64_t ft)665 uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
666 {
667 LMIValue vs, vt;
668 unsigned i;
669
670 vs.d = fs;
671 vt.d = ft;
672 for (i = 0; i < 4; ++i) {
673 int32_t r = vs.sh[i] * vt.sh[i];
674 vs.sh[i] = r >> 16;
675 }
676 return vs.d;
677 }
678
helper_pmulhuh(uint64_t fs,uint64_t ft)679 uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
680 {
681 LMIValue vs, vt;
682 unsigned i;
683
684 vs.d = fs;
685 vt.d = ft;
686 for (i = 0; i < 4; ++i) {
687 uint32_t r = vs.uh[i] * vt.uh[i];
688 vs.uh[i] = r >> 16;
689 }
690 return vs.d;
691 }
692
helper_pmaddhw(uint64_t fs,uint64_t ft)693 uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
694 {
695 unsigned host = BYTE_ORDER_XOR(3);
696 LMIValue vs, vt;
697 uint32_t p0, p1;
698
699 vs.d = fs;
700 vt.d = ft;
701 p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host];
702 p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
703 p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host];
704 p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
705
706 return ((uint64_t)p1 << 32) | p0;
707 }
708
helper_pasubub(uint64_t fs,uint64_t ft)709 uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
710 {
711 LMIValue vs, vt;
712 unsigned i;
713
714 vs.d = fs;
715 vt.d = ft;
716 for (i = 0; i < 8; ++i) {
717 int r = vs.ub[i] - vt.ub[i];
718 vs.ub[i] = (r < 0 ? -r : r);
719 }
720 return vs.d;
721 }
722
helper_biadd(uint64_t fs)723 uint64_t helper_biadd(uint64_t fs)
724 {
725 unsigned i, fd;
726
727 for (i = fd = 0; i < 8; ++i) {
728 fd += (fs >> (i * 8)) & 0xff;
729 }
730 return fd & 0xffff;
731 }
732
helper_pmovmskb(uint64_t fs)733 uint64_t helper_pmovmskb(uint64_t fs)
734 {
735 unsigned fd = 0;
736
737 fd |= ((fs >> 7) & 1) << 0;
738 fd |= ((fs >> 15) & 1) << 1;
739 fd |= ((fs >> 23) & 1) << 2;
740 fd |= ((fs >> 31) & 1) << 3;
741 fd |= ((fs >> 39) & 1) << 4;
742 fd |= ((fs >> 47) & 1) << 5;
743 fd |= ((fs >> 55) & 1) << 6;
744 fd |= ((fs >> 63) & 1) << 7;
745
746 return fd & 0xff;
747 }
748