1a2b0a27dSPhilippe Mathieu-Daudé /* 2a2b0a27dSPhilippe Mathieu-Daudé * Loongson Multimedia Instruction emulation helpers for QEMU. 3a2b0a27dSPhilippe Mathieu-Daudé * 4a2b0a27dSPhilippe Mathieu-Daudé * Copyright (c) 2011 Richard Henderson <rth@twiddle.net> 5a2b0a27dSPhilippe Mathieu-Daudé * 6a2b0a27dSPhilippe Mathieu-Daudé * This library is free software; you can redistribute it and/or 7a2b0a27dSPhilippe Mathieu-Daudé * modify it under the terms of the GNU Lesser General Public 8a2b0a27dSPhilippe Mathieu-Daudé * License as published by the Free Software Foundation; either 9a2b0a27dSPhilippe Mathieu-Daudé * version 2.1 of the License, or (at your option) any later version. 10a2b0a27dSPhilippe Mathieu-Daudé * 11a2b0a27dSPhilippe Mathieu-Daudé * This library is distributed in the hope that it will be useful, 12a2b0a27dSPhilippe Mathieu-Daudé * but WITHOUT ANY WARRANTY; without even the implied warranty of 13a2b0a27dSPhilippe Mathieu-Daudé * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14a2b0a27dSPhilippe Mathieu-Daudé * Lesser General Public License for more details. 15a2b0a27dSPhilippe Mathieu-Daudé * 16a2b0a27dSPhilippe Mathieu-Daudé * You should have received a copy of the GNU Lesser General Public 17a2b0a27dSPhilippe Mathieu-Daudé * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18a2b0a27dSPhilippe Mathieu-Daudé */ 19a2b0a27dSPhilippe Mathieu-Daudé 20a2b0a27dSPhilippe Mathieu-Daudé #include "qemu/osdep.h" 21a2b0a27dSPhilippe Mathieu-Daudé #include "cpu.h" 22a2b0a27dSPhilippe Mathieu-Daudé #include "exec/helper-proto.h" 23a2b0a27dSPhilippe Mathieu-Daudé 24a2b0a27dSPhilippe Mathieu-Daudé /* 25a2b0a27dSPhilippe Mathieu-Daudé * If the byte ordering doesn't matter, i.e. all columns are treated 26a2b0a27dSPhilippe Mathieu-Daudé * identically, then this union can be used directly. If byte ordering 27a2b0a27dSPhilippe Mathieu-Daudé * does matter, we generally ignore dumping to memory. 28a2b0a27dSPhilippe Mathieu-Daudé */ 29a2b0a27dSPhilippe Mathieu-Daudé typedef union { 30a2b0a27dSPhilippe Mathieu-Daudé uint8_t ub[8]; 31a2b0a27dSPhilippe Mathieu-Daudé int8_t sb[8]; 32a2b0a27dSPhilippe Mathieu-Daudé uint16_t uh[4]; 33a2b0a27dSPhilippe Mathieu-Daudé int16_t sh[4]; 34a2b0a27dSPhilippe Mathieu-Daudé uint32_t uw[2]; 35a2b0a27dSPhilippe Mathieu-Daudé int32_t sw[2]; 36a2b0a27dSPhilippe Mathieu-Daudé uint64_t d; 37a2b0a27dSPhilippe Mathieu-Daudé } LMIValue; 38a2b0a27dSPhilippe Mathieu-Daudé 39a2b0a27dSPhilippe Mathieu-Daudé /* Some byte ordering issues can be mitigated by XORing in the following. */ 40*e03b5686SMarc-André Lureau #if HOST_BIG_ENDIAN 41a2b0a27dSPhilippe Mathieu-Daudé # define BYTE_ORDER_XOR(N) N 42a2b0a27dSPhilippe Mathieu-Daudé #else 43a2b0a27dSPhilippe Mathieu-Daudé # define BYTE_ORDER_XOR(N) 0 44a2b0a27dSPhilippe Mathieu-Daudé #endif 45a2b0a27dSPhilippe Mathieu-Daudé 46a2b0a27dSPhilippe Mathieu-Daudé #define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) 47a2b0a27dSPhilippe Mathieu-Daudé #define SATUB(x) (x > 0xff ? 0xff : x) 48a2b0a27dSPhilippe Mathieu-Daudé 49a2b0a27dSPhilippe Mathieu-Daudé #define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) 50a2b0a27dSPhilippe Mathieu-Daudé #define SATUH(x) (x > 0xffff ? 0xffff : x) 51a2b0a27dSPhilippe Mathieu-Daudé 52a2b0a27dSPhilippe Mathieu-Daudé #define SATSW(x) \ 53a2b0a27dSPhilippe Mathieu-Daudé (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) 54a2b0a27dSPhilippe Mathieu-Daudé #define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) 55a2b0a27dSPhilippe Mathieu-Daudé 56a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddsb(uint64_t fs, uint64_t ft) 57a2b0a27dSPhilippe Mathieu-Daudé { 58a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 59a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 60a2b0a27dSPhilippe Mathieu-Daudé 61a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 62a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 63a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 64a2b0a27dSPhilippe Mathieu-Daudé int r = vs.sb[i] + vt.sb[i]; 65a2b0a27dSPhilippe Mathieu-Daudé vs.sb[i] = SATSB(r); 66a2b0a27dSPhilippe Mathieu-Daudé } 67a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 68a2b0a27dSPhilippe Mathieu-Daudé } 69a2b0a27dSPhilippe Mathieu-Daudé 70a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddusb(uint64_t fs, uint64_t ft) 71a2b0a27dSPhilippe Mathieu-Daudé { 72a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 73a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 74a2b0a27dSPhilippe Mathieu-Daudé 75a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 76a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 77a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 78a2b0a27dSPhilippe Mathieu-Daudé int r = vs.ub[i] + vt.ub[i]; 79a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = SATUB(r); 80a2b0a27dSPhilippe Mathieu-Daudé } 81a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 82a2b0a27dSPhilippe Mathieu-Daudé } 83a2b0a27dSPhilippe Mathieu-Daudé 84a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddsh(uint64_t fs, uint64_t ft) 85a2b0a27dSPhilippe Mathieu-Daudé { 86a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 87a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 88a2b0a27dSPhilippe Mathieu-Daudé 89a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 90a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 91a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 92a2b0a27dSPhilippe Mathieu-Daudé int r = vs.sh[i] + vt.sh[i]; 93a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] = SATSH(r); 94a2b0a27dSPhilippe Mathieu-Daudé } 95a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 96a2b0a27dSPhilippe Mathieu-Daudé } 97a2b0a27dSPhilippe Mathieu-Daudé 98a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddush(uint64_t fs, uint64_t ft) 99a2b0a27dSPhilippe Mathieu-Daudé { 100a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 101a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 102a2b0a27dSPhilippe Mathieu-Daudé 103a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 104a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 105a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 106a2b0a27dSPhilippe Mathieu-Daudé int r = vs.uh[i] + vt.uh[i]; 107a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = SATUH(r); 108a2b0a27dSPhilippe Mathieu-Daudé } 109a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 110a2b0a27dSPhilippe Mathieu-Daudé } 111a2b0a27dSPhilippe Mathieu-Daudé 112a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddb(uint64_t fs, uint64_t ft) 113a2b0a27dSPhilippe Mathieu-Daudé { 114a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 115a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 116a2b0a27dSPhilippe Mathieu-Daudé 117a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 118a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 119a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 120a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] += vt.ub[i]; 121a2b0a27dSPhilippe Mathieu-Daudé } 122a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 123a2b0a27dSPhilippe Mathieu-Daudé } 124a2b0a27dSPhilippe Mathieu-Daudé 125a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddh(uint64_t fs, uint64_t ft) 126a2b0a27dSPhilippe Mathieu-Daudé { 127a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 128a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 129a2b0a27dSPhilippe Mathieu-Daudé 130a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 131a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 132a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 133a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] += vt.uh[i]; 134a2b0a27dSPhilippe Mathieu-Daudé } 135a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 136a2b0a27dSPhilippe Mathieu-Daudé } 137a2b0a27dSPhilippe Mathieu-Daudé 138a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_paddw(uint64_t fs, uint64_t ft) 139a2b0a27dSPhilippe Mathieu-Daudé { 140a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 141a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 142a2b0a27dSPhilippe Mathieu-Daudé 143a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 144a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 145a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; ++i) { 146a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] += vt.uw[i]; 147a2b0a27dSPhilippe Mathieu-Daudé } 148a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 149a2b0a27dSPhilippe Mathieu-Daudé } 150a2b0a27dSPhilippe Mathieu-Daudé 151a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubsb(uint64_t fs, uint64_t ft) 152a2b0a27dSPhilippe Mathieu-Daudé { 153a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 154a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 155a2b0a27dSPhilippe Mathieu-Daudé 156a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 157a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 158a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 159a2b0a27dSPhilippe Mathieu-Daudé int r = vs.sb[i] - vt.sb[i]; 160a2b0a27dSPhilippe Mathieu-Daudé vs.sb[i] = SATSB(r); 161a2b0a27dSPhilippe Mathieu-Daudé } 162a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 163a2b0a27dSPhilippe Mathieu-Daudé } 164a2b0a27dSPhilippe Mathieu-Daudé 165a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubusb(uint64_t fs, uint64_t ft) 166a2b0a27dSPhilippe Mathieu-Daudé { 167a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 168a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 169a2b0a27dSPhilippe Mathieu-Daudé 170a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 171a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 172a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 173a2b0a27dSPhilippe Mathieu-Daudé int r = vs.ub[i] - vt.ub[i]; 174a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = SATUB(r); 175a2b0a27dSPhilippe Mathieu-Daudé } 176a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 177a2b0a27dSPhilippe Mathieu-Daudé } 178a2b0a27dSPhilippe Mathieu-Daudé 179a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubsh(uint64_t fs, uint64_t ft) 180a2b0a27dSPhilippe Mathieu-Daudé { 181a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 182a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 183a2b0a27dSPhilippe Mathieu-Daudé 184a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 185a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 186a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 187a2b0a27dSPhilippe Mathieu-Daudé int r = vs.sh[i] - vt.sh[i]; 188a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] = SATSH(r); 189a2b0a27dSPhilippe Mathieu-Daudé } 190a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 191a2b0a27dSPhilippe Mathieu-Daudé } 192a2b0a27dSPhilippe Mathieu-Daudé 193a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubush(uint64_t fs, uint64_t ft) 194a2b0a27dSPhilippe Mathieu-Daudé { 195a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 196a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 197a2b0a27dSPhilippe Mathieu-Daudé 198a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 199a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 200a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 201a2b0a27dSPhilippe Mathieu-Daudé int r = vs.uh[i] - vt.uh[i]; 202a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = SATUH(r); 203a2b0a27dSPhilippe Mathieu-Daudé } 204a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 205a2b0a27dSPhilippe Mathieu-Daudé } 206a2b0a27dSPhilippe Mathieu-Daudé 207a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubb(uint64_t fs, uint64_t ft) 208a2b0a27dSPhilippe Mathieu-Daudé { 209a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 210a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 211a2b0a27dSPhilippe Mathieu-Daudé 212a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 213a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 214a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 215a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] -= vt.ub[i]; 216a2b0a27dSPhilippe Mathieu-Daudé } 217a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 218a2b0a27dSPhilippe Mathieu-Daudé } 219a2b0a27dSPhilippe Mathieu-Daudé 220a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubh(uint64_t fs, uint64_t ft) 221a2b0a27dSPhilippe Mathieu-Daudé { 222a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 223a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 224a2b0a27dSPhilippe Mathieu-Daudé 225a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 226a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 227a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 228a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] -= vt.uh[i]; 229a2b0a27dSPhilippe Mathieu-Daudé } 230a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 231a2b0a27dSPhilippe Mathieu-Daudé } 232a2b0a27dSPhilippe Mathieu-Daudé 233a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psubw(uint64_t fs, uint64_t ft) 234a2b0a27dSPhilippe Mathieu-Daudé { 235a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 236a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 237a2b0a27dSPhilippe Mathieu-Daudé 238a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 239a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 240a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; ++i) { 241a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] -= vt.uw[i]; 242a2b0a27dSPhilippe Mathieu-Daudé } 243a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 244a2b0a27dSPhilippe Mathieu-Daudé } 245a2b0a27dSPhilippe Mathieu-Daudé 246a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pshufh(uint64_t fs, uint64_t ft) 247a2b0a27dSPhilippe Mathieu-Daudé { 248a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(3); 249a2b0a27dSPhilippe Mathieu-Daudé LMIValue vd, vs; 250a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 251a2b0a27dSPhilippe Mathieu-Daudé 252a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 253a2b0a27dSPhilippe Mathieu-Daudé vd.d = 0; 254a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++, ft >>= 2) { 255a2b0a27dSPhilippe Mathieu-Daudé vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; 256a2b0a27dSPhilippe Mathieu-Daudé } 257a2b0a27dSPhilippe Mathieu-Daudé return vd.d; 258a2b0a27dSPhilippe Mathieu-Daudé } 259a2b0a27dSPhilippe Mathieu-Daudé 260a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packsswh(uint64_t fs, uint64_t ft) 261a2b0a27dSPhilippe Mathieu-Daudé { 262a2b0a27dSPhilippe Mathieu-Daudé uint64_t fd = 0; 263a2b0a27dSPhilippe Mathieu-Daudé int64_t tmp; 264a2b0a27dSPhilippe Mathieu-Daudé 265a2b0a27dSPhilippe Mathieu-Daudé tmp = (int32_t)(fs >> 0); 266a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSH(tmp); 267a2b0a27dSPhilippe Mathieu-Daudé fd |= (tmp & 0xffff) << 0; 268a2b0a27dSPhilippe Mathieu-Daudé 269a2b0a27dSPhilippe Mathieu-Daudé tmp = (int32_t)(fs >> 32); 270a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSH(tmp); 271a2b0a27dSPhilippe Mathieu-Daudé fd |= (tmp & 0xffff) << 16; 272a2b0a27dSPhilippe Mathieu-Daudé 273a2b0a27dSPhilippe Mathieu-Daudé tmp = (int32_t)(ft >> 0); 274a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSH(tmp); 275a2b0a27dSPhilippe Mathieu-Daudé fd |= (tmp & 0xffff) << 32; 276a2b0a27dSPhilippe Mathieu-Daudé 277a2b0a27dSPhilippe Mathieu-Daudé tmp = (int32_t)(ft >> 32); 278a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSH(tmp); 279a2b0a27dSPhilippe Mathieu-Daudé fd |= (tmp & 0xffff) << 48; 280a2b0a27dSPhilippe Mathieu-Daudé 281a2b0a27dSPhilippe Mathieu-Daudé return fd; 282a2b0a27dSPhilippe Mathieu-Daudé } 283a2b0a27dSPhilippe Mathieu-Daudé 284a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packsshb(uint64_t fs, uint64_t ft) 285a2b0a27dSPhilippe Mathieu-Daudé { 286a2b0a27dSPhilippe Mathieu-Daudé uint64_t fd = 0; 287a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 288a2b0a27dSPhilippe Mathieu-Daudé 289a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 290a2b0a27dSPhilippe Mathieu-Daudé int16_t tmp = fs >> (i * 16); 291a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSB(tmp); 292a2b0a27dSPhilippe Mathieu-Daudé fd |= (uint64_t)(tmp & 0xff) << (i * 8); 293a2b0a27dSPhilippe Mathieu-Daudé } 294a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 295a2b0a27dSPhilippe Mathieu-Daudé int16_t tmp = ft >> (i * 16); 296a2b0a27dSPhilippe Mathieu-Daudé tmp = SATSB(tmp); 297a2b0a27dSPhilippe Mathieu-Daudé fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 298a2b0a27dSPhilippe Mathieu-Daudé } 299a2b0a27dSPhilippe Mathieu-Daudé 300a2b0a27dSPhilippe Mathieu-Daudé return fd; 301a2b0a27dSPhilippe Mathieu-Daudé } 302a2b0a27dSPhilippe Mathieu-Daudé 303a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_packushb(uint64_t fs, uint64_t ft) 304a2b0a27dSPhilippe Mathieu-Daudé { 305a2b0a27dSPhilippe Mathieu-Daudé uint64_t fd = 0; 306a2b0a27dSPhilippe Mathieu-Daudé unsigned int i; 307a2b0a27dSPhilippe Mathieu-Daudé 308a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 309a2b0a27dSPhilippe Mathieu-Daudé int16_t tmp = fs >> (i * 16); 310a2b0a27dSPhilippe Mathieu-Daudé tmp = SATUB(tmp); 311a2b0a27dSPhilippe Mathieu-Daudé fd |= (uint64_t)(tmp & 0xff) << (i * 8); 312a2b0a27dSPhilippe Mathieu-Daudé } 313a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 314a2b0a27dSPhilippe Mathieu-Daudé int16_t tmp = ft >> (i * 16); 315a2b0a27dSPhilippe Mathieu-Daudé tmp = SATUB(tmp); 316a2b0a27dSPhilippe Mathieu-Daudé fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); 317a2b0a27dSPhilippe Mathieu-Daudé } 318a2b0a27dSPhilippe Mathieu-Daudé 319a2b0a27dSPhilippe Mathieu-Daudé return fd; 320a2b0a27dSPhilippe Mathieu-Daudé } 321a2b0a27dSPhilippe Mathieu-Daudé 322a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) 323a2b0a27dSPhilippe Mathieu-Daudé { 324a2b0a27dSPhilippe Mathieu-Daudé return (fs & 0xffffffff) | (ft << 32); 325a2b0a27dSPhilippe Mathieu-Daudé } 326a2b0a27dSPhilippe Mathieu-Daudé 327a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) 328a2b0a27dSPhilippe Mathieu-Daudé { 329a2b0a27dSPhilippe Mathieu-Daudé return (fs >> 32) | (ft & ~0xffffffffull); 330a2b0a27dSPhilippe Mathieu-Daudé } 331a2b0a27dSPhilippe Mathieu-Daudé 332a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) 333a2b0a27dSPhilippe Mathieu-Daudé { 334a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(3); 335a2b0a27dSPhilippe Mathieu-Daudé LMIValue vd, vs, vt; 336a2b0a27dSPhilippe Mathieu-Daudé 337a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 338a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 339a2b0a27dSPhilippe Mathieu-Daudé vd.uh[0 ^ host] = vs.uh[0 ^ host]; 340a2b0a27dSPhilippe Mathieu-Daudé vd.uh[1 ^ host] = vt.uh[0 ^ host]; 341a2b0a27dSPhilippe Mathieu-Daudé vd.uh[2 ^ host] = vs.uh[1 ^ host]; 342a2b0a27dSPhilippe Mathieu-Daudé vd.uh[3 ^ host] = vt.uh[1 ^ host]; 343a2b0a27dSPhilippe Mathieu-Daudé 344a2b0a27dSPhilippe Mathieu-Daudé return vd.d; 345a2b0a27dSPhilippe Mathieu-Daudé } 346a2b0a27dSPhilippe Mathieu-Daudé 347a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) 348a2b0a27dSPhilippe Mathieu-Daudé { 349a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(3); 350a2b0a27dSPhilippe Mathieu-Daudé LMIValue vd, vs, vt; 351a2b0a27dSPhilippe Mathieu-Daudé 352a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 353a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 354a2b0a27dSPhilippe Mathieu-Daudé vd.uh[0 ^ host] = vs.uh[2 ^ host]; 355a2b0a27dSPhilippe Mathieu-Daudé vd.uh[1 ^ host] = vt.uh[2 ^ host]; 356a2b0a27dSPhilippe Mathieu-Daudé vd.uh[2 ^ host] = vs.uh[3 ^ host]; 357a2b0a27dSPhilippe Mathieu-Daudé vd.uh[3 ^ host] = vt.uh[3 ^ host]; 358a2b0a27dSPhilippe Mathieu-Daudé 359a2b0a27dSPhilippe Mathieu-Daudé return vd.d; 360a2b0a27dSPhilippe Mathieu-Daudé } 361a2b0a27dSPhilippe Mathieu-Daudé 362a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) 363a2b0a27dSPhilippe Mathieu-Daudé { 364a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(7); 365a2b0a27dSPhilippe Mathieu-Daudé LMIValue vd, vs, vt; 366a2b0a27dSPhilippe Mathieu-Daudé 367a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 368a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 369a2b0a27dSPhilippe Mathieu-Daudé vd.ub[0 ^ host] = vs.ub[0 ^ host]; 370a2b0a27dSPhilippe Mathieu-Daudé vd.ub[1 ^ host] = vt.ub[0 ^ host]; 371a2b0a27dSPhilippe Mathieu-Daudé vd.ub[2 ^ host] = vs.ub[1 ^ host]; 372a2b0a27dSPhilippe Mathieu-Daudé vd.ub[3 ^ host] = vt.ub[1 ^ host]; 373a2b0a27dSPhilippe Mathieu-Daudé vd.ub[4 ^ host] = vs.ub[2 ^ host]; 374a2b0a27dSPhilippe Mathieu-Daudé vd.ub[5 ^ host] = vt.ub[2 ^ host]; 375a2b0a27dSPhilippe Mathieu-Daudé vd.ub[6 ^ host] = vs.ub[3 ^ host]; 376a2b0a27dSPhilippe Mathieu-Daudé vd.ub[7 ^ host] = vt.ub[3 ^ host]; 377a2b0a27dSPhilippe Mathieu-Daudé 378a2b0a27dSPhilippe Mathieu-Daudé return vd.d; 379a2b0a27dSPhilippe Mathieu-Daudé } 380a2b0a27dSPhilippe Mathieu-Daudé 381a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) 382a2b0a27dSPhilippe Mathieu-Daudé { 383a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(7); 384a2b0a27dSPhilippe Mathieu-Daudé LMIValue vd, vs, vt; 385a2b0a27dSPhilippe Mathieu-Daudé 386a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 387a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 388a2b0a27dSPhilippe Mathieu-Daudé vd.ub[0 ^ host] = vs.ub[4 ^ host]; 389a2b0a27dSPhilippe Mathieu-Daudé vd.ub[1 ^ host] = vt.ub[4 ^ host]; 390a2b0a27dSPhilippe Mathieu-Daudé vd.ub[2 ^ host] = vs.ub[5 ^ host]; 391a2b0a27dSPhilippe Mathieu-Daudé vd.ub[3 ^ host] = vt.ub[5 ^ host]; 392a2b0a27dSPhilippe Mathieu-Daudé vd.ub[4 ^ host] = vs.ub[6 ^ host]; 393a2b0a27dSPhilippe Mathieu-Daudé vd.ub[5 ^ host] = vt.ub[6 ^ host]; 394a2b0a27dSPhilippe Mathieu-Daudé vd.ub[6 ^ host] = vs.ub[7 ^ host]; 395a2b0a27dSPhilippe Mathieu-Daudé vd.ub[7 ^ host] = vt.ub[7 ^ host]; 396a2b0a27dSPhilippe Mathieu-Daudé 397a2b0a27dSPhilippe Mathieu-Daudé return vd.d; 398a2b0a27dSPhilippe Mathieu-Daudé } 399a2b0a27dSPhilippe Mathieu-Daudé 400a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pavgh(uint64_t fs, uint64_t ft) 401a2b0a27dSPhilippe Mathieu-Daudé { 402a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 403a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 404a2b0a27dSPhilippe Mathieu-Daudé 405a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 406a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 407a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 408a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; 409a2b0a27dSPhilippe Mathieu-Daudé } 410a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 411a2b0a27dSPhilippe Mathieu-Daudé } 412a2b0a27dSPhilippe Mathieu-Daudé 413a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pavgb(uint64_t fs, uint64_t ft) 414a2b0a27dSPhilippe Mathieu-Daudé { 415a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 416a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 417a2b0a27dSPhilippe Mathieu-Daudé 418a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 419a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 420a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; i++) { 421a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; 422a2b0a27dSPhilippe Mathieu-Daudé } 423a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 424a2b0a27dSPhilippe Mathieu-Daudé } 425a2b0a27dSPhilippe Mathieu-Daudé 426a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) 427a2b0a27dSPhilippe Mathieu-Daudé { 428a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 429a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 430a2b0a27dSPhilippe Mathieu-Daudé 431a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 432a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 433a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 434a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 435a2b0a27dSPhilippe Mathieu-Daudé } 436a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 437a2b0a27dSPhilippe Mathieu-Daudé } 438a2b0a27dSPhilippe Mathieu-Daudé 439a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pminsh(uint64_t fs, uint64_t ft) 440a2b0a27dSPhilippe Mathieu-Daudé { 441a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 442a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 443a2b0a27dSPhilippe Mathieu-Daudé 444a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 445a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 446a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 447a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); 448a2b0a27dSPhilippe Mathieu-Daudé } 449a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 450a2b0a27dSPhilippe Mathieu-Daudé } 451a2b0a27dSPhilippe Mathieu-Daudé 452a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) 453a2b0a27dSPhilippe Mathieu-Daudé { 454a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 455a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 456a2b0a27dSPhilippe Mathieu-Daudé 457a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 458a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 459a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 460a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 461a2b0a27dSPhilippe Mathieu-Daudé } 462a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 463a2b0a27dSPhilippe Mathieu-Daudé } 464a2b0a27dSPhilippe Mathieu-Daudé 465a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pminub(uint64_t fs, uint64_t ft) 466a2b0a27dSPhilippe Mathieu-Daudé { 467a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 468a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 469a2b0a27dSPhilippe Mathieu-Daudé 470a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 471a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 472a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 473a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); 474a2b0a27dSPhilippe Mathieu-Daudé } 475a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 476a2b0a27dSPhilippe Mathieu-Daudé } 477a2b0a27dSPhilippe Mathieu-Daudé 478a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) 479a2b0a27dSPhilippe Mathieu-Daudé { 480a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 481a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 482a2b0a27dSPhilippe Mathieu-Daudé 483a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 484a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 485a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; i++) { 486a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] = -(vs.uw[i] == vt.uw[i]); 487a2b0a27dSPhilippe Mathieu-Daudé } 488a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 489a2b0a27dSPhilippe Mathieu-Daudé } 490a2b0a27dSPhilippe Mathieu-Daudé 491a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) 492a2b0a27dSPhilippe Mathieu-Daudé { 493a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 494a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 495a2b0a27dSPhilippe Mathieu-Daudé 496a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 497a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 498a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; i++) { 499a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] = -(vs.uw[i] > vt.uw[i]); 500a2b0a27dSPhilippe Mathieu-Daudé } 501a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 502a2b0a27dSPhilippe Mathieu-Daudé } 503a2b0a27dSPhilippe Mathieu-Daudé 504a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) 505a2b0a27dSPhilippe Mathieu-Daudé { 506a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 507a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 508a2b0a27dSPhilippe Mathieu-Daudé 509a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 510a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 511a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 512a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = -(vs.uh[i] == vt.uh[i]); 513a2b0a27dSPhilippe Mathieu-Daudé } 514a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 515a2b0a27dSPhilippe Mathieu-Daudé } 516a2b0a27dSPhilippe Mathieu-Daudé 517a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) 518a2b0a27dSPhilippe Mathieu-Daudé { 519a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 520a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 521a2b0a27dSPhilippe Mathieu-Daudé 522a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 523a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 524a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; i++) { 525a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = -(vs.uh[i] > vt.uh[i]); 526a2b0a27dSPhilippe Mathieu-Daudé } 527a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 528a2b0a27dSPhilippe Mathieu-Daudé } 529a2b0a27dSPhilippe Mathieu-Daudé 530a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) 531a2b0a27dSPhilippe Mathieu-Daudé { 532a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 533a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 534a2b0a27dSPhilippe Mathieu-Daudé 535a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 536a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 537a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; i++) { 538a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = -(vs.ub[i] == vt.ub[i]); 539a2b0a27dSPhilippe Mathieu-Daudé } 540a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 541a2b0a27dSPhilippe Mathieu-Daudé } 542a2b0a27dSPhilippe Mathieu-Daudé 543a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) 544a2b0a27dSPhilippe Mathieu-Daudé { 545a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 546a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 547a2b0a27dSPhilippe Mathieu-Daudé 548a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 549a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 550a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; i++) { 551a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = -(vs.ub[i] > vt.ub[i]); 552a2b0a27dSPhilippe Mathieu-Daudé } 553a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 554a2b0a27dSPhilippe Mathieu-Daudé } 555a2b0a27dSPhilippe Mathieu-Daudé 556a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psllw(uint64_t fs, uint64_t ft) 557a2b0a27dSPhilippe Mathieu-Daudé { 558a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 559a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 560a2b0a27dSPhilippe Mathieu-Daudé 561a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 562a2b0a27dSPhilippe Mathieu-Daudé if (ft > 31) { 563a2b0a27dSPhilippe Mathieu-Daudé return 0; 564a2b0a27dSPhilippe Mathieu-Daudé } 565a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 566a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; ++i) { 567a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] <<= ft; 568a2b0a27dSPhilippe Mathieu-Daudé } 569a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 570a2b0a27dSPhilippe Mathieu-Daudé } 571a2b0a27dSPhilippe Mathieu-Daudé 572a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrlw(uint64_t fs, uint64_t ft) 573a2b0a27dSPhilippe Mathieu-Daudé { 574a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 575a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 576a2b0a27dSPhilippe Mathieu-Daudé 577a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 578a2b0a27dSPhilippe Mathieu-Daudé if (ft > 31) { 579a2b0a27dSPhilippe Mathieu-Daudé return 0; 580a2b0a27dSPhilippe Mathieu-Daudé } 581a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 582a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; ++i) { 583a2b0a27dSPhilippe Mathieu-Daudé vs.uw[i] >>= ft; 584a2b0a27dSPhilippe Mathieu-Daudé } 585a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 586a2b0a27dSPhilippe Mathieu-Daudé } 587a2b0a27dSPhilippe Mathieu-Daudé 588a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psraw(uint64_t fs, uint64_t ft) 589a2b0a27dSPhilippe Mathieu-Daudé { 590a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 591a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 592a2b0a27dSPhilippe Mathieu-Daudé 593a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 594a2b0a27dSPhilippe Mathieu-Daudé if (ft > 31) { 595a2b0a27dSPhilippe Mathieu-Daudé ft = 31; 596a2b0a27dSPhilippe Mathieu-Daudé } 597a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 598a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 2; ++i) { 599a2b0a27dSPhilippe Mathieu-Daudé vs.sw[i] >>= ft; 600a2b0a27dSPhilippe Mathieu-Daudé } 601a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 602a2b0a27dSPhilippe Mathieu-Daudé } 603a2b0a27dSPhilippe Mathieu-Daudé 604a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psllh(uint64_t fs, uint64_t ft) 605a2b0a27dSPhilippe Mathieu-Daudé { 606a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 607a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 608a2b0a27dSPhilippe Mathieu-Daudé 609a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 610a2b0a27dSPhilippe Mathieu-Daudé if (ft > 15) { 611a2b0a27dSPhilippe Mathieu-Daudé return 0; 612a2b0a27dSPhilippe Mathieu-Daudé } 613a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 614a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 615a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] <<= ft; 616a2b0a27dSPhilippe Mathieu-Daudé } 617a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 618a2b0a27dSPhilippe Mathieu-Daudé } 619a2b0a27dSPhilippe Mathieu-Daudé 620a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrlh(uint64_t fs, uint64_t ft) 621a2b0a27dSPhilippe Mathieu-Daudé { 622a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 623a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 624a2b0a27dSPhilippe Mathieu-Daudé 625a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 626a2b0a27dSPhilippe Mathieu-Daudé if (ft > 15) { 627a2b0a27dSPhilippe Mathieu-Daudé return 0; 628a2b0a27dSPhilippe Mathieu-Daudé } 629a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 630a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 631a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] >>= ft; 632a2b0a27dSPhilippe Mathieu-Daudé } 633a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 634a2b0a27dSPhilippe Mathieu-Daudé } 635a2b0a27dSPhilippe Mathieu-Daudé 636a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_psrah(uint64_t fs, uint64_t ft) 637a2b0a27dSPhilippe Mathieu-Daudé { 638a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs; 639a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 640a2b0a27dSPhilippe Mathieu-Daudé 641a2b0a27dSPhilippe Mathieu-Daudé ft &= 0x7f; 642a2b0a27dSPhilippe Mathieu-Daudé if (ft > 15) { 643a2b0a27dSPhilippe Mathieu-Daudé ft = 15; 644a2b0a27dSPhilippe Mathieu-Daudé } 645a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 646a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 647a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] >>= ft; 648a2b0a27dSPhilippe Mathieu-Daudé } 649a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 650a2b0a27dSPhilippe Mathieu-Daudé } 651a2b0a27dSPhilippe Mathieu-Daudé 652a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmullh(uint64_t fs, uint64_t ft) 653a2b0a27dSPhilippe Mathieu-Daudé { 654a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 655a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 656a2b0a27dSPhilippe Mathieu-Daudé 657a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 658a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 659a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 660a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] *= vt.sh[i]; 661a2b0a27dSPhilippe Mathieu-Daudé } 662a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 663a2b0a27dSPhilippe Mathieu-Daudé } 664a2b0a27dSPhilippe Mathieu-Daudé 665a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) 666a2b0a27dSPhilippe Mathieu-Daudé { 667a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 668a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 669a2b0a27dSPhilippe Mathieu-Daudé 670a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 671a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 672a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 673a2b0a27dSPhilippe Mathieu-Daudé int32_t r = vs.sh[i] * vt.sh[i]; 674a2b0a27dSPhilippe Mathieu-Daudé vs.sh[i] = r >> 16; 675a2b0a27dSPhilippe Mathieu-Daudé } 676a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 677a2b0a27dSPhilippe Mathieu-Daudé } 678a2b0a27dSPhilippe Mathieu-Daudé 679a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) 680a2b0a27dSPhilippe Mathieu-Daudé { 681a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 682a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 683a2b0a27dSPhilippe Mathieu-Daudé 684a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 685a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 686a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 4; ++i) { 687a2b0a27dSPhilippe Mathieu-Daudé uint32_t r = vs.uh[i] * vt.uh[i]; 688a2b0a27dSPhilippe Mathieu-Daudé vs.uh[i] = r >> 16; 689a2b0a27dSPhilippe Mathieu-Daudé } 690a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 691a2b0a27dSPhilippe Mathieu-Daudé } 692a2b0a27dSPhilippe Mathieu-Daudé 693a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) 694a2b0a27dSPhilippe Mathieu-Daudé { 695a2b0a27dSPhilippe Mathieu-Daudé unsigned host = BYTE_ORDER_XOR(3); 696a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 697a2b0a27dSPhilippe Mathieu-Daudé uint32_t p0, p1; 698a2b0a27dSPhilippe Mathieu-Daudé 699a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 700a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 701a2b0a27dSPhilippe Mathieu-Daudé p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; 702a2b0a27dSPhilippe Mathieu-Daudé p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; 703a2b0a27dSPhilippe Mathieu-Daudé p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; 704a2b0a27dSPhilippe Mathieu-Daudé p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; 705a2b0a27dSPhilippe Mathieu-Daudé 706a2b0a27dSPhilippe Mathieu-Daudé return ((uint64_t)p1 << 32) | p0; 707a2b0a27dSPhilippe Mathieu-Daudé } 708a2b0a27dSPhilippe Mathieu-Daudé 709a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pasubub(uint64_t fs, uint64_t ft) 710a2b0a27dSPhilippe Mathieu-Daudé { 711a2b0a27dSPhilippe Mathieu-Daudé LMIValue vs, vt; 712a2b0a27dSPhilippe Mathieu-Daudé unsigned i; 713a2b0a27dSPhilippe Mathieu-Daudé 714a2b0a27dSPhilippe Mathieu-Daudé vs.d = fs; 715a2b0a27dSPhilippe Mathieu-Daudé vt.d = ft; 716a2b0a27dSPhilippe Mathieu-Daudé for (i = 0; i < 8; ++i) { 717a2b0a27dSPhilippe Mathieu-Daudé int r = vs.ub[i] - vt.ub[i]; 718a2b0a27dSPhilippe Mathieu-Daudé vs.ub[i] = (r < 0 ? -r : r); 719a2b0a27dSPhilippe Mathieu-Daudé } 720a2b0a27dSPhilippe Mathieu-Daudé return vs.d; 721a2b0a27dSPhilippe Mathieu-Daudé } 722a2b0a27dSPhilippe Mathieu-Daudé 723a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_biadd(uint64_t fs) 724a2b0a27dSPhilippe Mathieu-Daudé { 725a2b0a27dSPhilippe Mathieu-Daudé unsigned i, fd; 726a2b0a27dSPhilippe Mathieu-Daudé 727a2b0a27dSPhilippe Mathieu-Daudé for (i = fd = 0; i < 8; ++i) { 728a2b0a27dSPhilippe Mathieu-Daudé fd += (fs >> (i * 8)) & 0xff; 729a2b0a27dSPhilippe Mathieu-Daudé } 730a2b0a27dSPhilippe Mathieu-Daudé return fd & 0xffff; 731a2b0a27dSPhilippe Mathieu-Daudé } 732a2b0a27dSPhilippe Mathieu-Daudé 733a2b0a27dSPhilippe Mathieu-Daudé uint64_t helper_pmovmskb(uint64_t fs) 734a2b0a27dSPhilippe Mathieu-Daudé { 735a2b0a27dSPhilippe Mathieu-Daudé unsigned fd = 0; 736a2b0a27dSPhilippe Mathieu-Daudé 737a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 7) & 1) << 0; 738a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 15) & 1) << 1; 739a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 23) & 1) << 2; 740a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 31) & 1) << 3; 741a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 39) & 1) << 4; 742a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 47) & 1) << 5; 743a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 55) & 1) << 6; 744a2b0a27dSPhilippe Mathieu-Daudé fd |= ((fs >> 63) & 1) << 7; 745a2b0a27dSPhilippe Mathieu-Daudé 746a2b0a27dSPhilippe Mathieu-Daudé return fd & 0xff; 747a2b0a27dSPhilippe Mathieu-Daudé } 748