1*fcf5ef2aSThomas Huth /* 2*fcf5ef2aSThomas Huth * VIS op helpers 3*fcf5ef2aSThomas Huth * 4*fcf5ef2aSThomas Huth * Copyright (c) 2003-2005 Fabrice Bellard 5*fcf5ef2aSThomas Huth * 6*fcf5ef2aSThomas Huth * This library is free software; you can redistribute it and/or 7*fcf5ef2aSThomas Huth * modify it under the terms of the GNU Lesser General Public 8*fcf5ef2aSThomas Huth * License as published by the Free Software Foundation; either 9*fcf5ef2aSThomas Huth * version 2 of the License, or (at your option) any later version. 10*fcf5ef2aSThomas Huth * 11*fcf5ef2aSThomas Huth * This library is distributed in the hope that it will be useful, 12*fcf5ef2aSThomas Huth * but WITHOUT ANY WARRANTY; without even the implied warranty of 13*fcf5ef2aSThomas Huth * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14*fcf5ef2aSThomas Huth * Lesser General Public License for more details. 15*fcf5ef2aSThomas Huth * 16*fcf5ef2aSThomas Huth * You should have received a copy of the GNU Lesser General Public 17*fcf5ef2aSThomas Huth * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18*fcf5ef2aSThomas Huth */ 19*fcf5ef2aSThomas Huth 20*fcf5ef2aSThomas Huth #include "qemu/osdep.h" 21*fcf5ef2aSThomas Huth #include "cpu.h" 22*fcf5ef2aSThomas Huth #include "exec/helper-proto.h" 23*fcf5ef2aSThomas Huth 24*fcf5ef2aSThomas Huth /* This function uses non-native bit order */ 25*fcf5ef2aSThomas Huth #define GET_FIELD(X, FROM, TO) \ 26*fcf5ef2aSThomas Huth ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 27*fcf5ef2aSThomas Huth 28*fcf5ef2aSThomas Huth /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 29*fcf5ef2aSThomas Huth #define GET_FIELD_SP(X, FROM, TO) \ 30*fcf5ef2aSThomas Huth GET_FIELD(X, 63 - (TO), 63 - (FROM)) 31*fcf5ef2aSThomas Huth 32*fcf5ef2aSThomas Huth target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 33*fcf5ef2aSThomas Huth { 34*fcf5ef2aSThomas Huth return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 35*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 36*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 37*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 38*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 39*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 40*fcf5ef2aSThomas Huth (((pixel_addr >> 55) & 1) << 4) | 41*fcf5ef2aSThomas Huth (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 42*fcf5ef2aSThomas Huth GET_FIELD_SP(pixel_addr, 11, 12); 43*fcf5ef2aSThomas Huth } 44*fcf5ef2aSThomas Huth 45*fcf5ef2aSThomas Huth #ifdef HOST_WORDS_BIGENDIAN 46*fcf5ef2aSThomas Huth #define VIS_B64(n) b[7 - (n)] 47*fcf5ef2aSThomas Huth #define VIS_W64(n) w[3 - (n)] 48*fcf5ef2aSThomas Huth #define VIS_SW64(n) sw[3 - (n)] 49*fcf5ef2aSThomas Huth #define VIS_L64(n) l[1 - (n)] 50*fcf5ef2aSThomas Huth #define VIS_B32(n) b[3 - (n)] 51*fcf5ef2aSThomas Huth #define VIS_W32(n) w[1 - (n)] 52*fcf5ef2aSThomas Huth #else 53*fcf5ef2aSThomas Huth #define VIS_B64(n) b[n] 54*fcf5ef2aSThomas Huth #define VIS_W64(n) w[n] 55*fcf5ef2aSThomas Huth #define VIS_SW64(n) sw[n] 56*fcf5ef2aSThomas Huth #define VIS_L64(n) l[n] 57*fcf5ef2aSThomas Huth #define VIS_B32(n) b[n] 58*fcf5ef2aSThomas Huth #define VIS_W32(n) w[n] 59*fcf5ef2aSThomas Huth #endif 60*fcf5ef2aSThomas Huth 61*fcf5ef2aSThomas Huth typedef union { 62*fcf5ef2aSThomas Huth uint8_t b[8]; 63*fcf5ef2aSThomas Huth uint16_t w[4]; 64*fcf5ef2aSThomas Huth int16_t sw[4]; 65*fcf5ef2aSThomas Huth uint32_t l[2]; 66*fcf5ef2aSThomas Huth uint64_t ll; 67*fcf5ef2aSThomas Huth float64 d; 68*fcf5ef2aSThomas Huth } VIS64; 69*fcf5ef2aSThomas Huth 70*fcf5ef2aSThomas Huth typedef union { 71*fcf5ef2aSThomas Huth uint8_t b[4]; 72*fcf5ef2aSThomas Huth uint16_t w[2]; 73*fcf5ef2aSThomas Huth uint32_t l; 74*fcf5ef2aSThomas Huth float32 f; 75*fcf5ef2aSThomas Huth } VIS32; 76*fcf5ef2aSThomas Huth 77*fcf5ef2aSThomas Huth uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) 78*fcf5ef2aSThomas Huth { 79*fcf5ef2aSThomas Huth VIS64 s, d; 80*fcf5ef2aSThomas Huth 81*fcf5ef2aSThomas Huth s.ll = src1; 82*fcf5ef2aSThomas Huth d.ll = src2; 83*fcf5ef2aSThomas Huth 84*fcf5ef2aSThomas Huth /* Reverse calculation order to handle overlap */ 85*fcf5ef2aSThomas Huth d.VIS_B64(7) = s.VIS_B64(3); 86*fcf5ef2aSThomas Huth d.VIS_B64(6) = d.VIS_B64(3); 87*fcf5ef2aSThomas Huth d.VIS_B64(5) = s.VIS_B64(2); 88*fcf5ef2aSThomas Huth d.VIS_B64(4) = d.VIS_B64(2); 89*fcf5ef2aSThomas Huth d.VIS_B64(3) = s.VIS_B64(1); 90*fcf5ef2aSThomas Huth d.VIS_B64(2) = d.VIS_B64(1); 91*fcf5ef2aSThomas Huth d.VIS_B64(1) = s.VIS_B64(0); 92*fcf5ef2aSThomas Huth /* d.VIS_B64(0) = d.VIS_B64(0); */ 93*fcf5ef2aSThomas Huth 94*fcf5ef2aSThomas Huth return d.ll; 95*fcf5ef2aSThomas Huth } 96*fcf5ef2aSThomas Huth 97*fcf5ef2aSThomas Huth uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) 98*fcf5ef2aSThomas Huth { 99*fcf5ef2aSThomas Huth VIS64 s, d; 100*fcf5ef2aSThomas Huth uint32_t tmp; 101*fcf5ef2aSThomas Huth 102*fcf5ef2aSThomas Huth s.ll = src1; 103*fcf5ef2aSThomas Huth d.ll = src2; 104*fcf5ef2aSThomas Huth 105*fcf5ef2aSThomas Huth #define PMUL(r) \ 106*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ 107*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 108*fcf5ef2aSThomas Huth tmp += 0x100; \ 109*fcf5ef2aSThomas Huth } \ 110*fcf5ef2aSThomas Huth d.VIS_W64(r) = tmp >> 8; 111*fcf5ef2aSThomas Huth 112*fcf5ef2aSThomas Huth PMUL(0); 113*fcf5ef2aSThomas Huth PMUL(1); 114*fcf5ef2aSThomas Huth PMUL(2); 115*fcf5ef2aSThomas Huth PMUL(3); 116*fcf5ef2aSThomas Huth #undef PMUL 117*fcf5ef2aSThomas Huth 118*fcf5ef2aSThomas Huth return d.ll; 119*fcf5ef2aSThomas Huth } 120*fcf5ef2aSThomas Huth 121*fcf5ef2aSThomas Huth uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) 122*fcf5ef2aSThomas Huth { 123*fcf5ef2aSThomas Huth VIS64 s, d; 124*fcf5ef2aSThomas Huth uint32_t tmp; 125*fcf5ef2aSThomas Huth 126*fcf5ef2aSThomas Huth s.ll = src1; 127*fcf5ef2aSThomas Huth d.ll = src2; 128*fcf5ef2aSThomas Huth 129*fcf5ef2aSThomas Huth #define PMUL(r) \ 130*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ 131*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 132*fcf5ef2aSThomas Huth tmp += 0x100; \ 133*fcf5ef2aSThomas Huth } \ 134*fcf5ef2aSThomas Huth d.VIS_W64(r) = tmp >> 8; 135*fcf5ef2aSThomas Huth 136*fcf5ef2aSThomas Huth PMUL(0); 137*fcf5ef2aSThomas Huth PMUL(1); 138*fcf5ef2aSThomas Huth PMUL(2); 139*fcf5ef2aSThomas Huth PMUL(3); 140*fcf5ef2aSThomas Huth #undef PMUL 141*fcf5ef2aSThomas Huth 142*fcf5ef2aSThomas Huth return d.ll; 143*fcf5ef2aSThomas Huth } 144*fcf5ef2aSThomas Huth 145*fcf5ef2aSThomas Huth uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) 146*fcf5ef2aSThomas Huth { 147*fcf5ef2aSThomas Huth VIS64 s, d; 148*fcf5ef2aSThomas Huth uint32_t tmp; 149*fcf5ef2aSThomas Huth 150*fcf5ef2aSThomas Huth s.ll = src1; 151*fcf5ef2aSThomas Huth d.ll = src2; 152*fcf5ef2aSThomas Huth 153*fcf5ef2aSThomas Huth #define PMUL(r) \ 154*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ 155*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 156*fcf5ef2aSThomas Huth tmp += 0x100; \ 157*fcf5ef2aSThomas Huth } \ 158*fcf5ef2aSThomas Huth d.VIS_W64(r) = tmp >> 8; 159*fcf5ef2aSThomas Huth 160*fcf5ef2aSThomas Huth PMUL(0); 161*fcf5ef2aSThomas Huth PMUL(1); 162*fcf5ef2aSThomas Huth PMUL(2); 163*fcf5ef2aSThomas Huth PMUL(3); 164*fcf5ef2aSThomas Huth #undef PMUL 165*fcf5ef2aSThomas Huth 166*fcf5ef2aSThomas Huth return d.ll; 167*fcf5ef2aSThomas Huth } 168*fcf5ef2aSThomas Huth 169*fcf5ef2aSThomas Huth uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 170*fcf5ef2aSThomas Huth { 171*fcf5ef2aSThomas Huth VIS64 s, d; 172*fcf5ef2aSThomas Huth uint32_t tmp; 173*fcf5ef2aSThomas Huth 174*fcf5ef2aSThomas Huth s.ll = src1; 175*fcf5ef2aSThomas Huth d.ll = src2; 176*fcf5ef2aSThomas Huth 177*fcf5ef2aSThomas Huth #define PMUL(r) \ 178*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 179*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 180*fcf5ef2aSThomas Huth tmp += 0x100; \ 181*fcf5ef2aSThomas Huth } \ 182*fcf5ef2aSThomas Huth d.VIS_W64(r) = tmp >> 8; 183*fcf5ef2aSThomas Huth 184*fcf5ef2aSThomas Huth PMUL(0); 185*fcf5ef2aSThomas Huth PMUL(1); 186*fcf5ef2aSThomas Huth PMUL(2); 187*fcf5ef2aSThomas Huth PMUL(3); 188*fcf5ef2aSThomas Huth #undef PMUL 189*fcf5ef2aSThomas Huth 190*fcf5ef2aSThomas Huth return d.ll; 191*fcf5ef2aSThomas Huth } 192*fcf5ef2aSThomas Huth 193*fcf5ef2aSThomas Huth uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 194*fcf5ef2aSThomas Huth { 195*fcf5ef2aSThomas Huth VIS64 s, d; 196*fcf5ef2aSThomas Huth uint32_t tmp; 197*fcf5ef2aSThomas Huth 198*fcf5ef2aSThomas Huth s.ll = src1; 199*fcf5ef2aSThomas Huth d.ll = src2; 200*fcf5ef2aSThomas Huth 201*fcf5ef2aSThomas Huth #define PMUL(r) \ 202*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 203*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 204*fcf5ef2aSThomas Huth tmp += 0x100; \ 205*fcf5ef2aSThomas Huth } \ 206*fcf5ef2aSThomas Huth d.VIS_W64(r) = tmp >> 8; 207*fcf5ef2aSThomas Huth 208*fcf5ef2aSThomas Huth PMUL(0); 209*fcf5ef2aSThomas Huth PMUL(1); 210*fcf5ef2aSThomas Huth PMUL(2); 211*fcf5ef2aSThomas Huth PMUL(3); 212*fcf5ef2aSThomas Huth #undef PMUL 213*fcf5ef2aSThomas Huth 214*fcf5ef2aSThomas Huth return d.ll; 215*fcf5ef2aSThomas Huth } 216*fcf5ef2aSThomas Huth 217*fcf5ef2aSThomas Huth uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) 218*fcf5ef2aSThomas Huth { 219*fcf5ef2aSThomas Huth VIS64 s, d; 220*fcf5ef2aSThomas Huth uint32_t tmp; 221*fcf5ef2aSThomas Huth 222*fcf5ef2aSThomas Huth s.ll = src1; 223*fcf5ef2aSThomas Huth d.ll = src2; 224*fcf5ef2aSThomas Huth 225*fcf5ef2aSThomas Huth #define PMUL(r) \ 226*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 227*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 228*fcf5ef2aSThomas Huth tmp += 0x100; \ 229*fcf5ef2aSThomas Huth } \ 230*fcf5ef2aSThomas Huth d.VIS_L64(r) = tmp; 231*fcf5ef2aSThomas Huth 232*fcf5ef2aSThomas Huth /* Reverse calculation order to handle overlap */ 233*fcf5ef2aSThomas Huth PMUL(1); 234*fcf5ef2aSThomas Huth PMUL(0); 235*fcf5ef2aSThomas Huth #undef PMUL 236*fcf5ef2aSThomas Huth 237*fcf5ef2aSThomas Huth return d.ll; 238*fcf5ef2aSThomas Huth } 239*fcf5ef2aSThomas Huth 240*fcf5ef2aSThomas Huth uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) 241*fcf5ef2aSThomas Huth { 242*fcf5ef2aSThomas Huth VIS64 s, d; 243*fcf5ef2aSThomas Huth uint32_t tmp; 244*fcf5ef2aSThomas Huth 245*fcf5ef2aSThomas Huth s.ll = src1; 246*fcf5ef2aSThomas Huth d.ll = src2; 247*fcf5ef2aSThomas Huth 248*fcf5ef2aSThomas Huth #define PMUL(r) \ 249*fcf5ef2aSThomas Huth tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 250*fcf5ef2aSThomas Huth if ((tmp & 0xff) > 0x7f) { \ 251*fcf5ef2aSThomas Huth tmp += 0x100; \ 252*fcf5ef2aSThomas Huth } \ 253*fcf5ef2aSThomas Huth d.VIS_L64(r) = tmp; 254*fcf5ef2aSThomas Huth 255*fcf5ef2aSThomas Huth /* Reverse calculation order to handle overlap */ 256*fcf5ef2aSThomas Huth PMUL(1); 257*fcf5ef2aSThomas Huth PMUL(0); 258*fcf5ef2aSThomas Huth #undef PMUL 259*fcf5ef2aSThomas Huth 260*fcf5ef2aSThomas Huth return d.ll; 261*fcf5ef2aSThomas Huth } 262*fcf5ef2aSThomas Huth 263*fcf5ef2aSThomas Huth uint64_t helper_fexpand(uint64_t src1, uint64_t src2) 264*fcf5ef2aSThomas Huth { 265*fcf5ef2aSThomas Huth VIS32 s; 266*fcf5ef2aSThomas Huth VIS64 d; 267*fcf5ef2aSThomas Huth 268*fcf5ef2aSThomas Huth s.l = (uint32_t)src1; 269*fcf5ef2aSThomas Huth d.ll = src2; 270*fcf5ef2aSThomas Huth d.VIS_W64(0) = s.VIS_B32(0) << 4; 271*fcf5ef2aSThomas Huth d.VIS_W64(1) = s.VIS_B32(1) << 4; 272*fcf5ef2aSThomas Huth d.VIS_W64(2) = s.VIS_B32(2) << 4; 273*fcf5ef2aSThomas Huth d.VIS_W64(3) = s.VIS_B32(3) << 4; 274*fcf5ef2aSThomas Huth 275*fcf5ef2aSThomas Huth return d.ll; 276*fcf5ef2aSThomas Huth } 277*fcf5ef2aSThomas Huth 278*fcf5ef2aSThomas Huth #define VIS_HELPER(name, F) \ 279*fcf5ef2aSThomas Huth uint64_t name##16(uint64_t src1, uint64_t src2) \ 280*fcf5ef2aSThomas Huth { \ 281*fcf5ef2aSThomas Huth VIS64 s, d; \ 282*fcf5ef2aSThomas Huth \ 283*fcf5ef2aSThomas Huth s.ll = src1; \ 284*fcf5ef2aSThomas Huth d.ll = src2; \ 285*fcf5ef2aSThomas Huth \ 286*fcf5ef2aSThomas Huth d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ 287*fcf5ef2aSThomas Huth d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ 288*fcf5ef2aSThomas Huth d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ 289*fcf5ef2aSThomas Huth d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ 290*fcf5ef2aSThomas Huth \ 291*fcf5ef2aSThomas Huth return d.ll; \ 292*fcf5ef2aSThomas Huth } \ 293*fcf5ef2aSThomas Huth \ 294*fcf5ef2aSThomas Huth uint32_t name##16s(uint32_t src1, uint32_t src2) \ 295*fcf5ef2aSThomas Huth { \ 296*fcf5ef2aSThomas Huth VIS32 s, d; \ 297*fcf5ef2aSThomas Huth \ 298*fcf5ef2aSThomas Huth s.l = src1; \ 299*fcf5ef2aSThomas Huth d.l = src2; \ 300*fcf5ef2aSThomas Huth \ 301*fcf5ef2aSThomas Huth d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ 302*fcf5ef2aSThomas Huth d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ 303*fcf5ef2aSThomas Huth \ 304*fcf5ef2aSThomas Huth return d.l; \ 305*fcf5ef2aSThomas Huth } \ 306*fcf5ef2aSThomas Huth \ 307*fcf5ef2aSThomas Huth uint64_t name##32(uint64_t src1, uint64_t src2) \ 308*fcf5ef2aSThomas Huth { \ 309*fcf5ef2aSThomas Huth VIS64 s, d; \ 310*fcf5ef2aSThomas Huth \ 311*fcf5ef2aSThomas Huth s.ll = src1; \ 312*fcf5ef2aSThomas Huth d.ll = src2; \ 313*fcf5ef2aSThomas Huth \ 314*fcf5ef2aSThomas Huth d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ 315*fcf5ef2aSThomas Huth d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ 316*fcf5ef2aSThomas Huth \ 317*fcf5ef2aSThomas Huth return d.ll; \ 318*fcf5ef2aSThomas Huth } \ 319*fcf5ef2aSThomas Huth \ 320*fcf5ef2aSThomas Huth uint32_t name##32s(uint32_t src1, uint32_t src2) \ 321*fcf5ef2aSThomas Huth { \ 322*fcf5ef2aSThomas Huth VIS32 s, d; \ 323*fcf5ef2aSThomas Huth \ 324*fcf5ef2aSThomas Huth s.l = src1; \ 325*fcf5ef2aSThomas Huth d.l = src2; \ 326*fcf5ef2aSThomas Huth \ 327*fcf5ef2aSThomas Huth d.l = F(d.l, s.l); \ 328*fcf5ef2aSThomas Huth \ 329*fcf5ef2aSThomas Huth return d.l; \ 330*fcf5ef2aSThomas Huth } 331*fcf5ef2aSThomas Huth 332*fcf5ef2aSThomas Huth #define FADD(a, b) ((a) + (b)) 333*fcf5ef2aSThomas Huth #define FSUB(a, b) ((a) - (b)) 334*fcf5ef2aSThomas Huth VIS_HELPER(helper_fpadd, FADD) 335*fcf5ef2aSThomas Huth VIS_HELPER(helper_fpsub, FSUB) 336*fcf5ef2aSThomas Huth 337*fcf5ef2aSThomas Huth #define VIS_CMPHELPER(name, F) \ 338*fcf5ef2aSThomas Huth uint64_t name##16(uint64_t src1, uint64_t src2) \ 339*fcf5ef2aSThomas Huth { \ 340*fcf5ef2aSThomas Huth VIS64 s, d; \ 341*fcf5ef2aSThomas Huth \ 342*fcf5ef2aSThomas Huth s.ll = src1; \ 343*fcf5ef2aSThomas Huth d.ll = src2; \ 344*fcf5ef2aSThomas Huth \ 345*fcf5ef2aSThomas Huth d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 346*fcf5ef2aSThomas Huth d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 347*fcf5ef2aSThomas Huth d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 348*fcf5ef2aSThomas Huth d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 349*fcf5ef2aSThomas Huth d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 350*fcf5ef2aSThomas Huth \ 351*fcf5ef2aSThomas Huth return d.ll; \ 352*fcf5ef2aSThomas Huth } \ 353*fcf5ef2aSThomas Huth \ 354*fcf5ef2aSThomas Huth uint64_t name##32(uint64_t src1, uint64_t src2) \ 355*fcf5ef2aSThomas Huth { \ 356*fcf5ef2aSThomas Huth VIS64 s, d; \ 357*fcf5ef2aSThomas Huth \ 358*fcf5ef2aSThomas Huth s.ll = src1; \ 359*fcf5ef2aSThomas Huth d.ll = src2; \ 360*fcf5ef2aSThomas Huth \ 361*fcf5ef2aSThomas Huth d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 362*fcf5ef2aSThomas Huth d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 363*fcf5ef2aSThomas Huth d.VIS_L64(1) = 0; \ 364*fcf5ef2aSThomas Huth \ 365*fcf5ef2aSThomas Huth return d.ll; \ 366*fcf5ef2aSThomas Huth } 367*fcf5ef2aSThomas Huth 368*fcf5ef2aSThomas Huth #define FCMPGT(a, b) ((a) > (b)) 369*fcf5ef2aSThomas Huth #define FCMPEQ(a, b) ((a) == (b)) 370*fcf5ef2aSThomas Huth #define FCMPLE(a, b) ((a) <= (b)) 371*fcf5ef2aSThomas Huth #define FCMPNE(a, b) ((a) != (b)) 372*fcf5ef2aSThomas Huth 373*fcf5ef2aSThomas Huth VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 374*fcf5ef2aSThomas Huth VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 375*fcf5ef2aSThomas Huth VIS_CMPHELPER(helper_fcmple, FCMPLE) 376*fcf5ef2aSThomas Huth VIS_CMPHELPER(helper_fcmpne, FCMPNE) 377*fcf5ef2aSThomas Huth 378*fcf5ef2aSThomas Huth uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 379*fcf5ef2aSThomas Huth { 380*fcf5ef2aSThomas Huth int i; 381*fcf5ef2aSThomas Huth for (i = 0; i < 8; i++) { 382*fcf5ef2aSThomas Huth int s1, s2; 383*fcf5ef2aSThomas Huth 384*fcf5ef2aSThomas Huth s1 = (src1 >> (56 - (i * 8))) & 0xff; 385*fcf5ef2aSThomas Huth s2 = (src2 >> (56 - (i * 8))) & 0xff; 386*fcf5ef2aSThomas Huth 387*fcf5ef2aSThomas Huth /* Absolute value of difference. */ 388*fcf5ef2aSThomas Huth s1 -= s2; 389*fcf5ef2aSThomas Huth if (s1 < 0) { 390*fcf5ef2aSThomas Huth s1 = -s1; 391*fcf5ef2aSThomas Huth } 392*fcf5ef2aSThomas Huth 393*fcf5ef2aSThomas Huth sum += s1; 394*fcf5ef2aSThomas Huth } 395*fcf5ef2aSThomas Huth 396*fcf5ef2aSThomas Huth return sum; 397*fcf5ef2aSThomas Huth } 398*fcf5ef2aSThomas Huth 399*fcf5ef2aSThomas Huth uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 400*fcf5ef2aSThomas Huth { 401*fcf5ef2aSThomas Huth int scale = (gsr >> 3) & 0xf; 402*fcf5ef2aSThomas Huth uint32_t ret = 0; 403*fcf5ef2aSThomas Huth int byte; 404*fcf5ef2aSThomas Huth 405*fcf5ef2aSThomas Huth for (byte = 0; byte < 4; byte++) { 406*fcf5ef2aSThomas Huth uint32_t val; 407*fcf5ef2aSThomas Huth int16_t src = rs2 >> (byte * 16); 408*fcf5ef2aSThomas Huth int32_t scaled = src << scale; 409*fcf5ef2aSThomas Huth int32_t from_fixed = scaled >> 7; 410*fcf5ef2aSThomas Huth 411*fcf5ef2aSThomas Huth val = (from_fixed < 0 ? 0 : 412*fcf5ef2aSThomas Huth from_fixed > 255 ? 255 : from_fixed); 413*fcf5ef2aSThomas Huth 414*fcf5ef2aSThomas Huth ret |= val << (8 * byte); 415*fcf5ef2aSThomas Huth } 416*fcf5ef2aSThomas Huth 417*fcf5ef2aSThomas Huth return ret; 418*fcf5ef2aSThomas Huth } 419*fcf5ef2aSThomas Huth 420*fcf5ef2aSThomas Huth uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 421*fcf5ef2aSThomas Huth { 422*fcf5ef2aSThomas Huth int scale = (gsr >> 3) & 0x1f; 423*fcf5ef2aSThomas Huth uint64_t ret = 0; 424*fcf5ef2aSThomas Huth int word; 425*fcf5ef2aSThomas Huth 426*fcf5ef2aSThomas Huth ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 427*fcf5ef2aSThomas Huth for (word = 0; word < 2; word++) { 428*fcf5ef2aSThomas Huth uint64_t val; 429*fcf5ef2aSThomas Huth int32_t src = rs2 >> (word * 32); 430*fcf5ef2aSThomas Huth int64_t scaled = (int64_t)src << scale; 431*fcf5ef2aSThomas Huth int64_t from_fixed = scaled >> 23; 432*fcf5ef2aSThomas Huth 433*fcf5ef2aSThomas Huth val = (from_fixed < 0 ? 0 : 434*fcf5ef2aSThomas Huth (from_fixed > 255) ? 255 : from_fixed); 435*fcf5ef2aSThomas Huth 436*fcf5ef2aSThomas Huth ret |= val << (32 * word); 437*fcf5ef2aSThomas Huth } 438*fcf5ef2aSThomas Huth 439*fcf5ef2aSThomas Huth return ret; 440*fcf5ef2aSThomas Huth } 441*fcf5ef2aSThomas Huth 442*fcf5ef2aSThomas Huth uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 443*fcf5ef2aSThomas Huth { 444*fcf5ef2aSThomas Huth int scale = (gsr >> 3) & 0x1f; 445*fcf5ef2aSThomas Huth uint32_t ret = 0; 446*fcf5ef2aSThomas Huth int word; 447*fcf5ef2aSThomas Huth 448*fcf5ef2aSThomas Huth for (word = 0; word < 2; word++) { 449*fcf5ef2aSThomas Huth uint32_t val; 450*fcf5ef2aSThomas Huth int32_t src = rs2 >> (word * 32); 451*fcf5ef2aSThomas Huth int64_t scaled = (int64_t)src << scale; 452*fcf5ef2aSThomas Huth int64_t from_fixed = scaled >> 16; 453*fcf5ef2aSThomas Huth 454*fcf5ef2aSThomas Huth val = (from_fixed < -32768 ? -32768 : 455*fcf5ef2aSThomas Huth from_fixed > 32767 ? 32767 : from_fixed); 456*fcf5ef2aSThomas Huth 457*fcf5ef2aSThomas Huth ret |= (val & 0xffff) << (word * 16); 458*fcf5ef2aSThomas Huth } 459*fcf5ef2aSThomas Huth 460*fcf5ef2aSThomas Huth return ret; 461*fcf5ef2aSThomas Huth } 462*fcf5ef2aSThomas Huth 463*fcf5ef2aSThomas Huth uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 464*fcf5ef2aSThomas Huth { 465*fcf5ef2aSThomas Huth union { 466*fcf5ef2aSThomas Huth uint64_t ll[2]; 467*fcf5ef2aSThomas Huth uint8_t b[16]; 468*fcf5ef2aSThomas Huth } s; 469*fcf5ef2aSThomas Huth VIS64 r; 470*fcf5ef2aSThomas Huth uint32_t i, mask, host; 471*fcf5ef2aSThomas Huth 472*fcf5ef2aSThomas Huth /* Set up S such that we can index across all of the bytes. */ 473*fcf5ef2aSThomas Huth #ifdef HOST_WORDS_BIGENDIAN 474*fcf5ef2aSThomas Huth s.ll[0] = src1; 475*fcf5ef2aSThomas Huth s.ll[1] = src2; 476*fcf5ef2aSThomas Huth host = 0; 477*fcf5ef2aSThomas Huth #else 478*fcf5ef2aSThomas Huth s.ll[1] = src1; 479*fcf5ef2aSThomas Huth s.ll[0] = src2; 480*fcf5ef2aSThomas Huth host = 15; 481*fcf5ef2aSThomas Huth #endif 482*fcf5ef2aSThomas Huth mask = gsr >> 32; 483*fcf5ef2aSThomas Huth 484*fcf5ef2aSThomas Huth for (i = 0; i < 8; ++i) { 485*fcf5ef2aSThomas Huth unsigned e = (mask >> (28 - i*4)) & 0xf; 486*fcf5ef2aSThomas Huth r.VIS_B64(i) = s.b[e ^ host]; 487*fcf5ef2aSThomas Huth } 488*fcf5ef2aSThomas Huth 489*fcf5ef2aSThomas Huth return r.ll; 490*fcf5ef2aSThomas Huth } 491