1 /* 2 * VIS op helpers 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/helper-proto.h" 23 24 /* This function uses non-native bit order */ 25 #define GET_FIELD(X, FROM, TO) \ 26 ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 27 28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 29 #define GET_FIELD_SP(X, FROM, TO) \ 30 GET_FIELD(X, 63 - (TO), 63 - (FROM)) 31 32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 33 { 34 return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 35 (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 36 (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 37 (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 38 (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 39 (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 40 (((pixel_addr >> 55) & 1) << 4) | 41 (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 42 GET_FIELD_SP(pixel_addr, 11, 12); 43 } 44 45 #if HOST_BIG_ENDIAN 46 #define VIS_B64(n) b[7 - (n)] 47 #define VIS_W64(n) w[3 - (n)] 48 #define VIS_SW64(n) sw[3 - (n)] 49 #define VIS_L64(n) l[1 - (n)] 50 #define VIS_B32(n) b[3 - (n)] 51 #define VIS_W32(n) w[1 - (n)] 52 #else 53 #define VIS_B64(n) b[n] 54 #define VIS_W64(n) w[n] 55 #define VIS_SW64(n) sw[n] 56 #define VIS_L64(n) l[n] 57 #define VIS_B32(n) b[n] 58 #define VIS_W32(n) w[n] 59 #endif 60 61 typedef union { 62 uint8_t b[8]; 63 uint16_t w[4]; 64 int16_t sw[4]; 65 uint32_t l[2]; 66 uint64_t ll; 67 float64 d; 68 } VIS64; 69 70 typedef union { 71 uint8_t b[4]; 72 uint16_t w[2]; 73 uint32_t l; 74 float32 f; 75 } VIS32; 76 77 uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) 78 { 79 VIS32 s1, s2; 80 VIS64 d; 81 82 s1.l = src1; 83 s2.l = src2; 84 d.ll = 0; 85 86 d.VIS_B64(7) = s1.VIS_B32(3); 87 d.VIS_B64(6) = s2.VIS_B32(3); 88 d.VIS_B64(5) = s1.VIS_B32(2); 89 d.VIS_B64(4) = s2.VIS_B32(2); 90 d.VIS_B64(3) = s1.VIS_B32(1); 91 d.VIS_B64(2) = s2.VIS_B32(1); 92 d.VIS_B64(1) = s1.VIS_B32(0); 93 d.VIS_B64(0) = s2.VIS_B32(0); 94 95 return d.ll; 96 } 97 98 uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) 99 { 100 VIS64 d; 101 VIS32 s; 102 uint32_t tmp; 103 104 s.l = src1; 105 d.ll = src2; 106 107 #define PMUL(r) \ 108 tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B32(r); \ 109 if ((tmp & 0xff) > 0x7f) { \ 110 tmp += 0x100; \ 111 } \ 112 d.VIS_W64(r) = tmp >> 8; 113 114 PMUL(0); 115 PMUL(1); 116 PMUL(2); 117 PMUL(3); 118 #undef PMUL 119 120 return d.ll; 121 } 122 123 uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) 124 { 125 VIS32 s; 126 VIS64 d; 127 uint32_t tmp; 128 129 s.l = src1; 130 d.ll = 0; 131 132 #define PMUL(r) \ 133 do { \ 134 tmp = src2 * (int32_t)s.VIS_B32(r); \ 135 if ((tmp & 0xff) > 0x7f) { \ 136 tmp += 0x100; \ 137 } \ 138 d.VIS_W64(r) = tmp >> 8; \ 139 } while (0) 140 141 PMUL(0); 142 PMUL(1); 143 PMUL(2); 144 PMUL(3); 145 #undef PMUL 146 147 return d.ll; 148 } 149 150 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 151 { 152 VIS64 s, d; 153 uint32_t tmp; 154 155 s.ll = src1; 156 d.ll = src2; 157 158 #define PMUL(r) \ 159 tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 160 if ((tmp & 0xff) > 0x7f) { \ 161 tmp += 0x100; \ 162 } \ 163 d.VIS_W64(r) = tmp >> 8; 164 165 PMUL(0); 166 PMUL(1); 167 PMUL(2); 168 PMUL(3); 169 #undef PMUL 170 171 return d.ll; 172 } 173 174 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 175 { 176 VIS64 s, d; 177 uint32_t tmp; 178 179 s.ll = src1; 180 d.ll = src2; 181 182 #define PMUL(r) \ 183 tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 184 if ((tmp & 0xff) > 0x7f) { \ 185 tmp += 0x100; \ 186 } \ 187 d.VIS_W64(r) = tmp >> 8; 188 189 PMUL(0); 190 PMUL(1); 191 PMUL(2); 192 PMUL(3); 193 #undef PMUL 194 195 return d.ll; 196 } 197 198 uint64_t helper_fexpand(uint32_t src2) 199 { 200 VIS32 s; 201 VIS64 d; 202 203 s.l = src2; 204 d.ll = 0; 205 d.VIS_W64(0) = s.VIS_B32(0) << 4; 206 d.VIS_W64(1) = s.VIS_B32(1) << 4; 207 d.VIS_W64(2) = s.VIS_B32(2) << 4; 208 d.VIS_W64(3) = s.VIS_B32(3) << 4; 209 210 return d.ll; 211 } 212 213 #define VIS_CMPHELPER(name, F) \ 214 uint64_t name##16(uint64_t src1, uint64_t src2) \ 215 { \ 216 VIS64 s, d; \ 217 \ 218 s.ll = src1; \ 219 d.ll = src2; \ 220 \ 221 d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 222 d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 223 d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 224 d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 225 d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 226 \ 227 return d.ll; \ 228 } \ 229 \ 230 uint64_t name##32(uint64_t src1, uint64_t src2) \ 231 { \ 232 VIS64 s, d; \ 233 \ 234 s.ll = src1; \ 235 d.ll = src2; \ 236 \ 237 d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 238 d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 239 d.VIS_L64(1) = 0; \ 240 \ 241 return d.ll; \ 242 } 243 244 #define FCMPGT(a, b) ((a) > (b)) 245 #define FCMPEQ(a, b) ((a) == (b)) 246 #define FCMPLE(a, b) ((a) <= (b)) 247 #define FCMPNE(a, b) ((a) != (b)) 248 249 VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 250 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 251 VIS_CMPHELPER(helper_fcmple, FCMPLE) 252 VIS_CMPHELPER(helper_fcmpne, FCMPNE) 253 254 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 255 { 256 int i; 257 for (i = 0; i < 8; i++) { 258 int s1, s2; 259 260 s1 = (src1 >> (56 - (i * 8))) & 0xff; 261 s2 = (src2 >> (56 - (i * 8))) & 0xff; 262 263 /* Absolute value of difference. */ 264 s1 -= s2; 265 if (s1 < 0) { 266 s1 = -s1; 267 } 268 269 sum += s1; 270 } 271 272 return sum; 273 } 274 275 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 276 { 277 int scale = (gsr >> 3) & 0xf; 278 uint32_t ret = 0; 279 int byte; 280 281 for (byte = 0; byte < 4; byte++) { 282 uint32_t val; 283 int16_t src = rs2 >> (byte * 16); 284 int32_t scaled = src << scale; 285 int32_t from_fixed = scaled >> 7; 286 287 val = (from_fixed < 0 ? 0 : 288 from_fixed > 255 ? 255 : from_fixed); 289 290 ret |= val << (8 * byte); 291 } 292 293 return ret; 294 } 295 296 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 297 { 298 int scale = (gsr >> 3) & 0x1f; 299 uint64_t ret = 0; 300 int word; 301 302 ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 303 for (word = 0; word < 2; word++) { 304 uint64_t val; 305 int32_t src = rs2 >> (word * 32); 306 int64_t scaled = (int64_t)src << scale; 307 int64_t from_fixed = scaled >> 23; 308 309 val = (from_fixed < 0 ? 0 : 310 (from_fixed > 255) ? 255 : from_fixed); 311 312 ret |= val << (32 * word); 313 } 314 315 return ret; 316 } 317 318 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 319 { 320 int scale = (gsr >> 3) & 0x1f; 321 uint32_t ret = 0; 322 int word; 323 324 for (word = 0; word < 2; word++) { 325 uint32_t val; 326 int32_t src = rs2 >> (word * 32); 327 int64_t scaled = (int64_t)src << scale; 328 int64_t from_fixed = scaled >> 16; 329 330 val = (from_fixed < -32768 ? -32768 : 331 from_fixed > 32767 ? 32767 : from_fixed); 332 333 ret |= (val & 0xffff) << (word * 16); 334 } 335 336 return ret; 337 } 338 339 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 340 { 341 union { 342 uint64_t ll[2]; 343 uint8_t b[16]; 344 } s; 345 VIS64 r; 346 uint32_t i, mask, host; 347 348 /* Set up S such that we can index across all of the bytes. */ 349 #if HOST_BIG_ENDIAN 350 s.ll[0] = src1; 351 s.ll[1] = src2; 352 host = 0; 353 #else 354 s.ll[1] = src1; 355 s.ll[0] = src2; 356 host = 15; 357 #endif 358 mask = gsr >> 32; 359 360 for (i = 0; i < 8; ++i) { 361 unsigned e = (mask >> (28 - i*4)) & 0xf; 362 r.VIS_B64(i) = s.b[e ^ host]; 363 } 364 365 return r.ll; 366 } 367