1 /* 2 * VIS op helpers 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/helper-proto.h" 23 24 /* This function uses non-native bit order */ 25 #define GET_FIELD(X, FROM, TO) \ 26 ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 27 28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 29 #define GET_FIELD_SP(X, FROM, TO) \ 30 GET_FIELD(X, 63 - (TO), 63 - (FROM)) 31 32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 33 { 34 return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 35 (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 36 (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 37 (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 38 (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 39 (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 40 (((pixel_addr >> 55) & 1) << 4) | 41 (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 42 GET_FIELD_SP(pixel_addr, 11, 12); 43 } 44 45 #ifdef HOST_WORDS_BIGENDIAN 46 #define VIS_B64(n) b[7 - (n)] 47 #define VIS_W64(n) w[3 - (n)] 48 #define VIS_SW64(n) sw[3 - (n)] 49 #define VIS_L64(n) l[1 - (n)] 50 #define VIS_B32(n) b[3 - (n)] 51 #define VIS_W32(n) w[1 - (n)] 52 #else 53 #define VIS_B64(n) b[n] 54 #define VIS_W64(n) w[n] 55 #define VIS_SW64(n) sw[n] 56 #define VIS_L64(n) l[n] 57 #define VIS_B32(n) b[n] 58 #define VIS_W32(n) w[n] 59 #endif 60 61 typedef union { 62 uint8_t b[8]; 63 uint16_t w[4]; 64 int16_t sw[4]; 65 uint32_t l[2]; 66 uint64_t ll; 67 float64 d; 68 } VIS64; 69 70 typedef union { 71 uint8_t b[4]; 72 uint16_t w[2]; 73 uint32_t l; 74 float32 f; 75 } VIS32; 76 77 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) 78 { 79 VIS64 s, d; 80 81 s.ll = src1; 82 d.ll = src2; 83 84 /* Reverse calculation order to handle overlap */ 85 d.VIS_B64(7) = s.VIS_B64(3); 86 d.VIS_B64(6) = d.VIS_B64(3); 87 d.VIS_B64(5) = s.VIS_B64(2); 88 d.VIS_B64(4) = d.VIS_B64(2); 89 d.VIS_B64(3) = s.VIS_B64(1); 90 d.VIS_B64(2) = d.VIS_B64(1); 91 d.VIS_B64(1) = s.VIS_B64(0); 92 /* d.VIS_B64(0) = d.VIS_B64(0); */ 93 94 return d.ll; 95 } 96 97 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) 98 { 99 VIS64 s, d; 100 uint32_t tmp; 101 102 s.ll = src1; 103 d.ll = src2; 104 105 #define PMUL(r) \ 106 tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ 107 if ((tmp & 0xff) > 0x7f) { \ 108 tmp += 0x100; \ 109 } \ 110 d.VIS_W64(r) = tmp >> 8; 111 112 PMUL(0); 113 PMUL(1); 114 PMUL(2); 115 PMUL(3); 116 #undef PMUL 117 118 return d.ll; 119 } 120 121 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) 122 { 123 VIS64 s, d; 124 uint32_t tmp; 125 126 s.ll = src1; 127 d.ll = src2; 128 129 #define PMUL(r) \ 130 tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ 131 if ((tmp & 0xff) > 0x7f) { \ 132 tmp += 0x100; \ 133 } \ 134 d.VIS_W64(r) = tmp >> 8; 135 136 PMUL(0); 137 PMUL(1); 138 PMUL(2); 139 PMUL(3); 140 #undef PMUL 141 142 return d.ll; 143 } 144 145 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) 146 { 147 VIS64 s, d; 148 uint32_t tmp; 149 150 s.ll = src1; 151 d.ll = src2; 152 153 #define PMUL(r) \ 154 tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ 155 if ((tmp & 0xff) > 0x7f) { \ 156 tmp += 0x100; \ 157 } \ 158 d.VIS_W64(r) = tmp >> 8; 159 160 PMUL(0); 161 PMUL(1); 162 PMUL(2); 163 PMUL(3); 164 #undef PMUL 165 166 return d.ll; 167 } 168 169 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 170 { 171 VIS64 s, d; 172 uint32_t tmp; 173 174 s.ll = src1; 175 d.ll = src2; 176 177 #define PMUL(r) \ 178 tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 179 if ((tmp & 0xff) > 0x7f) { \ 180 tmp += 0x100; \ 181 } \ 182 d.VIS_W64(r) = tmp >> 8; 183 184 PMUL(0); 185 PMUL(1); 186 PMUL(2); 187 PMUL(3); 188 #undef PMUL 189 190 return d.ll; 191 } 192 193 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 194 { 195 VIS64 s, d; 196 uint32_t tmp; 197 198 s.ll = src1; 199 d.ll = src2; 200 201 #define PMUL(r) \ 202 tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 203 if ((tmp & 0xff) > 0x7f) { \ 204 tmp += 0x100; \ 205 } \ 206 d.VIS_W64(r) = tmp >> 8; 207 208 PMUL(0); 209 PMUL(1); 210 PMUL(2); 211 PMUL(3); 212 #undef PMUL 213 214 return d.ll; 215 } 216 217 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) 218 { 219 VIS64 s, d; 220 uint32_t tmp; 221 222 s.ll = src1; 223 d.ll = src2; 224 225 #define PMUL(r) \ 226 tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ 227 if ((tmp & 0xff) > 0x7f) { \ 228 tmp += 0x100; \ 229 } \ 230 d.VIS_L64(r) = tmp; 231 232 /* Reverse calculation order to handle overlap */ 233 PMUL(1); 234 PMUL(0); 235 #undef PMUL 236 237 return d.ll; 238 } 239 240 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) 241 { 242 VIS64 s, d; 243 uint32_t tmp; 244 245 s.ll = src1; 246 d.ll = src2; 247 248 #define PMUL(r) \ 249 tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ 250 if ((tmp & 0xff) > 0x7f) { \ 251 tmp += 0x100; \ 252 } \ 253 d.VIS_L64(r) = tmp; 254 255 /* Reverse calculation order to handle overlap */ 256 PMUL(1); 257 PMUL(0); 258 #undef PMUL 259 260 return d.ll; 261 } 262 263 uint64_t helper_fexpand(uint64_t src1, uint64_t src2) 264 { 265 VIS32 s; 266 VIS64 d; 267 268 s.l = (uint32_t)src1; 269 d.ll = src2; 270 d.VIS_W64(0) = s.VIS_B32(0) << 4; 271 d.VIS_W64(1) = s.VIS_B32(1) << 4; 272 d.VIS_W64(2) = s.VIS_B32(2) << 4; 273 d.VIS_W64(3) = s.VIS_B32(3) << 4; 274 275 return d.ll; 276 } 277 278 #define VIS_HELPER(name, F) \ 279 uint64_t name##16(uint64_t src1, uint64_t src2) \ 280 { \ 281 VIS64 s, d; \ 282 \ 283 s.ll = src1; \ 284 d.ll = src2; \ 285 \ 286 d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0)); \ 287 d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1)); \ 288 d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2)); \ 289 d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3)); \ 290 \ 291 return d.ll; \ 292 } \ 293 \ 294 uint32_t name##16s(uint32_t src1, uint32_t src2) \ 295 { \ 296 VIS32 s, d; \ 297 \ 298 s.l = src1; \ 299 d.l = src2; \ 300 \ 301 d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0)); \ 302 d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1)); \ 303 \ 304 return d.l; \ 305 } \ 306 \ 307 uint64_t name##32(uint64_t src1, uint64_t src2) \ 308 { \ 309 VIS64 s, d; \ 310 \ 311 s.ll = src1; \ 312 d.ll = src2; \ 313 \ 314 d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0)); \ 315 d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1)); \ 316 \ 317 return d.ll; \ 318 } \ 319 \ 320 uint32_t name##32s(uint32_t src1, uint32_t src2) \ 321 { \ 322 VIS32 s, d; \ 323 \ 324 s.l = src1; \ 325 d.l = src2; \ 326 \ 327 d.l = F(d.l, s.l); \ 328 \ 329 return d.l; \ 330 } 331 332 #define FADD(a, b) ((a) + (b)) 333 #define FSUB(a, b) ((a) - (b)) 334 VIS_HELPER(helper_fpadd, FADD) 335 VIS_HELPER(helper_fpsub, FSUB) 336 337 #define VIS_CMPHELPER(name, F) \ 338 uint64_t name##16(uint64_t src1, uint64_t src2) \ 339 { \ 340 VIS64 s, d; \ 341 \ 342 s.ll = src1; \ 343 d.ll = src2; \ 344 \ 345 d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 346 d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 347 d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 348 d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 349 d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 350 \ 351 return d.ll; \ 352 } \ 353 \ 354 uint64_t name##32(uint64_t src1, uint64_t src2) \ 355 { \ 356 VIS64 s, d; \ 357 \ 358 s.ll = src1; \ 359 d.ll = src2; \ 360 \ 361 d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 362 d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 363 d.VIS_L64(1) = 0; \ 364 \ 365 return d.ll; \ 366 } 367 368 #define FCMPGT(a, b) ((a) > (b)) 369 #define FCMPEQ(a, b) ((a) == (b)) 370 #define FCMPLE(a, b) ((a) <= (b)) 371 #define FCMPNE(a, b) ((a) != (b)) 372 373 VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 374 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 375 VIS_CMPHELPER(helper_fcmple, FCMPLE) 376 VIS_CMPHELPER(helper_fcmpne, FCMPNE) 377 378 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 379 { 380 int i; 381 for (i = 0; i < 8; i++) { 382 int s1, s2; 383 384 s1 = (src1 >> (56 - (i * 8))) & 0xff; 385 s2 = (src2 >> (56 - (i * 8))) & 0xff; 386 387 /* Absolute value of difference. */ 388 s1 -= s2; 389 if (s1 < 0) { 390 s1 = -s1; 391 } 392 393 sum += s1; 394 } 395 396 return sum; 397 } 398 399 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 400 { 401 int scale = (gsr >> 3) & 0xf; 402 uint32_t ret = 0; 403 int byte; 404 405 for (byte = 0; byte < 4; byte++) { 406 uint32_t val; 407 int16_t src = rs2 >> (byte * 16); 408 int32_t scaled = src << scale; 409 int32_t from_fixed = scaled >> 7; 410 411 val = (from_fixed < 0 ? 0 : 412 from_fixed > 255 ? 255 : from_fixed); 413 414 ret |= val << (8 * byte); 415 } 416 417 return ret; 418 } 419 420 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 421 { 422 int scale = (gsr >> 3) & 0x1f; 423 uint64_t ret = 0; 424 int word; 425 426 ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 427 for (word = 0; word < 2; word++) { 428 uint64_t val; 429 int32_t src = rs2 >> (word * 32); 430 int64_t scaled = (int64_t)src << scale; 431 int64_t from_fixed = scaled >> 23; 432 433 val = (from_fixed < 0 ? 0 : 434 (from_fixed > 255) ? 255 : from_fixed); 435 436 ret |= val << (32 * word); 437 } 438 439 return ret; 440 } 441 442 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 443 { 444 int scale = (gsr >> 3) & 0x1f; 445 uint32_t ret = 0; 446 int word; 447 448 for (word = 0; word < 2; word++) { 449 uint32_t val; 450 int32_t src = rs2 >> (word * 32); 451 int64_t scaled = (int64_t)src << scale; 452 int64_t from_fixed = scaled >> 16; 453 454 val = (from_fixed < -32768 ? -32768 : 455 from_fixed > 32767 ? 32767 : from_fixed); 456 457 ret |= (val & 0xffff) << (word * 16); 458 } 459 460 return ret; 461 } 462 463 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 464 { 465 union { 466 uint64_t ll[2]; 467 uint8_t b[16]; 468 } s; 469 VIS64 r; 470 uint32_t i, mask, host; 471 472 /* Set up S such that we can index across all of the bytes. */ 473 #ifdef HOST_WORDS_BIGENDIAN 474 s.ll[0] = src1; 475 s.ll[1] = src2; 476 host = 0; 477 #else 478 s.ll[1] = src1; 479 s.ll[0] = src2; 480 host = 15; 481 #endif 482 mask = gsr >> 32; 483 484 for (i = 0; i < 8; ++i) { 485 unsigned e = (mask >> (28 - i*4)) & 0xf; 486 r.VIS_B64(i) = s.b[e ^ host]; 487 } 488 489 return r.ll; 490 } 491