1 /* 2 * VIS op helpers 3 * 4 * Copyright (c) 2003-2005 Fabrice Bellard 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "cpu.h" 22 #include "exec/helper-proto.h" 23 24 /* This function uses non-native bit order */ 25 #define GET_FIELD(X, FROM, TO) \ 26 ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1)) 27 28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */ 29 #define GET_FIELD_SP(X, FROM, TO) \ 30 GET_FIELD(X, 63 - (TO), 63 - (FROM)) 31 32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) 33 { 34 return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) | 35 (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) | 36 (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) | 37 (GET_FIELD_SP(pixel_addr, 56, 59) << 13) | 38 (GET_FIELD_SP(pixel_addr, 35, 38) << 9) | 39 (GET_FIELD_SP(pixel_addr, 13, 16) << 5) | 40 (((pixel_addr >> 55) & 1) << 4) | 41 (GET_FIELD_SP(pixel_addr, 33, 34) << 2) | 42 GET_FIELD_SP(pixel_addr, 11, 12); 43 } 44 45 #if HOST_BIG_ENDIAN 46 #define VIS_B64(n) b[7 - (n)] 47 #define VIS_SB64(n) sb[7 - (n)] 48 #define VIS_W64(n) w[3 - (n)] 49 #define VIS_SW64(n) sw[3 - (n)] 50 #define VIS_L64(n) l[1 - (n)] 51 #define VIS_B32(n) b[3 - (n)] 52 #define VIS_W32(n) w[1 - (n)] 53 #else 54 #define VIS_B64(n) b[n] 55 #define VIS_SB64(n) sb[n] 56 #define VIS_W64(n) w[n] 57 #define VIS_SW64(n) sw[n] 58 #define VIS_L64(n) l[n] 59 #define VIS_B32(n) b[n] 60 #define VIS_W32(n) w[n] 61 #endif 62 63 typedef union { 64 uint8_t b[8]; 65 int8_t sb[8]; 66 uint16_t w[4]; 67 int16_t sw[4]; 68 uint32_t l[2]; 69 uint64_t ll; 70 float64 d; 71 } VIS64; 72 73 typedef union { 74 uint8_t b[4]; 75 uint16_t w[2]; 76 uint32_t l; 77 float32 f; 78 } VIS32; 79 80 uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) 81 { 82 VIS32 s1, s2; 83 VIS64 d; 84 85 s1.l = src1; 86 s2.l = src2; 87 d.ll = 0; 88 89 d.VIS_B64(7) = s1.VIS_B32(3); 90 d.VIS_B64(6) = s2.VIS_B32(3); 91 d.VIS_B64(5) = s1.VIS_B32(2); 92 d.VIS_B64(4) = s2.VIS_B32(2); 93 d.VIS_B64(3) = s1.VIS_B32(1); 94 d.VIS_B64(2) = s2.VIS_B32(1); 95 d.VIS_B64(1) = s1.VIS_B32(0); 96 d.VIS_B64(0) = s2.VIS_B32(0); 97 98 return d.ll; 99 } 100 101 static inline int do_ms16b(int x, int y) 102 { 103 return ((x * y) + 0x80) >> 8; 104 } 105 106 uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) 107 { 108 VIS64 d; 109 VIS32 s; 110 111 s.l = src1; 112 d.ll = src2; 113 114 d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0)); 115 d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1)); 116 d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2)); 117 d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3)); 118 119 return d.ll; 120 } 121 122 uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) 123 { 124 VIS32 s; 125 VIS64 d; 126 127 s.l = src1; 128 d.ll = 0; 129 130 d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2); 131 d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2); 132 d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2); 133 d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2); 134 135 return d.ll; 136 } 137 138 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) 139 { 140 VIS64 s, d; 141 142 s.ll = src1; 143 d.ll = src2; 144 145 d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); 146 d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); 147 d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); 148 d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); 149 150 return d.ll; 151 } 152 153 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) 154 { 155 VIS64 s, d; 156 157 s.ll = src1; 158 d.ll = src2; 159 160 d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); 161 d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); 162 d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); 163 d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); 164 165 return d.ll; 166 } 167 168 uint64_t helper_fexpand(uint32_t src2) 169 { 170 VIS32 s; 171 VIS64 d; 172 173 s.l = src2; 174 d.ll = 0; 175 d.VIS_W64(0) = s.VIS_B32(0) << 4; 176 d.VIS_W64(1) = s.VIS_B32(1) << 4; 177 d.VIS_W64(2) = s.VIS_B32(2) << 4; 178 d.VIS_W64(3) = s.VIS_B32(3) << 4; 179 180 return d.ll; 181 } 182 183 #define VIS_CMPHELPER(name, F) \ 184 uint64_t name##16(uint64_t src1, uint64_t src2) \ 185 { \ 186 VIS64 s, d; \ 187 \ 188 s.ll = src1; \ 189 d.ll = src2; \ 190 \ 191 d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0; \ 192 d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0; \ 193 d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0; \ 194 d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0; \ 195 d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0; \ 196 \ 197 return d.ll; \ 198 } \ 199 \ 200 uint64_t name##32(uint64_t src1, uint64_t src2) \ 201 { \ 202 VIS64 s, d; \ 203 \ 204 s.ll = src1; \ 205 d.ll = src2; \ 206 \ 207 d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0; \ 208 d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0; \ 209 d.VIS_L64(1) = 0; \ 210 \ 211 return d.ll; \ 212 } 213 214 #define FCMPGT(a, b) ((a) > (b)) 215 #define FCMPEQ(a, b) ((a) == (b)) 216 #define FCMPLE(a, b) ((a) <= (b)) 217 #define FCMPNE(a, b) ((a) != (b)) 218 219 VIS_CMPHELPER(helper_fcmpgt, FCMPGT) 220 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ) 221 VIS_CMPHELPER(helper_fcmple, FCMPLE) 222 VIS_CMPHELPER(helper_fcmpne, FCMPNE) 223 224 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2) 225 { 226 int i; 227 for (i = 0; i < 8; i++) { 228 int s1, s2; 229 230 s1 = (src1 >> (56 - (i * 8))) & 0xff; 231 s2 = (src2 >> (56 - (i * 8))) & 0xff; 232 233 /* Absolute value of difference. */ 234 s1 -= s2; 235 if (s1 < 0) { 236 s1 = -s1; 237 } 238 239 sum += s1; 240 } 241 242 return sum; 243 } 244 245 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2) 246 { 247 int scale = (gsr >> 3) & 0xf; 248 uint32_t ret = 0; 249 int byte; 250 251 for (byte = 0; byte < 4; byte++) { 252 uint32_t val; 253 int16_t src = rs2 >> (byte * 16); 254 int32_t scaled = src << scale; 255 int32_t from_fixed = scaled >> 7; 256 257 val = (from_fixed < 0 ? 0 : 258 from_fixed > 255 ? 255 : from_fixed); 259 260 ret |= val << (8 * byte); 261 } 262 263 return ret; 264 } 265 266 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2) 267 { 268 int scale = (gsr >> 3) & 0x1f; 269 uint64_t ret = 0; 270 int word; 271 272 ret = (rs1 << 8) & ~(0x000000ff000000ffULL); 273 for (word = 0; word < 2; word++) { 274 uint64_t val; 275 int32_t src = rs2 >> (word * 32); 276 int64_t scaled = (int64_t)src << scale; 277 int64_t from_fixed = scaled >> 23; 278 279 val = (from_fixed < 0 ? 0 : 280 (from_fixed > 255) ? 255 : from_fixed); 281 282 ret |= val << (32 * word); 283 } 284 285 return ret; 286 } 287 288 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2) 289 { 290 int scale = (gsr >> 3) & 0x1f; 291 uint32_t ret = 0; 292 int word; 293 294 for (word = 0; word < 2; word++) { 295 uint32_t val; 296 int32_t src = rs2 >> (word * 32); 297 int64_t scaled = (int64_t)src << scale; 298 int64_t from_fixed = scaled >> 16; 299 300 val = (from_fixed < -32768 ? -32768 : 301 from_fixed > 32767 ? 32767 : from_fixed); 302 303 ret |= (val & 0xffff) << (word * 16); 304 } 305 306 return ret; 307 } 308 309 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2) 310 { 311 union { 312 uint64_t ll[2]; 313 uint8_t b[16]; 314 } s; 315 VIS64 r; 316 uint32_t i, mask, host; 317 318 /* Set up S such that we can index across all of the bytes. */ 319 #if HOST_BIG_ENDIAN 320 s.ll[0] = src1; 321 s.ll[1] = src2; 322 host = 0; 323 #else 324 s.ll[1] = src1; 325 s.ll[0] = src2; 326 host = 15; 327 #endif 328 mask = gsr >> 32; 329 330 for (i = 0; i < 8; ++i) { 331 unsigned e = (mask >> (28 - i*4)) & 0xf; 332 r.VIS_B64(i) = s.b[e ^ host]; 333 } 334 335 return r.ll; 336 } 337