1 /* 2 * Ingenic XBurst Media eXtension Unit (MXU) translation routines. 3 * 4 * Copyright (c) 2004-2005 Jocelyn Mayer 5 * Copyright (c) 2006 Marius Groeger (FPU operations) 6 * Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support) 7 * Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support) 8 * Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support) 9 * 10 * SPDX-License-Identifier: LGPL-2.1-or-later 11 * 12 * Datasheet: 13 * 14 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 15 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 16 */ 17 18 #include "qemu/osdep.h" 19 #include "translate.h" 20 21 /* 22 * 23 * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET 24 * ============================================ 25 * 26 * 27 * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32 28 * instructions set. It is designed to fit the needs of signal, graphical and 29 * video processing applications. MXU instruction set is used in Xburst family 30 * of microprocessors by Ingenic. 31 * 32 * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is 33 * the control register. 34 * 35 * 36 * The notation used in MXU assembler mnemonics 37 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 38 * 39 * Register operands: 40 * 41 * XRa, XRb, XRc, XRd - MXU registers 42 * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers 43 * 44 * Non-register operands: 45 * 46 * aptn1 - 1-bit accumulate add/subtract pattern 47 * aptn2 - 2-bit accumulate add/subtract pattern 48 * eptn2 - 2-bit execute add/subtract pattern 49 * optn2 - 2-bit operand pattern 50 * optn3 - 3-bit operand pattern 51 * sft4 - 4-bit shift amount 52 * strd2 - 2-bit stride amount 53 * 54 * Prefixes: 55 * 56 * Level of parallelism: Operand size: 57 * S - single operation at a time 32 - word 58 * D - two operations in parallel 16 - half word 59 * Q - four operations in parallel 8 - byte 60 * 61 * Operations: 62 * 63 * ADD - Add or subtract 64 * ADDC - Add with carry-in 65 * ACC - Accumulate 66 * ASUM - Sum together then accumulate (add or subtract) 67 * ASUMC - Sum together then accumulate (add or subtract) with carry-in 68 * AVG - Average between 2 operands 69 * ABD - Absolute difference 70 * ALN - Align data 71 * AND - Logical bitwise 'and' operation 72 * CPS - Copy sign 73 * EXTR - Extract bits 74 * I2M - Move from GPR register to MXU register 75 * LDD - Load data from memory to XRF 76 * LDI - Load data from memory to XRF (and increase the address base) 77 * LUI - Load unsigned immediate 78 * MUL - Multiply 79 * MULU - Unsigned multiply 80 * MADD - 64-bit operand add 32x32 product 81 * MSUB - 64-bit operand subtract 32x32 product 82 * MAC - Multiply and accumulate (add or subtract) 83 * MAD - Multiply and add or subtract 84 * MAX - Maximum between 2 operands 85 * MIN - Minimum between 2 operands 86 * M2I - Move from MXU register to GPR register 87 * MOVZ - Move if zero 88 * MOVN - Move if non-zero 89 * NOR - Logical bitwise 'nor' operation 90 * OR - Logical bitwise 'or' operation 91 * STD - Store data from XRF to memory 92 * SDI - Store data from XRF to memory (and increase the address base) 93 * SLT - Set of less than comparison 94 * SAD - Sum of absolute differences 95 * SLL - Logical shift left 96 * SLR - Logical shift right 97 * SAR - Arithmetic shift right 98 * SAT - Saturation 99 * SFL - Shuffle 100 * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0) 101 * XOR - Logical bitwise 'exclusive or' operation 102 * 103 * Suffixes: 104 * 105 * E - Expand results 106 * F - Fixed point multiplication 107 * L - Low part result 108 * R - Doing rounding 109 * V - Variable instead of immediate 110 * W - Combine above L and V 111 * 112 * 113 * The list of MXU instructions grouped by functionality 114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 115 * 116 * Load/Store instructions Multiplication instructions 117 * ----------------------- --------------------------- 118 * 119 * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt 120 * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt 121 * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt 122 * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt 123 * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt 124 * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt 125 * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 126 * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 127 * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 128 * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 129 * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 130 * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 131 * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 132 * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 133 * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd 134 * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd 135 * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 136 * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 137 * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 138 * S16SDI XRa, Rb, s10, eptn2 139 * S8LDD XRa, Rb, s8, eptn3 140 * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions 141 * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- 142 * S8SDI XRa, Rb, s8, eptn3 143 * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 144 * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd 145 * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 146 * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 147 * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 148 * S32CPS XRa, XRb, XRc 149 * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 150 * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 151 * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 152 * D16ASUM XRa, XRb, XRc, XRd, eptn2 153 * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, 154 * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc 155 * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc 156 * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 157 * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 158 * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 159 * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc 160 * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd 161 * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc 162 * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc 163 * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd 164 * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd 165 * Q8SLT XRa, XRb, XRc 166 * Q8SLTU XRa, XRb, XRc 167 * Q8MOVZ XRa, XRb, XRc Shift instructions 168 * Q8MOVN XRa, XRb, XRc ------------------ 169 * 170 * D32SLL XRa, XRb, XRc, XRd, sft4 171 * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 172 * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 173 * D32SARL XRa, XRb, XRc, sft4 174 * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb 175 * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb 176 * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb 177 * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb 178 * Q16SLL XRa, XRb, XRc, XRd, sft4 179 * Q16SLR XRa, XRb, XRc, XRd, sft4 180 * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 181 * ------------------------- Q16SLLV XRa, XRb, Rb 182 * Q16SLRV XRa, XRb, Rb 183 * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb 184 * S32ALN XRa, XRb, XRc, Rb 185 * S32ALNI XRa, XRb, XRc, s3 186 * S32LUI XRa, s8, optn3 Move instructions 187 * S32EXTR XRa, XRb, Rb, bits5 ----------------- 188 * S32EXTRV XRa, XRb, Rs, Rt 189 * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb 190 * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb 191 * 192 * 193 * The opcode organization of MXU instructions 194 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 195 * 196 * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred 197 * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of 198 * other bits up to the instruction level is as follows: 199 * 200 * bits 201 * 05..00 202 * 203 * ┌─ 000000 ─ OPC_MXU_S32MADD 204 * ├─ 000001 ─ OPC_MXU_S32MADDU 205 * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL) 206 * │ 207 * │ 20..18 208 * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX 209 * │ ├─ 001 ─ OPC_MXU_S32MIN 210 * │ ├─ 010 ─ OPC_MXU_D16MAX 211 * │ ├─ 011 ─ OPC_MXU_D16MIN 212 * │ ├─ 100 ─ OPC_MXU_Q8MAX 213 * │ ├─ 101 ─ OPC_MXU_Q8MIN 214 * │ ├─ 110 ─ OPC_MXU_Q8SLT 215 * │ └─ 111 ─ OPC_MXU_Q8SLTU 216 * ├─ 000100 ─ OPC_MXU_S32MSUB 217 * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 218 * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT 219 * │ ├─ 001 ─ OPC_MXU_D16SLT 220 * │ ├─ 010 ─ OPC_MXU_D16AVG 221 * │ ├─ 011 ─ OPC_MXU_D16AVGR 222 * │ ├─ 100 ─ OPC_MXU_Q8AVG 223 * │ ├─ 101 ─ OPC_MXU_Q8AVGR 224 * │ └─ 111 ─ OPC_MXU_Q8ADD 225 * │ 226 * │ 20..18 227 * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS 228 * │ ├─ 010 ─ OPC_MXU_D16CPS 229 * │ ├─ 100 ─ OPC_MXU_Q8ABD 230 * │ └─ 110 ─ OPC_MXU_Q16SAT 231 * ├─ 001000 ─ OPC_MXU_D16MUL 232 * │ 25..24 233 * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF 234 * │ └─ 01 ─ OPC_MXU_D16MULE 235 * ├─ 001010 ─ OPC_MXU_D16MAC 236 * ├─ 001011 ─ OPC_MXU_D16MACF 237 * ├─ 001100 ─ OPC_MXU_D16MADL 238 * ├─ 001101 ─ OPC_MXU_S16MAD 239 * ├─ 001110 ─ OPC_MXU_Q16ADD 240 * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care) 241 * │ ┌─ 0 ─ OPC_MXU_S32LDD 242 * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR 243 * │ 244 * │ 20 (13..10 don't care) 245 * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD 246 * │ └─ 1 ─ OPC_MXU_S32STDR 247 * │ 248 * │ 13..10 249 * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV 250 * │ └─ 0001 ─ OPC_MXU_S32LDDVR 251 * │ 252 * │ 13..10 253 * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV 254 * │ └─ 0001 ─ OPC_MXU_S32STDVR 255 * │ 256 * │ 20 (13..10 don't care) 257 * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI 258 * │ └─ 1 ─ OPC_MXU_S32LDIR 259 * │ 260 * │ 20 (13..10 don't care) 261 * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI 262 * │ └─ 1 ─ OPC_MXU_S32SDIR 263 * │ 264 * │ 13..10 265 * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV 266 * │ └─ 0001 ─ OPC_MXU_S32LDIVR 267 * │ 268 * │ 13..10 269 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV 270 * │ └─ 0001 ─ OPC_MXU_S32SDIVR 271 * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too) 272 * │ 23..22 273 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC 274 * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM 275 * │ └─ 10 ─ OPC_MXU_D32ASUM 276 * ├─ 011010 ─ <not assigned> 277 * │ 23..22 278 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC 279 * │ ├─ 01 ─ OPC_MXU_Q16ACCM 280 * │ └─ 10 ─ OPC_MXU_D16ASUM 281 * │ 282 * │ 23..22 283 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE 284 * │ ├─ 01 ─ OPC_MXU_D8SUM 285 * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC 286 * ├─ 011110 ─ <not assigned> 287 * ├─ 011111 ─ <not assigned> 288 * ├─ 100000 ─ <not assigned> (overlaps with CLZ) 289 * ├─ 100001 ─ <not assigned> (overlaps with CLO) 290 * ├─ 100010 ─ OPC_MXU_S8LDD 291 * ├─ 100011 ─ OPC_MXU_S8STD 15..14 292 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL 293 * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU 294 * │ ├─ 10 ─ OPC_MXU_S32EXTR 295 * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV 296 * │ 297 * │ 20..18 298 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW 299 * │ ├─ 001 ─ OPC_MXU_S32ALN 300 * │ ├─ 010 ─ OPC_MXU_S32ALNI 301 * │ ├─ 011 ─ OPC_MXU_S32LUI 302 * │ ├─ 100 ─ OPC_MXU_S32NOR 303 * │ ├─ 101 ─ OPC_MXU_S32AND 304 * │ ├─ 110 ─ OPC_MXU_S32OR 305 * │ └─ 111 ─ OPC_MXU_S32XOR 306 * │ 307 * │ 8..6 308 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB 309 * │ ├─ 001 ─ OPC_MXU_LXH 310 * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW 311 * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU 312 * ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU 313 * ├─ 101100 ─ OPC_MXU_S16LDI 314 * ├─ 101101 ─ OPC_MXU_S16SDI 315 * ├─ 101110 ─ OPC_MXU_S32M2I 316 * ├─ 101111 ─ OPC_MXU_S32I2M 317 * ├─ 110000 ─ OPC_MXU_D32SLL 318 * ├─ 110001 ─ OPC_MXU_D32SLR 20..18 319 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV 320 * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV 321 * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV 322 * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV 323 * │ ├─ 101 ─ OPC_MXU_Q16SLRV 324 * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV 325 * │ 326 * ├─ 110111 ─ OPC_MXU_Q16SAR 327 * │ 23..22 328 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL 329 * │ └─ 10 ─ OPC_MXU_Q8MULSU 330 * │ 331 * │ 20..18 332 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ 333 * │ ├─ 001 ─ OPC_MXU_Q8MOVN 334 * │ ├─ 010 ─ OPC_MXU_D16MOVZ 335 * │ ├─ 011 ─ OPC_MXU_D16MOVN 336 * │ ├─ 100 ─ OPC_MXU_S32MOVZ 337 * │ └─ 101 ─ OPC_MXU_S32MOVN 338 * │ 339 * │ 23..22 340 * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC 341 * │ └─ 10 ─ OPC_MXU_Q8MACSU 342 * ├─ 111011 ─ OPC_MXU_Q16SCOP 343 * ├─ 111100 ─ OPC_MXU_Q8MADL 344 * ├─ 111101 ─ OPC_MXU_S32SFL 345 * ├─ 111110 ─ OPC_MXU_Q8SAD 346 * └─ 111111 ─ <not assigned> (overlaps with SDBBP) 347 * 348 * 349 * Compiled after: 350 * 351 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 352 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 353 */ 354 355 enum { 356 OPC_MXU_S32MADD = 0x00, 357 OPC_MXU_S32MADDU = 0x01, 358 OPC_MXU__POOL00 = 0x03, 359 OPC_MXU_S32MSUB = 0x04, 360 OPC_MXU_S32MSUBU = 0x05, 361 OPC_MXU__POOL01 = 0x06, 362 OPC_MXU__POOL02 = 0x07, 363 OPC_MXU_D16MUL = 0x08, 364 OPC_MXU__POOL03 = 0x09, 365 OPC_MXU_D16MAC = 0x0A, 366 OPC_MXU_D16MACF = 0x0B, 367 OPC_MXU_D16MADL = 0x0C, 368 OPC_MXU_S16MAD = 0x0D, 369 OPC_MXU_Q16ADD = 0x0E, 370 OPC_MXU_D16MACE = 0x0F, 371 OPC_MXU__POOL04 = 0x10, 372 OPC_MXU__POOL05 = 0x11, 373 OPC_MXU__POOL06 = 0x12, 374 OPC_MXU__POOL07 = 0x13, 375 OPC_MXU__POOL08 = 0x14, 376 OPC_MXU__POOL09 = 0x15, 377 OPC_MXU__POOL10 = 0x16, 378 OPC_MXU__POOL11 = 0x17, 379 OPC_MXU_D32ADD = 0x18, 380 OPC_MXU__POOL12 = 0x19, 381 OPC_MXU__POOL13 = 0x1B, 382 OPC_MXU__POOL14 = 0x1C, 383 OPC_MXU_Q8ACCE = 0x1D, 384 OPC_MXU_S8LDD = 0x22, 385 OPC_MXU_S8STD = 0x23, 386 OPC_MXU_S8LDI = 0x24, 387 OPC_MXU_S8SDI = 0x25, 388 OPC_MXU__POOL15 = 0x26, 389 OPC_MXU__POOL16 = 0x27, 390 OPC_MXU__POOL17 = 0x28, 391 OPC_MXU_S16LDD = 0x2A, 392 OPC_MXU_S16STD = 0x2B, 393 OPC_MXU_S16LDI = 0x2C, 394 OPC_MXU_S16SDI = 0x2D, 395 OPC_MXU_S32M2I = 0x2E, 396 OPC_MXU_S32I2M = 0x2F, 397 OPC_MXU_D32SLL = 0x30, 398 OPC_MXU_D32SLR = 0x31, 399 OPC_MXU_D32SARL = 0x32, 400 OPC_MXU_D32SAR = 0x33, 401 OPC_MXU_Q16SLL = 0x34, 402 OPC_MXU_Q16SLR = 0x35, 403 OPC_MXU__POOL18 = 0x36, 404 OPC_MXU_Q16SAR = 0x37, 405 OPC_MXU__POOL19 = 0x38, 406 OPC_MXU__POOL20 = 0x39, 407 OPC_MXU__POOL21 = 0x3A, 408 OPC_MXU_Q16SCOP = 0x3B, 409 OPC_MXU_Q8MADL = 0x3C, 410 OPC_MXU_S32SFL = 0x3D, 411 OPC_MXU_Q8SAD = 0x3E, 412 }; 413 414 415 /* 416 * MXU pool 00 417 */ 418 enum { 419 OPC_MXU_S32MAX = 0x00, 420 OPC_MXU_S32MIN = 0x01, 421 OPC_MXU_D16MAX = 0x02, 422 OPC_MXU_D16MIN = 0x03, 423 OPC_MXU_Q8MAX = 0x04, 424 OPC_MXU_Q8MIN = 0x05, 425 OPC_MXU_Q8SLT = 0x06, 426 OPC_MXU_Q8SLTU = 0x07, 427 }; 428 429 /* 430 * MXU pool 01 431 */ 432 enum { 433 OPC_MXU_S32SLT = 0x00, 434 OPC_MXU_D16SLT = 0x01, 435 OPC_MXU_D16AVG = 0x02, 436 OPC_MXU_D16AVGR = 0x03, 437 OPC_MXU_Q8AVG = 0x04, 438 OPC_MXU_Q8AVGR = 0x05, 439 OPC_MXU_Q8ADD = 0x07, 440 }; 441 442 /* 443 * MXU pool 02 444 */ 445 enum { 446 OPC_MXU_S32CPS = 0x00, 447 OPC_MXU_D16CPS = 0x02, 448 OPC_MXU_Q8ABD = 0x04, 449 OPC_MXU_Q16SAT = 0x06, 450 }; 451 452 /* 453 * MXU pool 03 454 */ 455 enum { 456 OPC_MXU_D16MULF = 0x00, 457 OPC_MXU_D16MULE = 0x01, 458 }; 459 460 /* 461 * MXU pool 04 05 06 07 08 09 10 11 462 */ 463 enum { 464 OPC_MXU_S32LDST = 0x00, 465 OPC_MXU_S32LDSTR = 0x01, 466 }; 467 468 /* 469 * MXU pool 12 470 */ 471 enum { 472 OPC_MXU_D32ACC = 0x00, 473 OPC_MXU_D32ACCM = 0x01, 474 OPC_MXU_D32ASUM = 0x02, 475 }; 476 477 /* 478 * MXU pool 13 479 */ 480 enum { 481 OPC_MXU_Q16ACC = 0x00, 482 OPC_MXU_Q16ACCM = 0x01, 483 OPC_MXU_D16ASUM = 0x02, 484 }; 485 486 /* 487 * MXU pool 14 488 */ 489 enum { 490 OPC_MXU_Q8ADDE = 0x00, 491 OPC_MXU_D8SUM = 0x01, 492 OPC_MXU_D8SUMC = 0x02, 493 }; 494 495 /* 496 * MXU pool 15 497 */ 498 enum { 499 OPC_MXU_S32MUL = 0x00, 500 OPC_MXU_S32MULU = 0x01, 501 OPC_MXU_S32EXTR = 0x02, 502 OPC_MXU_S32EXTRV = 0x03, 503 }; 504 505 /* 506 * MXU pool 16 507 */ 508 enum { 509 OPC_MXU_D32SARW = 0x00, 510 OPC_MXU_S32ALN = 0x01, 511 OPC_MXU_S32ALNI = 0x02, 512 OPC_MXU_S32LUI = 0x03, 513 OPC_MXU_S32NOR = 0x04, 514 OPC_MXU_S32AND = 0x05, 515 OPC_MXU_S32OR = 0x06, 516 OPC_MXU_S32XOR = 0x07, 517 }; 518 519 /* 520 * MXU pool 17 521 */ 522 enum { 523 OPC_MXU_LXB = 0x00, 524 OPC_MXU_LXH = 0x01, 525 OPC_MXU_LXW = 0x03, 526 OPC_MXU_LXBU = 0x04, 527 OPC_MXU_LXHU = 0x05, 528 }; 529 530 /* 531 * MXU pool 18 532 */ 533 enum { 534 OPC_MXU_D32SLLV = 0x00, 535 OPC_MXU_D32SLRV = 0x01, 536 OPC_MXU_D32SARV = 0x03, 537 OPC_MXU_Q16SLLV = 0x04, 538 OPC_MXU_Q16SLRV = 0x05, 539 OPC_MXU_Q16SARV = 0x07, 540 }; 541 542 /* 543 * MXU pool 19 544 */ 545 enum { 546 OPC_MXU_Q8MUL = 0x00, 547 OPC_MXU_Q8MULSU = 0x02, 548 }; 549 550 /* 551 * MXU pool 20 552 */ 553 enum { 554 OPC_MXU_Q8MOVZ = 0x00, 555 OPC_MXU_Q8MOVN = 0x01, 556 OPC_MXU_D16MOVZ = 0x02, 557 OPC_MXU_D16MOVN = 0x03, 558 OPC_MXU_S32MOVZ = 0x04, 559 OPC_MXU_S32MOVN = 0x05, 560 }; 561 562 /* 563 * MXU pool 21 564 */ 565 enum { 566 OPC_MXU_Q8MAC = 0x00, 567 OPC_MXU_Q8MACSU = 0x02, 568 }; 569 570 571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ 572 #define MXU_APTN1_A 0 573 #define MXU_APTN1_S 1 574 575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */ 576 #define MXU_APTN2_AA 0 577 #define MXU_APTN2_AS 1 578 #define MXU_APTN2_SA 2 579 #define MXU_APTN2_SS 3 580 581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */ 582 #define MXU_EPTN2_AA 0 583 #define MXU_EPTN2_AS 1 584 #define MXU_EPTN2_SA 2 585 #define MXU_EPTN2_SS 3 586 587 /* MXU operand getting pattern 'optn2' */ 588 #define MXU_OPTN2_PTN0 0 589 #define MXU_OPTN2_PTN1 1 590 #define MXU_OPTN2_PTN2 2 591 #define MXU_OPTN2_PTN3 3 592 /* alternative naming scheme for 'optn2' */ 593 #define MXU_OPTN2_WW 0 594 #define MXU_OPTN2_LW 1 595 #define MXU_OPTN2_HW 2 596 #define MXU_OPTN2_XW 3 597 598 /* MXU operand getting pattern 'optn3' */ 599 #define MXU_OPTN3_PTN0 0 600 #define MXU_OPTN3_PTN1 1 601 #define MXU_OPTN3_PTN2 2 602 #define MXU_OPTN3_PTN3 3 603 #define MXU_OPTN3_PTN4 4 604 #define MXU_OPTN3_PTN5 5 605 #define MXU_OPTN3_PTN6 6 606 #define MXU_OPTN3_PTN7 7 607 608 /* MXU registers */ 609 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1]; 610 static TCGv mxu_CR; 611 612 static const char mxuregnames[][4] = { 613 "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8", 614 "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR", 615 }; 616 617 void mxu_translate_init(void) 618 { 619 for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) { 620 mxu_gpr[i] = tcg_global_mem_new(cpu_env, 621 offsetof(CPUMIPSState, active_tc.mxu_gpr[i]), 622 mxuregnames[i]); 623 } 624 625 mxu_CR = tcg_global_mem_new(cpu_env, 626 offsetof(CPUMIPSState, active_tc.mxu_cr), 627 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]); 628 } 629 630 /* MXU General purpose registers moves. */ 631 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg) 632 { 633 if (reg == 0) { 634 tcg_gen_movi_tl(t, 0); 635 } else if (reg <= 15) { 636 tcg_gen_mov_tl(t, mxu_gpr[reg - 1]); 637 } 638 } 639 640 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg) 641 { 642 if (reg > 0 && reg <= 15) { 643 tcg_gen_mov_tl(mxu_gpr[reg - 1], t); 644 } 645 } 646 647 /* MXU control register moves. */ 648 static inline void gen_load_mxu_cr(TCGv t) 649 { 650 tcg_gen_mov_tl(t, mxu_CR); 651 } 652 653 static inline void gen_store_mxu_cr(TCGv t) 654 { 655 /* TODO: Add handling of RW rules for MXU_CR. */ 656 tcg_gen_mov_tl(mxu_CR, t); 657 } 658 659 /* 660 * S32I2M XRa, rb - Register move from GRF to XRF 661 */ 662 static void gen_mxu_s32i2m(DisasContext *ctx) 663 { 664 TCGv t0; 665 uint32_t XRa, Rb; 666 667 t0 = tcg_temp_new(); 668 669 XRa = extract32(ctx->opcode, 6, 5); 670 Rb = extract32(ctx->opcode, 16, 5); 671 672 gen_load_gpr(t0, Rb); 673 if (XRa <= 15) { 674 gen_store_mxu_gpr(t0, XRa); 675 } else if (XRa == 16) { 676 gen_store_mxu_cr(t0); 677 } 678 } 679 680 /* 681 * S32M2I XRa, rb - Register move from XRF to GRF 682 */ 683 static void gen_mxu_s32m2i(DisasContext *ctx) 684 { 685 TCGv t0; 686 uint32_t XRa, Rb; 687 688 t0 = tcg_temp_new(); 689 690 XRa = extract32(ctx->opcode, 6, 5); 691 Rb = extract32(ctx->opcode, 16, 5); 692 693 if (XRa <= 15) { 694 gen_load_mxu_gpr(t0, XRa); 695 } else if (XRa == 16) { 696 gen_load_mxu_cr(t0); 697 } 698 699 gen_store_gpr(t0, Rb); 700 } 701 702 /* 703 * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF 704 * 705 * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF, 706 * post modify address register 707 */ 708 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify) 709 { 710 TCGv t0, t1; 711 uint32_t XRa, Rb, s8, optn3; 712 713 t0 = tcg_temp_new(); 714 t1 = tcg_temp_new(); 715 716 XRa = extract32(ctx->opcode, 6, 4); 717 s8 = extract32(ctx->opcode, 10, 8); 718 optn3 = extract32(ctx->opcode, 18, 3); 719 Rb = extract32(ctx->opcode, 21, 5); 720 721 gen_load_gpr(t0, Rb); 722 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 723 if (postmodify) { 724 gen_store_gpr(t0, Rb); 725 } 726 727 switch (optn3) { 728 /* XRa[7:0] = tmp8 */ 729 case MXU_OPTN3_PTN0: 730 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 731 gen_load_mxu_gpr(t0, XRa); 732 tcg_gen_deposit_tl(t0, t0, t1, 0, 8); 733 break; 734 /* XRa[15:8] = tmp8 */ 735 case MXU_OPTN3_PTN1: 736 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 737 gen_load_mxu_gpr(t0, XRa); 738 tcg_gen_deposit_tl(t0, t0, t1, 8, 8); 739 break; 740 /* XRa[23:16] = tmp8 */ 741 case MXU_OPTN3_PTN2: 742 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 743 gen_load_mxu_gpr(t0, XRa); 744 tcg_gen_deposit_tl(t0, t0, t1, 16, 8); 745 break; 746 /* XRa[31:24] = tmp8 */ 747 case MXU_OPTN3_PTN3: 748 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 749 gen_load_mxu_gpr(t0, XRa); 750 tcg_gen_deposit_tl(t0, t0, t1, 24, 8); 751 break; 752 /* XRa = {8'b0, tmp8, 8'b0, tmp8} */ 753 case MXU_OPTN3_PTN4: 754 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 755 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 756 break; 757 /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */ 758 case MXU_OPTN3_PTN5: 759 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 760 tcg_gen_shli_tl(t1, t1, 8); 761 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 762 break; 763 /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */ 764 case MXU_OPTN3_PTN6: 765 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB); 766 tcg_gen_mov_tl(t0, t1); 767 tcg_gen_andi_tl(t0, t0, 0xFF00FFFF); 768 tcg_gen_shli_tl(t1, t1, 16); 769 tcg_gen_or_tl(t0, t0, t1); 770 break; 771 /* XRa = {tmp8, tmp8, tmp8, tmp8} */ 772 case MXU_OPTN3_PTN7: 773 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 774 tcg_gen_deposit_tl(t1, t1, t1, 8, 8); 775 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 776 break; 777 } 778 779 gen_store_mxu_gpr(t0, XRa); 780 } 781 782 /* 783 * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory 784 * 785 * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory, 786 * post modify address register 787 */ 788 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify) 789 { 790 TCGv t0, t1; 791 uint32_t XRa, Rb, s8, optn3; 792 793 t0 = tcg_temp_new(); 794 t1 = tcg_temp_new(); 795 796 XRa = extract32(ctx->opcode, 6, 4); 797 s8 = extract32(ctx->opcode, 10, 8); 798 optn3 = extract32(ctx->opcode, 18, 3); 799 Rb = extract32(ctx->opcode, 21, 5); 800 801 if (optn3 > 3) { 802 /* reserved, do nothing */ 803 return; 804 } 805 806 gen_load_gpr(t0, Rb); 807 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 808 if (postmodify) { 809 gen_store_gpr(t0, Rb); 810 } 811 gen_load_mxu_gpr(t1, XRa); 812 813 switch (optn3) { 814 /* XRa[7:0] => tmp8 */ 815 case MXU_OPTN3_PTN0: 816 tcg_gen_extract_tl(t1, t1, 0, 8); 817 break; 818 /* XRa[15:8] => tmp8 */ 819 case MXU_OPTN3_PTN1: 820 tcg_gen_extract_tl(t1, t1, 8, 8); 821 break; 822 /* XRa[23:16] => tmp8 */ 823 case MXU_OPTN3_PTN2: 824 tcg_gen_extract_tl(t1, t1, 16, 8); 825 break; 826 /* XRa[31:24] => tmp8 */ 827 case MXU_OPTN3_PTN3: 828 tcg_gen_extract_tl(t1, t1, 24, 8); 829 break; 830 } 831 832 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB); 833 } 834 835 /* 836 * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF 837 * 838 * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF, 839 * post modify address register 840 */ 841 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify) 842 { 843 TCGv t0, t1; 844 uint32_t XRa, Rb, optn2; 845 int32_t s10; 846 847 t0 = tcg_temp_new(); 848 t1 = tcg_temp_new(); 849 850 XRa = extract32(ctx->opcode, 6, 4); 851 s10 = sextract32(ctx->opcode, 10, 9) * 2; 852 optn2 = extract32(ctx->opcode, 19, 2); 853 Rb = extract32(ctx->opcode, 21, 5); 854 855 gen_load_gpr(t0, Rb); 856 tcg_gen_addi_tl(t0, t0, s10); 857 if (postmodify) { 858 gen_store_gpr(t0, Rb); 859 } 860 861 switch (optn2) { 862 /* XRa[15:0] = tmp16 */ 863 case MXU_OPTN2_PTN0: 864 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 865 gen_load_mxu_gpr(t0, XRa); 866 tcg_gen_deposit_tl(t0, t0, t1, 0, 16); 867 break; 868 /* XRa[31:16] = tmp16 */ 869 case MXU_OPTN2_PTN1: 870 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 871 gen_load_mxu_gpr(t0, XRa); 872 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 873 break; 874 /* XRa = sign_extend(tmp16) */ 875 case MXU_OPTN2_PTN2: 876 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW); 877 break; 878 /* XRa = {tmp16, tmp16} */ 879 case MXU_OPTN2_PTN3: 880 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 881 tcg_gen_deposit_tl(t0, t1, t1, 0, 16); 882 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 883 break; 884 } 885 886 gen_store_mxu_gpr(t0, XRa); 887 } 888 889 /* 890 * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory 891 * 892 * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory, 893 * post modify address register 894 */ 895 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify) 896 { 897 TCGv t0, t1; 898 uint32_t XRa, Rb, optn2; 899 int32_t s10; 900 901 t0 = tcg_temp_new(); 902 t1 = tcg_temp_new(); 903 904 XRa = extract32(ctx->opcode, 6, 4); 905 s10 = sextract32(ctx->opcode, 10, 9) * 2; 906 optn2 = extract32(ctx->opcode, 19, 2); 907 Rb = extract32(ctx->opcode, 21, 5); 908 909 if (optn2 > 1) { 910 /* reserved, do nothing */ 911 return; 912 } 913 914 gen_load_gpr(t0, Rb); 915 tcg_gen_addi_tl(t0, t0, s10); 916 if (postmodify) { 917 gen_store_gpr(t0, Rb); 918 } 919 gen_load_mxu_gpr(t1, XRa); 920 921 switch (optn2) { 922 /* XRa[15:0] => tmp16 */ 923 case MXU_OPTN2_PTN0: 924 tcg_gen_extract_tl(t1, t1, 0, 16); 925 break; 926 /* XRa[31:16] => tmp16 */ 927 case MXU_OPTN2_PTN1: 928 tcg_gen_extract_tl(t1, t1, 16, 16); 929 break; 930 } 931 932 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW); 933 } 934 935 /* 936 * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication 937 * of GPR's and stores result into pair of MXU registers. 938 * It strains HI and LO registers. 939 * 940 * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication 941 * of GPR's and stores result into pair of MXU registers. 942 * It strains HI and LO registers. 943 */ 944 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu) 945 { 946 TCGv t0, t1; 947 uint32_t XRa, XRd, rs, rt; 948 949 t0 = tcg_temp_new(); 950 t1 = tcg_temp_new(); 951 952 XRa = extract32(ctx->opcode, 6, 4); 953 XRd = extract32(ctx->opcode, 10, 4); 954 rs = extract32(ctx->opcode, 16, 5); 955 rt = extract32(ctx->opcode, 21, 5); 956 957 if (unlikely(rs == 0 || rt == 0)) { 958 tcg_gen_movi_tl(t0, 0); 959 tcg_gen_movi_tl(t1, 0); 960 } else { 961 gen_load_gpr(t0, rs); 962 gen_load_gpr(t1, rt); 963 964 if (mulu) { 965 tcg_gen_mulu2_tl(t0, t1, t0, t1); 966 } else { 967 tcg_gen_muls2_tl(t0, t1, t0, t1); 968 } 969 } 970 tcg_gen_mov_tl(cpu_HI[0], t1); 971 tcg_gen_mov_tl(cpu_LO[0], t0); 972 gen_store_mxu_gpr(t1, XRa); 973 gen_store_mxu_gpr(t0, XRd); 974 } 975 976 /* 977 * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication 978 * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication 979 * with rounding and packing result 980 * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern 981 * multiplication with rounding 982 */ 983 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional, 984 bool packed_result) 985 { 986 TCGv t0, t1, t2, t3; 987 uint32_t XRa, XRb, XRc, XRd, optn2; 988 989 t0 = tcg_temp_new(); 990 t1 = tcg_temp_new(); 991 t2 = tcg_temp_new(); 992 t3 = tcg_temp_new(); 993 994 XRa = extract32(ctx->opcode, 6, 4); 995 XRb = extract32(ctx->opcode, 10, 4); 996 XRc = extract32(ctx->opcode, 14, 4); 997 XRd = extract32(ctx->opcode, 18, 4); 998 optn2 = extract32(ctx->opcode, 22, 2); 999 1000 /* 1001 * TODO: XRd field isn't used for D16MULF 1002 * There's no knowledge how this field affect 1003 * instruction decoding/behavior 1004 */ 1005 1006 gen_load_mxu_gpr(t1, XRb); 1007 tcg_gen_sextract_tl(t0, t1, 0, 16); 1008 tcg_gen_sextract_tl(t1, t1, 16, 16); 1009 gen_load_mxu_gpr(t3, XRc); 1010 tcg_gen_sextract_tl(t2, t3, 0, 16); 1011 tcg_gen_sextract_tl(t3, t3, 16, 16); 1012 1013 switch (optn2) { 1014 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1015 tcg_gen_mul_tl(t3, t1, t3); 1016 tcg_gen_mul_tl(t2, t0, t2); 1017 break; 1018 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1019 tcg_gen_mul_tl(t3, t0, t3); 1020 tcg_gen_mul_tl(t2, t0, t2); 1021 break; 1022 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1023 tcg_gen_mul_tl(t3, t1, t3); 1024 tcg_gen_mul_tl(t2, t1, t2); 1025 break; 1026 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1027 tcg_gen_mul_tl(t3, t0, t3); 1028 tcg_gen_mul_tl(t2, t1, t2); 1029 break; 1030 } 1031 if (fractional) { 1032 TCGLabel *l_done = gen_new_label(); 1033 TCGv rounding = tcg_temp_new(); 1034 1035 tcg_gen_shli_tl(t3, t3, 1); 1036 tcg_gen_shli_tl(t2, t2, 1); 1037 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1038 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1039 if (packed_result) { 1040 TCGLabel *l_apply_bias_l = gen_new_label(); 1041 TCGLabel *l_apply_bias_r = gen_new_label(); 1042 TCGLabel *l_half_done = gen_new_label(); 1043 TCGv bias = tcg_temp_new(); 1044 1045 /* 1046 * D16MULF supports unbiased rounding aka "bankers rounding", 1047 * "round to even", "convergent rounding" 1048 */ 1049 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1050 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1051 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1052 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1053 gen_set_label(l_apply_bias_l); 1054 tcg_gen_addi_tl(t3, t3, 0x8000); 1055 gen_set_label(l_half_done); 1056 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1057 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1058 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1059 gen_set_label(l_apply_bias_r); 1060 tcg_gen_addi_tl(t2, t2, 0x8000); 1061 } else { 1062 /* D16MULE doesn't support unbiased rounding */ 1063 tcg_gen_addi_tl(t3, t3, 0x8000); 1064 tcg_gen_addi_tl(t2, t2, 0x8000); 1065 } 1066 gen_set_label(l_done); 1067 } 1068 if (!packed_result) { 1069 gen_store_mxu_gpr(t3, XRa); 1070 gen_store_mxu_gpr(t2, XRd); 1071 } else { 1072 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1073 tcg_gen_shri_tl(t2, t2, 16); 1074 tcg_gen_or_tl(t3, t3, t2); 1075 gen_store_mxu_gpr(t3, XRa); 1076 } 1077 } 1078 1079 /* 1080 * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 1081 * Signed 16 bit pattern multiply and accumulate 1082 * D16MACF XRa, XRb, XRc, aptn2, optn2 1083 * Signed Q15 fraction pattern multiply accumulate and pack 1084 * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 1085 * Signed Q15 fraction pattern multiply and accumulate 1086 */ 1087 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional, 1088 bool packed_result) 1089 { 1090 TCGv t0, t1, t2, t3; 1091 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1092 1093 t0 = tcg_temp_new(); 1094 t1 = tcg_temp_new(); 1095 t2 = tcg_temp_new(); 1096 t3 = tcg_temp_new(); 1097 1098 XRa = extract32(ctx->opcode, 6, 4); 1099 XRb = extract32(ctx->opcode, 10, 4); 1100 XRc = extract32(ctx->opcode, 14, 4); 1101 XRd = extract32(ctx->opcode, 18, 4); 1102 optn2 = extract32(ctx->opcode, 22, 2); 1103 aptn2 = extract32(ctx->opcode, 24, 2); 1104 1105 gen_load_mxu_gpr(t1, XRb); 1106 tcg_gen_sextract_tl(t0, t1, 0, 16); 1107 tcg_gen_sextract_tl(t1, t1, 16, 16); 1108 1109 gen_load_mxu_gpr(t3, XRc); 1110 tcg_gen_sextract_tl(t2, t3, 0, 16); 1111 tcg_gen_sextract_tl(t3, t3, 16, 16); 1112 1113 switch (optn2) { 1114 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1115 tcg_gen_mul_tl(t3, t1, t3); 1116 tcg_gen_mul_tl(t2, t0, t2); 1117 break; 1118 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1119 tcg_gen_mul_tl(t3, t0, t3); 1120 tcg_gen_mul_tl(t2, t0, t2); 1121 break; 1122 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1123 tcg_gen_mul_tl(t3, t1, t3); 1124 tcg_gen_mul_tl(t2, t1, t2); 1125 break; 1126 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1127 tcg_gen_mul_tl(t3, t0, t3); 1128 tcg_gen_mul_tl(t2, t1, t2); 1129 break; 1130 } 1131 1132 if (fractional) { 1133 tcg_gen_shli_tl(t3, t3, 1); 1134 tcg_gen_shli_tl(t2, t2, 1); 1135 } 1136 gen_load_mxu_gpr(t0, XRa); 1137 gen_load_mxu_gpr(t1, XRd); 1138 1139 switch (aptn2) { 1140 case MXU_APTN2_AA: 1141 tcg_gen_add_tl(t3, t0, t3); 1142 tcg_gen_add_tl(t2, t1, t2); 1143 break; 1144 case MXU_APTN2_AS: 1145 tcg_gen_add_tl(t3, t0, t3); 1146 tcg_gen_sub_tl(t2, t1, t2); 1147 break; 1148 case MXU_APTN2_SA: 1149 tcg_gen_sub_tl(t3, t0, t3); 1150 tcg_gen_add_tl(t2, t1, t2); 1151 break; 1152 case MXU_APTN2_SS: 1153 tcg_gen_sub_tl(t3, t0, t3); 1154 tcg_gen_sub_tl(t2, t1, t2); 1155 break; 1156 } 1157 1158 if (fractional) { 1159 TCGLabel *l_done = gen_new_label(); 1160 TCGv rounding = tcg_temp_new(); 1161 1162 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1163 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1164 if (packed_result) { 1165 TCGLabel *l_apply_bias_l = gen_new_label(); 1166 TCGLabel *l_apply_bias_r = gen_new_label(); 1167 TCGLabel *l_half_done = gen_new_label(); 1168 TCGv bias = tcg_temp_new(); 1169 1170 /* 1171 * D16MACF supports unbiased rounding aka "bankers rounding", 1172 * "round to even", "convergent rounding" 1173 */ 1174 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1175 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1176 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1177 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1178 gen_set_label(l_apply_bias_l); 1179 tcg_gen_addi_tl(t3, t3, 0x8000); 1180 gen_set_label(l_half_done); 1181 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1182 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1183 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1184 gen_set_label(l_apply_bias_r); 1185 tcg_gen_addi_tl(t2, t2, 0x8000); 1186 } else { 1187 /* D16MACE doesn't support unbiased rounding */ 1188 tcg_gen_addi_tl(t3, t3, 0x8000); 1189 tcg_gen_addi_tl(t2, t2, 0x8000); 1190 } 1191 gen_set_label(l_done); 1192 } 1193 1194 if (!packed_result) { 1195 gen_store_mxu_gpr(t3, XRa); 1196 gen_store_mxu_gpr(t2, XRd); 1197 } else { 1198 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1199 tcg_gen_shri_tl(t2, t2, 16); 1200 tcg_gen_or_tl(t3, t3, t2); 1201 gen_store_mxu_gpr(t3, XRa); 1202 } 1203 } 1204 1205 /* 1206 * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed 1207 * unsigned 16 bit pattern multiply and add/subtract. 1208 */ 1209 static void gen_mxu_d16madl(DisasContext *ctx) 1210 { 1211 TCGv t0, t1, t2, t3; 1212 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1213 1214 t0 = tcg_temp_new(); 1215 t1 = tcg_temp_new(); 1216 t2 = tcg_temp_new(); 1217 t3 = tcg_temp_new(); 1218 1219 XRa = extract32(ctx->opcode, 6, 4); 1220 XRb = extract32(ctx->opcode, 10, 4); 1221 XRc = extract32(ctx->opcode, 14, 4); 1222 XRd = extract32(ctx->opcode, 18, 4); 1223 optn2 = extract32(ctx->opcode, 22, 2); 1224 aptn2 = extract32(ctx->opcode, 24, 2); 1225 1226 gen_load_mxu_gpr(t1, XRb); 1227 tcg_gen_sextract_tl(t0, t1, 0, 16); 1228 tcg_gen_sextract_tl(t1, t1, 16, 16); 1229 1230 gen_load_mxu_gpr(t3, XRc); 1231 tcg_gen_sextract_tl(t2, t3, 0, 16); 1232 tcg_gen_sextract_tl(t3, t3, 16, 16); 1233 1234 switch (optn2) { 1235 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1236 tcg_gen_mul_tl(t3, t1, t3); 1237 tcg_gen_mul_tl(t2, t0, t2); 1238 break; 1239 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1240 tcg_gen_mul_tl(t3, t0, t3); 1241 tcg_gen_mul_tl(t2, t0, t2); 1242 break; 1243 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1244 tcg_gen_mul_tl(t3, t1, t3); 1245 tcg_gen_mul_tl(t2, t1, t2); 1246 break; 1247 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1248 tcg_gen_mul_tl(t3, t0, t3); 1249 tcg_gen_mul_tl(t2, t1, t2); 1250 break; 1251 } 1252 tcg_gen_extract_tl(t2, t2, 0, 16); 1253 tcg_gen_extract_tl(t3, t3, 0, 16); 1254 1255 gen_load_mxu_gpr(t1, XRa); 1256 tcg_gen_extract_tl(t0, t1, 0, 16); 1257 tcg_gen_extract_tl(t1, t1, 16, 16); 1258 1259 switch (aptn2) { 1260 case MXU_APTN2_AA: 1261 tcg_gen_add_tl(t3, t1, t3); 1262 tcg_gen_add_tl(t2, t0, t2); 1263 break; 1264 case MXU_APTN2_AS: 1265 tcg_gen_add_tl(t3, t1, t3); 1266 tcg_gen_sub_tl(t2, t0, t2); 1267 break; 1268 case MXU_APTN2_SA: 1269 tcg_gen_sub_tl(t3, t1, t3); 1270 tcg_gen_add_tl(t2, t0, t2); 1271 break; 1272 case MXU_APTN2_SS: 1273 tcg_gen_sub_tl(t3, t1, t3); 1274 tcg_gen_sub_tl(t2, t0, t2); 1275 break; 1276 } 1277 1278 tcg_gen_andi_tl(t2, t2, 0xffff); 1279 tcg_gen_shli_tl(t3, t3, 16); 1280 tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2); 1281 } 1282 1283 /* 1284 * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed 1285 * signed 16 bit pattern multiply and 32-bit add/subtract. 1286 */ 1287 static void gen_mxu_s16mad(DisasContext *ctx) 1288 { 1289 TCGv t0, t1; 1290 uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad; 1291 1292 t0 = tcg_temp_new(); 1293 t1 = tcg_temp_new(); 1294 1295 XRa = extract32(ctx->opcode, 6, 4); 1296 XRb = extract32(ctx->opcode, 10, 4); 1297 XRc = extract32(ctx->opcode, 14, 4); 1298 XRd = extract32(ctx->opcode, 18, 4); 1299 optn2 = extract32(ctx->opcode, 22, 2); 1300 aptn1 = extract32(ctx->opcode, 24, 1); 1301 pad = extract32(ctx->opcode, 25, 1); 1302 1303 if (pad) { 1304 /* FIXME check if it influence the result */ 1305 } 1306 1307 gen_load_mxu_gpr(t0, XRb); 1308 gen_load_mxu_gpr(t1, XRc); 1309 1310 switch (optn2) { 1311 case MXU_OPTN2_WW: /* XRB.H*XRC.H */ 1312 tcg_gen_sextract_tl(t0, t0, 16, 16); 1313 tcg_gen_sextract_tl(t1, t1, 16, 16); 1314 break; 1315 case MXU_OPTN2_LW: /* XRB.L*XRC.L */ 1316 tcg_gen_sextract_tl(t0, t0, 0, 16); 1317 tcg_gen_sextract_tl(t1, t1, 0, 16); 1318 break; 1319 case MXU_OPTN2_HW: /* XRB.H*XRC.L */ 1320 tcg_gen_sextract_tl(t0, t0, 16, 16); 1321 tcg_gen_sextract_tl(t1, t1, 0, 16); 1322 break; 1323 case MXU_OPTN2_XW: /* XRB.L*XRC.H */ 1324 tcg_gen_sextract_tl(t0, t0, 0, 16); 1325 tcg_gen_sextract_tl(t1, t1, 16, 16); 1326 break; 1327 } 1328 tcg_gen_mul_tl(t0, t0, t1); 1329 1330 gen_load_mxu_gpr(t1, XRa); 1331 1332 switch (aptn1) { 1333 case MXU_APTN1_A: 1334 tcg_gen_add_tl(t1, t1, t0); 1335 break; 1336 case MXU_APTN1_S: 1337 tcg_gen_sub_tl(t1, t1, t0); 1338 break; 1339 } 1340 1341 gen_store_mxu_gpr(t1, XRd); 1342 } 1343 1344 /* 1345 * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1346 * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1347 * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1348 * and accumulate 1349 * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1350 * and accumulate 1351 */ 1352 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac) 1353 { 1354 TCGv t0, t1, t2, t3, t4, t5, t6, t7; 1355 uint32_t XRa, XRb, XRc, XRd, aptn2; 1356 1357 t0 = tcg_temp_new(); 1358 t1 = tcg_temp_new(); 1359 t2 = tcg_temp_new(); 1360 t3 = tcg_temp_new(); 1361 t4 = tcg_temp_new(); 1362 t5 = tcg_temp_new(); 1363 t6 = tcg_temp_new(); 1364 t7 = tcg_temp_new(); 1365 1366 XRa = extract32(ctx->opcode, 6, 4); 1367 XRb = extract32(ctx->opcode, 10, 4); 1368 XRc = extract32(ctx->opcode, 14, 4); 1369 XRd = extract32(ctx->opcode, 18, 4); 1370 aptn2 = extract32(ctx->opcode, 24, 2); 1371 1372 gen_load_mxu_gpr(t3, XRb); 1373 gen_load_mxu_gpr(t7, XRc); 1374 1375 if (su) { 1376 /* Q8MULSU / Q8MACSU */ 1377 tcg_gen_sextract_tl(t0, t3, 0, 8); 1378 tcg_gen_sextract_tl(t1, t3, 8, 8); 1379 tcg_gen_sextract_tl(t2, t3, 16, 8); 1380 tcg_gen_sextract_tl(t3, t3, 24, 8); 1381 } else { 1382 /* Q8MUL / Q8MAC */ 1383 tcg_gen_extract_tl(t0, t3, 0, 8); 1384 tcg_gen_extract_tl(t1, t3, 8, 8); 1385 tcg_gen_extract_tl(t2, t3, 16, 8); 1386 tcg_gen_extract_tl(t3, t3, 24, 8); 1387 } 1388 1389 tcg_gen_extract_tl(t4, t7, 0, 8); 1390 tcg_gen_extract_tl(t5, t7, 8, 8); 1391 tcg_gen_extract_tl(t6, t7, 16, 8); 1392 tcg_gen_extract_tl(t7, t7, 24, 8); 1393 1394 tcg_gen_mul_tl(t0, t0, t4); 1395 tcg_gen_mul_tl(t1, t1, t5); 1396 tcg_gen_mul_tl(t2, t2, t6); 1397 tcg_gen_mul_tl(t3, t3, t7); 1398 1399 if (mac) { 1400 gen_load_mxu_gpr(t4, XRd); 1401 gen_load_mxu_gpr(t5, XRa); 1402 tcg_gen_extract_tl(t6, t4, 0, 16); 1403 tcg_gen_extract_tl(t7, t4, 16, 16); 1404 if (aptn2 & 1) { 1405 tcg_gen_sub_tl(t0, t6, t0); 1406 tcg_gen_sub_tl(t1, t7, t1); 1407 } else { 1408 tcg_gen_add_tl(t0, t6, t0); 1409 tcg_gen_add_tl(t1, t7, t1); 1410 } 1411 tcg_gen_extract_tl(t6, t5, 0, 16); 1412 tcg_gen_extract_tl(t7, t5, 16, 16); 1413 if (aptn2 & 2) { 1414 tcg_gen_sub_tl(t2, t6, t2); 1415 tcg_gen_sub_tl(t3, t7, t3); 1416 } else { 1417 tcg_gen_add_tl(t2, t6, t2); 1418 tcg_gen_add_tl(t3, t7, t3); 1419 } 1420 } 1421 1422 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 1423 tcg_gen_deposit_tl(t1, t2, t3, 16, 16); 1424 1425 gen_store_mxu_gpr(t0, XRd); 1426 gen_store_mxu_gpr(t1, XRa); 1427 } 1428 1429 /* 1430 * Q8MADL XRd, XRa, XRb, XRc 1431 * Parallel quad unsigned 8 bit multiply and accumulate. 1432 * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3] 1433 */ 1434 static void gen_mxu_q8madl(DisasContext *ctx) 1435 { 1436 TCGv t0, t1, t2, t3, t4, t5, t6, t7; 1437 uint32_t XRa, XRb, XRc, XRd, aptn2; 1438 1439 t0 = tcg_temp_new(); 1440 t1 = tcg_temp_new(); 1441 t2 = tcg_temp_new(); 1442 t3 = tcg_temp_new(); 1443 t4 = tcg_temp_new(); 1444 t5 = tcg_temp_new(); 1445 t6 = tcg_temp_new(); 1446 t7 = tcg_temp_new(); 1447 1448 XRa = extract32(ctx->opcode, 6, 4); 1449 XRb = extract32(ctx->opcode, 10, 4); 1450 XRc = extract32(ctx->opcode, 14, 4); 1451 XRd = extract32(ctx->opcode, 18, 4); 1452 aptn2 = extract32(ctx->opcode, 24, 2); 1453 1454 gen_load_mxu_gpr(t3, XRb); 1455 gen_load_mxu_gpr(t7, XRc); 1456 1457 tcg_gen_extract_tl(t0, t3, 0, 8); 1458 tcg_gen_extract_tl(t1, t3, 8, 8); 1459 tcg_gen_extract_tl(t2, t3, 16, 8); 1460 tcg_gen_extract_tl(t3, t3, 24, 8); 1461 1462 tcg_gen_extract_tl(t4, t7, 0, 8); 1463 tcg_gen_extract_tl(t5, t7, 8, 8); 1464 tcg_gen_extract_tl(t6, t7, 16, 8); 1465 tcg_gen_extract_tl(t7, t7, 24, 8); 1466 1467 tcg_gen_mul_tl(t0, t0, t4); 1468 tcg_gen_mul_tl(t1, t1, t5); 1469 tcg_gen_mul_tl(t2, t2, t6); 1470 tcg_gen_mul_tl(t3, t3, t7); 1471 1472 gen_load_mxu_gpr(t4, XRa); 1473 tcg_gen_extract_tl(t6, t4, 0, 8); 1474 tcg_gen_extract_tl(t7, t4, 8, 8); 1475 if (aptn2 & 1) { 1476 tcg_gen_sub_tl(t0, t6, t0); 1477 tcg_gen_sub_tl(t1, t7, t1); 1478 } else { 1479 tcg_gen_add_tl(t0, t6, t0); 1480 tcg_gen_add_tl(t1, t7, t1); 1481 } 1482 tcg_gen_extract_tl(t6, t4, 16, 8); 1483 tcg_gen_extract_tl(t7, t4, 24, 8); 1484 if (aptn2 & 2) { 1485 tcg_gen_sub_tl(t2, t6, t2); 1486 tcg_gen_sub_tl(t3, t7, t3); 1487 } else { 1488 tcg_gen_add_tl(t2, t6, t2); 1489 tcg_gen_add_tl(t3, t7, t3); 1490 } 1491 1492 tcg_gen_andi_tl(t5, t0, 0xff); 1493 tcg_gen_deposit_tl(t5, t5, t1, 8, 8); 1494 tcg_gen_deposit_tl(t5, t5, t2, 16, 8); 1495 tcg_gen_deposit_tl(t5, t5, t3, 24, 8); 1496 1497 gen_store_mxu_gpr(t5, XRd); 1498 } 1499 1500 /* 1501 * S32LDD XRa, Rb, S12 - Load a word from memory to XRF 1502 * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF 1503 * in reversed byte seq. 1504 * S32LDI XRa, Rb, S12 - Load a word from memory to XRF, 1505 * post modify base address GPR. 1506 * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF, 1507 * post modify base address GPR and load in reversed byte seq. 1508 */ 1509 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc) 1510 { 1511 TCGv t0, t1; 1512 uint32_t XRa, Rb, s12; 1513 1514 t0 = tcg_temp_new(); 1515 t1 = tcg_temp_new(); 1516 1517 XRa = extract32(ctx->opcode, 6, 4); 1518 s12 = sextract32(ctx->opcode, 10, 10); 1519 Rb = extract32(ctx->opcode, 21, 5); 1520 1521 gen_load_gpr(t0, Rb); 1522 tcg_gen_movi_tl(t1, s12 * 4); 1523 tcg_gen_add_tl(t0, t0, t1); 1524 1525 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1526 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1527 ctx->default_tcg_memop_mask); 1528 gen_store_mxu_gpr(t1, XRa); 1529 1530 if (postinc) { 1531 gen_store_gpr(t0, Rb); 1532 } 1533 } 1534 1535 /* 1536 * S32STD XRa, Rb, S12 - Store a word from XRF to memory 1537 * S32STDR XRa, Rb, S12 - Store a word from XRF to memory 1538 * in reversed byte seq. 1539 * S32SDI XRa, Rb, S12 - Store a word from XRF to memory, 1540 * post modify base address GPR. 1541 * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory, 1542 * post modify base address GPR and store in reversed byte seq. 1543 */ 1544 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc) 1545 { 1546 TCGv t0, t1; 1547 uint32_t XRa, Rb, s12; 1548 1549 t0 = tcg_temp_new(); 1550 t1 = tcg_temp_new(); 1551 1552 XRa = extract32(ctx->opcode, 6, 4); 1553 s12 = sextract32(ctx->opcode, 10, 10); 1554 Rb = extract32(ctx->opcode, 21, 5); 1555 1556 gen_load_gpr(t0, Rb); 1557 tcg_gen_movi_tl(t1, s12 * 4); 1558 tcg_gen_add_tl(t0, t0, t1); 1559 1560 gen_load_mxu_gpr(t1, XRa); 1561 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1562 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1563 ctx->default_tcg_memop_mask); 1564 1565 if (postinc) { 1566 gen_store_gpr(t0, Rb); 1567 } 1568 } 1569 1570 /* 1571 * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1572 * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1573 * in reversed byte seq. 1574 * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1575 * post modify base address GPR. 1576 * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1577 * post modify base address GPR and load in reversed byte seq. 1578 */ 1579 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed, 1580 bool postinc, uint32_t strd2) 1581 { 1582 TCGv t0, t1; 1583 uint32_t XRa, Rb, Rc; 1584 1585 t0 = tcg_temp_new(); 1586 t1 = tcg_temp_new(); 1587 1588 XRa = extract32(ctx->opcode, 6, 4); 1589 Rc = extract32(ctx->opcode, 16, 5); 1590 Rb = extract32(ctx->opcode, 21, 5); 1591 1592 gen_load_gpr(t0, Rb); 1593 gen_load_gpr(t1, Rc); 1594 tcg_gen_shli_tl(t1, t1, strd2); 1595 tcg_gen_add_tl(t0, t0, t1); 1596 1597 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1598 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1599 ctx->default_tcg_memop_mask); 1600 gen_store_mxu_gpr(t1, XRa); 1601 1602 if (postinc) { 1603 gen_store_gpr(t0, Rb); 1604 } 1605 } 1606 1607 /* 1608 * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR 1609 * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1610 * sign extending to GPR size. 1611 * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1612 * sign extending to GPR size. 1613 * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1614 * zero extending to GPR size. 1615 * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1616 * zero extending to GPR size. 1617 */ 1618 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop) 1619 { 1620 TCGv t0, t1; 1621 uint32_t Ra, Rb, Rc; 1622 1623 t0 = tcg_temp_new(); 1624 t1 = tcg_temp_new(); 1625 1626 Ra = extract32(ctx->opcode, 11, 5); 1627 Rc = extract32(ctx->opcode, 16, 5); 1628 Rb = extract32(ctx->opcode, 21, 5); 1629 1630 gen_load_gpr(t0, Rb); 1631 gen_load_gpr(t1, Rc); 1632 tcg_gen_shli_tl(t1, t1, strd2); 1633 tcg_gen_add_tl(t0, t0, t1); 1634 1635 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask); 1636 gen_store_gpr(t1, Ra); 1637 } 1638 1639 /* 1640 * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1641 * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1642 * in reversed byte seq. 1643 * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1644 * post modify base address GPR. 1645 * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1646 * post modify base address GPR and store in reversed byte seq. 1647 */ 1648 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed, 1649 bool postinc, uint32_t strd2) 1650 { 1651 TCGv t0, t1; 1652 uint32_t XRa, Rb, Rc; 1653 1654 t0 = tcg_temp_new(); 1655 t1 = tcg_temp_new(); 1656 1657 XRa = extract32(ctx->opcode, 6, 4); 1658 Rc = extract32(ctx->opcode, 16, 5); 1659 Rb = extract32(ctx->opcode, 21, 5); 1660 1661 gen_load_gpr(t0, Rb); 1662 gen_load_gpr(t1, Rc); 1663 tcg_gen_shli_tl(t1, t1, strd2); 1664 tcg_gen_add_tl(t0, t0, t1); 1665 1666 gen_load_mxu_gpr(t1, XRa); 1667 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1668 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1669 ctx->default_tcg_memop_mask); 1670 1671 if (postinc) { 1672 gen_store_gpr(t0, Rb); 1673 } 1674 } 1675 1676 /* 1677 * MXU instruction category: logic 1678 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1679 * 1680 * S32NOR S32AND S32OR S32XOR 1681 */ 1682 1683 /* 1684 * S32NOR XRa, XRb, XRc 1685 * Update XRa with the result of logical bitwise 'nor' operation 1686 * applied to the content of XRb and XRc. 1687 */ 1688 static void gen_mxu_S32NOR(DisasContext *ctx) 1689 { 1690 uint32_t pad, XRc, XRb, XRa; 1691 1692 pad = extract32(ctx->opcode, 21, 5); 1693 XRc = extract32(ctx->opcode, 14, 4); 1694 XRb = extract32(ctx->opcode, 10, 4); 1695 XRa = extract32(ctx->opcode, 6, 4); 1696 1697 if (unlikely(pad != 0)) { 1698 /* opcode padding incorrect -> do nothing */ 1699 } else if (unlikely(XRa == 0)) { 1700 /* destination is zero register -> do nothing */ 1701 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1702 /* both operands zero registers -> just set destination to all 1s */ 1703 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF); 1704 } else if (unlikely(XRb == 0)) { 1705 /* XRb zero register -> just set destination to the negation of XRc */ 1706 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1707 } else if (unlikely(XRc == 0)) { 1708 /* XRa zero register -> just set destination to the negation of XRb */ 1709 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1710 } else if (unlikely(XRb == XRc)) { 1711 /* both operands same -> just set destination to the negation of XRb */ 1712 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1713 } else { 1714 /* the most general case */ 1715 tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1716 } 1717 } 1718 1719 /* 1720 * S32AND XRa, XRb, XRc 1721 * Update XRa with the result of logical bitwise 'and' operation 1722 * applied to the content of XRb and XRc. 1723 */ 1724 static void gen_mxu_S32AND(DisasContext *ctx) 1725 { 1726 uint32_t pad, XRc, XRb, XRa; 1727 1728 pad = extract32(ctx->opcode, 21, 5); 1729 XRc = extract32(ctx->opcode, 14, 4); 1730 XRb = extract32(ctx->opcode, 10, 4); 1731 XRa = extract32(ctx->opcode, 6, 4); 1732 1733 if (unlikely(pad != 0)) { 1734 /* opcode padding incorrect -> do nothing */ 1735 } else if (unlikely(XRa == 0)) { 1736 /* destination is zero register -> do nothing */ 1737 } else if (unlikely((XRb == 0) || (XRc == 0))) { 1738 /* one of operands zero register -> just set destination to all 0s */ 1739 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1740 } else if (unlikely(XRb == XRc)) { 1741 /* both operands same -> just set destination to one of them */ 1742 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1743 } else { 1744 /* the most general case */ 1745 tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1746 } 1747 } 1748 1749 /* 1750 * S32OR XRa, XRb, XRc 1751 * Update XRa with the result of logical bitwise 'or' operation 1752 * applied to the content of XRb and XRc. 1753 */ 1754 static void gen_mxu_S32OR(DisasContext *ctx) 1755 { 1756 uint32_t pad, XRc, XRb, XRa; 1757 1758 pad = extract32(ctx->opcode, 21, 5); 1759 XRc = extract32(ctx->opcode, 14, 4); 1760 XRb = extract32(ctx->opcode, 10, 4); 1761 XRa = extract32(ctx->opcode, 6, 4); 1762 1763 if (unlikely(pad != 0)) { 1764 /* opcode padding incorrect -> do nothing */ 1765 } else if (unlikely(XRa == 0)) { 1766 /* destination is zero register -> do nothing */ 1767 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1768 /* both operands zero registers -> just set destination to all 0s */ 1769 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1770 } else if (unlikely(XRb == 0)) { 1771 /* XRb zero register -> just set destination to the content of XRc */ 1772 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1773 } else if (unlikely(XRc == 0)) { 1774 /* XRc zero register -> just set destination to the content of XRb */ 1775 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1776 } else if (unlikely(XRb == XRc)) { 1777 /* both operands same -> just set destination to one of them */ 1778 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1779 } else { 1780 /* the most general case */ 1781 tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1782 } 1783 } 1784 1785 /* 1786 * S32XOR XRa, XRb, XRc 1787 * Update XRa with the result of logical bitwise 'xor' operation 1788 * applied to the content of XRb and XRc. 1789 */ 1790 static void gen_mxu_S32XOR(DisasContext *ctx) 1791 { 1792 uint32_t pad, XRc, XRb, XRa; 1793 1794 pad = extract32(ctx->opcode, 21, 5); 1795 XRc = extract32(ctx->opcode, 14, 4); 1796 XRb = extract32(ctx->opcode, 10, 4); 1797 XRa = extract32(ctx->opcode, 6, 4); 1798 1799 if (unlikely(pad != 0)) { 1800 /* opcode padding incorrect -> do nothing */ 1801 } else if (unlikely(XRa == 0)) { 1802 /* destination is zero register -> do nothing */ 1803 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1804 /* both operands zero registers -> just set destination to all 0s */ 1805 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1806 } else if (unlikely(XRb == 0)) { 1807 /* XRb zero register -> just set destination to the content of XRc */ 1808 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1809 } else if (unlikely(XRc == 0)) { 1810 /* XRc zero register -> just set destination to the content of XRb */ 1811 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1812 } else if (unlikely(XRb == XRc)) { 1813 /* both operands same -> just set destination to all 0s */ 1814 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1815 } else { 1816 /* the most general case */ 1817 tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1818 } 1819 } 1820 1821 /* 1822 * MXU instruction category: shift 1823 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1824 * 1825 * D32SLL D32SLR D32SAR D32SARL 1826 * D32SLLV D32SLRV D32SARV D32SARW 1827 * Q16SLL Q16SLR Q16SAR 1828 * Q16SLLV Q16SLRV Q16SARV 1829 */ 1830 1831 /* 1832 * D32SLL XRa, XRd, XRb, XRc, SFT4 1833 * Dual 32-bit shift left from XRb and XRc to SFT4 1834 * bits (0..15). Store to XRa and XRd respectively. 1835 * D32SLR XRa, XRd, XRb, XRc, SFT4 1836 * Dual 32-bit shift logic right from XRb and XRc 1837 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1838 * D32SAR XRa, XRd, XRb, XRc, SFT4 1839 * Dual 32-bit shift arithmetic right from XRb and XRc 1840 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1841 */ 1842 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic) 1843 { 1844 uint32_t XRa, XRb, XRc, XRd, sft4; 1845 1846 XRa = extract32(ctx->opcode, 6, 4); 1847 XRb = extract32(ctx->opcode, 10, 4); 1848 XRc = extract32(ctx->opcode, 14, 4); 1849 XRd = extract32(ctx->opcode, 18, 4); 1850 sft4 = extract32(ctx->opcode, 22, 4); 1851 1852 TCGv t0 = tcg_temp_new(); 1853 TCGv t1 = tcg_temp_new(); 1854 1855 gen_load_mxu_gpr(t0, XRb); 1856 gen_load_mxu_gpr(t1, XRc); 1857 1858 if (right) { 1859 if (arithmetic) { 1860 tcg_gen_sari_tl(t0, t0, sft4); 1861 tcg_gen_sari_tl(t1, t1, sft4); 1862 } else { 1863 tcg_gen_shri_tl(t0, t0, sft4); 1864 tcg_gen_shri_tl(t1, t1, sft4); 1865 } 1866 } else { 1867 tcg_gen_shli_tl(t0, t0, sft4); 1868 tcg_gen_shli_tl(t1, t1, sft4); 1869 } 1870 gen_store_mxu_gpr(t0, XRa); 1871 gen_store_mxu_gpr(t1, XRd); 1872 } 1873 1874 /* 1875 * D32SLLV XRa, XRd, rs 1876 * Dual 32-bit shift left from XRa and XRd to rs[3:0] 1877 * bits. Store back to XRa and XRd respectively. 1878 * D32SLRV XRa, XRd, rs 1879 * Dual 32-bit shift logic right from XRa and XRd to rs[3:0] 1880 * bits. Store back to XRa and XRd respectively. 1881 * D32SARV XRa, XRd, rs 1882 * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0] 1883 * bits. Store back to XRa and XRd respectively. 1884 */ 1885 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic) 1886 { 1887 uint32_t XRa, XRd, rs; 1888 1889 XRa = extract32(ctx->opcode, 10, 4); 1890 XRd = extract32(ctx->opcode, 14, 4); 1891 rs = extract32(ctx->opcode, 21, 5); 1892 1893 TCGv t0 = tcg_temp_new(); 1894 TCGv t1 = tcg_temp_new(); 1895 TCGv t2 = tcg_temp_new(); 1896 1897 gen_load_mxu_gpr(t0, XRa); 1898 gen_load_mxu_gpr(t1, XRd); 1899 gen_load_gpr(t2, rs); 1900 tcg_gen_andi_tl(t2, t2, 0x0f); 1901 1902 if (right) { 1903 if (arithmetic) { 1904 tcg_gen_sar_tl(t0, t0, t2); 1905 tcg_gen_sar_tl(t1, t1, t2); 1906 } else { 1907 tcg_gen_shr_tl(t0, t0, t2); 1908 tcg_gen_shr_tl(t1, t1, t2); 1909 } 1910 } else { 1911 tcg_gen_shl_tl(t0, t0, t2); 1912 tcg_gen_shl_tl(t1, t1, t2); 1913 } 1914 gen_store_mxu_gpr(t0, XRa); 1915 gen_store_mxu_gpr(t1, XRd); 1916 } 1917 1918 /* 1919 * D32SARL XRa, XRb, XRc, SFT4 1920 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1921 * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa. 1922 * 1923 * D32SARW XRa, XRb, XRc, rb 1924 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1925 * to rb[3:0] bits. Pack 16 LSBs of each into XRa. 1926 */ 1927 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw) 1928 { 1929 uint32_t XRa, XRb, XRc, rb; 1930 1931 XRa = extract32(ctx->opcode, 6, 4); 1932 XRb = extract32(ctx->opcode, 10, 4); 1933 XRc = extract32(ctx->opcode, 14, 4); 1934 rb = extract32(ctx->opcode, 21, 5); 1935 1936 if (unlikely(XRa == 0)) { 1937 /* destination is zero register -> do nothing */ 1938 } else { 1939 TCGv t0 = tcg_temp_new(); 1940 TCGv t1 = tcg_temp_new(); 1941 TCGv t2 = tcg_temp_new(); 1942 1943 if (!sarw) { 1944 /* Make SFT4 from rb field */ 1945 tcg_gen_movi_tl(t2, rb >> 1); 1946 } else { 1947 gen_load_gpr(t2, rb); 1948 tcg_gen_andi_tl(t2, t2, 0x0f); 1949 } 1950 gen_load_mxu_gpr(t0, XRb); 1951 gen_load_mxu_gpr(t1, XRc); 1952 tcg_gen_sar_tl(t0, t0, t2); 1953 tcg_gen_sar_tl(t1, t1, t2); 1954 tcg_gen_extract_tl(t2, t1, 0, 16); 1955 tcg_gen_deposit_tl(t2, t2, t0, 16, 16); 1956 gen_store_mxu_gpr(t2, XRa); 1957 } 1958 } 1959 1960 /* 1961 * Q16SLL XRa, XRd, XRb, XRc, SFT4 1962 * Quad 16-bit shift left from XRb and XRc to SFT4 1963 * bits (0..15). Store to XRa and XRd respectively. 1964 * Q16SLR XRa, XRd, XRb, XRc, SFT4 1965 * Quad 16-bit shift logic right from XRb and XRc 1966 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1967 * Q16SAR XRa, XRd, XRb, XRc, SFT4 1968 * Quad 16-bit shift arithmetic right from XRb and XRc 1969 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1970 */ 1971 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic) 1972 { 1973 uint32_t XRa, XRb, XRc, XRd, sft4; 1974 1975 XRa = extract32(ctx->opcode, 6, 4); 1976 XRb = extract32(ctx->opcode, 10, 4); 1977 XRc = extract32(ctx->opcode, 14, 4); 1978 XRd = extract32(ctx->opcode, 18, 4); 1979 sft4 = extract32(ctx->opcode, 22, 4); 1980 1981 TCGv t0 = tcg_temp_new(); 1982 TCGv t1 = tcg_temp_new(); 1983 TCGv t2 = tcg_temp_new(); 1984 TCGv t3 = tcg_temp_new(); 1985 1986 gen_load_mxu_gpr(t0, XRb); 1987 gen_load_mxu_gpr(t2, XRc); 1988 1989 if (arithmetic) { 1990 tcg_gen_sextract_tl(t1, t0, 16, 16); 1991 tcg_gen_sextract_tl(t0, t0, 0, 16); 1992 tcg_gen_sextract_tl(t3, t2, 16, 16); 1993 tcg_gen_sextract_tl(t2, t2, 0, 16); 1994 } else { 1995 tcg_gen_extract_tl(t1, t0, 16, 16); 1996 tcg_gen_extract_tl(t0, t0, 0, 16); 1997 tcg_gen_extract_tl(t3, t2, 16, 16); 1998 tcg_gen_extract_tl(t2, t2, 0, 16); 1999 } 2000 2001 if (right) { 2002 if (arithmetic) { 2003 tcg_gen_sari_tl(t0, t0, sft4); 2004 tcg_gen_sari_tl(t1, t1, sft4); 2005 tcg_gen_sari_tl(t2, t2, sft4); 2006 tcg_gen_sari_tl(t3, t3, sft4); 2007 } else { 2008 tcg_gen_shri_tl(t0, t0, sft4); 2009 tcg_gen_shri_tl(t1, t1, sft4); 2010 tcg_gen_shri_tl(t2, t2, sft4); 2011 tcg_gen_shri_tl(t3, t3, sft4); 2012 } 2013 } else { 2014 tcg_gen_shli_tl(t0, t0, sft4); 2015 tcg_gen_shli_tl(t1, t1, sft4); 2016 tcg_gen_shli_tl(t2, t2, sft4); 2017 tcg_gen_shli_tl(t3, t3, sft4); 2018 } 2019 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 2020 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2021 2022 gen_store_mxu_gpr(t0, XRa); 2023 gen_store_mxu_gpr(t2, XRd); 2024 } 2025 2026 /* 2027 * Q16SLLV XRa, XRd, rs 2028 * Quad 16-bit shift left from XRa and XRd to rs[3:0] 2029 * bits. Store to XRa and XRd respectively. 2030 * Q16SLRV XRa, XRd, rs 2031 * Quad 16-bit shift logic right from XRa and XRd to rs[3:0] 2032 * bits. Store to XRa and XRd respectively. 2033 * Q16SARV XRa, XRd, rs 2034 * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0] 2035 * bits. Store to XRa and XRd respectively. 2036 */ 2037 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic) 2038 { 2039 uint32_t XRa, XRd, rs; 2040 2041 XRa = extract32(ctx->opcode, 10, 4); 2042 XRd = extract32(ctx->opcode, 14, 4); 2043 rs = extract32(ctx->opcode, 21, 5); 2044 2045 TCGv t0 = tcg_temp_new(); 2046 TCGv t1 = tcg_temp_new(); 2047 TCGv t2 = tcg_temp_new(); 2048 TCGv t3 = tcg_temp_new(); 2049 TCGv t5 = tcg_temp_new(); 2050 2051 gen_load_mxu_gpr(t0, XRa); 2052 gen_load_mxu_gpr(t2, XRd); 2053 gen_load_gpr(t5, rs); 2054 tcg_gen_andi_tl(t5, t5, 0x0f); 2055 2056 2057 if (arithmetic) { 2058 tcg_gen_sextract_tl(t1, t0, 16, 16); 2059 tcg_gen_sextract_tl(t0, t0, 0, 16); 2060 tcg_gen_sextract_tl(t3, t2, 16, 16); 2061 tcg_gen_sextract_tl(t2, t2, 0, 16); 2062 } else { 2063 tcg_gen_extract_tl(t1, t0, 16, 16); 2064 tcg_gen_extract_tl(t0, t0, 0, 16); 2065 tcg_gen_extract_tl(t3, t2, 16, 16); 2066 tcg_gen_extract_tl(t2, t2, 0, 16); 2067 } 2068 2069 if (right) { 2070 if (arithmetic) { 2071 tcg_gen_sar_tl(t0, t0, t5); 2072 tcg_gen_sar_tl(t1, t1, t5); 2073 tcg_gen_sar_tl(t2, t2, t5); 2074 tcg_gen_sar_tl(t3, t3, t5); 2075 } else { 2076 tcg_gen_shr_tl(t0, t0, t5); 2077 tcg_gen_shr_tl(t1, t1, t5); 2078 tcg_gen_shr_tl(t2, t2, t5); 2079 tcg_gen_shr_tl(t3, t3, t5); 2080 } 2081 } else { 2082 tcg_gen_shl_tl(t0, t0, t5); 2083 tcg_gen_shl_tl(t1, t1, t5); 2084 tcg_gen_shl_tl(t2, t2, t5); 2085 tcg_gen_shl_tl(t3, t3, t5); 2086 } 2087 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 2088 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2089 2090 gen_store_mxu_gpr(t0, XRa); 2091 gen_store_mxu_gpr(t2, XRd); 2092 } 2093 2094 /* 2095 * MXU instruction category max/min/avg 2096 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2097 * 2098 * S32MAX D16MAX Q8MAX 2099 * S32MIN D16MIN Q8MIN 2100 * S32SLT D16SLT Q8SLT 2101 * Q8SLTU 2102 * D16AVG Q8AVG 2103 * D16AVGR Q8AVGR 2104 * S32MOVZ D16MOVZ Q8MOVZ 2105 * S32MOVN D16MOVN Q8MOVN 2106 */ 2107 2108 /* 2109 * S32MAX XRa, XRb, XRc 2110 * Update XRa with the maximum of signed 32-bit integers contained 2111 * in XRb and XRc. 2112 * 2113 * S32MIN XRa, XRb, XRc 2114 * Update XRa with the minimum of signed 32-bit integers contained 2115 * in XRb and XRc. 2116 */ 2117 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx) 2118 { 2119 uint32_t pad, opc, XRc, XRb, XRa; 2120 2121 pad = extract32(ctx->opcode, 21, 5); 2122 opc = extract32(ctx->opcode, 18, 3); 2123 XRc = extract32(ctx->opcode, 14, 4); 2124 XRb = extract32(ctx->opcode, 10, 4); 2125 XRa = extract32(ctx->opcode, 6, 4); 2126 2127 if (unlikely(pad != 0)) { 2128 /* opcode padding incorrect -> do nothing */ 2129 } else if (unlikely(XRa == 0)) { 2130 /* destination is zero register -> do nothing */ 2131 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2132 /* both operands zero registers -> just set destination to zero */ 2133 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2134 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2135 /* exactly one operand is zero register - find which one is not...*/ 2136 uint32_t XRx = XRb ? XRb : XRc; 2137 /* ...and do max/min operation with one operand 0 */ 2138 if (opc == OPC_MXU_S32MAX) { 2139 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2140 } else { 2141 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2142 } 2143 } else if (unlikely(XRb == XRc)) { 2144 /* both operands same -> just set destination to one of them */ 2145 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2146 } else { 2147 /* the most general case */ 2148 if (opc == OPC_MXU_S32MAX) { 2149 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2150 mxu_gpr[XRc - 1]); 2151 } else { 2152 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2153 mxu_gpr[XRc - 1]); 2154 } 2155 } 2156 } 2157 2158 /* 2159 * D16MAX 2160 * Update XRa with the 16-bit-wise maximums of signed integers 2161 * contained in XRb and XRc. 2162 * 2163 * D16MIN 2164 * Update XRa with the 16-bit-wise minimums of signed integers 2165 * contained in XRb and XRc. 2166 */ 2167 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) 2168 { 2169 uint32_t pad, opc, XRc, XRb, XRa; 2170 2171 pad = extract32(ctx->opcode, 21, 5); 2172 opc = extract32(ctx->opcode, 18, 3); 2173 XRc = extract32(ctx->opcode, 14, 4); 2174 XRb = extract32(ctx->opcode, 10, 4); 2175 XRa = extract32(ctx->opcode, 6, 4); 2176 2177 if (unlikely(pad != 0)) { 2178 /* opcode padding incorrect -> do nothing */ 2179 } else if (unlikely(XRa == 0)) { 2180 /* destination is zero register -> do nothing */ 2181 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2182 /* both operands zero registers -> just set destination to zero */ 2183 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2184 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2185 /* exactly one operand is zero register - find which one is not...*/ 2186 uint32_t XRx = XRb ? XRb : XRc; 2187 /* ...and do half-word-wise max/min with one operand 0 */ 2188 TCGv_i32 t0 = tcg_temp_new(); 2189 TCGv_i32 t1 = tcg_constant_i32(0); 2190 TCGv_i32 t2 = tcg_temp_new(); 2191 2192 /* the left half-word first */ 2193 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000); 2194 if (opc == OPC_MXU_D16MAX) { 2195 tcg_gen_smax_i32(t2, t0, t1); 2196 } else { 2197 tcg_gen_smin_i32(t2, t0, t1); 2198 } 2199 2200 /* the right half-word */ 2201 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF); 2202 /* move half-words to the leftmost position */ 2203 tcg_gen_shli_i32(t0, t0, 16); 2204 /* t0 will be max/min of t0 and t1 */ 2205 if (opc == OPC_MXU_D16MAX) { 2206 tcg_gen_smax_i32(t0, t0, t1); 2207 } else { 2208 tcg_gen_smin_i32(t0, t0, t1); 2209 } 2210 /* return resulting half-words to its original position */ 2211 tcg_gen_shri_i32(t0, t0, 16); 2212 /* finally update the destination */ 2213 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2214 } else if (unlikely(XRb == XRc)) { 2215 /* both operands same -> just set destination to one of them */ 2216 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2217 } else { 2218 /* the most general case */ 2219 TCGv_i32 t0 = tcg_temp_new(); 2220 TCGv_i32 t1 = tcg_temp_new(); 2221 TCGv_i32 t2 = tcg_temp_new(); 2222 2223 /* the left half-word first */ 2224 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000); 2225 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 2226 if (opc == OPC_MXU_D16MAX) { 2227 tcg_gen_smax_i32(t2, t0, t1); 2228 } else { 2229 tcg_gen_smin_i32(t2, t0, t1); 2230 } 2231 2232 /* the right half-word */ 2233 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 2234 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF); 2235 /* move half-words to the leftmost position */ 2236 tcg_gen_shli_i32(t0, t0, 16); 2237 tcg_gen_shli_i32(t1, t1, 16); 2238 /* t0 will be max/min of t0 and t1 */ 2239 if (opc == OPC_MXU_D16MAX) { 2240 tcg_gen_smax_i32(t0, t0, t1); 2241 } else { 2242 tcg_gen_smin_i32(t0, t0, t1); 2243 } 2244 /* return resulting half-words to its original position */ 2245 tcg_gen_shri_i32(t0, t0, 16); 2246 /* finally update the destination */ 2247 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2248 } 2249 } 2250 2251 /* 2252 * Q8MAX 2253 * Update XRa with the 8-bit-wise maximums of signed integers 2254 * contained in XRb and XRc. 2255 * 2256 * Q8MIN 2257 * Update XRa with the 8-bit-wise minimums of signed integers 2258 * contained in XRb and XRc. 2259 */ 2260 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) 2261 { 2262 uint32_t pad, opc, XRc, XRb, XRa; 2263 2264 pad = extract32(ctx->opcode, 21, 5); 2265 opc = extract32(ctx->opcode, 18, 3); 2266 XRc = extract32(ctx->opcode, 14, 4); 2267 XRb = extract32(ctx->opcode, 10, 4); 2268 XRa = extract32(ctx->opcode, 6, 4); 2269 2270 if (unlikely(pad != 0)) { 2271 /* opcode padding incorrect -> do nothing */ 2272 } else if (unlikely(XRa == 0)) { 2273 /* destination is zero register -> do nothing */ 2274 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2275 /* both operands zero registers -> just set destination to zero */ 2276 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2277 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2278 /* exactly one operand is zero register - make it be the first...*/ 2279 uint32_t XRx = XRb ? XRb : XRc; 2280 /* ...and do byte-wise max/min with one operand 0 */ 2281 TCGv_i32 t0 = tcg_temp_new(); 2282 TCGv_i32 t1 = tcg_constant_i32(0); 2283 TCGv_i32 t2 = tcg_temp_new(); 2284 int32_t i; 2285 2286 /* the leftmost byte (byte 3) first */ 2287 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000); 2288 if (opc == OPC_MXU_Q8MAX) { 2289 tcg_gen_smax_i32(t2, t0, t1); 2290 } else { 2291 tcg_gen_smin_i32(t2, t0, t1); 2292 } 2293 2294 /* bytes 2, 1, 0 */ 2295 for (i = 2; i >= 0; i--) { 2296 /* extract the byte */ 2297 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i)); 2298 /* move the byte to the leftmost position */ 2299 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2300 /* t0 will be max/min of t0 and t1 */ 2301 if (opc == OPC_MXU_Q8MAX) { 2302 tcg_gen_smax_i32(t0, t0, t1); 2303 } else { 2304 tcg_gen_smin_i32(t0, t0, t1); 2305 } 2306 /* return resulting byte to its original position */ 2307 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2308 /* finally update the destination */ 2309 tcg_gen_or_i32(t2, t2, t0); 2310 } 2311 gen_store_mxu_gpr(t2, XRa); 2312 } else if (unlikely(XRb == XRc)) { 2313 /* both operands same -> just set destination to one of them */ 2314 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2315 } else { 2316 /* the most general case */ 2317 TCGv_i32 t0 = tcg_temp_new(); 2318 TCGv_i32 t1 = tcg_temp_new(); 2319 TCGv_i32 t2 = tcg_temp_new(); 2320 int32_t i; 2321 2322 /* the leftmost bytes (bytes 3) first */ 2323 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000); 2324 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 2325 if (opc == OPC_MXU_Q8MAX) { 2326 tcg_gen_smax_i32(t2, t0, t1); 2327 } else { 2328 tcg_gen_smin_i32(t2, t0, t1); 2329 } 2330 2331 /* bytes 2, 1, 0 */ 2332 for (i = 2; i >= 0; i--) { 2333 /* extract corresponding bytes */ 2334 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i)); 2335 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i)); 2336 /* move the bytes to the leftmost position */ 2337 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2338 tcg_gen_shli_i32(t1, t1, 8 * (3 - i)); 2339 /* t0 will be max/min of t0 and t1 */ 2340 if (opc == OPC_MXU_Q8MAX) { 2341 tcg_gen_smax_i32(t0, t0, t1); 2342 } else { 2343 tcg_gen_smin_i32(t0, t0, t1); 2344 } 2345 /* return resulting byte to its original position */ 2346 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2347 /* finally update the destination */ 2348 tcg_gen_or_i32(t2, t2, t0); 2349 } 2350 gen_store_mxu_gpr(t2, XRa); 2351 } 2352 } 2353 2354 /* 2355 * Q8SLT 2356 * Update XRa with the signed "set less than" comparison of XRb and XRc 2357 * on per-byte basis. 2358 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2359 * 2360 * Q8SLTU 2361 * Update XRa with the unsigned "set less than" comparison of XRb and XRc 2362 * on per-byte basis. 2363 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2364 */ 2365 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu) 2366 { 2367 uint32_t pad, XRc, XRb, XRa; 2368 2369 pad = extract32(ctx->opcode, 21, 5); 2370 XRc = extract32(ctx->opcode, 14, 4); 2371 XRb = extract32(ctx->opcode, 10, 4); 2372 XRa = extract32(ctx->opcode, 6, 4); 2373 2374 if (unlikely(pad != 0)) { 2375 /* opcode padding incorrect -> do nothing */ 2376 } else if (unlikely(XRa == 0)) { 2377 /* destination is zero register -> do nothing */ 2378 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2379 /* both operands zero registers -> just set destination to zero */ 2380 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2381 } else if (unlikely(XRb == XRc)) { 2382 /* both operands same registers -> just set destination to zero */ 2383 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2384 } else { 2385 /* the most general case */ 2386 TCGv t0 = tcg_temp_new(); 2387 TCGv t1 = tcg_temp_new(); 2388 TCGv t2 = tcg_temp_new(); 2389 TCGv t3 = tcg_temp_new(); 2390 TCGv t4 = tcg_temp_new(); 2391 2392 gen_load_mxu_gpr(t3, XRb); 2393 gen_load_mxu_gpr(t4, XRc); 2394 tcg_gen_movi_tl(t2, 0); 2395 2396 for (int i = 0; i < 4; i++) { 2397 if (sltu) { 2398 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2399 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2400 } else { 2401 tcg_gen_sextract_tl(t0, t3, 8 * i, 8); 2402 tcg_gen_sextract_tl(t1, t4, 8 * i, 8); 2403 } 2404 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2405 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2406 } 2407 gen_store_mxu_gpr(t2, XRa); 2408 } 2409 } 2410 2411 /* 2412 * S32SLT 2413 * Update XRa with the signed "set less than" comparison of XRb and XRc. 2414 * a.k.a. XRa = XRb < XRc ? 1 : 0; 2415 */ 2416 static void gen_mxu_S32SLT(DisasContext *ctx) 2417 { 2418 uint32_t pad, XRc, XRb, XRa; 2419 2420 pad = extract32(ctx->opcode, 21, 5); 2421 XRc = extract32(ctx->opcode, 14, 4); 2422 XRb = extract32(ctx->opcode, 10, 4); 2423 XRa = extract32(ctx->opcode, 6, 4); 2424 2425 if (unlikely(pad != 0)) { 2426 /* opcode padding incorrect -> do nothing */ 2427 } else if (unlikely(XRa == 0)) { 2428 /* destination is zero register -> do nothing */ 2429 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2430 /* both operands zero registers -> just set destination to zero */ 2431 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2432 } else if (unlikely(XRb == XRc)) { 2433 /* both operands same registers -> just set destination to zero */ 2434 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2435 } else { 2436 /* the most general case */ 2437 tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], 2438 mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 2439 } 2440 } 2441 2442 /* 2443 * D16SLT 2444 * Update XRa with the signed "set less than" comparison of XRb and XRc 2445 * on per-word basis. 2446 * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0; 2447 */ 2448 static void gen_mxu_D16SLT(DisasContext *ctx) 2449 { 2450 uint32_t pad, XRc, XRb, XRa; 2451 2452 pad = extract32(ctx->opcode, 21, 5); 2453 XRc = extract32(ctx->opcode, 14, 4); 2454 XRb = extract32(ctx->opcode, 10, 4); 2455 XRa = extract32(ctx->opcode, 6, 4); 2456 2457 if (unlikely(pad != 0)) { 2458 /* opcode padding incorrect -> do nothing */ 2459 } else if (unlikely(XRa == 0)) { 2460 /* destination is zero register -> do nothing */ 2461 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2462 /* both operands zero registers -> just set destination to zero */ 2463 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2464 } else if (unlikely(XRb == XRc)) { 2465 /* both operands same registers -> just set destination to zero */ 2466 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2467 } else { 2468 /* the most general case */ 2469 TCGv t0 = tcg_temp_new(); 2470 TCGv t1 = tcg_temp_new(); 2471 TCGv t2 = tcg_temp_new(); 2472 TCGv t3 = tcg_temp_new(); 2473 TCGv t4 = tcg_temp_new(); 2474 2475 gen_load_mxu_gpr(t3, XRb); 2476 gen_load_mxu_gpr(t4, XRc); 2477 tcg_gen_sextract_tl(t0, t3, 16, 16); 2478 tcg_gen_sextract_tl(t1, t4, 16, 16); 2479 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2480 tcg_gen_shli_tl(t2, t0, 16); 2481 tcg_gen_sextract_tl(t0, t3, 0, 16); 2482 tcg_gen_sextract_tl(t1, t4, 0, 16); 2483 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2484 tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0); 2485 } 2486 } 2487 2488 /* 2489 * D16AVG 2490 * Update XRa with the signed average of XRb and XRc 2491 * on per-word basis, rounding down. 2492 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1; 2493 * 2494 * D16AVGR 2495 * Update XRa with the signed average of XRb and XRc 2496 * on per-word basis, math rounding 4/5. 2497 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1; 2498 */ 2499 static void gen_mxu_d16avg(DisasContext *ctx, bool round45) 2500 { 2501 uint32_t pad, XRc, XRb, XRa; 2502 2503 pad = extract32(ctx->opcode, 21, 5); 2504 XRc = extract32(ctx->opcode, 14, 4); 2505 XRb = extract32(ctx->opcode, 10, 4); 2506 XRa = extract32(ctx->opcode, 6, 4); 2507 2508 if (unlikely(pad != 0)) { 2509 /* opcode padding incorrect -> do nothing */ 2510 } else if (unlikely(XRa == 0)) { 2511 /* destination is zero register -> do nothing */ 2512 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2513 /* both operands zero registers -> just set destination to zero */ 2514 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2515 } else if (unlikely(XRb == XRc)) { 2516 /* both operands same registers -> just set destination to same */ 2517 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2518 } else { 2519 /* the most general case */ 2520 TCGv t0 = tcg_temp_new(); 2521 TCGv t1 = tcg_temp_new(); 2522 TCGv t2 = tcg_temp_new(); 2523 TCGv t3 = tcg_temp_new(); 2524 TCGv t4 = tcg_temp_new(); 2525 2526 gen_load_mxu_gpr(t3, XRb); 2527 gen_load_mxu_gpr(t4, XRc); 2528 tcg_gen_sextract_tl(t0, t3, 16, 16); 2529 tcg_gen_sextract_tl(t1, t4, 16, 16); 2530 tcg_gen_add_tl(t0, t0, t1); 2531 if (round45) { 2532 tcg_gen_addi_tl(t0, t0, 1); 2533 } 2534 tcg_gen_shli_tl(t2, t0, 15); 2535 tcg_gen_andi_tl(t2, t2, 0xffff0000); 2536 tcg_gen_sextract_tl(t0, t3, 0, 16); 2537 tcg_gen_sextract_tl(t1, t4, 0, 16); 2538 tcg_gen_add_tl(t0, t0, t1); 2539 if (round45) { 2540 tcg_gen_addi_tl(t0, t0, 1); 2541 } 2542 tcg_gen_shri_tl(t0, t0, 1); 2543 tcg_gen_deposit_tl(t2, t2, t0, 0, 16); 2544 gen_store_mxu_gpr(t2, XRa); 2545 } 2546 } 2547 2548 /* 2549 * Q8AVG 2550 * Update XRa with the signed average of XRb and XRc 2551 * on per-byte basis, rounding down. 2552 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1; 2553 * 2554 * Q8AVGR 2555 * Update XRa with the signed average of XRb and XRc 2556 * on per-word basis, math rounding 4/5. 2557 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1; 2558 */ 2559 static void gen_mxu_q8avg(DisasContext *ctx, bool round45) 2560 { 2561 uint32_t pad, XRc, XRb, XRa; 2562 2563 pad = extract32(ctx->opcode, 21, 5); 2564 XRc = extract32(ctx->opcode, 14, 4); 2565 XRb = extract32(ctx->opcode, 10, 4); 2566 XRa = extract32(ctx->opcode, 6, 4); 2567 2568 if (unlikely(pad != 0)) { 2569 /* opcode padding incorrect -> do nothing */ 2570 } else if (unlikely(XRa == 0)) { 2571 /* destination is zero register -> do nothing */ 2572 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2573 /* both operands zero registers -> just set destination to zero */ 2574 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2575 } else if (unlikely(XRb == XRc)) { 2576 /* both operands same registers -> just set destination to same */ 2577 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2578 } else { 2579 /* the most general case */ 2580 TCGv t0 = tcg_temp_new(); 2581 TCGv t1 = tcg_temp_new(); 2582 TCGv t2 = tcg_temp_new(); 2583 TCGv t3 = tcg_temp_new(); 2584 TCGv t4 = tcg_temp_new(); 2585 2586 gen_load_mxu_gpr(t3, XRb); 2587 gen_load_mxu_gpr(t4, XRc); 2588 tcg_gen_movi_tl(t2, 0); 2589 2590 for (int i = 0; i < 4; i++) { 2591 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2592 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2593 tcg_gen_add_tl(t0, t0, t1); 2594 if (round45) { 2595 tcg_gen_addi_tl(t0, t0, 1); 2596 } 2597 tcg_gen_shri_tl(t0, t0, 1); 2598 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2599 } 2600 gen_store_mxu_gpr(t2, XRa); 2601 } 2602 } 2603 2604 /* 2605 * Q8MOVZ 2606 * Quadruple 8-bit packed conditional move where 2607 * XRb contains conditions, XRc what to move and 2608 * XRa is the destination. 2609 * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] } 2610 * 2611 * Q8MOVN 2612 * Quadruple 8-bit packed conditional move where 2613 * XRb contains conditions, XRc what to move and 2614 * XRa is the destination. 2615 * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] } 2616 */ 2617 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond) 2618 { 2619 uint32_t XRc, XRb, XRa; 2620 2621 XRa = extract32(ctx->opcode, 6, 4); 2622 XRb = extract32(ctx->opcode, 10, 4); 2623 XRc = extract32(ctx->opcode, 14, 4); 2624 2625 TCGv t0 = tcg_temp_new(); 2626 TCGv t1 = tcg_temp_new(); 2627 TCGv t2 = tcg_temp_new(); 2628 TCGv t3 = tcg_temp_new(); 2629 TCGLabel *l_quarterdone = gen_new_label(); 2630 TCGLabel *l_halfdone = gen_new_label(); 2631 TCGLabel *l_quarterrest = gen_new_label(); 2632 TCGLabel *l_done = gen_new_label(); 2633 2634 gen_load_mxu_gpr(t0, XRc); 2635 gen_load_mxu_gpr(t1, XRb); 2636 gen_load_mxu_gpr(t2, XRa); 2637 2638 tcg_gen_extract_tl(t3, t1, 24, 8); 2639 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone); 2640 tcg_gen_extract_tl(t3, t0, 24, 8); 2641 tcg_gen_deposit_tl(t2, t2, t3, 24, 8); 2642 2643 gen_set_label(l_quarterdone); 2644 tcg_gen_extract_tl(t3, t1, 16, 8); 2645 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2646 tcg_gen_extract_tl(t3, t0, 16, 8); 2647 tcg_gen_deposit_tl(t2, t2, t3, 16, 8); 2648 2649 gen_set_label(l_halfdone); 2650 tcg_gen_extract_tl(t3, t1, 8, 8); 2651 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest); 2652 tcg_gen_extract_tl(t3, t0, 8, 8); 2653 tcg_gen_deposit_tl(t2, t2, t3, 8, 8); 2654 2655 gen_set_label(l_quarterrest); 2656 tcg_gen_extract_tl(t3, t1, 0, 8); 2657 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2658 tcg_gen_extract_tl(t3, t0, 0, 8); 2659 tcg_gen_deposit_tl(t2, t2, t3, 0, 8); 2660 2661 gen_set_label(l_done); 2662 gen_store_mxu_gpr(t2, XRa); 2663 } 2664 2665 /* 2666 * D16MOVZ 2667 * Double 16-bit packed conditional move where 2668 * XRb contains conditions, XRc what to move and 2669 * XRa is the destination. 2670 * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] } 2671 * 2672 * D16MOVN 2673 * Double 16-bit packed conditional move where 2674 * XRb contains conditions, XRc what to move and 2675 * XRa is the destination. 2676 * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] } 2677 */ 2678 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond) 2679 { 2680 uint32_t XRc, XRb, XRa; 2681 2682 XRa = extract32(ctx->opcode, 6, 4); 2683 XRb = extract32(ctx->opcode, 10, 4); 2684 XRc = extract32(ctx->opcode, 14, 4); 2685 2686 TCGv t0 = tcg_temp_new(); 2687 TCGv t1 = tcg_temp_new(); 2688 TCGv t2 = tcg_temp_new(); 2689 TCGv t3 = tcg_temp_new(); 2690 TCGLabel *l_halfdone = gen_new_label(); 2691 TCGLabel *l_done = gen_new_label(); 2692 2693 gen_load_mxu_gpr(t0, XRc); 2694 gen_load_mxu_gpr(t1, XRb); 2695 gen_load_mxu_gpr(t2, XRa); 2696 2697 tcg_gen_extract_tl(t3, t1, 16, 16); 2698 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2699 tcg_gen_extract_tl(t3, t0, 16, 16); 2700 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2701 2702 gen_set_label(l_halfdone); 2703 tcg_gen_extract_tl(t3, t1, 0, 16); 2704 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2705 tcg_gen_extract_tl(t3, t0, 0, 16); 2706 tcg_gen_deposit_tl(t2, t2, t3, 0, 16); 2707 2708 gen_set_label(l_done); 2709 gen_store_mxu_gpr(t2, XRa); 2710 } 2711 2712 /* 2713 * S32MOVZ 2714 * Quadruple 32-bit conditional move where 2715 * XRb contains conditions, XRc what to move and 2716 * XRa is the destination. 2717 * a.k.a. if (XRb == 0) { XRa = XRc } 2718 * 2719 * S32MOVN 2720 * Single 32-bit conditional move where 2721 * XRb contains conditions, XRc what to move and 2722 * XRa is the destination. 2723 * a.k.a. if (XRb != 0) { XRa = XRc } 2724 */ 2725 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond) 2726 { 2727 uint32_t XRc, XRb, XRa; 2728 2729 XRa = extract32(ctx->opcode, 6, 4); 2730 XRb = extract32(ctx->opcode, 10, 4); 2731 XRc = extract32(ctx->opcode, 14, 4); 2732 2733 TCGv t0 = tcg_temp_new(); 2734 TCGv t1 = tcg_temp_new(); 2735 TCGLabel *l_done = gen_new_label(); 2736 2737 gen_load_mxu_gpr(t0, XRc); 2738 gen_load_mxu_gpr(t1, XRb); 2739 2740 tcg_gen_brcondi_tl(cond, t1, 0, l_done); 2741 gen_store_mxu_gpr(t0, XRa); 2742 gen_set_label(l_done); 2743 } 2744 2745 /* 2746 * MXU instruction category: Addition and subtraction 2747 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2748 * 2749 * S32CPS D16CPS 2750 * Q8ADD 2751 */ 2752 2753 /* 2754 * S32CPS 2755 * Update XRa if XRc < 0 by value of 0 - XRb 2756 * else XRa = XRb 2757 */ 2758 static void gen_mxu_S32CPS(DisasContext *ctx) 2759 { 2760 uint32_t pad, XRc, XRb, XRa; 2761 2762 pad = extract32(ctx->opcode, 21, 5); 2763 XRc = extract32(ctx->opcode, 14, 4); 2764 XRb = extract32(ctx->opcode, 10, 4); 2765 XRa = extract32(ctx->opcode, 6, 4); 2766 2767 if (unlikely(pad != 0)) { 2768 /* opcode padding incorrect -> do nothing */ 2769 } else if (unlikely(XRa == 0)) { 2770 /* destination is zero register -> do nothing */ 2771 } else if (unlikely(XRb == 0)) { 2772 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2773 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2774 } else if (unlikely(XRc == 0)) { 2775 /* condition always false -> just move XRb to XRa */ 2776 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2777 } else { 2778 /* the most general case */ 2779 TCGv t0 = tcg_temp_new(); 2780 TCGLabel *l_not_less = gen_new_label(); 2781 TCGLabel *l_done = gen_new_label(); 2782 2783 tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less); 2784 tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]); 2785 tcg_gen_br(l_done); 2786 gen_set_label(l_not_less); 2787 gen_load_mxu_gpr(t0, XRb); 2788 gen_set_label(l_done); 2789 gen_store_mxu_gpr(t0, XRa); 2790 } 2791 } 2792 2793 /* 2794 * D16CPS 2795 * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1] 2796 * else XRa[0..1] = XRb[0..1] 2797 */ 2798 static void gen_mxu_D16CPS(DisasContext *ctx) 2799 { 2800 uint32_t pad, XRc, XRb, XRa; 2801 2802 pad = extract32(ctx->opcode, 21, 5); 2803 XRc = extract32(ctx->opcode, 14, 4); 2804 XRb = extract32(ctx->opcode, 10, 4); 2805 XRa = extract32(ctx->opcode, 6, 4); 2806 2807 if (unlikely(pad != 0)) { 2808 /* opcode padding incorrect -> do nothing */ 2809 } else if (unlikely(XRa == 0)) { 2810 /* destination is zero register -> do nothing */ 2811 } else if (unlikely(XRb == 0)) { 2812 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2813 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2814 } else if (unlikely(XRc == 0)) { 2815 /* condition always false -> just move XRb to XRa */ 2816 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2817 } else { 2818 /* the most general case */ 2819 TCGv t0 = tcg_temp_new(); 2820 TCGv t1 = tcg_temp_new(); 2821 TCGLabel *l_done_hi = gen_new_label(); 2822 TCGLabel *l_not_less_lo = gen_new_label(); 2823 TCGLabel *l_done_lo = gen_new_label(); 2824 2825 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16); 2826 tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16); 2827 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi); 2828 tcg_gen_subfi_tl(t1, 0, t1); 2829 2830 gen_set_label(l_done_hi); 2831 tcg_gen_shli_i32(t1, t1, 16); 2832 2833 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16); 2834 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo); 2835 tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2836 tcg_gen_subfi_tl(t0, 0, t0); 2837 tcg_gen_br(l_done_lo); 2838 2839 gen_set_label(l_not_less_lo); 2840 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2841 2842 gen_set_label(l_done_lo); 2843 tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16); 2844 } 2845 } 2846 2847 /* 2848 * Q8ABD XRa, XRb, XRc 2849 * Gets absolute difference for quadruple of 8-bit 2850 * packed in XRb to another one in XRc, 2851 * put the result in XRa. 2852 * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]); 2853 */ 2854 static void gen_mxu_Q8ABD(DisasContext *ctx) 2855 { 2856 uint32_t pad, XRc, XRb, XRa; 2857 2858 pad = extract32(ctx->opcode, 21, 3); 2859 XRc = extract32(ctx->opcode, 14, 4); 2860 XRb = extract32(ctx->opcode, 10, 4); 2861 XRa = extract32(ctx->opcode, 6, 4); 2862 2863 if (unlikely(pad != 0)) { 2864 /* opcode padding incorrect -> do nothing */ 2865 } else if (unlikely(XRa == 0)) { 2866 /* destination is zero register -> do nothing */ 2867 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2868 /* both operands zero registers -> just set destination to zero */ 2869 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2870 } else { 2871 /* the most general case */ 2872 TCGv t0 = tcg_temp_new(); 2873 TCGv t1 = tcg_temp_new(); 2874 TCGv t2 = tcg_temp_new(); 2875 TCGv t3 = tcg_temp_new(); 2876 TCGv t4 = tcg_temp_new(); 2877 2878 gen_load_mxu_gpr(t3, XRb); 2879 gen_load_mxu_gpr(t4, XRc); 2880 tcg_gen_movi_tl(t2, 0); 2881 2882 for (int i = 0; i < 4; i++) { 2883 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2884 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2885 2886 tcg_gen_sub_tl(t0, t0, t1); 2887 tcg_gen_abs_tl(t0, t0); 2888 2889 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2890 } 2891 gen_store_mxu_gpr(t2, XRa); 2892 } 2893 } 2894 2895 /* 2896 * Q8ADD XRa, XRb, XRc, ptn2 2897 * Add/subtract quadruple of 8-bit packed in XRb 2898 * to another one in XRc, put the result in XRa. 2899 */ 2900 static void gen_mxu_Q8ADD(DisasContext *ctx) 2901 { 2902 uint32_t aptn2, pad, XRc, XRb, XRa; 2903 2904 aptn2 = extract32(ctx->opcode, 24, 2); 2905 pad = extract32(ctx->opcode, 21, 3); 2906 XRc = extract32(ctx->opcode, 14, 4); 2907 XRb = extract32(ctx->opcode, 10, 4); 2908 XRa = extract32(ctx->opcode, 6, 4); 2909 2910 if (unlikely(pad != 0)) { 2911 /* opcode padding incorrect -> do nothing */ 2912 } else if (unlikely(XRa == 0)) { 2913 /* destination is zero register -> do nothing */ 2914 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2915 /* both operands zero registers -> just set destination to zero */ 2916 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2917 } else { 2918 /* the most general case */ 2919 TCGv t0 = tcg_temp_new(); 2920 TCGv t1 = tcg_temp_new(); 2921 TCGv t2 = tcg_temp_new(); 2922 TCGv t3 = tcg_temp_new(); 2923 TCGv t4 = tcg_temp_new(); 2924 2925 gen_load_mxu_gpr(t3, XRb); 2926 gen_load_mxu_gpr(t4, XRc); 2927 2928 for (int i = 0; i < 4; i++) { 2929 tcg_gen_andi_tl(t0, t3, 0xff); 2930 tcg_gen_andi_tl(t1, t4, 0xff); 2931 2932 if (i < 2) { 2933 if (aptn2 & 0x01) { 2934 tcg_gen_sub_tl(t0, t0, t1); 2935 } else { 2936 tcg_gen_add_tl(t0, t0, t1); 2937 } 2938 } else { 2939 if (aptn2 & 0x02) { 2940 tcg_gen_sub_tl(t0, t0, t1); 2941 } else { 2942 tcg_gen_add_tl(t0, t0, t1); 2943 } 2944 } 2945 if (i < 3) { 2946 tcg_gen_shri_tl(t3, t3, 8); 2947 tcg_gen_shri_tl(t4, t4, 8); 2948 } 2949 if (i > 0) { 2950 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2951 } else { 2952 tcg_gen_andi_tl(t0, t0, 0xff); 2953 tcg_gen_mov_tl(t2, t0); 2954 } 2955 } 2956 gen_store_mxu_gpr(t2, XRa); 2957 } 2958 } 2959 2960 /* 2961 * Q8ADDE XRa, XRb, XRc, XRd, aptn2 2962 * Add/subtract quadruple of 8-bit packed in XRb 2963 * to another one in XRc, with zero extending 2964 * to 16-bit and put results as packed 16-bit data 2965 * into XRa and XRd. 2966 * aptn2 manages action add or subract of pairs of data. 2967 * 2968 * Q8ACCE XRa, XRb, XRc, XRd, aptn2 2969 * Add/subtract quadruple of 8-bit packed in XRb 2970 * to another one in XRc, with zero extending 2971 * to 16-bit and accumulate results as packed 16-bit data 2972 * into XRa and XRd. 2973 * aptn2 manages action add or subract of pairs of data. 2974 */ 2975 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate) 2976 { 2977 uint32_t aptn2, XRd, XRc, XRb, XRa; 2978 2979 aptn2 = extract32(ctx->opcode, 24, 2); 2980 XRd = extract32(ctx->opcode, 18, 4); 2981 XRc = extract32(ctx->opcode, 14, 4); 2982 XRb = extract32(ctx->opcode, 10, 4); 2983 XRa = extract32(ctx->opcode, 6, 4); 2984 2985 if (unlikely((XRb == 0) && (XRc == 0))) { 2986 /* both operands zero registers -> just set destination to zero */ 2987 if (XRa != 0) { 2988 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2989 } 2990 if (XRd != 0) { 2991 tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0); 2992 } 2993 } else { 2994 /* the most general case */ 2995 TCGv t0 = tcg_temp_new(); 2996 TCGv t1 = tcg_temp_new(); 2997 TCGv t2 = tcg_temp_new(); 2998 TCGv t3 = tcg_temp_new(); 2999 TCGv t4 = tcg_temp_new(); 3000 TCGv t5 = tcg_temp_new(); 3001 3002 if (XRa != 0) { 3003 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8); 3004 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8); 3005 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8); 3006 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); 3007 if (aptn2 & 2) { 3008 tcg_gen_sub_tl(t0, t0, t1); 3009 tcg_gen_sub_tl(t2, t2, t3); 3010 } else { 3011 tcg_gen_add_tl(t0, t0, t1); 3012 tcg_gen_add_tl(t2, t2, t3); 3013 } 3014 if (accumulate) { 3015 gen_load_mxu_gpr(t5, XRa); 3016 tcg_gen_extract_tl(t1, t5, 0, 16); 3017 tcg_gen_extract_tl(t3, t5, 16, 16); 3018 tcg_gen_add_tl(t0, t0, t1); 3019 tcg_gen_add_tl(t2, t2, t3); 3020 } 3021 tcg_gen_shli_tl(t2, t2, 16); 3022 tcg_gen_extract_tl(t0, t0, 0, 16); 3023 tcg_gen_or_tl(t4, t2, t0); 3024 } 3025 if (XRd != 0) { 3026 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); 3027 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8); 3028 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8); 3029 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8); 3030 if (aptn2 & 1) { 3031 tcg_gen_sub_tl(t0, t0, t1); 3032 tcg_gen_sub_tl(t2, t2, t3); 3033 } else { 3034 tcg_gen_add_tl(t0, t0, t1); 3035 tcg_gen_add_tl(t2, t2, t3); 3036 } 3037 if (accumulate) { 3038 gen_load_mxu_gpr(t5, XRd); 3039 tcg_gen_extract_tl(t1, t5, 0, 16); 3040 tcg_gen_extract_tl(t3, t5, 16, 16); 3041 tcg_gen_add_tl(t0, t0, t1); 3042 tcg_gen_add_tl(t2, t2, t3); 3043 } 3044 tcg_gen_shli_tl(t2, t2, 16); 3045 tcg_gen_extract_tl(t0, t0, 0, 16); 3046 tcg_gen_or_tl(t5, t2, t0); 3047 } 3048 3049 gen_store_mxu_gpr(t4, XRa); 3050 gen_store_mxu_gpr(t5, XRd); 3051 } 3052 } 3053 3054 /* 3055 * D8SUM XRa, XRb, XRc 3056 * Double parallel add of quadruple unsigned 8-bit together 3057 * with zero extending to 16-bit data. 3058 * D8SUMC XRa, XRb, XRc 3059 * Double parallel add of quadruple unsigned 8-bit together 3060 * with zero extending to 16-bit data and adding 2 to each 3061 * parallel result. 3062 */ 3063 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc) 3064 { 3065 uint32_t pad, pad2, XRc, XRb, XRa; 3066 3067 pad = extract32(ctx->opcode, 24, 2); 3068 pad2 = extract32(ctx->opcode, 18, 4); 3069 XRc = extract32(ctx->opcode, 14, 4); 3070 XRb = extract32(ctx->opcode, 10, 4); 3071 XRa = extract32(ctx->opcode, 6, 4); 3072 3073 if (unlikely(pad != 0 || pad2 != 0)) { 3074 /* opcode padding incorrect -> do nothing */ 3075 } else if (unlikely(XRa == 0)) { 3076 /* destination is zero register -> do nothing */ 3077 } else if (unlikely((XRb == 0) && (XRc == 0))) { 3078 /* both operands zero registers -> just set destination to zero */ 3079 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 3080 } else { 3081 /* the most general case */ 3082 TCGv t0 = tcg_temp_new(); 3083 TCGv t1 = tcg_temp_new(); 3084 TCGv t2 = tcg_temp_new(); 3085 TCGv t3 = tcg_temp_new(); 3086 TCGv t4 = tcg_temp_new(); 3087 TCGv t5 = tcg_temp_new(); 3088 3089 if (XRb != 0) { 3090 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); 3091 tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8); 3092 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8); 3093 tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8); 3094 tcg_gen_add_tl(t4, t0, t1); 3095 tcg_gen_add_tl(t4, t4, t2); 3096 tcg_gen_add_tl(t4, t4, t3); 3097 } else { 3098 tcg_gen_mov_tl(t4, 0); 3099 } 3100 if (XRc != 0) { 3101 tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8); 3102 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8); 3103 tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8); 3104 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); 3105 tcg_gen_add_tl(t5, t0, t1); 3106 tcg_gen_add_tl(t5, t5, t2); 3107 tcg_gen_add_tl(t5, t5, t3); 3108 } else { 3109 tcg_gen_mov_tl(t5, 0); 3110 } 3111 3112 if (sumc) { 3113 tcg_gen_addi_tl(t4, t4, 2); 3114 tcg_gen_addi_tl(t5, t5, 2); 3115 } 3116 tcg_gen_shli_tl(t4, t4, 16); 3117 3118 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3119 } 3120 } 3121 3122 /* 3123 * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed 3124 * 16-bit pattern addition. 3125 */ 3126 static void gen_mxu_q16add(DisasContext *ctx) 3127 { 3128 uint32_t aptn2, optn2, XRc, XRb, XRa, XRd; 3129 3130 aptn2 = extract32(ctx->opcode, 24, 2); 3131 optn2 = extract32(ctx->opcode, 22, 2); 3132 XRd = extract32(ctx->opcode, 18, 4); 3133 XRc = extract32(ctx->opcode, 14, 4); 3134 XRb = extract32(ctx->opcode, 10, 4); 3135 XRa = extract32(ctx->opcode, 6, 4); 3136 3137 TCGv t0 = tcg_temp_new(); 3138 TCGv t1 = tcg_temp_new(); 3139 TCGv t2 = tcg_temp_new(); 3140 TCGv t3 = tcg_temp_new(); 3141 TCGv t4 = tcg_temp_new(); 3142 TCGv t5 = tcg_temp_new(); 3143 3144 gen_load_mxu_gpr(t1, XRb); 3145 tcg_gen_extract_tl(t0, t1, 0, 16); 3146 tcg_gen_extract_tl(t1, t1, 16, 16); 3147 3148 gen_load_mxu_gpr(t3, XRc); 3149 tcg_gen_extract_tl(t2, t3, 0, 16); 3150 tcg_gen_extract_tl(t3, t3, 16, 16); 3151 3152 switch (optn2) { 3153 case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */ 3154 tcg_gen_mov_tl(t4, t1); 3155 tcg_gen_mov_tl(t5, t0); 3156 break; 3157 case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */ 3158 tcg_gen_mov_tl(t4, t0); 3159 tcg_gen_mov_tl(t5, t0); 3160 break; 3161 case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */ 3162 tcg_gen_mov_tl(t4, t1); 3163 tcg_gen_mov_tl(t5, t1); 3164 break; 3165 case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */ 3166 tcg_gen_mov_tl(t4, t0); 3167 tcg_gen_mov_tl(t5, t1); 3168 break; 3169 } 3170 3171 switch (aptn2) { 3172 case MXU_APTN2_AA: /* lop +, rop + */ 3173 tcg_gen_add_tl(t0, t4, t3); 3174 tcg_gen_add_tl(t1, t5, t2); 3175 tcg_gen_add_tl(t4, t4, t3); 3176 tcg_gen_add_tl(t5, t5, t2); 3177 break; 3178 case MXU_APTN2_AS: /* lop +, rop + */ 3179 tcg_gen_sub_tl(t0, t4, t3); 3180 tcg_gen_sub_tl(t1, t5, t2); 3181 tcg_gen_add_tl(t4, t4, t3); 3182 tcg_gen_add_tl(t5, t5, t2); 3183 break; 3184 case MXU_APTN2_SA: /* lop +, rop + */ 3185 tcg_gen_add_tl(t0, t4, t3); 3186 tcg_gen_add_tl(t1, t5, t2); 3187 tcg_gen_sub_tl(t4, t4, t3); 3188 tcg_gen_sub_tl(t5, t5, t2); 3189 break; 3190 case MXU_APTN2_SS: /* lop +, rop + */ 3191 tcg_gen_sub_tl(t0, t4, t3); 3192 tcg_gen_sub_tl(t1, t5, t2); 3193 tcg_gen_sub_tl(t4, t4, t3); 3194 tcg_gen_sub_tl(t5, t5, t2); 3195 break; 3196 } 3197 3198 tcg_gen_shli_tl(t0, t0, 16); 3199 tcg_gen_extract_tl(t1, t1, 0, 16); 3200 tcg_gen_shli_tl(t4, t4, 16); 3201 tcg_gen_extract_tl(t5, t5, 0, 16); 3202 3203 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3204 tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1); 3205 } 3206 3207 /* 3208 * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed 3209 * 16-bit addition/subtraction with accumulate. 3210 */ 3211 static void gen_mxu_q16acc(DisasContext *ctx) 3212 { 3213 uint32_t aptn2, XRc, XRb, XRa, XRd; 3214 3215 aptn2 = extract32(ctx->opcode, 24, 2); 3216 XRd = extract32(ctx->opcode, 18, 4); 3217 XRc = extract32(ctx->opcode, 14, 4); 3218 XRb = extract32(ctx->opcode, 10, 4); 3219 XRa = extract32(ctx->opcode, 6, 4); 3220 3221 TCGv t0 = tcg_temp_new(); 3222 TCGv t1 = tcg_temp_new(); 3223 TCGv t2 = tcg_temp_new(); 3224 TCGv t3 = tcg_temp_new(); 3225 TCGv s3 = tcg_temp_new(); 3226 TCGv s2 = tcg_temp_new(); 3227 TCGv s1 = tcg_temp_new(); 3228 TCGv s0 = tcg_temp_new(); 3229 3230 gen_load_mxu_gpr(t1, XRb); 3231 tcg_gen_extract_tl(t0, t1, 0, 16); 3232 tcg_gen_extract_tl(t1, t1, 16, 16); 3233 3234 gen_load_mxu_gpr(t3, XRc); 3235 tcg_gen_extract_tl(t2, t3, 0, 16); 3236 tcg_gen_extract_tl(t3, t3, 16, 16); 3237 3238 switch (aptn2) { 3239 case MXU_APTN2_AA: /* lop +, rop + */ 3240 tcg_gen_add_tl(s3, t1, t3); 3241 tcg_gen_add_tl(s2, t0, t2); 3242 tcg_gen_add_tl(s1, t1, t3); 3243 tcg_gen_add_tl(s0, t0, t2); 3244 break; 3245 case MXU_APTN2_AS: /* lop +, rop - */ 3246 tcg_gen_sub_tl(s3, t1, t3); 3247 tcg_gen_sub_tl(s2, t0, t2); 3248 tcg_gen_add_tl(s1, t1, t3); 3249 tcg_gen_add_tl(s0, t0, t2); 3250 break; 3251 case MXU_APTN2_SA: /* lop -, rop + */ 3252 tcg_gen_add_tl(s3, t1, t3); 3253 tcg_gen_add_tl(s2, t0, t2); 3254 tcg_gen_sub_tl(s1, t1, t3); 3255 tcg_gen_sub_tl(s0, t0, t2); 3256 break; 3257 case MXU_APTN2_SS: /* lop -, rop - */ 3258 tcg_gen_sub_tl(s3, t1, t3); 3259 tcg_gen_sub_tl(s2, t0, t2); 3260 tcg_gen_sub_tl(s1, t1, t3); 3261 tcg_gen_sub_tl(s0, t0, t2); 3262 break; 3263 } 3264 3265 if (XRa != 0) { 3266 tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0); 3267 tcg_gen_extract_tl(t0, t0, 0, 16); 3268 tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16); 3269 tcg_gen_add_tl(t1, t1, s1); 3270 tcg_gen_shli_tl(t1, t1, 16); 3271 tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0); 3272 } 3273 3274 if (XRd != 0) { 3275 tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2); 3276 tcg_gen_extract_tl(t0, t0, 0, 16); 3277 tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16); 3278 tcg_gen_add_tl(t1, t1, s3); 3279 tcg_gen_shli_tl(t1, t1, 16); 3280 tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0); 3281 } 3282 } 3283 3284 /* 3285 * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed 3286 * 16-bit accumulate. 3287 */ 3288 static void gen_mxu_q16accm(DisasContext *ctx) 3289 { 3290 uint32_t aptn2, XRc, XRb, XRa, XRd; 3291 3292 aptn2 = extract32(ctx->opcode, 24, 2); 3293 XRd = extract32(ctx->opcode, 18, 4); 3294 XRc = extract32(ctx->opcode, 14, 4); 3295 XRb = extract32(ctx->opcode, 10, 4); 3296 XRa = extract32(ctx->opcode, 6, 4); 3297 3298 TCGv t0 = tcg_temp_new(); 3299 TCGv t1 = tcg_temp_new(); 3300 TCGv t2 = tcg_temp_new(); 3301 TCGv t3 = tcg_temp_new(); 3302 3303 gen_load_mxu_gpr(t2, XRb); 3304 gen_load_mxu_gpr(t3, XRc); 3305 3306 if (XRa != 0) { 3307 TCGv a0 = tcg_temp_new(); 3308 TCGv a1 = tcg_temp_new(); 3309 3310 tcg_gen_extract_tl(t0, t2, 0, 16); 3311 tcg_gen_extract_tl(t1, t2, 16, 16); 3312 3313 gen_load_mxu_gpr(a1, XRa); 3314 tcg_gen_extract_tl(a0, a1, 0, 16); 3315 tcg_gen_extract_tl(a1, a1, 16, 16); 3316 3317 if (aptn2 & 2) { 3318 tcg_gen_sub_tl(a0, a0, t0); 3319 tcg_gen_sub_tl(a1, a1, t1); 3320 } else { 3321 tcg_gen_add_tl(a0, a0, t0); 3322 tcg_gen_add_tl(a1, a1, t1); 3323 } 3324 tcg_gen_extract_tl(a0, a0, 0, 16); 3325 tcg_gen_shli_tl(a1, a1, 16); 3326 tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0); 3327 } 3328 3329 if (XRd != 0) { 3330 TCGv a0 = tcg_temp_new(); 3331 TCGv a1 = tcg_temp_new(); 3332 3333 tcg_gen_extract_tl(t0, t3, 0, 16); 3334 tcg_gen_extract_tl(t1, t3, 16, 16); 3335 3336 gen_load_mxu_gpr(a1, XRd); 3337 tcg_gen_extract_tl(a0, a1, 0, 16); 3338 tcg_gen_extract_tl(a1, a1, 16, 16); 3339 3340 if (aptn2 & 1) { 3341 tcg_gen_sub_tl(a0, a0, t0); 3342 tcg_gen_sub_tl(a1, a1, t1); 3343 } else { 3344 tcg_gen_add_tl(a0, a0, t0); 3345 tcg_gen_add_tl(a1, a1, t1); 3346 } 3347 tcg_gen_extract_tl(a0, a0, 0, 16); 3348 tcg_gen_shli_tl(a1, a1, 16); 3349 tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0); 3350 } 3351 } 3352 3353 3354 /* 3355 * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed 3356 * 16-bit sign extended addition and accumulate. 3357 */ 3358 static void gen_mxu_d16asum(DisasContext *ctx) 3359 { 3360 uint32_t aptn2, XRc, XRb, XRa, XRd; 3361 3362 aptn2 = extract32(ctx->opcode, 24, 2); 3363 XRd = extract32(ctx->opcode, 18, 4); 3364 XRc = extract32(ctx->opcode, 14, 4); 3365 XRb = extract32(ctx->opcode, 10, 4); 3366 XRa = extract32(ctx->opcode, 6, 4); 3367 3368 TCGv t0 = tcg_temp_new(); 3369 TCGv t1 = tcg_temp_new(); 3370 TCGv t2 = tcg_temp_new(); 3371 TCGv t3 = tcg_temp_new(); 3372 3373 gen_load_mxu_gpr(t2, XRb); 3374 gen_load_mxu_gpr(t3, XRc); 3375 3376 if (XRa != 0) { 3377 tcg_gen_sextract_tl(t0, t2, 0, 16); 3378 tcg_gen_sextract_tl(t1, t2, 16, 16); 3379 tcg_gen_add_tl(t0, t0, t1); 3380 if (aptn2 & 2) { 3381 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3382 } else { 3383 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3384 } 3385 } 3386 3387 if (XRd != 0) { 3388 tcg_gen_sextract_tl(t0, t3, 0, 16); 3389 tcg_gen_sextract_tl(t1, t3, 16, 16); 3390 tcg_gen_add_tl(t0, t0, t1); 3391 if (aptn2 & 1) { 3392 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3393 } else { 3394 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3395 } 3396 } 3397 } 3398 3399 /* 3400 * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double 3401 * 32 bit pattern addition/subtraction, set carry. 3402 * 3403 * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double 3404 * 32 bit pattern addition/subtraction with carry. 3405 */ 3406 static void gen_mxu_d32add(DisasContext *ctx) 3407 { 3408 uint32_t aptn2, addc, XRc, XRb, XRa, XRd; 3409 3410 aptn2 = extract32(ctx->opcode, 24, 2); 3411 addc = extract32(ctx->opcode, 22, 2); 3412 XRd = extract32(ctx->opcode, 18, 4); 3413 XRc = extract32(ctx->opcode, 14, 4); 3414 XRb = extract32(ctx->opcode, 10, 4); 3415 XRa = extract32(ctx->opcode, 6, 4); 3416 3417 TCGv t0 = tcg_temp_new(); 3418 TCGv t1 = tcg_temp_new(); 3419 TCGv t2 = tcg_temp_new(); 3420 TCGv cr = tcg_temp_new(); 3421 3422 if (unlikely(addc > 1)) { 3423 /* opcode incorrect -> do nothing */ 3424 } else if (addc == 1) { 3425 if (unlikely(XRa == 0 && XRd == 0)) { 3426 /* destinations are zero register -> do nothing */ 3427 } else { 3428 /* FIXME ??? What if XRa == XRd ??? */ 3429 /* aptn2 is unused here */ 3430 gen_load_mxu_gpr(t0, XRb); 3431 gen_load_mxu_gpr(t1, XRc); 3432 gen_load_mxu_cr(cr); 3433 if (XRa != 0) { 3434 tcg_gen_extract_tl(t2, cr, 31, 1); 3435 tcg_gen_add_tl(t0, t0, t2); 3436 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3437 } 3438 if (XRd != 0) { 3439 tcg_gen_extract_tl(t2, cr, 30, 1); 3440 tcg_gen_add_tl(t1, t1, t2); 3441 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3442 } 3443 } 3444 } else if (unlikely(XRa == 0 && XRd == 0)) { 3445 /* destinations are zero register -> do nothing */ 3446 } else { 3447 /* common case */ 3448 /* FIXME ??? What if XRa == XRd ??? */ 3449 TCGv carry = tcg_temp_new(); 3450 3451 gen_load_mxu_gpr(t0, XRb); 3452 gen_load_mxu_gpr(t1, XRc); 3453 gen_load_mxu_cr(cr); 3454 if (XRa != 0) { 3455 if (aptn2 & 2) { 3456 tcg_gen_sub_i32(t2, t0, t1); 3457 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3458 } else { 3459 tcg_gen_add_i32(t2, t0, t1); 3460 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3461 } 3462 tcg_gen_andi_tl(cr, cr, 0x7fffffff); 3463 tcg_gen_shli_tl(carry, carry, 31); 3464 tcg_gen_or_tl(cr, cr, carry); 3465 gen_store_mxu_gpr(t2, XRa); 3466 } 3467 if (XRd != 0) { 3468 if (aptn2 & 1) { 3469 tcg_gen_sub_i32(t2, t0, t1); 3470 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3471 } else { 3472 tcg_gen_add_i32(t2, t0, t1); 3473 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3474 } 3475 tcg_gen_andi_tl(cr, cr, 0xbfffffff); 3476 tcg_gen_shli_tl(carry, carry, 30); 3477 tcg_gen_or_tl(cr, cr, carry); 3478 gen_store_mxu_gpr(t2, XRd); 3479 } 3480 gen_store_mxu_cr(cr); 3481 } 3482 } 3483 3484 /* 3485 * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double 3486 * 32 bit pattern addition/subtraction and accumulate. 3487 */ 3488 static void gen_mxu_d32acc(DisasContext *ctx) 3489 { 3490 uint32_t aptn2, XRc, XRb, XRa, XRd; 3491 3492 aptn2 = extract32(ctx->opcode, 24, 2); 3493 XRd = extract32(ctx->opcode, 18, 4); 3494 XRc = extract32(ctx->opcode, 14, 4); 3495 XRb = extract32(ctx->opcode, 10, 4); 3496 XRa = extract32(ctx->opcode, 6, 4); 3497 3498 TCGv t0 = tcg_temp_new(); 3499 TCGv t1 = tcg_temp_new(); 3500 TCGv t2 = tcg_temp_new(); 3501 3502 if (unlikely(XRa == 0 && XRd == 0)) { 3503 /* destinations are zero register -> do nothing */ 3504 } else { 3505 /* common case */ 3506 gen_load_mxu_gpr(t0, XRb); 3507 gen_load_mxu_gpr(t1, XRc); 3508 if (XRa != 0) { 3509 if (aptn2 & 2) { 3510 tcg_gen_sub_tl(t2, t0, t1); 3511 } else { 3512 tcg_gen_add_tl(t2, t0, t1); 3513 } 3514 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3515 } 3516 if (XRd != 0) { 3517 if (aptn2 & 1) { 3518 tcg_gen_sub_tl(t2, t0, t1); 3519 } else { 3520 tcg_gen_add_tl(t2, t0, t1); 3521 } 3522 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3523 } 3524 } 3525 } 3526 3527 /* 3528 * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double 3529 * 32 bit pattern addition/subtraction and accumulate. 3530 */ 3531 static void gen_mxu_d32accm(DisasContext *ctx) 3532 { 3533 uint32_t aptn2, XRc, XRb, XRa, XRd; 3534 3535 aptn2 = extract32(ctx->opcode, 24, 2); 3536 XRd = extract32(ctx->opcode, 18, 4); 3537 XRc = extract32(ctx->opcode, 14, 4); 3538 XRb = extract32(ctx->opcode, 10, 4); 3539 XRa = extract32(ctx->opcode, 6, 4); 3540 3541 TCGv t0 = tcg_temp_new(); 3542 TCGv t1 = tcg_temp_new(); 3543 TCGv t2 = tcg_temp_new(); 3544 3545 if (unlikely(XRa == 0 && XRd == 0)) { 3546 /* destinations are zero register -> do nothing */ 3547 } else { 3548 /* common case */ 3549 gen_load_mxu_gpr(t0, XRb); 3550 gen_load_mxu_gpr(t1, XRc); 3551 if (XRa != 0) { 3552 tcg_gen_add_tl(t2, t0, t1); 3553 if (aptn2 & 2) { 3554 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3555 } else { 3556 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3557 } 3558 } 3559 if (XRd != 0) { 3560 tcg_gen_sub_tl(t2, t0, t1); 3561 if (aptn2 & 1) { 3562 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3563 } else { 3564 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3565 } 3566 } 3567 } 3568 } 3569 3570 /* 3571 * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double 3572 * 32 bit pattern addition/subtraction. 3573 */ 3574 static void gen_mxu_d32asum(DisasContext *ctx) 3575 { 3576 uint32_t aptn2, XRc, XRb, XRa, XRd; 3577 3578 aptn2 = extract32(ctx->opcode, 24, 2); 3579 XRd = extract32(ctx->opcode, 18, 4); 3580 XRc = extract32(ctx->opcode, 14, 4); 3581 XRb = extract32(ctx->opcode, 10, 4); 3582 XRa = extract32(ctx->opcode, 6, 4); 3583 3584 TCGv t0 = tcg_temp_new(); 3585 TCGv t1 = tcg_temp_new(); 3586 3587 if (unlikely(XRa == 0 && XRd == 0)) { 3588 /* destinations are zero register -> do nothing */ 3589 } else { 3590 /* common case */ 3591 gen_load_mxu_gpr(t0, XRb); 3592 gen_load_mxu_gpr(t1, XRc); 3593 if (XRa != 0) { 3594 if (aptn2 & 2) { 3595 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3596 } else { 3597 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3598 } 3599 } 3600 if (XRd != 0) { 3601 if (aptn2 & 1) { 3602 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3603 } else { 3604 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3605 } 3606 } 3607 } 3608 } 3609 3610 /* 3611 * MXU instruction category: Miscellaneous 3612 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3613 * 3614 * S32EXTR S32LUI 3615 * S32EXTRV 3616 * Q16SAT 3617 * Q16SCOP 3618 */ 3619 3620 /* 3621 * S32EXTR XRa, XRd, rs, bits5 3622 * Extract bits5 bits from 64-bit pair {XRa:XRd} 3623 * starting from rs[4:0] offset and put to the XRa. 3624 */ 3625 static void gen_mxu_s32extr(DisasContext *ctx) 3626 { 3627 TCGv t0, t1, t2, t3; 3628 uint32_t XRa, XRd, rs, bits5; 3629 3630 t0 = tcg_temp_new(); 3631 t1 = tcg_temp_new(); 3632 t2 = tcg_temp_new(); 3633 t3 = tcg_temp_new(); 3634 3635 XRa = extract32(ctx->opcode, 6, 4); 3636 XRd = extract32(ctx->opcode, 10, 4); 3637 bits5 = extract32(ctx->opcode, 16, 5); 3638 rs = extract32(ctx->opcode, 21, 5); 3639 3640 /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */ 3641 /* {XRa} = extract({tmp}, 0, bits5); */ 3642 if (bits5 > 0) { 3643 TCGLabel *l_xra_only = gen_new_label(); 3644 TCGLabel *l_done = gen_new_label(); 3645 3646 gen_load_mxu_gpr(t0, XRd); 3647 gen_load_mxu_gpr(t1, XRa); 3648 gen_load_gpr(t2, rs); 3649 tcg_gen_andi_tl(t2, t2, 0x1f); 3650 tcg_gen_subfi_tl(t2, 32, t2); 3651 tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only); 3652 tcg_gen_subfi_tl(t2, bits5, t2); 3653 tcg_gen_subfi_tl(t3, 32, t2); 3654 tcg_gen_shr_tl(t0, t0, t3); 3655 tcg_gen_shl_tl(t1, t1, t2); 3656 tcg_gen_or_tl(t0, t0, t1); 3657 tcg_gen_br(l_done); 3658 gen_set_label(l_xra_only); 3659 tcg_gen_subi_tl(t2, t2, bits5); 3660 tcg_gen_shr_tl(t0, t1, t2); 3661 gen_set_label(l_done); 3662 tcg_gen_extract_tl(t0, t0, 0, bits5); 3663 } else { 3664 /* unspecified behavior but matches tests on real hardware*/ 3665 tcg_gen_movi_tl(t0, 0); 3666 } 3667 gen_store_mxu_gpr(t0, XRa); 3668 } 3669 3670 /* 3671 * S32EXTRV XRa, XRd, rs, rt 3672 * Extract rt[4:0] bits from 64-bit pair {XRa:XRd} 3673 * starting from rs[4:0] offset and put to the XRa. 3674 */ 3675 static void gen_mxu_s32extrv(DisasContext *ctx) 3676 { 3677 TCGv t0, t1, t2, t3, t4; 3678 uint32_t XRa, XRd, rs, rt; 3679 3680 t0 = tcg_temp_new(); 3681 t1 = tcg_temp_new(); 3682 t2 = tcg_temp_new(); 3683 t3 = tcg_temp_new(); 3684 t4 = tcg_temp_new(); 3685 TCGLabel *l_xra_only = gen_new_label(); 3686 TCGLabel *l_done = gen_new_label(); 3687 TCGLabel *l_zero = gen_new_label(); 3688 TCGLabel *l_extract = gen_new_label(); 3689 3690 XRa = extract32(ctx->opcode, 6, 4); 3691 XRd = extract32(ctx->opcode, 10, 4); 3692 rt = extract32(ctx->opcode, 16, 5); 3693 rs = extract32(ctx->opcode, 21, 5); 3694 3695 /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */ 3696 gen_load_mxu_gpr(t0, XRd); 3697 gen_load_mxu_gpr(t1, XRa); 3698 gen_load_gpr(t2, rs); 3699 gen_load_gpr(t4, rt); 3700 tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero); 3701 tcg_gen_andi_tl(t2, t2, 0x1f); 3702 tcg_gen_subfi_tl(t2, 32, t2); 3703 tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only); 3704 tcg_gen_sub_tl(t2, t4, t2); 3705 tcg_gen_subfi_tl(t3, 32, t2); 3706 tcg_gen_shr_tl(t0, t0, t3); 3707 tcg_gen_shl_tl(t1, t1, t2); 3708 tcg_gen_or_tl(t0, t0, t1); 3709 tcg_gen_br(l_extract); 3710 3711 gen_set_label(l_xra_only); 3712 tcg_gen_sub_tl(t2, t2, t4); 3713 tcg_gen_shr_tl(t0, t1, t2); 3714 tcg_gen_br(l_extract); 3715 3716 /* unspecified behavior but matches tests on real hardware*/ 3717 gen_set_label(l_zero); 3718 tcg_gen_movi_tl(t0, 0); 3719 tcg_gen_br(l_done); 3720 3721 /* {XRa} = extract({tmp}, 0, rt) */ 3722 gen_set_label(l_extract); 3723 tcg_gen_subfi_tl(t4, 32, t4); 3724 tcg_gen_shl_tl(t0, t0, t4); 3725 tcg_gen_shr_tl(t0, t0, t4); 3726 3727 gen_set_label(l_done); 3728 gen_store_mxu_gpr(t0, XRa); 3729 } 3730 3731 /* 3732 * S32LUI XRa, S8, optn3 3733 * Permutate the immediate S8 value to form a word 3734 * to update XRa. 3735 */ 3736 static void gen_mxu_s32lui(DisasContext *ctx) 3737 { 3738 uint32_t XRa, s8, optn3, pad; 3739 3740 XRa = extract32(ctx->opcode, 6, 4); 3741 s8 = extract32(ctx->opcode, 10, 8); 3742 pad = extract32(ctx->opcode, 21, 2); 3743 optn3 = extract32(ctx->opcode, 23, 3); 3744 3745 if (unlikely(pad != 0)) { 3746 /* opcode padding incorrect -> do nothing */ 3747 } else if (unlikely(XRa == 0)) { 3748 /* destination is zero register -> do nothing */ 3749 } else { 3750 uint32_t s16; 3751 TCGv t0 = tcg_temp_new(); 3752 3753 switch (optn3) { 3754 case 0: 3755 tcg_gen_movi_tl(t0, s8); 3756 break; 3757 case 1: 3758 tcg_gen_movi_tl(t0, s8 << 8); 3759 break; 3760 case 2: 3761 tcg_gen_movi_tl(t0, s8 << 16); 3762 break; 3763 case 3: 3764 tcg_gen_movi_tl(t0, s8 << 24); 3765 break; 3766 case 4: 3767 tcg_gen_movi_tl(t0, (s8 << 16) | s8); 3768 break; 3769 case 5: 3770 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8)); 3771 break; 3772 case 6: 3773 s16 = (uint16_t)(int16_t)(int8_t)s8; 3774 tcg_gen_movi_tl(t0, (s16 << 16) | s16); 3775 break; 3776 case 7: 3777 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8); 3778 break; 3779 } 3780 gen_store_mxu_gpr(t0, XRa); 3781 } 3782 } 3783 3784 /* 3785 * Q16SAT XRa, XRb, XRc 3786 * Packs four 16-bit signed integers in XRb and XRc to 3787 * four saturated unsigned 8-bit into XRa. 3788 * 3789 */ 3790 static void gen_mxu_Q16SAT(DisasContext *ctx) 3791 { 3792 uint32_t pad, XRc, XRb, XRa; 3793 3794 pad = extract32(ctx->opcode, 21, 3); 3795 XRc = extract32(ctx->opcode, 14, 4); 3796 XRb = extract32(ctx->opcode, 10, 4); 3797 XRa = extract32(ctx->opcode, 6, 4); 3798 3799 if (unlikely(pad != 0)) { 3800 /* opcode padding incorrect -> do nothing */ 3801 } else if (unlikely(XRa == 0)) { 3802 /* destination is zero register -> do nothing */ 3803 } else { 3804 /* the most general case */ 3805 TCGv t0 = tcg_temp_new(); 3806 TCGv t1 = tcg_temp_new(); 3807 TCGv t2 = tcg_temp_new(); 3808 3809 tcg_gen_movi_tl(t2, 0); 3810 if (XRb != 0) { 3811 TCGLabel *l_less_hi = gen_new_label(); 3812 TCGLabel *l_less_lo = gen_new_label(); 3813 TCGLabel *l_lo = gen_new_label(); 3814 TCGLabel *l_greater_hi = gen_new_label(); 3815 TCGLabel *l_greater_lo = gen_new_label(); 3816 TCGLabel *l_done = gen_new_label(); 3817 3818 tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16); 3819 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3820 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3821 tcg_gen_br(l_lo); 3822 gen_set_label(l_less_hi); 3823 tcg_gen_movi_tl(t0, 0); 3824 tcg_gen_br(l_lo); 3825 gen_set_label(l_greater_hi); 3826 tcg_gen_movi_tl(t0, 255); 3827 3828 gen_set_label(l_lo); 3829 tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16); 3830 tcg_gen_sari_tl(t1, t1, 16); 3831 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3832 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3833 tcg_gen_br(l_done); 3834 gen_set_label(l_less_lo); 3835 tcg_gen_movi_tl(t1, 0); 3836 tcg_gen_br(l_done); 3837 gen_set_label(l_greater_lo); 3838 tcg_gen_movi_tl(t1, 255); 3839 3840 gen_set_label(l_done); 3841 tcg_gen_shli_tl(t2, t0, 24); 3842 tcg_gen_shli_tl(t1, t1, 16); 3843 tcg_gen_or_tl(t2, t2, t1); 3844 } 3845 3846 if (XRc != 0) { 3847 TCGLabel *l_less_hi = gen_new_label(); 3848 TCGLabel *l_less_lo = gen_new_label(); 3849 TCGLabel *l_lo = gen_new_label(); 3850 TCGLabel *l_greater_hi = gen_new_label(); 3851 TCGLabel *l_greater_lo = gen_new_label(); 3852 TCGLabel *l_done = gen_new_label(); 3853 3854 tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16); 3855 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3856 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3857 tcg_gen_br(l_lo); 3858 gen_set_label(l_less_hi); 3859 tcg_gen_movi_tl(t0, 0); 3860 tcg_gen_br(l_lo); 3861 gen_set_label(l_greater_hi); 3862 tcg_gen_movi_tl(t0, 255); 3863 3864 gen_set_label(l_lo); 3865 tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16); 3866 tcg_gen_sari_tl(t1, t1, 16); 3867 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3868 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3869 tcg_gen_br(l_done); 3870 gen_set_label(l_less_lo); 3871 tcg_gen_movi_tl(t1, 0); 3872 tcg_gen_br(l_done); 3873 gen_set_label(l_greater_lo); 3874 tcg_gen_movi_tl(t1, 255); 3875 3876 gen_set_label(l_done); 3877 tcg_gen_shli_tl(t0, t0, 8); 3878 tcg_gen_or_tl(t2, t2, t0); 3879 tcg_gen_or_tl(t2, t2, t1); 3880 } 3881 gen_store_mxu_gpr(t2, XRa); 3882 } 3883 } 3884 3885 /* 3886 * Q16SCOP XRa, XRd, XRb, XRc 3887 * Determine sign of quad packed 16-bit signed values 3888 * in XRb and XRc put result in XRa and XRd respectively. 3889 */ 3890 static void gen_mxu_q16scop(DisasContext *ctx) 3891 { 3892 uint32_t XRd, XRc, XRb, XRa; 3893 3894 XRd = extract32(ctx->opcode, 18, 4); 3895 XRc = extract32(ctx->opcode, 14, 4); 3896 XRb = extract32(ctx->opcode, 10, 4); 3897 XRa = extract32(ctx->opcode, 6, 4); 3898 3899 TCGv t0 = tcg_temp_new(); 3900 TCGv t1 = tcg_temp_new(); 3901 TCGv t2 = tcg_temp_new(); 3902 TCGv t3 = tcg_temp_new(); 3903 TCGv t4 = tcg_temp_new(); 3904 3905 TCGLabel *l_b_hi_lt = gen_new_label(); 3906 TCGLabel *l_b_hi_gt = gen_new_label(); 3907 TCGLabel *l_b_lo = gen_new_label(); 3908 TCGLabel *l_b_lo_lt = gen_new_label(); 3909 TCGLabel *l_c_hi = gen_new_label(); 3910 TCGLabel *l_c_hi_lt = gen_new_label(); 3911 TCGLabel *l_c_hi_gt = gen_new_label(); 3912 TCGLabel *l_c_lo = gen_new_label(); 3913 TCGLabel *l_c_lo_lt = gen_new_label(); 3914 TCGLabel *l_done = gen_new_label(); 3915 3916 gen_load_mxu_gpr(t0, XRb); 3917 gen_load_mxu_gpr(t1, XRc); 3918 3919 tcg_gen_sextract_tl(t2, t0, 16, 16); 3920 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt); 3921 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt); 3922 tcg_gen_movi_tl(t3, 0); 3923 tcg_gen_br(l_b_lo); 3924 gen_set_label(l_b_hi_lt); 3925 tcg_gen_movi_tl(t3, 0xffff0000); 3926 tcg_gen_br(l_b_lo); 3927 gen_set_label(l_b_hi_gt); 3928 tcg_gen_movi_tl(t3, 0x00010000); 3929 3930 gen_set_label(l_b_lo); 3931 tcg_gen_sextract_tl(t2, t0, 0, 16); 3932 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi); 3933 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt); 3934 tcg_gen_ori_tl(t3, t3, 0x00000001); 3935 tcg_gen_br(l_c_hi); 3936 gen_set_label(l_b_lo_lt); 3937 tcg_gen_ori_tl(t3, t3, 0x0000ffff); 3938 tcg_gen_br(l_c_hi); 3939 3940 gen_set_label(l_c_hi); 3941 tcg_gen_sextract_tl(t2, t1, 16, 16); 3942 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt); 3943 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt); 3944 tcg_gen_movi_tl(t4, 0); 3945 tcg_gen_br(l_c_lo); 3946 gen_set_label(l_c_hi_lt); 3947 tcg_gen_movi_tl(t4, 0xffff0000); 3948 tcg_gen_br(l_c_lo); 3949 gen_set_label(l_c_hi_gt); 3950 tcg_gen_movi_tl(t4, 0x00010000); 3951 3952 gen_set_label(l_c_lo); 3953 tcg_gen_sextract_tl(t2, t1, 0, 16); 3954 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done); 3955 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt); 3956 tcg_gen_ori_tl(t4, t4, 0x00000001); 3957 tcg_gen_br(l_done); 3958 gen_set_label(l_c_lo_lt); 3959 tcg_gen_ori_tl(t4, t4, 0x0000ffff); 3960 3961 gen_set_label(l_done); 3962 gen_store_mxu_gpr(t3, XRa); 3963 gen_store_mxu_gpr(t4, XRd); 3964 } 3965 3966 /* 3967 * S32SFL XRa, XRd, XRb, XRc 3968 * Shuffle bytes according to one of four patterns. 3969 */ 3970 static void gen_mxu_s32sfl(DisasContext *ctx) 3971 { 3972 uint32_t XRd, XRc, XRb, XRa, ptn2; 3973 3974 XRd = extract32(ctx->opcode, 18, 4); 3975 XRc = extract32(ctx->opcode, 14, 4); 3976 XRb = extract32(ctx->opcode, 10, 4); 3977 XRa = extract32(ctx->opcode, 6, 4); 3978 ptn2 = extract32(ctx->opcode, 24, 2); 3979 3980 TCGv t0 = tcg_temp_new(); 3981 TCGv t1 = tcg_temp_new(); 3982 TCGv t2 = tcg_temp_new(); 3983 TCGv t3 = tcg_temp_new(); 3984 3985 gen_load_mxu_gpr(t0, XRb); 3986 gen_load_mxu_gpr(t1, XRc); 3987 3988 switch (ptn2) { 3989 case 0: 3990 tcg_gen_andi_tl(t2, t0, 0xff000000); 3991 tcg_gen_andi_tl(t3, t1, 0x000000ff); 3992 tcg_gen_deposit_tl(t3, t3, t0, 8, 8); 3993 tcg_gen_shri_tl(t0, t0, 8); 3994 tcg_gen_shri_tl(t1, t1, 8); 3995 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 3996 tcg_gen_deposit_tl(t3, t3, t1, 16, 8); 3997 tcg_gen_shri_tl(t0, t0, 8); 3998 tcg_gen_shri_tl(t1, t1, 8); 3999 tcg_gen_deposit_tl(t2, t2, t0, 8, 8); 4000 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4001 tcg_gen_shri_tl(t1, t1, 8); 4002 tcg_gen_deposit_tl(t2, t2, t1, 16, 8); 4003 break; 4004 case 1: 4005 tcg_gen_andi_tl(t2, t0, 0xff000000); 4006 tcg_gen_andi_tl(t3, t1, 0x000000ff); 4007 tcg_gen_deposit_tl(t3, t3, t0, 16, 8); 4008 tcg_gen_shri_tl(t0, t0, 8); 4009 tcg_gen_shri_tl(t1, t1, 8); 4010 tcg_gen_deposit_tl(t2, t2, t0, 16, 8); 4011 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4012 tcg_gen_shri_tl(t0, t0, 8); 4013 tcg_gen_shri_tl(t1, t1, 8); 4014 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 4015 tcg_gen_deposit_tl(t3, t3, t1, 8, 8); 4016 tcg_gen_shri_tl(t1, t1, 8); 4017 tcg_gen_deposit_tl(t2, t2, t1, 8, 8); 4018 break; 4019 case 2: 4020 tcg_gen_andi_tl(t2, t0, 0xff00ff00); 4021 tcg_gen_andi_tl(t3, t1, 0x00ff00ff); 4022 tcg_gen_deposit_tl(t3, t3, t0, 8, 8); 4023 tcg_gen_shri_tl(t0, t0, 16); 4024 tcg_gen_shri_tl(t1, t1, 8); 4025 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4026 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 4027 tcg_gen_shri_tl(t1, t1, 16); 4028 tcg_gen_deposit_tl(t2, t2, t1, 16, 8); 4029 break; 4030 case 3: 4031 tcg_gen_andi_tl(t2, t0, 0xffff0000); 4032 tcg_gen_andi_tl(t3, t1, 0x0000ffff); 4033 tcg_gen_shri_tl(t1, t1, 16); 4034 tcg_gen_deposit_tl(t2, t2, t1, 0, 16); 4035 tcg_gen_deposit_tl(t3, t3, t0, 16, 16); 4036 break; 4037 } 4038 4039 gen_store_mxu_gpr(t2, XRa); 4040 gen_store_mxu_gpr(t3, XRd); 4041 } 4042 4043 /* 4044 * Q8SAD XRa, XRd, XRb, XRc 4045 * Typical SAD opration for motion estimation. 4046 */ 4047 static void gen_mxu_q8sad(DisasContext *ctx) 4048 { 4049 uint32_t XRd, XRc, XRb, XRa; 4050 4051 XRd = extract32(ctx->opcode, 18, 4); 4052 XRc = extract32(ctx->opcode, 14, 4); 4053 XRb = extract32(ctx->opcode, 10, 4); 4054 XRa = extract32(ctx->opcode, 6, 4); 4055 4056 TCGv t0 = tcg_temp_new(); 4057 TCGv t1 = tcg_temp_new(); 4058 TCGv t2 = tcg_temp_new(); 4059 TCGv t3 = tcg_temp_new(); 4060 TCGv t4 = tcg_temp_new(); 4061 TCGv t5 = tcg_temp_new(); 4062 4063 gen_load_mxu_gpr(t2, XRb); 4064 gen_load_mxu_gpr(t3, XRc); 4065 gen_load_mxu_gpr(t5, XRd); 4066 tcg_gen_movi_tl(t4, 0); 4067 4068 for (int i = 0; i < 4; i++) { 4069 tcg_gen_andi_tl(t0, t2, 0xff); 4070 tcg_gen_andi_tl(t1, t3, 0xff); 4071 tcg_gen_sub_tl(t0, t0, t1); 4072 tcg_gen_abs_tl(t0, t0); 4073 tcg_gen_add_tl(t4, t4, t0); 4074 if (i < 3) { 4075 tcg_gen_shri_tl(t2, t2, 8); 4076 tcg_gen_shri_tl(t3, t3, 8); 4077 } 4078 } 4079 tcg_gen_add_tl(t5, t5, t4); 4080 gen_store_mxu_gpr(t4, XRa); 4081 gen_store_mxu_gpr(t5, XRd); 4082 } 4083 4084 /* 4085 * MXU instruction category: align 4086 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4087 * 4088 * S32ALN S32ALNI 4089 */ 4090 4091 /* 4092 * S32ALNI XRc, XRb, XRa, optn3 4093 * Arrange bytes from XRb and XRc according to one of five sets of 4094 * rules determined by optn3, and place the result in XRa. 4095 */ 4096 static void gen_mxu_S32ALNI(DisasContext *ctx) 4097 { 4098 uint32_t optn3, pad, XRc, XRb, XRa; 4099 4100 optn3 = extract32(ctx->opcode, 23, 3); 4101 pad = extract32(ctx->opcode, 21, 2); 4102 XRc = extract32(ctx->opcode, 14, 4); 4103 XRb = extract32(ctx->opcode, 10, 4); 4104 XRa = extract32(ctx->opcode, 6, 4); 4105 4106 if (unlikely(pad != 0)) { 4107 /* opcode padding incorrect -> do nothing */ 4108 } else if (unlikely(XRa == 0)) { 4109 /* destination is zero register -> do nothing */ 4110 } else if (unlikely((XRb == 0) && (XRc == 0))) { 4111 /* both operands zero registers -> just set destination to all 0s */ 4112 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4113 } else if (unlikely(XRb == 0)) { 4114 /* XRb zero register -> just appropriatelly shift XRc into XRa */ 4115 switch (optn3) { 4116 case MXU_OPTN3_PTN0: 4117 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4118 break; 4119 case MXU_OPTN3_PTN1: 4120 case MXU_OPTN3_PTN2: 4121 case MXU_OPTN3_PTN3: 4122 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1], 4123 8 * (4 - optn3)); 4124 break; 4125 case MXU_OPTN3_PTN4: 4126 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 4127 break; 4128 } 4129 } else if (unlikely(XRc == 0)) { 4130 /* XRc zero register -> just appropriatelly shift XRb into XRa */ 4131 switch (optn3) { 4132 case MXU_OPTN3_PTN0: 4133 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4134 break; 4135 case MXU_OPTN3_PTN1: 4136 case MXU_OPTN3_PTN2: 4137 case MXU_OPTN3_PTN3: 4138 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 4139 break; 4140 case MXU_OPTN3_PTN4: 4141 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4142 break; 4143 } 4144 } else if (unlikely(XRb == XRc)) { 4145 /* both operands same -> just rotation or moving from any of them */ 4146 switch (optn3) { 4147 case MXU_OPTN3_PTN0: 4148 case MXU_OPTN3_PTN4: 4149 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4150 break; 4151 case MXU_OPTN3_PTN1: 4152 case MXU_OPTN3_PTN2: 4153 case MXU_OPTN3_PTN3: 4154 tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 4155 break; 4156 } 4157 } else { 4158 /* the most general case */ 4159 switch (optn3) { 4160 case MXU_OPTN3_PTN0: 4161 { 4162 /* */ 4163 /* XRb XRc */ 4164 /* +---------------+ */ 4165 /* | A B C D | E F G H */ 4166 /* +-------+-------+ */ 4167 /* | */ 4168 /* XRa */ 4169 /* */ 4170 4171 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4172 } 4173 break; 4174 case MXU_OPTN3_PTN1: 4175 { 4176 /* */ 4177 /* XRb XRc */ 4178 /* +-------------------+ */ 4179 /* A | B C D E | F G H */ 4180 /* +---------+---------+ */ 4181 /* | */ 4182 /* XRa */ 4183 /* */ 4184 4185 TCGv_i32 t0 = tcg_temp_new(); 4186 TCGv_i32 t1 = tcg_temp_new(); 4187 4188 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF); 4189 tcg_gen_shli_i32(t0, t0, 8); 4190 4191 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 4192 tcg_gen_shri_i32(t1, t1, 24); 4193 4194 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4195 } 4196 break; 4197 case MXU_OPTN3_PTN2: 4198 { 4199 /* */ 4200 /* XRb XRc */ 4201 /* +-------------------+ */ 4202 /* A B | C D E F | G H */ 4203 /* +---------+---------+ */ 4204 /* | */ 4205 /* XRa */ 4206 /* */ 4207 4208 TCGv_i32 t0 = tcg_temp_new(); 4209 TCGv_i32 t1 = tcg_temp_new(); 4210 4211 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 4212 tcg_gen_shli_i32(t0, t0, 16); 4213 4214 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 4215 tcg_gen_shri_i32(t1, t1, 16); 4216 4217 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4218 } 4219 break; 4220 case MXU_OPTN3_PTN3: 4221 { 4222 /* */ 4223 /* XRb XRc */ 4224 /* +-------------------+ */ 4225 /* A B C | D E F G | H */ 4226 /* +---------+---------+ */ 4227 /* | */ 4228 /* XRa */ 4229 /* */ 4230 4231 TCGv_i32 t0 = tcg_temp_new(); 4232 TCGv_i32 t1 = tcg_temp_new(); 4233 4234 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF); 4235 tcg_gen_shli_i32(t0, t0, 24); 4236 4237 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00); 4238 tcg_gen_shri_i32(t1, t1, 8); 4239 4240 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4241 } 4242 break; 4243 case MXU_OPTN3_PTN4: 4244 { 4245 /* */ 4246 /* XRb XRc */ 4247 /* +---------------+ */ 4248 /* A B C D | E F G H | */ 4249 /* +-------+-------+ */ 4250 /* | */ 4251 /* XRa */ 4252 /* */ 4253 4254 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 4255 } 4256 break; 4257 } 4258 } 4259 } 4260 4261 /* 4262 * S32ALN XRc, XRb, XRa, rs 4263 * Arrange bytes from XRb and XRc according to one of five sets of 4264 * rules determined by rs[2:0], and place the result in XRa. 4265 */ 4266 static void gen_mxu_S32ALN(DisasContext *ctx) 4267 { 4268 uint32_t rs, XRc, XRb, XRa; 4269 4270 rs = extract32(ctx->opcode, 21, 5); 4271 XRc = extract32(ctx->opcode, 14, 4); 4272 XRb = extract32(ctx->opcode, 10, 4); 4273 XRa = extract32(ctx->opcode, 6, 4); 4274 4275 if (unlikely(XRa == 0)) { 4276 /* destination is zero register -> do nothing */ 4277 } else if (unlikely((XRb == 0) && (XRc == 0))) { 4278 /* both operands zero registers -> just set destination to all 0s */ 4279 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 4280 } else { 4281 /* the most general case */ 4282 TCGv t0 = tcg_temp_new(); 4283 TCGv t1 = tcg_temp_new(); 4284 TCGv t2 = tcg_temp_new(); 4285 TCGv t3 = tcg_temp_new(); 4286 TCGLabel *l_exit = gen_new_label(); 4287 TCGLabel *l_b_only = gen_new_label(); 4288 TCGLabel *l_c_only = gen_new_label(); 4289 4290 gen_load_mxu_gpr(t0, XRb); 4291 gen_load_mxu_gpr(t1, XRc); 4292 gen_load_gpr(t2, rs); 4293 tcg_gen_andi_tl(t2, t2, 0x07); 4294 4295 /* do nothing for undefined cases */ 4296 tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit); 4297 4298 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only); 4299 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only); 4300 4301 tcg_gen_shli_tl(t2, t2, 3); 4302 tcg_gen_subfi_tl(t3, 32, t2); 4303 4304 tcg_gen_shl_tl(t0, t0, t2); 4305 tcg_gen_shr_tl(t1, t1, t3); 4306 tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1); 4307 tcg_gen_br(l_exit); 4308 4309 gen_set_label(l_b_only); 4310 gen_store_mxu_gpr(t0, XRa); 4311 tcg_gen_br(l_exit); 4312 4313 gen_set_label(l_c_only); 4314 gen_store_mxu_gpr(t1, XRa); 4315 4316 gen_set_label(l_exit); 4317 } 4318 } 4319 4320 /* 4321 * S32MADD XRa, XRd, rb, rc 4322 * 32 to 64 bit signed multiply with subsequent add 4323 * result stored in {XRa, XRd} pair, stain HI/LO. 4324 * S32MADDU XRa, XRd, rb, rc 4325 * 32 to 64 bit unsigned multiply with subsequent add 4326 * result stored in {XRa, XRd} pair, stain HI/LO. 4327 * S32MSUB XRa, XRd, rb, rc 4328 * 32 to 64 bit signed multiply with subsequent subtract 4329 * result stored in {XRa, XRd} pair, stain HI/LO. 4330 * S32MSUBU XRa, XRd, rb, rc 4331 * 32 to 64 bit unsigned multiply with subsequent subtract 4332 * result stored in {XRa, XRd} pair, stain HI/LO. 4333 */ 4334 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns) 4335 { 4336 uint32_t XRa, XRd, Rb, Rc; 4337 4338 XRa = extract32(ctx->opcode, 6, 4); 4339 XRd = extract32(ctx->opcode, 10, 4); 4340 Rb = extract32(ctx->opcode, 16, 5); 4341 Rc = extract32(ctx->opcode, 21, 5); 4342 4343 if (unlikely(Rb == 0 || Rc == 0)) { 4344 /* do nothing because x + 0 * y => x */ 4345 } else if (unlikely(XRa == 0 && XRd == 0)) { 4346 /* do nothing because result just dropped */ 4347 } else { 4348 TCGv t0 = tcg_temp_new(); 4349 TCGv t1 = tcg_temp_new(); 4350 TCGv_i64 t2 = tcg_temp_new_i64(); 4351 TCGv_i64 t3 = tcg_temp_new_i64(); 4352 4353 gen_load_gpr(t0, Rb); 4354 gen_load_gpr(t1, Rc); 4355 4356 if (uns) { 4357 tcg_gen_extu_tl_i64(t2, t0); 4358 tcg_gen_extu_tl_i64(t3, t1); 4359 } else { 4360 tcg_gen_ext_tl_i64(t2, t0); 4361 tcg_gen_ext_tl_i64(t3, t1); 4362 } 4363 tcg_gen_mul_i64(t2, t2, t3); 4364 4365 gen_load_mxu_gpr(t0, XRa); 4366 gen_load_mxu_gpr(t1, XRd); 4367 4368 tcg_gen_concat_tl_i64(t3, t1, t0); 4369 if (sub) { 4370 tcg_gen_sub_i64(t3, t3, t2); 4371 } else { 4372 tcg_gen_add_i64(t3, t3, t2); 4373 } 4374 gen_move_low32(t1, t3); 4375 gen_move_high32(t0, t3); 4376 4377 tcg_gen_mov_tl(cpu_HI[0], t0); 4378 tcg_gen_mov_tl(cpu_LO[0], t1); 4379 4380 gen_store_mxu_gpr(t1, XRd); 4381 gen_store_mxu_gpr(t0, XRa); 4382 } 4383 } 4384 4385 /* 4386 * Decoding engine for MXU 4387 * ======================= 4388 */ 4389 4390 static void decode_opc_mxu__pool00(DisasContext *ctx) 4391 { 4392 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4393 4394 switch (opcode) { 4395 case OPC_MXU_S32MAX: 4396 case OPC_MXU_S32MIN: 4397 gen_mxu_S32MAX_S32MIN(ctx); 4398 break; 4399 case OPC_MXU_D16MAX: 4400 case OPC_MXU_D16MIN: 4401 gen_mxu_D16MAX_D16MIN(ctx); 4402 break; 4403 case OPC_MXU_Q8MAX: 4404 case OPC_MXU_Q8MIN: 4405 gen_mxu_Q8MAX_Q8MIN(ctx); 4406 break; 4407 case OPC_MXU_Q8SLT: 4408 gen_mxu_q8slt(ctx, false); 4409 break; 4410 case OPC_MXU_Q8SLTU: 4411 gen_mxu_q8slt(ctx, true); 4412 break; 4413 default: 4414 MIPS_INVAL("decode_opc_mxu"); 4415 gen_reserved_instruction(ctx); 4416 break; 4417 } 4418 } 4419 4420 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx) 4421 { 4422 uint32_t opcode = extract32(ctx->opcode, 0, 6); 4423 uint32_t pad = extract32(ctx->opcode, 14, 2); 4424 4425 if (pad != 2) { 4426 /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */ 4427 return false; 4428 } 4429 4430 switch (opcode) { 4431 case OPC_MXU_S32MADD: 4432 gen_mxu_s32madd_sub(ctx, false, false); 4433 break; 4434 case OPC_MXU_S32MADDU: 4435 gen_mxu_s32madd_sub(ctx, false, true); 4436 break; 4437 case OPC_MXU_S32MSUB: 4438 gen_mxu_s32madd_sub(ctx, true, false); 4439 break; 4440 case OPC_MXU_S32MSUBU: 4441 gen_mxu_s32madd_sub(ctx, true, true); 4442 break; 4443 default: 4444 return false; 4445 } 4446 return true; 4447 } 4448 4449 static void decode_opc_mxu__pool01(DisasContext *ctx) 4450 { 4451 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4452 4453 switch (opcode) { 4454 case OPC_MXU_S32SLT: 4455 gen_mxu_S32SLT(ctx); 4456 break; 4457 case OPC_MXU_D16SLT: 4458 gen_mxu_D16SLT(ctx); 4459 break; 4460 case OPC_MXU_D16AVG: 4461 gen_mxu_d16avg(ctx, false); 4462 break; 4463 case OPC_MXU_D16AVGR: 4464 gen_mxu_d16avg(ctx, true); 4465 break; 4466 case OPC_MXU_Q8AVG: 4467 gen_mxu_q8avg(ctx, false); 4468 break; 4469 case OPC_MXU_Q8AVGR: 4470 gen_mxu_q8avg(ctx, true); 4471 break; 4472 case OPC_MXU_Q8ADD: 4473 gen_mxu_Q8ADD(ctx); 4474 break; 4475 default: 4476 MIPS_INVAL("decode_opc_mxu"); 4477 gen_reserved_instruction(ctx); 4478 break; 4479 } 4480 } 4481 4482 static void decode_opc_mxu__pool02(DisasContext *ctx) 4483 { 4484 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4485 4486 switch (opcode) { 4487 case OPC_MXU_S32CPS: 4488 gen_mxu_S32CPS(ctx); 4489 break; 4490 case OPC_MXU_D16CPS: 4491 gen_mxu_D16CPS(ctx); 4492 break; 4493 case OPC_MXU_Q8ABD: 4494 gen_mxu_Q8ABD(ctx); 4495 break; 4496 case OPC_MXU_Q16SAT: 4497 gen_mxu_Q16SAT(ctx); 4498 break; 4499 default: 4500 MIPS_INVAL("decode_opc_mxu"); 4501 gen_reserved_instruction(ctx); 4502 break; 4503 } 4504 } 4505 4506 static void decode_opc_mxu__pool03(DisasContext *ctx) 4507 { 4508 uint32_t opcode = extract32(ctx->opcode, 24, 2); 4509 4510 switch (opcode) { 4511 case OPC_MXU_D16MULF: 4512 gen_mxu_d16mul(ctx, true, true); 4513 break; 4514 case OPC_MXU_D16MULE: 4515 gen_mxu_d16mul(ctx, true, false); 4516 break; 4517 default: 4518 MIPS_INVAL("decode_opc_mxu"); 4519 gen_reserved_instruction(ctx); 4520 break; 4521 } 4522 } 4523 4524 static void decode_opc_mxu__pool04(DisasContext *ctx) 4525 { 4526 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4527 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4528 4529 /* Don't care about opcode bits as their meaning is unknown yet */ 4530 switch (opcode) { 4531 default: 4532 gen_mxu_s32ldxx(ctx, reversed, false); 4533 break; 4534 } 4535 } 4536 4537 static void decode_opc_mxu__pool05(DisasContext *ctx) 4538 { 4539 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4540 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4541 4542 /* Don't care about opcode bits as their meaning is unknown yet */ 4543 switch (opcode) { 4544 default: 4545 gen_mxu_s32stxx(ctx, reversed, false); 4546 break; 4547 } 4548 } 4549 4550 static void decode_opc_mxu__pool06(DisasContext *ctx) 4551 { 4552 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4553 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4554 4555 switch (opcode) { 4556 case OPC_MXU_S32LDST: 4557 case OPC_MXU_S32LDSTR: 4558 if (strd2 <= 2) { 4559 gen_mxu_s32ldxvx(ctx, opcode, false, strd2); 4560 break; 4561 } 4562 /* fallthrough */ 4563 default: 4564 MIPS_INVAL("decode_opc_mxu"); 4565 gen_reserved_instruction(ctx); 4566 break; 4567 } 4568 } 4569 4570 static void decode_opc_mxu__pool07(DisasContext *ctx) 4571 { 4572 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4573 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4574 4575 switch (opcode) { 4576 case OPC_MXU_S32LDST: 4577 case OPC_MXU_S32LDSTR: 4578 if (strd2 <= 2) { 4579 gen_mxu_s32stxvx(ctx, opcode, false, strd2); 4580 break; 4581 } 4582 /* fallthrough */ 4583 default: 4584 MIPS_INVAL("decode_opc_mxu"); 4585 gen_reserved_instruction(ctx); 4586 break; 4587 } 4588 } 4589 4590 static void decode_opc_mxu__pool08(DisasContext *ctx) 4591 { 4592 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4593 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4594 4595 /* Don't care about opcode bits as their meaning is unknown yet */ 4596 switch (opcode) { 4597 default: 4598 gen_mxu_s32ldxx(ctx, reversed, true); 4599 break; 4600 } 4601 } 4602 4603 static void decode_opc_mxu__pool09(DisasContext *ctx) 4604 { 4605 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4606 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4607 4608 /* Don't care about opcode bits as their meaning is unknown yet */ 4609 switch (opcode) { 4610 default: 4611 gen_mxu_s32stxx(ctx, reversed, true); 4612 break; 4613 } 4614 } 4615 4616 static void decode_opc_mxu__pool10(DisasContext *ctx) 4617 { 4618 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4619 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4620 4621 switch (opcode) { 4622 case OPC_MXU_S32LDST: 4623 case OPC_MXU_S32LDSTR: 4624 if (strd2 <= 2) { 4625 gen_mxu_s32ldxvx(ctx, opcode, true, strd2); 4626 break; 4627 } 4628 /* fallthrough */ 4629 default: 4630 MIPS_INVAL("decode_opc_mxu"); 4631 gen_reserved_instruction(ctx); 4632 break; 4633 } 4634 } 4635 4636 static void decode_opc_mxu__pool11(DisasContext *ctx) 4637 { 4638 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4639 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4640 4641 switch (opcode) { 4642 case OPC_MXU_S32LDST: 4643 case OPC_MXU_S32LDSTR: 4644 if (strd2 <= 2) { 4645 gen_mxu_s32stxvx(ctx, opcode, true, strd2); 4646 break; 4647 } 4648 /* fallthrough */ 4649 default: 4650 MIPS_INVAL("decode_opc_mxu"); 4651 gen_reserved_instruction(ctx); 4652 break; 4653 } 4654 } 4655 4656 static void decode_opc_mxu__pool12(DisasContext *ctx) 4657 { 4658 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4659 4660 switch (opcode) { 4661 case OPC_MXU_D32ACC: 4662 gen_mxu_d32acc(ctx); 4663 break; 4664 case OPC_MXU_D32ACCM: 4665 gen_mxu_d32accm(ctx); 4666 break; 4667 case OPC_MXU_D32ASUM: 4668 gen_mxu_d32asum(ctx); 4669 break; 4670 default: 4671 MIPS_INVAL("decode_opc_mxu"); 4672 gen_reserved_instruction(ctx); 4673 break; 4674 } 4675 } 4676 4677 static void decode_opc_mxu__pool13(DisasContext *ctx) 4678 { 4679 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4680 4681 switch (opcode) { 4682 case OPC_MXU_Q16ACC: 4683 gen_mxu_q16acc(ctx); 4684 break; 4685 case OPC_MXU_Q16ACCM: 4686 gen_mxu_q16accm(ctx); 4687 break; 4688 case OPC_MXU_D16ASUM: 4689 gen_mxu_d16asum(ctx); 4690 break; 4691 default: 4692 MIPS_INVAL("decode_opc_mxu"); 4693 gen_reserved_instruction(ctx); 4694 break; 4695 } 4696 } 4697 4698 static void decode_opc_mxu__pool14(DisasContext *ctx) 4699 { 4700 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4701 4702 switch (opcode) { 4703 case OPC_MXU_Q8ADDE: 4704 gen_mxu_q8adde(ctx, false); 4705 break; 4706 case OPC_MXU_D8SUM: 4707 gen_mxu_d8sum(ctx, false); 4708 break; 4709 case OPC_MXU_D8SUMC: 4710 gen_mxu_d8sum(ctx, true); 4711 break; 4712 default: 4713 MIPS_INVAL("decode_opc_mxu"); 4714 gen_reserved_instruction(ctx); 4715 break; 4716 } 4717 } 4718 4719 static void decode_opc_mxu__pool15(DisasContext *ctx) 4720 { 4721 uint32_t opcode = extract32(ctx->opcode, 14, 2); 4722 4723 switch (opcode) { 4724 case OPC_MXU_S32MUL: 4725 gen_mxu_s32mul(ctx, false); 4726 break; 4727 case OPC_MXU_S32MULU: 4728 gen_mxu_s32mul(ctx, true); 4729 break; 4730 case OPC_MXU_S32EXTR: 4731 gen_mxu_s32extr(ctx); 4732 break; 4733 case OPC_MXU_S32EXTRV: 4734 gen_mxu_s32extrv(ctx); 4735 break; 4736 default: 4737 MIPS_INVAL("decode_opc_mxu"); 4738 gen_reserved_instruction(ctx); 4739 break; 4740 } 4741 } 4742 4743 static void decode_opc_mxu__pool16(DisasContext *ctx) 4744 { 4745 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4746 4747 switch (opcode) { 4748 case OPC_MXU_D32SARW: 4749 gen_mxu_d32sarl(ctx, true); 4750 break; 4751 case OPC_MXU_S32ALN: 4752 gen_mxu_S32ALN(ctx); 4753 break; 4754 case OPC_MXU_S32ALNI: 4755 gen_mxu_S32ALNI(ctx); 4756 break; 4757 case OPC_MXU_S32LUI: 4758 gen_mxu_s32lui(ctx); 4759 break; 4760 case OPC_MXU_S32NOR: 4761 gen_mxu_S32NOR(ctx); 4762 break; 4763 case OPC_MXU_S32AND: 4764 gen_mxu_S32AND(ctx); 4765 break; 4766 case OPC_MXU_S32OR: 4767 gen_mxu_S32OR(ctx); 4768 break; 4769 case OPC_MXU_S32XOR: 4770 gen_mxu_S32XOR(ctx); 4771 break; 4772 default: 4773 MIPS_INVAL("decode_opc_mxu"); 4774 gen_reserved_instruction(ctx); 4775 break; 4776 } 4777 } 4778 4779 static void decode_opc_mxu__pool17(DisasContext *ctx) 4780 { 4781 uint32_t opcode = extract32(ctx->opcode, 6, 3); 4782 uint32_t strd2 = extract32(ctx->opcode, 9, 2); 4783 4784 if (strd2 > 2) { 4785 MIPS_INVAL("decode_opc_mxu"); 4786 gen_reserved_instruction(ctx); 4787 return; 4788 } 4789 4790 switch (opcode) { 4791 case OPC_MXU_LXW: 4792 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL); 4793 break; 4794 case OPC_MXU_LXB: 4795 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB); 4796 break; 4797 case OPC_MXU_LXH: 4798 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW); 4799 break; 4800 case OPC_MXU_LXBU: 4801 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB); 4802 break; 4803 case OPC_MXU_LXHU: 4804 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW); 4805 break; 4806 default: 4807 MIPS_INVAL("decode_opc_mxu"); 4808 gen_reserved_instruction(ctx); 4809 break; 4810 } 4811 } 4812 4813 static void decode_opc_mxu__pool18(DisasContext *ctx) 4814 { 4815 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4816 4817 switch (opcode) { 4818 case OPC_MXU_D32SLLV: 4819 gen_mxu_d32sxxv(ctx, false, false); 4820 break; 4821 case OPC_MXU_D32SLRV: 4822 gen_mxu_d32sxxv(ctx, true, false); 4823 break; 4824 case OPC_MXU_D32SARV: 4825 gen_mxu_d32sxxv(ctx, true, true); 4826 break; 4827 case OPC_MXU_Q16SLLV: 4828 gen_mxu_q16sxxv(ctx, false, false); 4829 break; 4830 case OPC_MXU_Q16SLRV: 4831 gen_mxu_q16sxxv(ctx, true, false); 4832 break; 4833 case OPC_MXU_Q16SARV: 4834 gen_mxu_q16sxxv(ctx, true, true); 4835 break; 4836 default: 4837 MIPS_INVAL("decode_opc_mxu"); 4838 gen_reserved_instruction(ctx); 4839 break; 4840 } 4841 } 4842 4843 static void decode_opc_mxu__pool19(DisasContext *ctx) 4844 { 4845 uint32_t opcode = extract32(ctx->opcode, 22, 4); 4846 4847 switch (opcode) { 4848 case OPC_MXU_Q8MUL: 4849 gen_mxu_q8mul_mac(ctx, false, false); 4850 break; 4851 case OPC_MXU_Q8MULSU: 4852 gen_mxu_q8mul_mac(ctx, true, false); 4853 break; 4854 default: 4855 MIPS_INVAL("decode_opc_mxu"); 4856 gen_reserved_instruction(ctx); 4857 break; 4858 } 4859 } 4860 4861 static void decode_opc_mxu__pool20(DisasContext *ctx) 4862 { 4863 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4864 4865 switch (opcode) { 4866 case OPC_MXU_Q8MOVZ: 4867 gen_mxu_q8movzn(ctx, TCG_COND_NE); 4868 break; 4869 case OPC_MXU_Q8MOVN: 4870 gen_mxu_q8movzn(ctx, TCG_COND_EQ); 4871 break; 4872 case OPC_MXU_D16MOVZ: 4873 gen_mxu_d16movzn(ctx, TCG_COND_NE); 4874 break; 4875 case OPC_MXU_D16MOVN: 4876 gen_mxu_d16movzn(ctx, TCG_COND_EQ); 4877 break; 4878 case OPC_MXU_S32MOVZ: 4879 gen_mxu_s32movzn(ctx, TCG_COND_NE); 4880 break; 4881 case OPC_MXU_S32MOVN: 4882 gen_mxu_s32movzn(ctx, TCG_COND_EQ); 4883 break; 4884 default: 4885 MIPS_INVAL("decode_opc_mxu"); 4886 gen_reserved_instruction(ctx); 4887 break; 4888 } 4889 } 4890 4891 static void decode_opc_mxu__pool21(DisasContext *ctx) 4892 { 4893 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4894 4895 switch (opcode) { 4896 case OPC_MXU_Q8MAC: 4897 gen_mxu_q8mul_mac(ctx, false, true); 4898 break; 4899 case OPC_MXU_Q8MACSU: 4900 gen_mxu_q8mul_mac(ctx, true, true); 4901 break; 4902 default: 4903 MIPS_INVAL("decode_opc_mxu"); 4904 gen_reserved_instruction(ctx); 4905 break; 4906 } 4907 } 4908 4909 4910 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) 4911 { 4912 uint32_t opcode = extract32(insn, 0, 6); 4913 4914 if (opcode == OPC_MXU_S32M2I) { 4915 gen_mxu_s32m2i(ctx); 4916 return true; 4917 } 4918 4919 if (opcode == OPC_MXU_S32I2M) { 4920 gen_mxu_s32i2m(ctx); 4921 return true; 4922 } 4923 4924 { 4925 TCGv t_mxu_cr = tcg_temp_new(); 4926 TCGLabel *l_exit = gen_new_label(); 4927 4928 gen_load_mxu_cr(t_mxu_cr); 4929 tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN); 4930 tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit); 4931 4932 switch (opcode) { 4933 case OPC_MXU_S32MADD: 4934 case OPC_MXU_S32MADDU: 4935 case OPC_MXU_S32MSUB: 4936 case OPC_MXU_S32MSUBU: 4937 return decode_opc_mxu_s32madd_sub(ctx); 4938 case OPC_MXU__POOL00: 4939 decode_opc_mxu__pool00(ctx); 4940 break; 4941 case OPC_MXU_D16MUL: 4942 gen_mxu_d16mul(ctx, false, false); 4943 break; 4944 case OPC_MXU_D16MAC: 4945 gen_mxu_d16mac(ctx, false, false); 4946 break; 4947 case OPC_MXU_D16MACF: 4948 gen_mxu_d16mac(ctx, true, true); 4949 break; 4950 case OPC_MXU_D16MADL: 4951 gen_mxu_d16madl(ctx); 4952 break; 4953 case OPC_MXU_S16MAD: 4954 gen_mxu_s16mad(ctx); 4955 break; 4956 case OPC_MXU_Q16ADD: 4957 gen_mxu_q16add(ctx); 4958 break; 4959 case OPC_MXU_D16MACE: 4960 gen_mxu_d16mac(ctx, true, false); 4961 break; 4962 case OPC_MXU__POOL01: 4963 decode_opc_mxu__pool01(ctx); 4964 break; 4965 case OPC_MXU__POOL02: 4966 decode_opc_mxu__pool02(ctx); 4967 break; 4968 case OPC_MXU__POOL03: 4969 decode_opc_mxu__pool03(ctx); 4970 break; 4971 case OPC_MXU__POOL04: 4972 decode_opc_mxu__pool04(ctx); 4973 break; 4974 case OPC_MXU__POOL05: 4975 decode_opc_mxu__pool05(ctx); 4976 break; 4977 case OPC_MXU__POOL06: 4978 decode_opc_mxu__pool06(ctx); 4979 break; 4980 case OPC_MXU__POOL07: 4981 decode_opc_mxu__pool07(ctx); 4982 break; 4983 case OPC_MXU__POOL08: 4984 decode_opc_mxu__pool08(ctx); 4985 break; 4986 case OPC_MXU__POOL09: 4987 decode_opc_mxu__pool09(ctx); 4988 break; 4989 case OPC_MXU__POOL10: 4990 decode_opc_mxu__pool10(ctx); 4991 break; 4992 case OPC_MXU__POOL11: 4993 decode_opc_mxu__pool11(ctx); 4994 break; 4995 case OPC_MXU_D32ADD: 4996 gen_mxu_d32add(ctx); 4997 break; 4998 case OPC_MXU__POOL12: 4999 decode_opc_mxu__pool12(ctx); 5000 break; 5001 case OPC_MXU__POOL13: 5002 decode_opc_mxu__pool13(ctx); 5003 break; 5004 case OPC_MXU__POOL14: 5005 decode_opc_mxu__pool14(ctx); 5006 break; 5007 case OPC_MXU_Q8ACCE: 5008 gen_mxu_q8adde(ctx, true); 5009 break; 5010 case OPC_MXU_S8LDD: 5011 gen_mxu_s8ldd(ctx, false); 5012 break; 5013 case OPC_MXU_S8STD: 5014 gen_mxu_s8std(ctx, false); 5015 break; 5016 case OPC_MXU_S8LDI: 5017 gen_mxu_s8ldd(ctx, true); 5018 break; 5019 case OPC_MXU_S8SDI: 5020 gen_mxu_s8std(ctx, true); 5021 break; 5022 case OPC_MXU__POOL15: 5023 decode_opc_mxu__pool15(ctx); 5024 break; 5025 case OPC_MXU__POOL16: 5026 decode_opc_mxu__pool16(ctx); 5027 break; 5028 case OPC_MXU__POOL17: 5029 decode_opc_mxu__pool17(ctx); 5030 break; 5031 case OPC_MXU_S16LDD: 5032 gen_mxu_s16ldd(ctx, false); 5033 break; 5034 case OPC_MXU_S16STD: 5035 gen_mxu_s16std(ctx, false); 5036 break; 5037 case OPC_MXU_S16LDI: 5038 gen_mxu_s16ldd(ctx, true); 5039 break; 5040 case OPC_MXU_S16SDI: 5041 gen_mxu_s16std(ctx, true); 5042 break; 5043 case OPC_MXU_D32SLL: 5044 gen_mxu_d32sxx(ctx, false, false); 5045 break; 5046 case OPC_MXU_D32SLR: 5047 gen_mxu_d32sxx(ctx, true, false); 5048 break; 5049 case OPC_MXU_D32SARL: 5050 gen_mxu_d32sarl(ctx, false); 5051 break; 5052 case OPC_MXU_D32SAR: 5053 gen_mxu_d32sxx(ctx, true, true); 5054 break; 5055 case OPC_MXU_Q16SLL: 5056 gen_mxu_q16sxx(ctx, false, false); 5057 break; 5058 case OPC_MXU__POOL18: 5059 decode_opc_mxu__pool18(ctx); 5060 break; 5061 case OPC_MXU_Q16SLR: 5062 gen_mxu_q16sxx(ctx, true, false); 5063 break; 5064 case OPC_MXU_Q16SAR: 5065 gen_mxu_q16sxx(ctx, true, true); 5066 break; 5067 case OPC_MXU__POOL19: 5068 decode_opc_mxu__pool19(ctx); 5069 break; 5070 case OPC_MXU__POOL20: 5071 decode_opc_mxu__pool20(ctx); 5072 break; 5073 case OPC_MXU__POOL21: 5074 decode_opc_mxu__pool21(ctx); 5075 break; 5076 case OPC_MXU_Q16SCOP: 5077 gen_mxu_q16scop(ctx); 5078 break; 5079 case OPC_MXU_Q8MADL: 5080 gen_mxu_q8madl(ctx); 5081 break; 5082 case OPC_MXU_S32SFL: 5083 gen_mxu_s32sfl(ctx); 5084 break; 5085 case OPC_MXU_Q8SAD: 5086 gen_mxu_q8sad(ctx); 5087 break; 5088 default: 5089 return false; 5090 } 5091 5092 gen_set_label(l_exit); 5093 } 5094 5095 return true; 5096 } 5097