1 /* 2 * Ingenic XBurst Media eXtension Unit (MXU) translation routines. 3 * 4 * Copyright (c) 2004-2005 Jocelyn Mayer 5 * Copyright (c) 2006 Marius Groeger (FPU operations) 6 * Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support) 7 * Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support) 8 * Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support) 9 * 10 * SPDX-License-Identifier: LGPL-2.1-or-later 11 * 12 * Datasheet: 13 * 14 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 15 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 16 */ 17 18 #include "qemu/osdep.h" 19 #include "translate.h" 20 21 /* 22 * 23 * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET 24 * ============================================ 25 * 26 * 27 * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32 28 * instructions set. It is designed to fit the needs of signal, graphical and 29 * video processing applications. MXU instruction set is used in Xburst family 30 * of microprocessors by Ingenic. 31 * 32 * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is 33 * the control register. 34 * 35 * 36 * The notation used in MXU assembler mnemonics 37 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 38 * 39 * Register operands: 40 * 41 * XRa, XRb, XRc, XRd - MXU registers 42 * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers 43 * 44 * Non-register operands: 45 * 46 * aptn1 - 1-bit accumulate add/subtract pattern 47 * aptn2 - 2-bit accumulate add/subtract pattern 48 * eptn2 - 2-bit execute add/subtract pattern 49 * optn2 - 2-bit operand pattern 50 * optn3 - 3-bit operand pattern 51 * sft4 - 4-bit shift amount 52 * strd2 - 2-bit stride amount 53 * 54 * Prefixes: 55 * 56 * Level of parallelism: Operand size: 57 * S - single operation at a time 32 - word 58 * D - two operations in parallel 16 - half word 59 * Q - four operations in parallel 8 - byte 60 * 61 * Operations: 62 * 63 * ADD - Add or subtract 64 * ADDC - Add with carry-in 65 * ACC - Accumulate 66 * ASUM - Sum together then accumulate (add or subtract) 67 * ASUMC - Sum together then accumulate (add or subtract) with carry-in 68 * AVG - Average between 2 operands 69 * ABD - Absolute difference 70 * ALN - Align data 71 * AND - Logical bitwise 'and' operation 72 * CPS - Copy sign 73 * EXTR - Extract bits 74 * I2M - Move from GPR register to MXU register 75 * LDD - Load data from memory to XRF 76 * LDI - Load data from memory to XRF (and increase the address base) 77 * LUI - Load unsigned immediate 78 * MUL - Multiply 79 * MULU - Unsigned multiply 80 * MADD - 64-bit operand add 32x32 product 81 * MSUB - 64-bit operand subtract 32x32 product 82 * MAC - Multiply and accumulate (add or subtract) 83 * MAD - Multiply and add or subtract 84 * MAX - Maximum between 2 operands 85 * MIN - Minimum between 2 operands 86 * M2I - Move from MXU register to GPR register 87 * MOVZ - Move if zero 88 * MOVN - Move if non-zero 89 * NOR - Logical bitwise 'nor' operation 90 * OR - Logical bitwise 'or' operation 91 * STD - Store data from XRF to memory 92 * SDI - Store data from XRF to memory (and increase the address base) 93 * SLT - Set of less than comparison 94 * SAD - Sum of absolute differences 95 * SLL - Logical shift left 96 * SLR - Logical shift right 97 * SAR - Arithmetic shift right 98 * SAT - Saturation 99 * SFL - Shuffle 100 * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0) 101 * XOR - Logical bitwise 'exclusive or' operation 102 * 103 * Suffixes: 104 * 105 * E - Expand results 106 * F - Fixed point multiplication 107 * L - Low part result 108 * R - Doing rounding 109 * V - Variable instead of immediate 110 * W - Combine above L and V 111 * 112 * 113 * The list of MXU instructions grouped by functionality 114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 115 * 116 * Load/Store instructions Multiplication instructions 117 * ----------------------- --------------------------- 118 * 119 * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt 120 * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt 121 * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt 122 * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt 123 * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt 124 * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt 125 * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 126 * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 127 * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 128 * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 129 * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 130 * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 131 * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 132 * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 133 * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd 134 * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd 135 * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 136 * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 137 * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 138 * S16SDI XRa, Rb, s10, eptn2 139 * S8LDD XRa, Rb, s8, eptn3 140 * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions 141 * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- 142 * S8SDI XRa, Rb, s8, eptn3 143 * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 144 * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd 145 * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 146 * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 147 * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 148 * S32CPS XRa, XRb, XRc 149 * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 150 * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 151 * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 152 * D16ASUM XRa, XRb, XRc, XRd, eptn2 153 * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, 154 * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc 155 * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc 156 * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 157 * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 158 * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 159 * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc 160 * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd 161 * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc 162 * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc 163 * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd 164 * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd 165 * Q8SLT XRa, XRb, XRc 166 * Q8SLTU XRa, XRb, XRc 167 * Q8MOVZ XRa, XRb, XRc Shift instructions 168 * Q8MOVN XRa, XRb, XRc ------------------ 169 * 170 * D32SLL XRa, XRb, XRc, XRd, sft4 171 * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 172 * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 173 * D32SARL XRa, XRb, XRc, sft4 174 * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb 175 * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb 176 * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb 177 * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb 178 * Q16SLL XRa, XRb, XRc, XRd, sft4 179 * Q16SLR XRa, XRb, XRc, XRd, sft4 180 * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 181 * ------------------------- Q16SLLV XRa, XRb, Rb 182 * Q16SLRV XRa, XRb, Rb 183 * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb 184 * S32ALN XRa, XRb, XRc, Rb 185 * S32ALNI XRa, XRb, XRc, s3 186 * S32LUI XRa, s8, optn3 Move instructions 187 * S32EXTR XRa, XRb, Rb, bits5 ----------------- 188 * S32EXTRV XRa, XRb, Rs, Rt 189 * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb 190 * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb 191 * 192 * 193 * The opcode organization of MXU instructions 194 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 195 * 196 * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred 197 * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of 198 * other bits up to the instruction level is as follows: 199 * 200 * bits 201 * 05..00 202 * 203 * ┌─ 000000 ─ OPC_MXU_S32MADD 204 * ├─ 000001 ─ OPC_MXU_S32MADDU 205 * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL) 206 * │ 207 * │ 20..18 208 * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX 209 * │ ├─ 001 ─ OPC_MXU_S32MIN 210 * │ ├─ 010 ─ OPC_MXU_D16MAX 211 * │ ├─ 011 ─ OPC_MXU_D16MIN 212 * │ ├─ 100 ─ OPC_MXU_Q8MAX 213 * │ ├─ 101 ─ OPC_MXU_Q8MIN 214 * │ ├─ 110 ─ OPC_MXU_Q8SLT 215 * │ └─ 111 ─ OPC_MXU_Q8SLTU 216 * ├─ 000100 ─ OPC_MXU_S32MSUB 217 * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 218 * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT 219 * │ ├─ 001 ─ OPC_MXU_D16SLT 220 * │ ├─ 010 ─ OPC_MXU_D16AVG 221 * │ ├─ 011 ─ OPC_MXU_D16AVGR 222 * │ ├─ 100 ─ OPC_MXU_Q8AVG 223 * │ ├─ 101 ─ OPC_MXU_Q8AVGR 224 * │ └─ 111 ─ OPC_MXU_Q8ADD 225 * │ 226 * │ 20..18 227 * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS 228 * │ ├─ 010 ─ OPC_MXU_D16CPS 229 * │ ├─ 100 ─ OPC_MXU_Q8ABD 230 * │ └─ 110 ─ OPC_MXU_Q16SAT 231 * ├─ 001000 ─ OPC_MXU_D16MUL 232 * │ 25..24 233 * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF 234 * │ └─ 01 ─ OPC_MXU_D16MULE 235 * ├─ 001010 ─ OPC_MXU_D16MAC 236 * ├─ 001011 ─ OPC_MXU_D16MACF 237 * ├─ 001100 ─ OPC_MXU_D16MADL 238 * ├─ 001101 ─ OPC_MXU_S16MAD 239 * ├─ 001110 ─ OPC_MXU_Q16ADD 240 * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care) 241 * │ ┌─ 0 ─ OPC_MXU_S32LDD 242 * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR 243 * │ 244 * │ 20 (13..10 don't care) 245 * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD 246 * │ └─ 1 ─ OPC_MXU_S32STDR 247 * │ 248 * │ 13..10 249 * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV 250 * │ └─ 0001 ─ OPC_MXU_S32LDDVR 251 * │ 252 * │ 13..10 253 * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV 254 * │ └─ 0001 ─ OPC_MXU_S32STDVR 255 * │ 256 * │ 20 (13..10 don't care) 257 * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI 258 * │ └─ 1 ─ OPC_MXU_S32LDIR 259 * │ 260 * │ 20 (13..10 don't care) 261 * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI 262 * │ └─ 1 ─ OPC_MXU_S32SDIR 263 * │ 264 * │ 13..10 265 * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV 266 * │ └─ 0001 ─ OPC_MXU_S32LDIVR 267 * │ 268 * │ 13..10 269 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV 270 * │ └─ 0001 ─ OPC_MXU_S32SDIVR 271 * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too) 272 * │ 23..22 273 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC 274 * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM 275 * │ └─ 10 ─ OPC_MXU_D32ASUM 276 * ├─ 011010 ─ <not assigned> 277 * │ 23..22 278 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC 279 * │ ├─ 01 ─ OPC_MXU_Q16ACCM 280 * │ └─ 10 ─ OPC_MXU_D16ASUM 281 * │ 282 * │ 23..22 283 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE 284 * │ ├─ 01 ─ OPC_MXU_D8SUM 285 * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC 286 * ├─ 011110 ─ <not assigned> 287 * ├─ 011111 ─ <not assigned> 288 * ├─ 100000 ─ <not assigned> (overlaps with CLZ) 289 * ├─ 100001 ─ <not assigned> (overlaps with CLO) 290 * ├─ 100010 ─ OPC_MXU_S8LDD 291 * ├─ 100011 ─ OPC_MXU_S8STD 15..14 292 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL 293 * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU 294 * │ ├─ 10 ─ OPC_MXU_S32EXTR 295 * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV 296 * │ 297 * │ 20..18 298 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW 299 * │ ├─ 001 ─ OPC_MXU_S32ALN 300 * │ ├─ 010 ─ OPC_MXU_S32ALNI 301 * │ ├─ 011 ─ OPC_MXU_S32LUI 302 * │ ├─ 100 ─ OPC_MXU_S32NOR 303 * │ ├─ 101 ─ OPC_MXU_S32AND 304 * │ ├─ 110 ─ OPC_MXU_S32OR 305 * │ └─ 111 ─ OPC_MXU_S32XOR 306 * │ 307 * │ 8..6 308 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB 309 * │ ├─ 001 ─ OPC_MXU_LXH 310 * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW 311 * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU 312 * ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU 313 * ├─ 101100 ─ OPC_MXU_S16LDI 314 * ├─ 101101 ─ OPC_MXU_S16SDI 315 * ├─ 101110 ─ OPC_MXU_S32M2I 316 * ├─ 101111 ─ OPC_MXU_S32I2M 317 * ├─ 110000 ─ OPC_MXU_D32SLL 318 * ├─ 110001 ─ OPC_MXU_D32SLR 20..18 319 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV 320 * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV 321 * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV 322 * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV 323 * │ ├─ 101 ─ OPC_MXU_Q16SLRV 324 * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV 325 * │ 326 * ├─ 110111 ─ OPC_MXU_Q16SAR 327 * │ 23..22 328 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL 329 * │ └─ 10 ─ OPC_MXU_Q8MULSU 330 * │ 331 * │ 20..18 332 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ 333 * │ ├─ 001 ─ OPC_MXU_Q8MOVN 334 * │ ├─ 010 ─ OPC_MXU_D16MOVZ 335 * │ ├─ 011 ─ OPC_MXU_D16MOVN 336 * │ ├─ 100 ─ OPC_MXU_S32MOVZ 337 * │ └─ 101 ─ OPC_MXU_S32MOVN 338 * │ 339 * │ 23..22 340 * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC 341 * │ └─ 10 ─ OPC_MXU_Q8MACSU 342 * ├─ 111011 ─ OPC_MXU_Q16SCOP 343 * ├─ 111100 ─ OPC_MXU_Q8MADL 344 * ├─ 111101 ─ OPC_MXU_S32SFL 345 * ├─ 111110 ─ OPC_MXU_Q8SAD 346 * └─ 111111 ─ <not assigned> (overlaps with SDBBP) 347 * 348 * 349 * Compiled after: 350 * 351 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 352 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 353 */ 354 355 enum { 356 OPC_MXU_S32MADD = 0x00, 357 OPC_MXU_S32MADDU = 0x01, 358 OPC_MXU__POOL00 = 0x03, 359 OPC_MXU_S32MSUB = 0x04, 360 OPC_MXU_S32MSUBU = 0x05, 361 OPC_MXU__POOL01 = 0x06, 362 OPC_MXU__POOL02 = 0x07, 363 OPC_MXU_D16MUL = 0x08, 364 OPC_MXU__POOL03 = 0x09, 365 OPC_MXU_D16MAC = 0x0A, 366 OPC_MXU_D16MACF = 0x0B, 367 OPC_MXU_D16MADL = 0x0C, 368 OPC_MXU_S16MAD = 0x0D, 369 OPC_MXU_Q16ADD = 0x0E, 370 OPC_MXU_D16MACE = 0x0F, 371 OPC_MXU__POOL04 = 0x10, 372 OPC_MXU__POOL05 = 0x11, 373 OPC_MXU__POOL06 = 0x12, 374 OPC_MXU__POOL07 = 0x13, 375 OPC_MXU__POOL08 = 0x14, 376 OPC_MXU__POOL09 = 0x15, 377 OPC_MXU__POOL10 = 0x16, 378 OPC_MXU__POOL11 = 0x17, 379 OPC_MXU_D32ADD = 0x18, 380 OPC_MXU__POOL12 = 0x19, 381 OPC_MXU__POOL13 = 0x1B, 382 OPC_MXU__POOL14 = 0x1C, 383 OPC_MXU_Q8ACCE = 0x1D, 384 OPC_MXU_S8LDD = 0x22, 385 OPC_MXU_S8STD = 0x23, 386 OPC_MXU_S8LDI = 0x24, 387 OPC_MXU_S8SDI = 0x25, 388 OPC_MXU__POOL15 = 0x26, 389 OPC_MXU__POOL16 = 0x27, 390 OPC_MXU__POOL17 = 0x28, 391 OPC_MXU_S16LDD = 0x2A, 392 OPC_MXU_S16STD = 0x2B, 393 OPC_MXU_S16LDI = 0x2C, 394 OPC_MXU_S16SDI = 0x2D, 395 OPC_MXU_S32M2I = 0x2E, 396 OPC_MXU_S32I2M = 0x2F, 397 OPC_MXU_D32SLL = 0x30, 398 OPC_MXU_D32SLR = 0x31, 399 OPC_MXU_D32SARL = 0x32, 400 OPC_MXU_D32SAR = 0x33, 401 OPC_MXU_Q16SLL = 0x34, 402 OPC_MXU_Q16SLR = 0x35, 403 OPC_MXU__POOL18 = 0x36, 404 OPC_MXU_Q16SAR = 0x37, 405 OPC_MXU__POOL19 = 0x38, 406 OPC_MXU__POOL20 = 0x39, 407 OPC_MXU__POOL21 = 0x3A, 408 OPC_MXU_Q16SCOP = 0x3B, 409 }; 410 411 412 /* 413 * MXU pool 00 414 */ 415 enum { 416 OPC_MXU_S32MAX = 0x00, 417 OPC_MXU_S32MIN = 0x01, 418 OPC_MXU_D16MAX = 0x02, 419 OPC_MXU_D16MIN = 0x03, 420 OPC_MXU_Q8MAX = 0x04, 421 OPC_MXU_Q8MIN = 0x05, 422 OPC_MXU_Q8SLT = 0x06, 423 OPC_MXU_Q8SLTU = 0x07, 424 }; 425 426 /* 427 * MXU pool 01 428 */ 429 enum { 430 OPC_MXU_S32SLT = 0x00, 431 OPC_MXU_D16SLT = 0x01, 432 OPC_MXU_D16AVG = 0x02, 433 OPC_MXU_D16AVGR = 0x03, 434 OPC_MXU_Q8AVG = 0x04, 435 OPC_MXU_Q8AVGR = 0x05, 436 OPC_MXU_Q8ADD = 0x07, 437 }; 438 439 /* 440 * MXU pool 02 441 */ 442 enum { 443 OPC_MXU_S32CPS = 0x00, 444 OPC_MXU_D16CPS = 0x02, 445 OPC_MXU_Q8ABD = 0x04, 446 OPC_MXU_Q16SAT = 0x06, 447 }; 448 449 /* 450 * MXU pool 03 451 */ 452 enum { 453 OPC_MXU_D16MULF = 0x00, 454 OPC_MXU_D16MULE = 0x01, 455 }; 456 457 /* 458 * MXU pool 04 05 06 07 08 09 10 11 459 */ 460 enum { 461 OPC_MXU_S32LDST = 0x00, 462 OPC_MXU_S32LDSTR = 0x01, 463 }; 464 465 /* 466 * MXU pool 12 467 */ 468 enum { 469 OPC_MXU_D32ACC = 0x00, 470 OPC_MXU_D32ACCM = 0x01, 471 OPC_MXU_D32ASUM = 0x02, 472 }; 473 474 /* 475 * MXU pool 13 476 */ 477 enum { 478 OPC_MXU_Q16ACC = 0x00, 479 OPC_MXU_Q16ACCM = 0x01, 480 OPC_MXU_D16ASUM = 0x02, 481 }; 482 483 /* 484 * MXU pool 14 485 */ 486 enum { 487 OPC_MXU_Q8ADDE = 0x00, 488 OPC_MXU_D8SUM = 0x01, 489 OPC_MXU_D8SUMC = 0x02, 490 }; 491 492 /* 493 * MXU pool 15 494 */ 495 enum { 496 OPC_MXU_S32MUL = 0x00, 497 OPC_MXU_S32MULU = 0x01, 498 OPC_MXU_S32EXTR = 0x02, 499 OPC_MXU_S32EXTRV = 0x03, 500 }; 501 502 /* 503 * MXU pool 16 504 */ 505 enum { 506 OPC_MXU_D32SARW = 0x00, 507 OPC_MXU_S32ALN = 0x01, 508 OPC_MXU_S32ALNI = 0x02, 509 OPC_MXU_S32LUI = 0x03, 510 OPC_MXU_S32NOR = 0x04, 511 OPC_MXU_S32AND = 0x05, 512 OPC_MXU_S32OR = 0x06, 513 OPC_MXU_S32XOR = 0x07, 514 }; 515 516 /* 517 * MXU pool 17 518 */ 519 enum { 520 OPC_MXU_LXB = 0x00, 521 OPC_MXU_LXH = 0x01, 522 OPC_MXU_LXW = 0x03, 523 OPC_MXU_LXBU = 0x04, 524 OPC_MXU_LXHU = 0x05, 525 }; 526 527 /* 528 * MXU pool 18 529 */ 530 enum { 531 OPC_MXU_D32SLLV = 0x00, 532 OPC_MXU_D32SLRV = 0x01, 533 OPC_MXU_D32SARV = 0x03, 534 OPC_MXU_Q16SLLV = 0x04, 535 OPC_MXU_Q16SLRV = 0x05, 536 OPC_MXU_Q16SARV = 0x07, 537 }; 538 539 /* 540 * MXU pool 19 541 */ 542 enum { 543 OPC_MXU_Q8MUL = 0x00, 544 OPC_MXU_Q8MULSU = 0x02, 545 }; 546 547 /* 548 * MXU pool 20 549 */ 550 enum { 551 OPC_MXU_Q8MOVZ = 0x00, 552 OPC_MXU_Q8MOVN = 0x01, 553 OPC_MXU_D16MOVZ = 0x02, 554 OPC_MXU_D16MOVN = 0x03, 555 OPC_MXU_S32MOVZ = 0x04, 556 OPC_MXU_S32MOVN = 0x05, 557 }; 558 559 /* 560 * MXU pool 21 561 */ 562 enum { 563 OPC_MXU_Q8MAC = 0x00, 564 OPC_MXU_Q8MACSU = 0x02, 565 }; 566 567 568 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ 569 #define MXU_APTN1_A 0 570 #define MXU_APTN1_S 1 571 572 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */ 573 #define MXU_APTN2_AA 0 574 #define MXU_APTN2_AS 1 575 #define MXU_APTN2_SA 2 576 #define MXU_APTN2_SS 3 577 578 /* MXU execute add/subtract 2-bit pattern 'eptn2' */ 579 #define MXU_EPTN2_AA 0 580 #define MXU_EPTN2_AS 1 581 #define MXU_EPTN2_SA 2 582 #define MXU_EPTN2_SS 3 583 584 /* MXU operand getting pattern 'optn2' */ 585 #define MXU_OPTN2_PTN0 0 586 #define MXU_OPTN2_PTN1 1 587 #define MXU_OPTN2_PTN2 2 588 #define MXU_OPTN2_PTN3 3 589 /* alternative naming scheme for 'optn2' */ 590 #define MXU_OPTN2_WW 0 591 #define MXU_OPTN2_LW 1 592 #define MXU_OPTN2_HW 2 593 #define MXU_OPTN2_XW 3 594 595 /* MXU operand getting pattern 'optn3' */ 596 #define MXU_OPTN3_PTN0 0 597 #define MXU_OPTN3_PTN1 1 598 #define MXU_OPTN3_PTN2 2 599 #define MXU_OPTN3_PTN3 3 600 #define MXU_OPTN3_PTN4 4 601 #define MXU_OPTN3_PTN5 5 602 #define MXU_OPTN3_PTN6 6 603 #define MXU_OPTN3_PTN7 7 604 605 /* MXU registers */ 606 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1]; 607 static TCGv mxu_CR; 608 609 static const char mxuregnames[][4] = { 610 "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8", 611 "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR", 612 }; 613 614 void mxu_translate_init(void) 615 { 616 for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) { 617 mxu_gpr[i] = tcg_global_mem_new(cpu_env, 618 offsetof(CPUMIPSState, active_tc.mxu_gpr[i]), 619 mxuregnames[i]); 620 } 621 622 mxu_CR = tcg_global_mem_new(cpu_env, 623 offsetof(CPUMIPSState, active_tc.mxu_cr), 624 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]); 625 } 626 627 /* MXU General purpose registers moves. */ 628 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg) 629 { 630 if (reg == 0) { 631 tcg_gen_movi_tl(t, 0); 632 } else if (reg <= 15) { 633 tcg_gen_mov_tl(t, mxu_gpr[reg - 1]); 634 } 635 } 636 637 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg) 638 { 639 if (reg > 0 && reg <= 15) { 640 tcg_gen_mov_tl(mxu_gpr[reg - 1], t); 641 } 642 } 643 644 /* MXU control register moves. */ 645 static inline void gen_load_mxu_cr(TCGv t) 646 { 647 tcg_gen_mov_tl(t, mxu_CR); 648 } 649 650 static inline void gen_store_mxu_cr(TCGv t) 651 { 652 /* TODO: Add handling of RW rules for MXU_CR. */ 653 tcg_gen_mov_tl(mxu_CR, t); 654 } 655 656 /* 657 * S32I2M XRa, rb - Register move from GRF to XRF 658 */ 659 static void gen_mxu_s32i2m(DisasContext *ctx) 660 { 661 TCGv t0; 662 uint32_t XRa, Rb; 663 664 t0 = tcg_temp_new(); 665 666 XRa = extract32(ctx->opcode, 6, 5); 667 Rb = extract32(ctx->opcode, 16, 5); 668 669 gen_load_gpr(t0, Rb); 670 if (XRa <= 15) { 671 gen_store_mxu_gpr(t0, XRa); 672 } else if (XRa == 16) { 673 gen_store_mxu_cr(t0); 674 } 675 } 676 677 /* 678 * S32M2I XRa, rb - Register move from XRF to GRF 679 */ 680 static void gen_mxu_s32m2i(DisasContext *ctx) 681 { 682 TCGv t0; 683 uint32_t XRa, Rb; 684 685 t0 = tcg_temp_new(); 686 687 XRa = extract32(ctx->opcode, 6, 5); 688 Rb = extract32(ctx->opcode, 16, 5); 689 690 if (XRa <= 15) { 691 gen_load_mxu_gpr(t0, XRa); 692 } else if (XRa == 16) { 693 gen_load_mxu_cr(t0); 694 } 695 696 gen_store_gpr(t0, Rb); 697 } 698 699 /* 700 * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF 701 * 702 * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF, 703 * post modify address register 704 */ 705 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify) 706 { 707 TCGv t0, t1; 708 uint32_t XRa, Rb, s8, optn3; 709 710 t0 = tcg_temp_new(); 711 t1 = tcg_temp_new(); 712 713 XRa = extract32(ctx->opcode, 6, 4); 714 s8 = extract32(ctx->opcode, 10, 8); 715 optn3 = extract32(ctx->opcode, 18, 3); 716 Rb = extract32(ctx->opcode, 21, 5); 717 718 gen_load_gpr(t0, Rb); 719 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 720 if (postmodify) { 721 gen_store_gpr(t0, Rb); 722 } 723 724 switch (optn3) { 725 /* XRa[7:0] = tmp8 */ 726 case MXU_OPTN3_PTN0: 727 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 728 gen_load_mxu_gpr(t0, XRa); 729 tcg_gen_deposit_tl(t0, t0, t1, 0, 8); 730 break; 731 /* XRa[15:8] = tmp8 */ 732 case MXU_OPTN3_PTN1: 733 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 734 gen_load_mxu_gpr(t0, XRa); 735 tcg_gen_deposit_tl(t0, t0, t1, 8, 8); 736 break; 737 /* XRa[23:16] = tmp8 */ 738 case MXU_OPTN3_PTN2: 739 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 740 gen_load_mxu_gpr(t0, XRa); 741 tcg_gen_deposit_tl(t0, t0, t1, 16, 8); 742 break; 743 /* XRa[31:24] = tmp8 */ 744 case MXU_OPTN3_PTN3: 745 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 746 gen_load_mxu_gpr(t0, XRa); 747 tcg_gen_deposit_tl(t0, t0, t1, 24, 8); 748 break; 749 /* XRa = {8'b0, tmp8, 8'b0, tmp8} */ 750 case MXU_OPTN3_PTN4: 751 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 752 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 753 break; 754 /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */ 755 case MXU_OPTN3_PTN5: 756 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 757 tcg_gen_shli_tl(t1, t1, 8); 758 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 759 break; 760 /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */ 761 case MXU_OPTN3_PTN6: 762 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB); 763 tcg_gen_mov_tl(t0, t1); 764 tcg_gen_andi_tl(t0, t0, 0xFF00FFFF); 765 tcg_gen_shli_tl(t1, t1, 16); 766 tcg_gen_or_tl(t0, t0, t1); 767 break; 768 /* XRa = {tmp8, tmp8, tmp8, tmp8} */ 769 case MXU_OPTN3_PTN7: 770 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 771 tcg_gen_deposit_tl(t1, t1, t1, 8, 8); 772 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 773 break; 774 } 775 776 gen_store_mxu_gpr(t0, XRa); 777 } 778 779 /* 780 * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory 781 * 782 * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory, 783 * post modify address register 784 */ 785 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify) 786 { 787 TCGv t0, t1; 788 uint32_t XRa, Rb, s8, optn3; 789 790 t0 = tcg_temp_new(); 791 t1 = tcg_temp_new(); 792 793 XRa = extract32(ctx->opcode, 6, 4); 794 s8 = extract32(ctx->opcode, 10, 8); 795 optn3 = extract32(ctx->opcode, 18, 3); 796 Rb = extract32(ctx->opcode, 21, 5); 797 798 if (optn3 > 3) { 799 /* reserved, do nothing */ 800 return; 801 } 802 803 gen_load_gpr(t0, Rb); 804 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 805 if (postmodify) { 806 gen_store_gpr(t0, Rb); 807 } 808 gen_load_mxu_gpr(t1, XRa); 809 810 switch (optn3) { 811 /* XRa[7:0] => tmp8 */ 812 case MXU_OPTN3_PTN0: 813 tcg_gen_extract_tl(t1, t1, 0, 8); 814 break; 815 /* XRa[15:8] => tmp8 */ 816 case MXU_OPTN3_PTN1: 817 tcg_gen_extract_tl(t1, t1, 8, 8); 818 break; 819 /* XRa[23:16] => tmp8 */ 820 case MXU_OPTN3_PTN2: 821 tcg_gen_extract_tl(t1, t1, 16, 8); 822 break; 823 /* XRa[31:24] => tmp8 */ 824 case MXU_OPTN3_PTN3: 825 tcg_gen_extract_tl(t1, t1, 24, 8); 826 break; 827 } 828 829 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB); 830 } 831 832 /* 833 * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF 834 * 835 * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF, 836 * post modify address register 837 */ 838 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify) 839 { 840 TCGv t0, t1; 841 uint32_t XRa, Rb, optn2; 842 int32_t s10; 843 844 t0 = tcg_temp_new(); 845 t1 = tcg_temp_new(); 846 847 XRa = extract32(ctx->opcode, 6, 4); 848 s10 = sextract32(ctx->opcode, 10, 9) * 2; 849 optn2 = extract32(ctx->opcode, 19, 2); 850 Rb = extract32(ctx->opcode, 21, 5); 851 852 gen_load_gpr(t0, Rb); 853 tcg_gen_addi_tl(t0, t0, s10); 854 if (postmodify) { 855 gen_store_gpr(t0, Rb); 856 } 857 858 switch (optn2) { 859 /* XRa[15:0] = tmp16 */ 860 case MXU_OPTN2_PTN0: 861 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 862 gen_load_mxu_gpr(t0, XRa); 863 tcg_gen_deposit_tl(t0, t0, t1, 0, 16); 864 break; 865 /* XRa[31:16] = tmp16 */ 866 case MXU_OPTN2_PTN1: 867 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 868 gen_load_mxu_gpr(t0, XRa); 869 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 870 break; 871 /* XRa = sign_extend(tmp16) */ 872 case MXU_OPTN2_PTN2: 873 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW); 874 break; 875 /* XRa = {tmp16, tmp16} */ 876 case MXU_OPTN2_PTN3: 877 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 878 tcg_gen_deposit_tl(t0, t1, t1, 0, 16); 879 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 880 break; 881 } 882 883 gen_store_mxu_gpr(t0, XRa); 884 } 885 886 /* 887 * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory 888 * 889 * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory, 890 * post modify address register 891 */ 892 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify) 893 { 894 TCGv t0, t1; 895 uint32_t XRa, Rb, optn2; 896 int32_t s10; 897 898 t0 = tcg_temp_new(); 899 t1 = tcg_temp_new(); 900 901 XRa = extract32(ctx->opcode, 6, 4); 902 s10 = sextract32(ctx->opcode, 10, 9) * 2; 903 optn2 = extract32(ctx->opcode, 19, 2); 904 Rb = extract32(ctx->opcode, 21, 5); 905 906 if (optn2 > 1) { 907 /* reserved, do nothing */ 908 return; 909 } 910 911 gen_load_gpr(t0, Rb); 912 tcg_gen_addi_tl(t0, t0, s10); 913 if (postmodify) { 914 gen_store_gpr(t0, Rb); 915 } 916 gen_load_mxu_gpr(t1, XRa); 917 918 switch (optn2) { 919 /* XRa[15:0] => tmp16 */ 920 case MXU_OPTN2_PTN0: 921 tcg_gen_extract_tl(t1, t1, 0, 16); 922 break; 923 /* XRa[31:16] => tmp16 */ 924 case MXU_OPTN2_PTN1: 925 tcg_gen_extract_tl(t1, t1, 16, 16); 926 break; 927 } 928 929 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW); 930 } 931 932 /* 933 * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication 934 * of GPR's and stores result into pair of MXU registers. 935 * It strains HI and LO registers. 936 * 937 * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication 938 * of GPR's and stores result into pair of MXU registers. 939 * It strains HI and LO registers. 940 */ 941 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu) 942 { 943 TCGv t0, t1; 944 uint32_t XRa, XRd, rs, rt; 945 946 t0 = tcg_temp_new(); 947 t1 = tcg_temp_new(); 948 949 XRa = extract32(ctx->opcode, 6, 4); 950 XRd = extract32(ctx->opcode, 10, 4); 951 rs = extract32(ctx->opcode, 16, 5); 952 rt = extract32(ctx->opcode, 21, 5); 953 954 if (unlikely(rs == 0 || rt == 0)) { 955 tcg_gen_movi_tl(t0, 0); 956 tcg_gen_movi_tl(t1, 0); 957 } else { 958 gen_load_gpr(t0, rs); 959 gen_load_gpr(t1, rt); 960 961 if (mulu) { 962 tcg_gen_mulu2_tl(t0, t1, t0, t1); 963 } else { 964 tcg_gen_muls2_tl(t0, t1, t0, t1); 965 } 966 } 967 tcg_gen_mov_tl(cpu_HI[0], t1); 968 tcg_gen_mov_tl(cpu_LO[0], t0); 969 gen_store_mxu_gpr(t1, XRa); 970 gen_store_mxu_gpr(t0, XRd); 971 } 972 973 /* 974 * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication 975 * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication 976 * with rounding and packing result 977 * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern 978 * multiplication with rounding 979 */ 980 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional, 981 bool packed_result) 982 { 983 TCGv t0, t1, t2, t3; 984 uint32_t XRa, XRb, XRc, XRd, optn2; 985 986 t0 = tcg_temp_new(); 987 t1 = tcg_temp_new(); 988 t2 = tcg_temp_new(); 989 t3 = tcg_temp_new(); 990 991 XRa = extract32(ctx->opcode, 6, 4); 992 XRb = extract32(ctx->opcode, 10, 4); 993 XRc = extract32(ctx->opcode, 14, 4); 994 XRd = extract32(ctx->opcode, 18, 4); 995 optn2 = extract32(ctx->opcode, 22, 2); 996 997 /* 998 * TODO: XRd field isn't used for D16MULF 999 * There's no knowledge how this field affect 1000 * instruction decoding/behavior 1001 */ 1002 1003 gen_load_mxu_gpr(t1, XRb); 1004 tcg_gen_sextract_tl(t0, t1, 0, 16); 1005 tcg_gen_sextract_tl(t1, t1, 16, 16); 1006 gen_load_mxu_gpr(t3, XRc); 1007 tcg_gen_sextract_tl(t2, t3, 0, 16); 1008 tcg_gen_sextract_tl(t3, t3, 16, 16); 1009 1010 switch (optn2) { 1011 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1012 tcg_gen_mul_tl(t3, t1, t3); 1013 tcg_gen_mul_tl(t2, t0, t2); 1014 break; 1015 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1016 tcg_gen_mul_tl(t3, t0, t3); 1017 tcg_gen_mul_tl(t2, t0, t2); 1018 break; 1019 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1020 tcg_gen_mul_tl(t3, t1, t3); 1021 tcg_gen_mul_tl(t2, t1, t2); 1022 break; 1023 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1024 tcg_gen_mul_tl(t3, t0, t3); 1025 tcg_gen_mul_tl(t2, t1, t2); 1026 break; 1027 } 1028 if (fractional) { 1029 TCGLabel *l_done = gen_new_label(); 1030 TCGv rounding = tcg_temp_new(); 1031 1032 tcg_gen_shli_tl(t3, t3, 1); 1033 tcg_gen_shli_tl(t2, t2, 1); 1034 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1035 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1036 if (packed_result) { 1037 TCGLabel *l_apply_bias_l = gen_new_label(); 1038 TCGLabel *l_apply_bias_r = gen_new_label(); 1039 TCGLabel *l_half_done = gen_new_label(); 1040 TCGv bias = tcg_temp_new(); 1041 1042 /* 1043 * D16MULF supports unbiased rounding aka "bankers rounding", 1044 * "round to even", "convergent rounding" 1045 */ 1046 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1047 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1048 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1049 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1050 gen_set_label(l_apply_bias_l); 1051 tcg_gen_addi_tl(t3, t3, 0x8000); 1052 gen_set_label(l_half_done); 1053 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1054 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1055 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1056 gen_set_label(l_apply_bias_r); 1057 tcg_gen_addi_tl(t2, t2, 0x8000); 1058 } else { 1059 /* D16MULE doesn't support unbiased rounding */ 1060 tcg_gen_addi_tl(t3, t3, 0x8000); 1061 tcg_gen_addi_tl(t2, t2, 0x8000); 1062 } 1063 gen_set_label(l_done); 1064 } 1065 if (!packed_result) { 1066 gen_store_mxu_gpr(t3, XRa); 1067 gen_store_mxu_gpr(t2, XRd); 1068 } else { 1069 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1070 tcg_gen_shri_tl(t2, t2, 16); 1071 tcg_gen_or_tl(t3, t3, t2); 1072 gen_store_mxu_gpr(t3, XRa); 1073 } 1074 } 1075 1076 /* 1077 * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 1078 * Signed 16 bit pattern multiply and accumulate 1079 * D16MACF XRa, XRb, XRc, aptn2, optn2 1080 * Signed Q15 fraction pattern multiply accumulate and pack 1081 * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 1082 * Signed Q15 fraction pattern multiply and accumulate 1083 */ 1084 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional, 1085 bool packed_result) 1086 { 1087 TCGv t0, t1, t2, t3; 1088 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1089 1090 t0 = tcg_temp_new(); 1091 t1 = tcg_temp_new(); 1092 t2 = tcg_temp_new(); 1093 t3 = tcg_temp_new(); 1094 1095 XRa = extract32(ctx->opcode, 6, 4); 1096 XRb = extract32(ctx->opcode, 10, 4); 1097 XRc = extract32(ctx->opcode, 14, 4); 1098 XRd = extract32(ctx->opcode, 18, 4); 1099 optn2 = extract32(ctx->opcode, 22, 2); 1100 aptn2 = extract32(ctx->opcode, 24, 2); 1101 1102 gen_load_mxu_gpr(t1, XRb); 1103 tcg_gen_sextract_tl(t0, t1, 0, 16); 1104 tcg_gen_sextract_tl(t1, t1, 16, 16); 1105 1106 gen_load_mxu_gpr(t3, XRc); 1107 tcg_gen_sextract_tl(t2, t3, 0, 16); 1108 tcg_gen_sextract_tl(t3, t3, 16, 16); 1109 1110 switch (optn2) { 1111 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1112 tcg_gen_mul_tl(t3, t1, t3); 1113 tcg_gen_mul_tl(t2, t0, t2); 1114 break; 1115 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1116 tcg_gen_mul_tl(t3, t0, t3); 1117 tcg_gen_mul_tl(t2, t0, t2); 1118 break; 1119 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1120 tcg_gen_mul_tl(t3, t1, t3); 1121 tcg_gen_mul_tl(t2, t1, t2); 1122 break; 1123 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1124 tcg_gen_mul_tl(t3, t0, t3); 1125 tcg_gen_mul_tl(t2, t1, t2); 1126 break; 1127 } 1128 1129 if (fractional) { 1130 tcg_gen_shli_tl(t3, t3, 1); 1131 tcg_gen_shli_tl(t2, t2, 1); 1132 } 1133 gen_load_mxu_gpr(t0, XRa); 1134 gen_load_mxu_gpr(t1, XRd); 1135 1136 switch (aptn2) { 1137 case MXU_APTN2_AA: 1138 tcg_gen_add_tl(t3, t0, t3); 1139 tcg_gen_add_tl(t2, t1, t2); 1140 break; 1141 case MXU_APTN2_AS: 1142 tcg_gen_add_tl(t3, t0, t3); 1143 tcg_gen_sub_tl(t2, t1, t2); 1144 break; 1145 case MXU_APTN2_SA: 1146 tcg_gen_sub_tl(t3, t0, t3); 1147 tcg_gen_add_tl(t2, t1, t2); 1148 break; 1149 case MXU_APTN2_SS: 1150 tcg_gen_sub_tl(t3, t0, t3); 1151 tcg_gen_sub_tl(t2, t1, t2); 1152 break; 1153 } 1154 1155 if (fractional) { 1156 TCGLabel *l_done = gen_new_label(); 1157 TCGv rounding = tcg_temp_new(); 1158 1159 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1160 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1161 if (packed_result) { 1162 TCGLabel *l_apply_bias_l = gen_new_label(); 1163 TCGLabel *l_apply_bias_r = gen_new_label(); 1164 TCGLabel *l_half_done = gen_new_label(); 1165 TCGv bias = tcg_temp_new(); 1166 1167 /* 1168 * D16MACF supports unbiased rounding aka "bankers rounding", 1169 * "round to even", "convergent rounding" 1170 */ 1171 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1172 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1173 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1174 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1175 gen_set_label(l_apply_bias_l); 1176 tcg_gen_addi_tl(t3, t3, 0x8000); 1177 gen_set_label(l_half_done); 1178 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1179 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1180 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1181 gen_set_label(l_apply_bias_r); 1182 tcg_gen_addi_tl(t2, t2, 0x8000); 1183 } else { 1184 /* D16MACE doesn't support unbiased rounding */ 1185 tcg_gen_addi_tl(t3, t3, 0x8000); 1186 tcg_gen_addi_tl(t2, t2, 0x8000); 1187 } 1188 gen_set_label(l_done); 1189 } 1190 1191 if (!packed_result) { 1192 gen_store_mxu_gpr(t3, XRa); 1193 gen_store_mxu_gpr(t2, XRd); 1194 } else { 1195 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1196 tcg_gen_shri_tl(t2, t2, 16); 1197 tcg_gen_or_tl(t3, t3, t2); 1198 gen_store_mxu_gpr(t3, XRa); 1199 } 1200 } 1201 1202 /* 1203 * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed 1204 * unsigned 16 bit pattern multiply and add/subtract. 1205 */ 1206 static void gen_mxu_d16madl(DisasContext *ctx) 1207 { 1208 TCGv t0, t1, t2, t3; 1209 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1210 1211 t0 = tcg_temp_new(); 1212 t1 = tcg_temp_new(); 1213 t2 = tcg_temp_new(); 1214 t3 = tcg_temp_new(); 1215 1216 XRa = extract32(ctx->opcode, 6, 4); 1217 XRb = extract32(ctx->opcode, 10, 4); 1218 XRc = extract32(ctx->opcode, 14, 4); 1219 XRd = extract32(ctx->opcode, 18, 4); 1220 optn2 = extract32(ctx->opcode, 22, 2); 1221 aptn2 = extract32(ctx->opcode, 24, 2); 1222 1223 gen_load_mxu_gpr(t1, XRb); 1224 tcg_gen_sextract_tl(t0, t1, 0, 16); 1225 tcg_gen_sextract_tl(t1, t1, 16, 16); 1226 1227 gen_load_mxu_gpr(t3, XRc); 1228 tcg_gen_sextract_tl(t2, t3, 0, 16); 1229 tcg_gen_sextract_tl(t3, t3, 16, 16); 1230 1231 switch (optn2) { 1232 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1233 tcg_gen_mul_tl(t3, t1, t3); 1234 tcg_gen_mul_tl(t2, t0, t2); 1235 break; 1236 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1237 tcg_gen_mul_tl(t3, t0, t3); 1238 tcg_gen_mul_tl(t2, t0, t2); 1239 break; 1240 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1241 tcg_gen_mul_tl(t3, t1, t3); 1242 tcg_gen_mul_tl(t2, t1, t2); 1243 break; 1244 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1245 tcg_gen_mul_tl(t3, t0, t3); 1246 tcg_gen_mul_tl(t2, t1, t2); 1247 break; 1248 } 1249 tcg_gen_extract_tl(t2, t2, 0, 16); 1250 tcg_gen_extract_tl(t3, t3, 0, 16); 1251 1252 gen_load_mxu_gpr(t1, XRa); 1253 tcg_gen_extract_tl(t0, t1, 0, 16); 1254 tcg_gen_extract_tl(t1, t1, 16, 16); 1255 1256 switch (aptn2) { 1257 case MXU_APTN2_AA: 1258 tcg_gen_add_tl(t3, t1, t3); 1259 tcg_gen_add_tl(t2, t0, t2); 1260 break; 1261 case MXU_APTN2_AS: 1262 tcg_gen_add_tl(t3, t1, t3); 1263 tcg_gen_sub_tl(t2, t0, t2); 1264 break; 1265 case MXU_APTN2_SA: 1266 tcg_gen_sub_tl(t3, t1, t3); 1267 tcg_gen_add_tl(t2, t0, t2); 1268 break; 1269 case MXU_APTN2_SS: 1270 tcg_gen_sub_tl(t3, t1, t3); 1271 tcg_gen_sub_tl(t2, t0, t2); 1272 break; 1273 } 1274 1275 tcg_gen_andi_tl(t2, t2, 0xffff); 1276 tcg_gen_shli_tl(t3, t3, 16); 1277 tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2); 1278 } 1279 1280 /* 1281 * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed 1282 * signed 16 bit pattern multiply and 32-bit add/subtract. 1283 */ 1284 static void gen_mxu_s16mad(DisasContext *ctx) 1285 { 1286 TCGv t0, t1; 1287 uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad; 1288 1289 t0 = tcg_temp_new(); 1290 t1 = tcg_temp_new(); 1291 1292 XRa = extract32(ctx->opcode, 6, 4); 1293 XRb = extract32(ctx->opcode, 10, 4); 1294 XRc = extract32(ctx->opcode, 14, 4); 1295 XRd = extract32(ctx->opcode, 18, 4); 1296 optn2 = extract32(ctx->opcode, 22, 2); 1297 aptn1 = extract32(ctx->opcode, 24, 1); 1298 pad = extract32(ctx->opcode, 25, 1); 1299 1300 if (pad) { 1301 /* FIXME check if it influence the result */ 1302 } 1303 1304 gen_load_mxu_gpr(t0, XRb); 1305 gen_load_mxu_gpr(t1, XRc); 1306 1307 switch (optn2) { 1308 case MXU_OPTN2_WW: /* XRB.H*XRC.H */ 1309 tcg_gen_sextract_tl(t0, t0, 16, 16); 1310 tcg_gen_sextract_tl(t1, t1, 16, 16); 1311 break; 1312 case MXU_OPTN2_LW: /* XRB.L*XRC.L */ 1313 tcg_gen_sextract_tl(t0, t0, 0, 16); 1314 tcg_gen_sextract_tl(t1, t1, 0, 16); 1315 break; 1316 case MXU_OPTN2_HW: /* XRB.H*XRC.L */ 1317 tcg_gen_sextract_tl(t0, t0, 16, 16); 1318 tcg_gen_sextract_tl(t1, t1, 0, 16); 1319 break; 1320 case MXU_OPTN2_XW: /* XRB.L*XRC.H */ 1321 tcg_gen_sextract_tl(t0, t0, 0, 16); 1322 tcg_gen_sextract_tl(t1, t1, 16, 16); 1323 break; 1324 } 1325 tcg_gen_mul_tl(t0, t0, t1); 1326 1327 gen_load_mxu_gpr(t1, XRa); 1328 1329 switch (aptn1) { 1330 case MXU_APTN1_A: 1331 tcg_gen_add_tl(t1, t1, t0); 1332 break; 1333 case MXU_APTN1_S: 1334 tcg_gen_sub_tl(t1, t1, t0); 1335 break; 1336 } 1337 1338 gen_store_mxu_gpr(t1, XRd); 1339 } 1340 1341 /* 1342 * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1343 * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1344 * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1345 * and accumulate 1346 * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1347 * and accumulate 1348 */ 1349 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac) 1350 { 1351 TCGv t0, t1, t2, t3, t4, t5, t6, t7; 1352 uint32_t XRa, XRb, XRc, XRd, aptn2; 1353 1354 t0 = tcg_temp_new(); 1355 t1 = tcg_temp_new(); 1356 t2 = tcg_temp_new(); 1357 t3 = tcg_temp_new(); 1358 t4 = tcg_temp_new(); 1359 t5 = tcg_temp_new(); 1360 t6 = tcg_temp_new(); 1361 t7 = tcg_temp_new(); 1362 1363 XRa = extract32(ctx->opcode, 6, 4); 1364 XRb = extract32(ctx->opcode, 10, 4); 1365 XRc = extract32(ctx->opcode, 14, 4); 1366 XRd = extract32(ctx->opcode, 18, 4); 1367 aptn2 = extract32(ctx->opcode, 24, 2); 1368 1369 gen_load_mxu_gpr(t3, XRb); 1370 gen_load_mxu_gpr(t7, XRc); 1371 1372 if (su) { 1373 /* Q8MULSU / Q8MACSU */ 1374 tcg_gen_sextract_tl(t0, t3, 0, 8); 1375 tcg_gen_sextract_tl(t1, t3, 8, 8); 1376 tcg_gen_sextract_tl(t2, t3, 16, 8); 1377 tcg_gen_sextract_tl(t3, t3, 24, 8); 1378 } else { 1379 /* Q8MUL / Q8MAC */ 1380 tcg_gen_extract_tl(t0, t3, 0, 8); 1381 tcg_gen_extract_tl(t1, t3, 8, 8); 1382 tcg_gen_extract_tl(t2, t3, 16, 8); 1383 tcg_gen_extract_tl(t3, t3, 24, 8); 1384 } 1385 1386 tcg_gen_extract_tl(t4, t7, 0, 8); 1387 tcg_gen_extract_tl(t5, t7, 8, 8); 1388 tcg_gen_extract_tl(t6, t7, 16, 8); 1389 tcg_gen_extract_tl(t7, t7, 24, 8); 1390 1391 tcg_gen_mul_tl(t0, t0, t4); 1392 tcg_gen_mul_tl(t1, t1, t5); 1393 tcg_gen_mul_tl(t2, t2, t6); 1394 tcg_gen_mul_tl(t3, t3, t7); 1395 1396 if (mac) { 1397 gen_load_mxu_gpr(t4, XRd); 1398 gen_load_mxu_gpr(t5, XRa); 1399 tcg_gen_extract_tl(t6, t4, 0, 16); 1400 tcg_gen_extract_tl(t7, t4, 16, 16); 1401 if (aptn2 & 1) { 1402 tcg_gen_sub_tl(t0, t6, t0); 1403 tcg_gen_sub_tl(t1, t7, t1); 1404 } else { 1405 tcg_gen_add_tl(t0, t6, t0); 1406 tcg_gen_add_tl(t1, t7, t1); 1407 } 1408 tcg_gen_extract_tl(t6, t5, 0, 16); 1409 tcg_gen_extract_tl(t7, t5, 16, 16); 1410 if (aptn2 & 2) { 1411 tcg_gen_sub_tl(t2, t6, t2); 1412 tcg_gen_sub_tl(t3, t7, t3); 1413 } else { 1414 tcg_gen_add_tl(t2, t6, t2); 1415 tcg_gen_add_tl(t3, t7, t3); 1416 } 1417 } 1418 1419 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 1420 tcg_gen_deposit_tl(t1, t2, t3, 16, 16); 1421 1422 gen_store_mxu_gpr(t0, XRd); 1423 gen_store_mxu_gpr(t1, XRa); 1424 } 1425 1426 /* 1427 * S32LDD XRa, Rb, S12 - Load a word from memory to XRF 1428 * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF 1429 * in reversed byte seq. 1430 * S32LDI XRa, Rb, S12 - Load a word from memory to XRF, 1431 * post modify base address GPR. 1432 * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF, 1433 * post modify base address GPR and load in reversed byte seq. 1434 */ 1435 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc) 1436 { 1437 TCGv t0, t1; 1438 uint32_t XRa, Rb, s12; 1439 1440 t0 = tcg_temp_new(); 1441 t1 = tcg_temp_new(); 1442 1443 XRa = extract32(ctx->opcode, 6, 4); 1444 s12 = sextract32(ctx->opcode, 10, 10); 1445 Rb = extract32(ctx->opcode, 21, 5); 1446 1447 gen_load_gpr(t0, Rb); 1448 tcg_gen_movi_tl(t1, s12 * 4); 1449 tcg_gen_add_tl(t0, t0, t1); 1450 1451 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1452 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1453 ctx->default_tcg_memop_mask); 1454 gen_store_mxu_gpr(t1, XRa); 1455 1456 if (postinc) { 1457 gen_store_gpr(t0, Rb); 1458 } 1459 } 1460 1461 /* 1462 * S32STD XRa, Rb, S12 - Store a word from XRF to memory 1463 * S32STDR XRa, Rb, S12 - Store a word from XRF to memory 1464 * in reversed byte seq. 1465 * S32SDI XRa, Rb, S12 - Store a word from XRF to memory, 1466 * post modify base address GPR. 1467 * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory, 1468 * post modify base address GPR and store in reversed byte seq. 1469 */ 1470 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc) 1471 { 1472 TCGv t0, t1; 1473 uint32_t XRa, Rb, s12; 1474 1475 t0 = tcg_temp_new(); 1476 t1 = tcg_temp_new(); 1477 1478 XRa = extract32(ctx->opcode, 6, 4); 1479 s12 = sextract32(ctx->opcode, 10, 10); 1480 Rb = extract32(ctx->opcode, 21, 5); 1481 1482 gen_load_gpr(t0, Rb); 1483 tcg_gen_movi_tl(t1, s12 * 4); 1484 tcg_gen_add_tl(t0, t0, t1); 1485 1486 gen_load_mxu_gpr(t1, XRa); 1487 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1488 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1489 ctx->default_tcg_memop_mask); 1490 1491 if (postinc) { 1492 gen_store_gpr(t0, Rb); 1493 } 1494 } 1495 1496 /* 1497 * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1498 * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1499 * in reversed byte seq. 1500 * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1501 * post modify base address GPR. 1502 * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1503 * post modify base address GPR and load in reversed byte seq. 1504 */ 1505 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed, 1506 bool postinc, uint32_t strd2) 1507 { 1508 TCGv t0, t1; 1509 uint32_t XRa, Rb, Rc; 1510 1511 t0 = tcg_temp_new(); 1512 t1 = tcg_temp_new(); 1513 1514 XRa = extract32(ctx->opcode, 6, 4); 1515 Rc = extract32(ctx->opcode, 16, 5); 1516 Rb = extract32(ctx->opcode, 21, 5); 1517 1518 gen_load_gpr(t0, Rb); 1519 gen_load_gpr(t1, Rc); 1520 tcg_gen_shli_tl(t1, t1, strd2); 1521 tcg_gen_add_tl(t0, t0, t1); 1522 1523 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1524 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1525 ctx->default_tcg_memop_mask); 1526 gen_store_mxu_gpr(t1, XRa); 1527 1528 if (postinc) { 1529 gen_store_gpr(t0, Rb); 1530 } 1531 } 1532 1533 /* 1534 * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR 1535 * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1536 * sign extending to GPR size. 1537 * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1538 * sign extending to GPR size. 1539 * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1540 * zero extending to GPR size. 1541 * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1542 * zero extending to GPR size. 1543 */ 1544 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop) 1545 { 1546 TCGv t0, t1; 1547 uint32_t Ra, Rb, Rc; 1548 1549 t0 = tcg_temp_new(); 1550 t1 = tcg_temp_new(); 1551 1552 Ra = extract32(ctx->opcode, 11, 5); 1553 Rc = extract32(ctx->opcode, 16, 5); 1554 Rb = extract32(ctx->opcode, 21, 5); 1555 1556 gen_load_gpr(t0, Rb); 1557 gen_load_gpr(t1, Rc); 1558 tcg_gen_shli_tl(t1, t1, strd2); 1559 tcg_gen_add_tl(t0, t0, t1); 1560 1561 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask); 1562 gen_store_gpr(t1, Ra); 1563 } 1564 1565 /* 1566 * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1567 * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1568 * in reversed byte seq. 1569 * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1570 * post modify base address GPR. 1571 * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1572 * post modify base address GPR and store in reversed byte seq. 1573 */ 1574 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed, 1575 bool postinc, uint32_t strd2) 1576 { 1577 TCGv t0, t1; 1578 uint32_t XRa, Rb, Rc; 1579 1580 t0 = tcg_temp_new(); 1581 t1 = tcg_temp_new(); 1582 1583 XRa = extract32(ctx->opcode, 6, 4); 1584 Rc = extract32(ctx->opcode, 16, 5); 1585 Rb = extract32(ctx->opcode, 21, 5); 1586 1587 gen_load_gpr(t0, Rb); 1588 gen_load_gpr(t1, Rc); 1589 tcg_gen_shli_tl(t1, t1, strd2); 1590 tcg_gen_add_tl(t0, t0, t1); 1591 1592 gen_load_mxu_gpr(t1, XRa); 1593 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1594 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1595 ctx->default_tcg_memop_mask); 1596 1597 if (postinc) { 1598 gen_store_gpr(t0, Rb); 1599 } 1600 } 1601 1602 /* 1603 * MXU instruction category: logic 1604 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1605 * 1606 * S32NOR S32AND S32OR S32XOR 1607 */ 1608 1609 /* 1610 * S32NOR XRa, XRb, XRc 1611 * Update XRa with the result of logical bitwise 'nor' operation 1612 * applied to the content of XRb and XRc. 1613 */ 1614 static void gen_mxu_S32NOR(DisasContext *ctx) 1615 { 1616 uint32_t pad, XRc, XRb, XRa; 1617 1618 pad = extract32(ctx->opcode, 21, 5); 1619 XRc = extract32(ctx->opcode, 14, 4); 1620 XRb = extract32(ctx->opcode, 10, 4); 1621 XRa = extract32(ctx->opcode, 6, 4); 1622 1623 if (unlikely(pad != 0)) { 1624 /* opcode padding incorrect -> do nothing */ 1625 } else if (unlikely(XRa == 0)) { 1626 /* destination is zero register -> do nothing */ 1627 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1628 /* both operands zero registers -> just set destination to all 1s */ 1629 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF); 1630 } else if (unlikely(XRb == 0)) { 1631 /* XRb zero register -> just set destination to the negation of XRc */ 1632 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1633 } else if (unlikely(XRc == 0)) { 1634 /* XRa zero register -> just set destination to the negation of XRb */ 1635 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1636 } else if (unlikely(XRb == XRc)) { 1637 /* both operands same -> just set destination to the negation of XRb */ 1638 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1639 } else { 1640 /* the most general case */ 1641 tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1642 } 1643 } 1644 1645 /* 1646 * S32AND XRa, XRb, XRc 1647 * Update XRa with the result of logical bitwise 'and' operation 1648 * applied to the content of XRb and XRc. 1649 */ 1650 static void gen_mxu_S32AND(DisasContext *ctx) 1651 { 1652 uint32_t pad, XRc, XRb, XRa; 1653 1654 pad = extract32(ctx->opcode, 21, 5); 1655 XRc = extract32(ctx->opcode, 14, 4); 1656 XRb = extract32(ctx->opcode, 10, 4); 1657 XRa = extract32(ctx->opcode, 6, 4); 1658 1659 if (unlikely(pad != 0)) { 1660 /* opcode padding incorrect -> do nothing */ 1661 } else if (unlikely(XRa == 0)) { 1662 /* destination is zero register -> do nothing */ 1663 } else if (unlikely((XRb == 0) || (XRc == 0))) { 1664 /* one of operands zero register -> just set destination to all 0s */ 1665 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1666 } else if (unlikely(XRb == XRc)) { 1667 /* both operands same -> just set destination to one of them */ 1668 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1669 } else { 1670 /* the most general case */ 1671 tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1672 } 1673 } 1674 1675 /* 1676 * S32OR XRa, XRb, XRc 1677 * Update XRa with the result of logical bitwise 'or' operation 1678 * applied to the content of XRb and XRc. 1679 */ 1680 static void gen_mxu_S32OR(DisasContext *ctx) 1681 { 1682 uint32_t pad, XRc, XRb, XRa; 1683 1684 pad = extract32(ctx->opcode, 21, 5); 1685 XRc = extract32(ctx->opcode, 14, 4); 1686 XRb = extract32(ctx->opcode, 10, 4); 1687 XRa = extract32(ctx->opcode, 6, 4); 1688 1689 if (unlikely(pad != 0)) { 1690 /* opcode padding incorrect -> do nothing */ 1691 } else if (unlikely(XRa == 0)) { 1692 /* destination is zero register -> do nothing */ 1693 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1694 /* both operands zero registers -> just set destination to all 0s */ 1695 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1696 } else if (unlikely(XRb == 0)) { 1697 /* XRb zero register -> just set destination to the content of XRc */ 1698 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1699 } else if (unlikely(XRc == 0)) { 1700 /* XRc zero register -> just set destination to the content of XRb */ 1701 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1702 } else if (unlikely(XRb == XRc)) { 1703 /* both operands same -> just set destination to one of them */ 1704 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1705 } else { 1706 /* the most general case */ 1707 tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1708 } 1709 } 1710 1711 /* 1712 * S32XOR XRa, XRb, XRc 1713 * Update XRa with the result of logical bitwise 'xor' operation 1714 * applied to the content of XRb and XRc. 1715 */ 1716 static void gen_mxu_S32XOR(DisasContext *ctx) 1717 { 1718 uint32_t pad, XRc, XRb, XRa; 1719 1720 pad = extract32(ctx->opcode, 21, 5); 1721 XRc = extract32(ctx->opcode, 14, 4); 1722 XRb = extract32(ctx->opcode, 10, 4); 1723 XRa = extract32(ctx->opcode, 6, 4); 1724 1725 if (unlikely(pad != 0)) { 1726 /* opcode padding incorrect -> do nothing */ 1727 } else if (unlikely(XRa == 0)) { 1728 /* destination is zero register -> do nothing */ 1729 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1730 /* both operands zero registers -> just set destination to all 0s */ 1731 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1732 } else if (unlikely(XRb == 0)) { 1733 /* XRb zero register -> just set destination to the content of XRc */ 1734 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1735 } else if (unlikely(XRc == 0)) { 1736 /* XRc zero register -> just set destination to the content of XRb */ 1737 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1738 } else if (unlikely(XRb == XRc)) { 1739 /* both operands same -> just set destination to all 0s */ 1740 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1741 } else { 1742 /* the most general case */ 1743 tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1744 } 1745 } 1746 1747 /* 1748 * MXU instruction category: shift 1749 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1750 * 1751 * D32SLL D32SLR D32SAR D32SARL 1752 * D32SLLV D32SLRV D32SARV D32SARW 1753 * Q16SLL Q16SLR Q16SAR 1754 * Q16SLLV Q16SLRV Q16SARV 1755 */ 1756 1757 /* 1758 * D32SLL XRa, XRd, XRb, XRc, SFT4 1759 * Dual 32-bit shift left from XRb and XRc to SFT4 1760 * bits (0..15). Store to XRa and XRd respectively. 1761 * D32SLR XRa, XRd, XRb, XRc, SFT4 1762 * Dual 32-bit shift logic right from XRb and XRc 1763 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1764 * D32SAR XRa, XRd, XRb, XRc, SFT4 1765 * Dual 32-bit shift arithmetic right from XRb and XRc 1766 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1767 */ 1768 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic) 1769 { 1770 uint32_t XRa, XRb, XRc, XRd, sft4; 1771 1772 XRa = extract32(ctx->opcode, 6, 4); 1773 XRb = extract32(ctx->opcode, 10, 4); 1774 XRc = extract32(ctx->opcode, 14, 4); 1775 XRd = extract32(ctx->opcode, 18, 4); 1776 sft4 = extract32(ctx->opcode, 22, 4); 1777 1778 TCGv t0 = tcg_temp_new(); 1779 TCGv t1 = tcg_temp_new(); 1780 1781 gen_load_mxu_gpr(t0, XRb); 1782 gen_load_mxu_gpr(t1, XRc); 1783 1784 if (right) { 1785 if (arithmetic) { 1786 tcg_gen_sari_tl(t0, t0, sft4); 1787 tcg_gen_sari_tl(t1, t1, sft4); 1788 } else { 1789 tcg_gen_shri_tl(t0, t0, sft4); 1790 tcg_gen_shri_tl(t1, t1, sft4); 1791 } 1792 } else { 1793 tcg_gen_shli_tl(t0, t0, sft4); 1794 tcg_gen_shli_tl(t1, t1, sft4); 1795 } 1796 gen_store_mxu_gpr(t0, XRa); 1797 gen_store_mxu_gpr(t1, XRd); 1798 } 1799 1800 /* 1801 * D32SLLV XRa, XRd, rs 1802 * Dual 32-bit shift left from XRa and XRd to rs[3:0] 1803 * bits. Store back to XRa and XRd respectively. 1804 * D32SLRV XRa, XRd, rs 1805 * Dual 32-bit shift logic right from XRa and XRd to rs[3:0] 1806 * bits. Store back to XRa and XRd respectively. 1807 * D32SARV XRa, XRd, rs 1808 * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0] 1809 * bits. Store back to XRa and XRd respectively. 1810 */ 1811 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic) 1812 { 1813 uint32_t XRa, XRd, rs; 1814 1815 XRa = extract32(ctx->opcode, 10, 4); 1816 XRd = extract32(ctx->opcode, 14, 4); 1817 rs = extract32(ctx->opcode, 21, 5); 1818 1819 TCGv t0 = tcg_temp_new(); 1820 TCGv t1 = tcg_temp_new(); 1821 TCGv t2 = tcg_temp_new(); 1822 1823 gen_load_mxu_gpr(t0, XRa); 1824 gen_load_mxu_gpr(t1, XRd); 1825 gen_load_gpr(t2, rs); 1826 tcg_gen_andi_tl(t2, t2, 0x0f); 1827 1828 if (right) { 1829 if (arithmetic) { 1830 tcg_gen_sar_tl(t0, t0, t2); 1831 tcg_gen_sar_tl(t1, t1, t2); 1832 } else { 1833 tcg_gen_shr_tl(t0, t0, t2); 1834 tcg_gen_shr_tl(t1, t1, t2); 1835 } 1836 } else { 1837 tcg_gen_shl_tl(t0, t0, t2); 1838 tcg_gen_shl_tl(t1, t1, t2); 1839 } 1840 gen_store_mxu_gpr(t0, XRa); 1841 gen_store_mxu_gpr(t1, XRd); 1842 } 1843 1844 /* 1845 * D32SARL XRa, XRb, XRc, SFT4 1846 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1847 * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa. 1848 * 1849 * D32SARW XRa, XRb, XRc, rb 1850 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1851 * to rb[3:0] bits. Pack 16 LSBs of each into XRa. 1852 */ 1853 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw) 1854 { 1855 uint32_t XRa, XRb, XRc, rb; 1856 1857 XRa = extract32(ctx->opcode, 6, 4); 1858 XRb = extract32(ctx->opcode, 10, 4); 1859 XRc = extract32(ctx->opcode, 14, 4); 1860 rb = extract32(ctx->opcode, 21, 5); 1861 1862 if (unlikely(XRa == 0)) { 1863 /* destination is zero register -> do nothing */ 1864 } else { 1865 TCGv t0 = tcg_temp_new(); 1866 TCGv t1 = tcg_temp_new(); 1867 TCGv t2 = tcg_temp_new(); 1868 1869 if (!sarw) { 1870 /* Make SFT4 from rb field */ 1871 tcg_gen_movi_tl(t2, rb >> 1); 1872 } else { 1873 gen_load_gpr(t2, rb); 1874 tcg_gen_andi_tl(t2, t2, 0x0f); 1875 } 1876 gen_load_mxu_gpr(t0, XRb); 1877 gen_load_mxu_gpr(t1, XRc); 1878 tcg_gen_sar_tl(t0, t0, t2); 1879 tcg_gen_sar_tl(t1, t1, t2); 1880 tcg_gen_extract_tl(t2, t1, 0, 16); 1881 tcg_gen_deposit_tl(t2, t2, t0, 16, 16); 1882 gen_store_mxu_gpr(t2, XRa); 1883 } 1884 } 1885 1886 /* 1887 * Q16SLL XRa, XRd, XRb, XRc, SFT4 1888 * Quad 16-bit shift left from XRb and XRc to SFT4 1889 * bits (0..15). Store to XRa and XRd respectively. 1890 * Q16SLR XRa, XRd, XRb, XRc, SFT4 1891 * Quad 16-bit shift logic right from XRb and XRc 1892 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1893 * Q16SAR XRa, XRd, XRb, XRc, SFT4 1894 * Quad 16-bit shift arithmetic right from XRb and XRc 1895 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1896 */ 1897 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic) 1898 { 1899 uint32_t XRa, XRb, XRc, XRd, sft4; 1900 1901 XRa = extract32(ctx->opcode, 6, 4); 1902 XRb = extract32(ctx->opcode, 10, 4); 1903 XRc = extract32(ctx->opcode, 14, 4); 1904 XRd = extract32(ctx->opcode, 18, 4); 1905 sft4 = extract32(ctx->opcode, 22, 4); 1906 1907 TCGv t0 = tcg_temp_new(); 1908 TCGv t1 = tcg_temp_new(); 1909 TCGv t2 = tcg_temp_new(); 1910 TCGv t3 = tcg_temp_new(); 1911 1912 gen_load_mxu_gpr(t0, XRb); 1913 gen_load_mxu_gpr(t2, XRc); 1914 1915 if (arithmetic) { 1916 tcg_gen_sextract_tl(t1, t0, 16, 16); 1917 tcg_gen_sextract_tl(t0, t0, 0, 16); 1918 tcg_gen_sextract_tl(t3, t2, 16, 16); 1919 tcg_gen_sextract_tl(t2, t2, 0, 16); 1920 } else { 1921 tcg_gen_extract_tl(t1, t0, 16, 16); 1922 tcg_gen_extract_tl(t0, t0, 0, 16); 1923 tcg_gen_extract_tl(t3, t2, 16, 16); 1924 tcg_gen_extract_tl(t2, t2, 0, 16); 1925 } 1926 1927 if (right) { 1928 if (arithmetic) { 1929 tcg_gen_sari_tl(t0, t0, sft4); 1930 tcg_gen_sari_tl(t1, t1, sft4); 1931 tcg_gen_sari_tl(t2, t2, sft4); 1932 tcg_gen_sari_tl(t3, t3, sft4); 1933 } else { 1934 tcg_gen_shri_tl(t0, t0, sft4); 1935 tcg_gen_shri_tl(t1, t1, sft4); 1936 tcg_gen_shri_tl(t2, t2, sft4); 1937 tcg_gen_shri_tl(t3, t3, sft4); 1938 } 1939 } else { 1940 tcg_gen_shli_tl(t0, t0, sft4); 1941 tcg_gen_shli_tl(t1, t1, sft4); 1942 tcg_gen_shli_tl(t2, t2, sft4); 1943 tcg_gen_shli_tl(t3, t3, sft4); 1944 } 1945 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 1946 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 1947 1948 gen_store_mxu_gpr(t0, XRa); 1949 gen_store_mxu_gpr(t2, XRd); 1950 } 1951 1952 /* 1953 * Q16SLLV XRa, XRd, rs 1954 * Quad 16-bit shift left from XRa and XRd to rs[3:0] 1955 * bits. Store to XRa and XRd respectively. 1956 * Q16SLRV XRa, XRd, rs 1957 * Quad 16-bit shift logic right from XRa and XRd to rs[3:0] 1958 * bits. Store to XRa and XRd respectively. 1959 * Q16SARV XRa, XRd, rs 1960 * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0] 1961 * bits. Store to XRa and XRd respectively. 1962 */ 1963 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic) 1964 { 1965 uint32_t XRa, XRd, rs; 1966 1967 XRa = extract32(ctx->opcode, 10, 4); 1968 XRd = extract32(ctx->opcode, 14, 4); 1969 rs = extract32(ctx->opcode, 21, 5); 1970 1971 TCGv t0 = tcg_temp_new(); 1972 TCGv t1 = tcg_temp_new(); 1973 TCGv t2 = tcg_temp_new(); 1974 TCGv t3 = tcg_temp_new(); 1975 TCGv t5 = tcg_temp_new(); 1976 1977 gen_load_mxu_gpr(t0, XRa); 1978 gen_load_mxu_gpr(t2, XRd); 1979 gen_load_gpr(t5, rs); 1980 tcg_gen_andi_tl(t5, t5, 0x0f); 1981 1982 1983 if (arithmetic) { 1984 tcg_gen_sextract_tl(t1, t0, 16, 16); 1985 tcg_gen_sextract_tl(t0, t0, 0, 16); 1986 tcg_gen_sextract_tl(t3, t2, 16, 16); 1987 tcg_gen_sextract_tl(t2, t2, 0, 16); 1988 } else { 1989 tcg_gen_extract_tl(t1, t0, 16, 16); 1990 tcg_gen_extract_tl(t0, t0, 0, 16); 1991 tcg_gen_extract_tl(t3, t2, 16, 16); 1992 tcg_gen_extract_tl(t2, t2, 0, 16); 1993 } 1994 1995 if (right) { 1996 if (arithmetic) { 1997 tcg_gen_sar_tl(t0, t0, t5); 1998 tcg_gen_sar_tl(t1, t1, t5); 1999 tcg_gen_sar_tl(t2, t2, t5); 2000 tcg_gen_sar_tl(t3, t3, t5); 2001 } else { 2002 tcg_gen_shr_tl(t0, t0, t5); 2003 tcg_gen_shr_tl(t1, t1, t5); 2004 tcg_gen_shr_tl(t2, t2, t5); 2005 tcg_gen_shr_tl(t3, t3, t5); 2006 } 2007 } else { 2008 tcg_gen_shl_tl(t0, t0, t5); 2009 tcg_gen_shl_tl(t1, t1, t5); 2010 tcg_gen_shl_tl(t2, t2, t5); 2011 tcg_gen_shl_tl(t3, t3, t5); 2012 } 2013 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 2014 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2015 2016 gen_store_mxu_gpr(t0, XRa); 2017 gen_store_mxu_gpr(t2, XRd); 2018 } 2019 2020 /* 2021 * MXU instruction category max/min/avg 2022 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2023 * 2024 * S32MAX D16MAX Q8MAX 2025 * S32MIN D16MIN Q8MIN 2026 * S32SLT D16SLT Q8SLT 2027 * Q8SLTU 2028 * D16AVG Q8AVG 2029 * D16AVGR Q8AVGR 2030 * S32MOVZ D16MOVZ Q8MOVZ 2031 * S32MOVN D16MOVN Q8MOVN 2032 */ 2033 2034 /* 2035 * S32MAX XRa, XRb, XRc 2036 * Update XRa with the maximum of signed 32-bit integers contained 2037 * in XRb and XRc. 2038 * 2039 * S32MIN XRa, XRb, XRc 2040 * Update XRa with the minimum of signed 32-bit integers contained 2041 * in XRb and XRc. 2042 */ 2043 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx) 2044 { 2045 uint32_t pad, opc, XRc, XRb, XRa; 2046 2047 pad = extract32(ctx->opcode, 21, 5); 2048 opc = extract32(ctx->opcode, 18, 3); 2049 XRc = extract32(ctx->opcode, 14, 4); 2050 XRb = extract32(ctx->opcode, 10, 4); 2051 XRa = extract32(ctx->opcode, 6, 4); 2052 2053 if (unlikely(pad != 0)) { 2054 /* opcode padding incorrect -> do nothing */ 2055 } else if (unlikely(XRa == 0)) { 2056 /* destination is zero register -> do nothing */ 2057 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2058 /* both operands zero registers -> just set destination to zero */ 2059 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2060 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2061 /* exactly one operand is zero register - find which one is not...*/ 2062 uint32_t XRx = XRb ? XRb : XRc; 2063 /* ...and do max/min operation with one operand 0 */ 2064 if (opc == OPC_MXU_S32MAX) { 2065 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2066 } else { 2067 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2068 } 2069 } else if (unlikely(XRb == XRc)) { 2070 /* both operands same -> just set destination to one of them */ 2071 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2072 } else { 2073 /* the most general case */ 2074 if (opc == OPC_MXU_S32MAX) { 2075 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2076 mxu_gpr[XRc - 1]); 2077 } else { 2078 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2079 mxu_gpr[XRc - 1]); 2080 } 2081 } 2082 } 2083 2084 /* 2085 * D16MAX 2086 * Update XRa with the 16-bit-wise maximums of signed integers 2087 * contained in XRb and XRc. 2088 * 2089 * D16MIN 2090 * Update XRa with the 16-bit-wise minimums of signed integers 2091 * contained in XRb and XRc. 2092 */ 2093 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) 2094 { 2095 uint32_t pad, opc, XRc, XRb, XRa; 2096 2097 pad = extract32(ctx->opcode, 21, 5); 2098 opc = extract32(ctx->opcode, 18, 3); 2099 XRc = extract32(ctx->opcode, 14, 4); 2100 XRb = extract32(ctx->opcode, 10, 4); 2101 XRa = extract32(ctx->opcode, 6, 4); 2102 2103 if (unlikely(pad != 0)) { 2104 /* opcode padding incorrect -> do nothing */ 2105 } else if (unlikely(XRa == 0)) { 2106 /* destination is zero register -> do nothing */ 2107 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2108 /* both operands zero registers -> just set destination to zero */ 2109 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2110 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2111 /* exactly one operand is zero register - find which one is not...*/ 2112 uint32_t XRx = XRb ? XRb : XRc; 2113 /* ...and do half-word-wise max/min with one operand 0 */ 2114 TCGv_i32 t0 = tcg_temp_new(); 2115 TCGv_i32 t1 = tcg_constant_i32(0); 2116 TCGv_i32 t2 = tcg_temp_new(); 2117 2118 /* the left half-word first */ 2119 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000); 2120 if (opc == OPC_MXU_D16MAX) { 2121 tcg_gen_smax_i32(t2, t0, t1); 2122 } else { 2123 tcg_gen_smin_i32(t2, t0, t1); 2124 } 2125 2126 /* the right half-word */ 2127 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF); 2128 /* move half-words to the leftmost position */ 2129 tcg_gen_shli_i32(t0, t0, 16); 2130 /* t0 will be max/min of t0 and t1 */ 2131 if (opc == OPC_MXU_D16MAX) { 2132 tcg_gen_smax_i32(t0, t0, t1); 2133 } else { 2134 tcg_gen_smin_i32(t0, t0, t1); 2135 } 2136 /* return resulting half-words to its original position */ 2137 tcg_gen_shri_i32(t0, t0, 16); 2138 /* finally update the destination */ 2139 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2140 } else if (unlikely(XRb == XRc)) { 2141 /* both operands same -> just set destination to one of them */ 2142 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2143 } else { 2144 /* the most general case */ 2145 TCGv_i32 t0 = tcg_temp_new(); 2146 TCGv_i32 t1 = tcg_temp_new(); 2147 TCGv_i32 t2 = tcg_temp_new(); 2148 2149 /* the left half-word first */ 2150 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000); 2151 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 2152 if (opc == OPC_MXU_D16MAX) { 2153 tcg_gen_smax_i32(t2, t0, t1); 2154 } else { 2155 tcg_gen_smin_i32(t2, t0, t1); 2156 } 2157 2158 /* the right half-word */ 2159 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 2160 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF); 2161 /* move half-words to the leftmost position */ 2162 tcg_gen_shli_i32(t0, t0, 16); 2163 tcg_gen_shli_i32(t1, t1, 16); 2164 /* t0 will be max/min of t0 and t1 */ 2165 if (opc == OPC_MXU_D16MAX) { 2166 tcg_gen_smax_i32(t0, t0, t1); 2167 } else { 2168 tcg_gen_smin_i32(t0, t0, t1); 2169 } 2170 /* return resulting half-words to its original position */ 2171 tcg_gen_shri_i32(t0, t0, 16); 2172 /* finally update the destination */ 2173 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2174 } 2175 } 2176 2177 /* 2178 * Q8MAX 2179 * Update XRa with the 8-bit-wise maximums of signed integers 2180 * contained in XRb and XRc. 2181 * 2182 * Q8MIN 2183 * Update XRa with the 8-bit-wise minimums of signed integers 2184 * contained in XRb and XRc. 2185 */ 2186 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) 2187 { 2188 uint32_t pad, opc, XRc, XRb, XRa; 2189 2190 pad = extract32(ctx->opcode, 21, 5); 2191 opc = extract32(ctx->opcode, 18, 3); 2192 XRc = extract32(ctx->opcode, 14, 4); 2193 XRb = extract32(ctx->opcode, 10, 4); 2194 XRa = extract32(ctx->opcode, 6, 4); 2195 2196 if (unlikely(pad != 0)) { 2197 /* opcode padding incorrect -> do nothing */ 2198 } else if (unlikely(XRa == 0)) { 2199 /* destination is zero register -> do nothing */ 2200 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2201 /* both operands zero registers -> just set destination to zero */ 2202 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2203 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2204 /* exactly one operand is zero register - make it be the first...*/ 2205 uint32_t XRx = XRb ? XRb : XRc; 2206 /* ...and do byte-wise max/min with one operand 0 */ 2207 TCGv_i32 t0 = tcg_temp_new(); 2208 TCGv_i32 t1 = tcg_constant_i32(0); 2209 TCGv_i32 t2 = tcg_temp_new(); 2210 int32_t i; 2211 2212 /* the leftmost byte (byte 3) first */ 2213 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000); 2214 if (opc == OPC_MXU_Q8MAX) { 2215 tcg_gen_smax_i32(t2, t0, t1); 2216 } else { 2217 tcg_gen_smin_i32(t2, t0, t1); 2218 } 2219 2220 /* bytes 2, 1, 0 */ 2221 for (i = 2; i >= 0; i--) { 2222 /* extract the byte */ 2223 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i)); 2224 /* move the byte to the leftmost position */ 2225 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2226 /* t0 will be max/min of t0 and t1 */ 2227 if (opc == OPC_MXU_Q8MAX) { 2228 tcg_gen_smax_i32(t0, t0, t1); 2229 } else { 2230 tcg_gen_smin_i32(t0, t0, t1); 2231 } 2232 /* return resulting byte to its original position */ 2233 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2234 /* finally update the destination */ 2235 tcg_gen_or_i32(t2, t2, t0); 2236 } 2237 gen_store_mxu_gpr(t2, XRa); 2238 } else if (unlikely(XRb == XRc)) { 2239 /* both operands same -> just set destination to one of them */ 2240 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2241 } else { 2242 /* the most general case */ 2243 TCGv_i32 t0 = tcg_temp_new(); 2244 TCGv_i32 t1 = tcg_temp_new(); 2245 TCGv_i32 t2 = tcg_temp_new(); 2246 int32_t i; 2247 2248 /* the leftmost bytes (bytes 3) first */ 2249 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000); 2250 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 2251 if (opc == OPC_MXU_Q8MAX) { 2252 tcg_gen_smax_i32(t2, t0, t1); 2253 } else { 2254 tcg_gen_smin_i32(t2, t0, t1); 2255 } 2256 2257 /* bytes 2, 1, 0 */ 2258 for (i = 2; i >= 0; i--) { 2259 /* extract corresponding bytes */ 2260 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i)); 2261 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i)); 2262 /* move the bytes to the leftmost position */ 2263 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2264 tcg_gen_shli_i32(t1, t1, 8 * (3 - i)); 2265 /* t0 will be max/min of t0 and t1 */ 2266 if (opc == OPC_MXU_Q8MAX) { 2267 tcg_gen_smax_i32(t0, t0, t1); 2268 } else { 2269 tcg_gen_smin_i32(t0, t0, t1); 2270 } 2271 /* return resulting byte to its original position */ 2272 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2273 /* finally update the destination */ 2274 tcg_gen_or_i32(t2, t2, t0); 2275 } 2276 gen_store_mxu_gpr(t2, XRa); 2277 } 2278 } 2279 2280 /* 2281 * Q8SLT 2282 * Update XRa with the signed "set less than" comparison of XRb and XRc 2283 * on per-byte basis. 2284 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2285 * 2286 * Q8SLTU 2287 * Update XRa with the unsigned "set less than" comparison of XRb and XRc 2288 * on per-byte basis. 2289 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2290 */ 2291 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu) 2292 { 2293 uint32_t pad, XRc, XRb, XRa; 2294 2295 pad = extract32(ctx->opcode, 21, 5); 2296 XRc = extract32(ctx->opcode, 14, 4); 2297 XRb = extract32(ctx->opcode, 10, 4); 2298 XRa = extract32(ctx->opcode, 6, 4); 2299 2300 if (unlikely(pad != 0)) { 2301 /* opcode padding incorrect -> do nothing */ 2302 } else if (unlikely(XRa == 0)) { 2303 /* destination is zero register -> do nothing */ 2304 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2305 /* both operands zero registers -> just set destination to zero */ 2306 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2307 } else if (unlikely(XRb == XRc)) { 2308 /* both operands same registers -> just set destination to zero */ 2309 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2310 } else { 2311 /* the most general case */ 2312 TCGv t0 = tcg_temp_new(); 2313 TCGv t1 = tcg_temp_new(); 2314 TCGv t2 = tcg_temp_new(); 2315 TCGv t3 = tcg_temp_new(); 2316 TCGv t4 = tcg_temp_new(); 2317 2318 gen_load_mxu_gpr(t3, XRb); 2319 gen_load_mxu_gpr(t4, XRc); 2320 tcg_gen_movi_tl(t2, 0); 2321 2322 for (int i = 0; i < 4; i++) { 2323 if (sltu) { 2324 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2325 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2326 } else { 2327 tcg_gen_sextract_tl(t0, t3, 8 * i, 8); 2328 tcg_gen_sextract_tl(t1, t4, 8 * i, 8); 2329 } 2330 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2331 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2332 } 2333 gen_store_mxu_gpr(t2, XRa); 2334 } 2335 } 2336 2337 /* 2338 * S32SLT 2339 * Update XRa with the signed "set less than" comparison of XRb and XRc. 2340 * a.k.a. XRa = XRb < XRc ? 1 : 0; 2341 */ 2342 static void gen_mxu_S32SLT(DisasContext *ctx) 2343 { 2344 uint32_t pad, XRc, XRb, XRa; 2345 2346 pad = extract32(ctx->opcode, 21, 5); 2347 XRc = extract32(ctx->opcode, 14, 4); 2348 XRb = extract32(ctx->opcode, 10, 4); 2349 XRa = extract32(ctx->opcode, 6, 4); 2350 2351 if (unlikely(pad != 0)) { 2352 /* opcode padding incorrect -> do nothing */ 2353 } else if (unlikely(XRa == 0)) { 2354 /* destination is zero register -> do nothing */ 2355 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2356 /* both operands zero registers -> just set destination to zero */ 2357 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2358 } else if (unlikely(XRb == XRc)) { 2359 /* both operands same registers -> just set destination to zero */ 2360 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2361 } else { 2362 /* the most general case */ 2363 tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], 2364 mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 2365 } 2366 } 2367 2368 /* 2369 * D16SLT 2370 * Update XRa with the signed "set less than" comparison of XRb and XRc 2371 * on per-word basis. 2372 * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0; 2373 */ 2374 static void gen_mxu_D16SLT(DisasContext *ctx) 2375 { 2376 uint32_t pad, XRc, XRb, XRa; 2377 2378 pad = extract32(ctx->opcode, 21, 5); 2379 XRc = extract32(ctx->opcode, 14, 4); 2380 XRb = extract32(ctx->opcode, 10, 4); 2381 XRa = extract32(ctx->opcode, 6, 4); 2382 2383 if (unlikely(pad != 0)) { 2384 /* opcode padding incorrect -> do nothing */ 2385 } else if (unlikely(XRa == 0)) { 2386 /* destination is zero register -> do nothing */ 2387 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2388 /* both operands zero registers -> just set destination to zero */ 2389 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2390 } else if (unlikely(XRb == XRc)) { 2391 /* both operands same registers -> just set destination to zero */ 2392 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2393 } else { 2394 /* the most general case */ 2395 TCGv t0 = tcg_temp_new(); 2396 TCGv t1 = tcg_temp_new(); 2397 TCGv t2 = tcg_temp_new(); 2398 TCGv t3 = tcg_temp_new(); 2399 TCGv t4 = tcg_temp_new(); 2400 2401 gen_load_mxu_gpr(t3, XRb); 2402 gen_load_mxu_gpr(t4, XRc); 2403 tcg_gen_sextract_tl(t0, t3, 16, 16); 2404 tcg_gen_sextract_tl(t1, t4, 16, 16); 2405 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2406 tcg_gen_shli_tl(t2, t0, 16); 2407 tcg_gen_sextract_tl(t0, t3, 0, 16); 2408 tcg_gen_sextract_tl(t1, t4, 0, 16); 2409 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2410 tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0); 2411 } 2412 } 2413 2414 /* 2415 * D16AVG 2416 * Update XRa with the signed average of XRb and XRc 2417 * on per-word basis, rounding down. 2418 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1; 2419 * 2420 * D16AVGR 2421 * Update XRa with the signed average of XRb and XRc 2422 * on per-word basis, math rounding 4/5. 2423 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1; 2424 */ 2425 static void gen_mxu_d16avg(DisasContext *ctx, bool round45) 2426 { 2427 uint32_t pad, XRc, XRb, XRa; 2428 2429 pad = extract32(ctx->opcode, 21, 5); 2430 XRc = extract32(ctx->opcode, 14, 4); 2431 XRb = extract32(ctx->opcode, 10, 4); 2432 XRa = extract32(ctx->opcode, 6, 4); 2433 2434 if (unlikely(pad != 0)) { 2435 /* opcode padding incorrect -> do nothing */ 2436 } else if (unlikely(XRa == 0)) { 2437 /* destination is zero register -> do nothing */ 2438 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2439 /* both operands zero registers -> just set destination to zero */ 2440 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2441 } else if (unlikely(XRb == XRc)) { 2442 /* both operands same registers -> just set destination to same */ 2443 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2444 } else { 2445 /* the most general case */ 2446 TCGv t0 = tcg_temp_new(); 2447 TCGv t1 = tcg_temp_new(); 2448 TCGv t2 = tcg_temp_new(); 2449 TCGv t3 = tcg_temp_new(); 2450 TCGv t4 = tcg_temp_new(); 2451 2452 gen_load_mxu_gpr(t3, XRb); 2453 gen_load_mxu_gpr(t4, XRc); 2454 tcg_gen_sextract_tl(t0, t3, 16, 16); 2455 tcg_gen_sextract_tl(t1, t4, 16, 16); 2456 tcg_gen_add_tl(t0, t0, t1); 2457 if (round45) { 2458 tcg_gen_addi_tl(t0, t0, 1); 2459 } 2460 tcg_gen_shli_tl(t2, t0, 15); 2461 tcg_gen_andi_tl(t2, t2, 0xffff0000); 2462 tcg_gen_sextract_tl(t0, t3, 0, 16); 2463 tcg_gen_sextract_tl(t1, t4, 0, 16); 2464 tcg_gen_add_tl(t0, t0, t1); 2465 if (round45) { 2466 tcg_gen_addi_tl(t0, t0, 1); 2467 } 2468 tcg_gen_shri_tl(t0, t0, 1); 2469 tcg_gen_deposit_tl(t2, t2, t0, 0, 16); 2470 gen_store_mxu_gpr(t2, XRa); 2471 } 2472 } 2473 2474 /* 2475 * Q8AVG 2476 * Update XRa with the signed average of XRb and XRc 2477 * on per-byte basis, rounding down. 2478 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1; 2479 * 2480 * Q8AVGR 2481 * Update XRa with the signed average of XRb and XRc 2482 * on per-word basis, math rounding 4/5. 2483 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1; 2484 */ 2485 static void gen_mxu_q8avg(DisasContext *ctx, bool round45) 2486 { 2487 uint32_t pad, XRc, XRb, XRa; 2488 2489 pad = extract32(ctx->opcode, 21, 5); 2490 XRc = extract32(ctx->opcode, 14, 4); 2491 XRb = extract32(ctx->opcode, 10, 4); 2492 XRa = extract32(ctx->opcode, 6, 4); 2493 2494 if (unlikely(pad != 0)) { 2495 /* opcode padding incorrect -> do nothing */ 2496 } else if (unlikely(XRa == 0)) { 2497 /* destination is zero register -> do nothing */ 2498 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2499 /* both operands zero registers -> just set destination to zero */ 2500 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2501 } else if (unlikely(XRb == XRc)) { 2502 /* both operands same registers -> just set destination to same */ 2503 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2504 } else { 2505 /* the most general case */ 2506 TCGv t0 = tcg_temp_new(); 2507 TCGv t1 = tcg_temp_new(); 2508 TCGv t2 = tcg_temp_new(); 2509 TCGv t3 = tcg_temp_new(); 2510 TCGv t4 = tcg_temp_new(); 2511 2512 gen_load_mxu_gpr(t3, XRb); 2513 gen_load_mxu_gpr(t4, XRc); 2514 tcg_gen_movi_tl(t2, 0); 2515 2516 for (int i = 0; i < 4; i++) { 2517 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2518 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2519 tcg_gen_add_tl(t0, t0, t1); 2520 if (round45) { 2521 tcg_gen_addi_tl(t0, t0, 1); 2522 } 2523 tcg_gen_shri_tl(t0, t0, 1); 2524 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2525 } 2526 gen_store_mxu_gpr(t2, XRa); 2527 } 2528 } 2529 2530 /* 2531 * Q8MOVZ 2532 * Quadruple 8-bit packed conditional move where 2533 * XRb contains conditions, XRc what to move and 2534 * XRa is the destination. 2535 * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] } 2536 * 2537 * Q8MOVN 2538 * Quadruple 8-bit packed conditional move where 2539 * XRb contains conditions, XRc what to move and 2540 * XRa is the destination. 2541 * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] } 2542 */ 2543 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond) 2544 { 2545 uint32_t XRc, XRb, XRa; 2546 2547 XRa = extract32(ctx->opcode, 6, 4); 2548 XRb = extract32(ctx->opcode, 10, 4); 2549 XRc = extract32(ctx->opcode, 14, 4); 2550 2551 TCGv t0 = tcg_temp_new(); 2552 TCGv t1 = tcg_temp_new(); 2553 TCGv t2 = tcg_temp_new(); 2554 TCGv t3 = tcg_temp_new(); 2555 TCGLabel *l_quarterdone = gen_new_label(); 2556 TCGLabel *l_halfdone = gen_new_label(); 2557 TCGLabel *l_quarterrest = gen_new_label(); 2558 TCGLabel *l_done = gen_new_label(); 2559 2560 gen_load_mxu_gpr(t0, XRc); 2561 gen_load_mxu_gpr(t1, XRb); 2562 gen_load_mxu_gpr(t2, XRa); 2563 2564 tcg_gen_extract_tl(t3, t1, 24, 8); 2565 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone); 2566 tcg_gen_extract_tl(t3, t0, 24, 8); 2567 tcg_gen_deposit_tl(t2, t2, t3, 24, 8); 2568 2569 gen_set_label(l_quarterdone); 2570 tcg_gen_extract_tl(t3, t1, 16, 8); 2571 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2572 tcg_gen_extract_tl(t3, t0, 16, 8); 2573 tcg_gen_deposit_tl(t2, t2, t3, 16, 8); 2574 2575 gen_set_label(l_halfdone); 2576 tcg_gen_extract_tl(t3, t1, 8, 8); 2577 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest); 2578 tcg_gen_extract_tl(t3, t0, 8, 8); 2579 tcg_gen_deposit_tl(t2, t2, t3, 8, 8); 2580 2581 gen_set_label(l_quarterrest); 2582 tcg_gen_extract_tl(t3, t1, 0, 8); 2583 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2584 tcg_gen_extract_tl(t3, t0, 0, 8); 2585 tcg_gen_deposit_tl(t2, t2, t3, 0, 8); 2586 2587 gen_set_label(l_done); 2588 gen_store_mxu_gpr(t2, XRa); 2589 } 2590 2591 /* 2592 * D16MOVZ 2593 * Double 16-bit packed conditional move where 2594 * XRb contains conditions, XRc what to move and 2595 * XRa is the destination. 2596 * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] } 2597 * 2598 * D16MOVN 2599 * Double 16-bit packed conditional move where 2600 * XRb contains conditions, XRc what to move and 2601 * XRa is the destination. 2602 * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] } 2603 */ 2604 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond) 2605 { 2606 uint32_t XRc, XRb, XRa; 2607 2608 XRa = extract32(ctx->opcode, 6, 4); 2609 XRb = extract32(ctx->opcode, 10, 4); 2610 XRc = extract32(ctx->opcode, 14, 4); 2611 2612 TCGv t0 = tcg_temp_new(); 2613 TCGv t1 = tcg_temp_new(); 2614 TCGv t2 = tcg_temp_new(); 2615 TCGv t3 = tcg_temp_new(); 2616 TCGLabel *l_halfdone = gen_new_label(); 2617 TCGLabel *l_done = gen_new_label(); 2618 2619 gen_load_mxu_gpr(t0, XRc); 2620 gen_load_mxu_gpr(t1, XRb); 2621 gen_load_mxu_gpr(t2, XRa); 2622 2623 tcg_gen_extract_tl(t3, t1, 16, 16); 2624 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2625 tcg_gen_extract_tl(t3, t0, 16, 16); 2626 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2627 2628 gen_set_label(l_halfdone); 2629 tcg_gen_extract_tl(t3, t1, 0, 16); 2630 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2631 tcg_gen_extract_tl(t3, t0, 0, 16); 2632 tcg_gen_deposit_tl(t2, t2, t3, 0, 16); 2633 2634 gen_set_label(l_done); 2635 gen_store_mxu_gpr(t2, XRa); 2636 } 2637 2638 /* 2639 * S32MOVZ 2640 * Quadruple 32-bit conditional move where 2641 * XRb contains conditions, XRc what to move and 2642 * XRa is the destination. 2643 * a.k.a. if (XRb == 0) { XRa = XRc } 2644 * 2645 * S32MOVN 2646 * Single 32-bit conditional move where 2647 * XRb contains conditions, XRc what to move and 2648 * XRa is the destination. 2649 * a.k.a. if (XRb != 0) { XRa = XRc } 2650 */ 2651 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond) 2652 { 2653 uint32_t XRc, XRb, XRa; 2654 2655 XRa = extract32(ctx->opcode, 6, 4); 2656 XRb = extract32(ctx->opcode, 10, 4); 2657 XRc = extract32(ctx->opcode, 14, 4); 2658 2659 TCGv t0 = tcg_temp_new(); 2660 TCGv t1 = tcg_temp_new(); 2661 TCGLabel *l_done = gen_new_label(); 2662 2663 gen_load_mxu_gpr(t0, XRc); 2664 gen_load_mxu_gpr(t1, XRb); 2665 2666 tcg_gen_brcondi_tl(cond, t1, 0, l_done); 2667 gen_store_mxu_gpr(t0, XRa); 2668 gen_set_label(l_done); 2669 } 2670 2671 /* 2672 * MXU instruction category: Addition and subtraction 2673 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2674 * 2675 * S32CPS D16CPS 2676 * Q8ADD 2677 */ 2678 2679 /* 2680 * S32CPS 2681 * Update XRa if XRc < 0 by value of 0 - XRb 2682 * else XRa = XRb 2683 */ 2684 static void gen_mxu_S32CPS(DisasContext *ctx) 2685 { 2686 uint32_t pad, XRc, XRb, XRa; 2687 2688 pad = extract32(ctx->opcode, 21, 5); 2689 XRc = extract32(ctx->opcode, 14, 4); 2690 XRb = extract32(ctx->opcode, 10, 4); 2691 XRa = extract32(ctx->opcode, 6, 4); 2692 2693 if (unlikely(pad != 0)) { 2694 /* opcode padding incorrect -> do nothing */ 2695 } else if (unlikely(XRa == 0)) { 2696 /* destination is zero register -> do nothing */ 2697 } else if (unlikely(XRb == 0)) { 2698 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2699 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2700 } else if (unlikely(XRc == 0)) { 2701 /* condition always false -> just move XRb to XRa */ 2702 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2703 } else { 2704 /* the most general case */ 2705 TCGv t0 = tcg_temp_new(); 2706 TCGLabel *l_not_less = gen_new_label(); 2707 TCGLabel *l_done = gen_new_label(); 2708 2709 tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less); 2710 tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]); 2711 tcg_gen_br(l_done); 2712 gen_set_label(l_not_less); 2713 gen_load_mxu_gpr(t0, XRb); 2714 gen_set_label(l_done); 2715 gen_store_mxu_gpr(t0, XRa); 2716 } 2717 } 2718 2719 /* 2720 * D16CPS 2721 * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1] 2722 * else XRa[0..1] = XRb[0..1] 2723 */ 2724 static void gen_mxu_D16CPS(DisasContext *ctx) 2725 { 2726 uint32_t pad, XRc, XRb, XRa; 2727 2728 pad = extract32(ctx->opcode, 21, 5); 2729 XRc = extract32(ctx->opcode, 14, 4); 2730 XRb = extract32(ctx->opcode, 10, 4); 2731 XRa = extract32(ctx->opcode, 6, 4); 2732 2733 if (unlikely(pad != 0)) { 2734 /* opcode padding incorrect -> do nothing */ 2735 } else if (unlikely(XRa == 0)) { 2736 /* destination is zero register -> do nothing */ 2737 } else if (unlikely(XRb == 0)) { 2738 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2739 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2740 } else if (unlikely(XRc == 0)) { 2741 /* condition always false -> just move XRb to XRa */ 2742 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2743 } else { 2744 /* the most general case */ 2745 TCGv t0 = tcg_temp_new(); 2746 TCGv t1 = tcg_temp_new(); 2747 TCGLabel *l_done_hi = gen_new_label(); 2748 TCGLabel *l_not_less_lo = gen_new_label(); 2749 TCGLabel *l_done_lo = gen_new_label(); 2750 2751 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16); 2752 tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16); 2753 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi); 2754 tcg_gen_subfi_tl(t1, 0, t1); 2755 2756 gen_set_label(l_done_hi); 2757 tcg_gen_shli_i32(t1, t1, 16); 2758 2759 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16); 2760 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo); 2761 tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2762 tcg_gen_subfi_tl(t0, 0, t0); 2763 tcg_gen_br(l_done_lo); 2764 2765 gen_set_label(l_not_less_lo); 2766 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2767 2768 gen_set_label(l_done_lo); 2769 tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16); 2770 } 2771 } 2772 2773 /* 2774 * Q8ABD XRa, XRb, XRc 2775 * Gets absolute difference for quadruple of 8-bit 2776 * packed in XRb to another one in XRc, 2777 * put the result in XRa. 2778 * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]); 2779 */ 2780 static void gen_mxu_Q8ABD(DisasContext *ctx) 2781 { 2782 uint32_t pad, XRc, XRb, XRa; 2783 2784 pad = extract32(ctx->opcode, 21, 3); 2785 XRc = extract32(ctx->opcode, 14, 4); 2786 XRb = extract32(ctx->opcode, 10, 4); 2787 XRa = extract32(ctx->opcode, 6, 4); 2788 2789 if (unlikely(pad != 0)) { 2790 /* opcode padding incorrect -> do nothing */ 2791 } else if (unlikely(XRa == 0)) { 2792 /* destination is zero register -> do nothing */ 2793 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2794 /* both operands zero registers -> just set destination to zero */ 2795 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2796 } else { 2797 /* the most general case */ 2798 TCGv t0 = tcg_temp_new(); 2799 TCGv t1 = tcg_temp_new(); 2800 TCGv t2 = tcg_temp_new(); 2801 TCGv t3 = tcg_temp_new(); 2802 TCGv t4 = tcg_temp_new(); 2803 2804 gen_load_mxu_gpr(t3, XRb); 2805 gen_load_mxu_gpr(t4, XRc); 2806 tcg_gen_movi_tl(t2, 0); 2807 2808 for (int i = 0; i < 4; i++) { 2809 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2810 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2811 2812 tcg_gen_sub_tl(t0, t0, t1); 2813 tcg_gen_abs_tl(t0, t0); 2814 2815 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2816 } 2817 gen_store_mxu_gpr(t2, XRa); 2818 } 2819 } 2820 2821 /* 2822 * Q8ADD XRa, XRb, XRc, ptn2 2823 * Add/subtract quadruple of 8-bit packed in XRb 2824 * to another one in XRc, put the result in XRa. 2825 */ 2826 static void gen_mxu_Q8ADD(DisasContext *ctx) 2827 { 2828 uint32_t aptn2, pad, XRc, XRb, XRa; 2829 2830 aptn2 = extract32(ctx->opcode, 24, 2); 2831 pad = extract32(ctx->opcode, 21, 3); 2832 XRc = extract32(ctx->opcode, 14, 4); 2833 XRb = extract32(ctx->opcode, 10, 4); 2834 XRa = extract32(ctx->opcode, 6, 4); 2835 2836 if (unlikely(pad != 0)) { 2837 /* opcode padding incorrect -> do nothing */ 2838 } else if (unlikely(XRa == 0)) { 2839 /* destination is zero register -> do nothing */ 2840 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2841 /* both operands zero registers -> just set destination to zero */ 2842 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2843 } else { 2844 /* the most general case */ 2845 TCGv t0 = tcg_temp_new(); 2846 TCGv t1 = tcg_temp_new(); 2847 TCGv t2 = tcg_temp_new(); 2848 TCGv t3 = tcg_temp_new(); 2849 TCGv t4 = tcg_temp_new(); 2850 2851 gen_load_mxu_gpr(t3, XRb); 2852 gen_load_mxu_gpr(t4, XRc); 2853 2854 for (int i = 0; i < 4; i++) { 2855 tcg_gen_andi_tl(t0, t3, 0xff); 2856 tcg_gen_andi_tl(t1, t4, 0xff); 2857 2858 if (i < 2) { 2859 if (aptn2 & 0x01) { 2860 tcg_gen_sub_tl(t0, t0, t1); 2861 } else { 2862 tcg_gen_add_tl(t0, t0, t1); 2863 } 2864 } else { 2865 if (aptn2 & 0x02) { 2866 tcg_gen_sub_tl(t0, t0, t1); 2867 } else { 2868 tcg_gen_add_tl(t0, t0, t1); 2869 } 2870 } 2871 if (i < 3) { 2872 tcg_gen_shri_tl(t3, t3, 8); 2873 tcg_gen_shri_tl(t4, t4, 8); 2874 } 2875 if (i > 0) { 2876 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2877 } else { 2878 tcg_gen_andi_tl(t0, t0, 0xff); 2879 tcg_gen_mov_tl(t2, t0); 2880 } 2881 } 2882 gen_store_mxu_gpr(t2, XRa); 2883 } 2884 } 2885 2886 /* 2887 * Q8ADDE XRa, XRb, XRc, XRd, aptn2 2888 * Add/subtract quadruple of 8-bit packed in XRb 2889 * to another one in XRc, with zero extending 2890 * to 16-bit and put results as packed 16-bit data 2891 * into XRa and XRd. 2892 * aptn2 manages action add or subract of pairs of data. 2893 * 2894 * Q8ACCE XRa, XRb, XRc, XRd, aptn2 2895 * Add/subtract quadruple of 8-bit packed in XRb 2896 * to another one in XRc, with zero extending 2897 * to 16-bit and accumulate results as packed 16-bit data 2898 * into XRa and XRd. 2899 * aptn2 manages action add or subract of pairs of data. 2900 */ 2901 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate) 2902 { 2903 uint32_t aptn2, XRd, XRc, XRb, XRa; 2904 2905 aptn2 = extract32(ctx->opcode, 24, 2); 2906 XRd = extract32(ctx->opcode, 18, 4); 2907 XRc = extract32(ctx->opcode, 14, 4); 2908 XRb = extract32(ctx->opcode, 10, 4); 2909 XRa = extract32(ctx->opcode, 6, 4); 2910 2911 if (unlikely((XRb == 0) && (XRc == 0))) { 2912 /* both operands zero registers -> just set destination to zero */ 2913 if (XRa != 0) { 2914 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2915 } 2916 if (XRd != 0) { 2917 tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0); 2918 } 2919 } else { 2920 /* the most general case */ 2921 TCGv t0 = tcg_temp_new(); 2922 TCGv t1 = tcg_temp_new(); 2923 TCGv t2 = tcg_temp_new(); 2924 TCGv t3 = tcg_temp_new(); 2925 TCGv t4 = tcg_temp_new(); 2926 TCGv t5 = tcg_temp_new(); 2927 2928 if (XRa != 0) { 2929 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 16, 8); 2930 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 16, 8); 2931 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 24, 8); 2932 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); 2933 if (aptn2 & 2) { 2934 tcg_gen_sub_tl(t0, t0, t1); 2935 tcg_gen_sub_tl(t2, t2, t3); 2936 } else { 2937 tcg_gen_add_tl(t0, t0, t1); 2938 tcg_gen_add_tl(t2, t2, t3); 2939 } 2940 if (accumulate) { 2941 gen_load_mxu_gpr(t5, XRa); 2942 tcg_gen_extract_tl(t1, t5, 0, 16); 2943 tcg_gen_extract_tl(t3, t5, 16, 16); 2944 tcg_gen_add_tl(t0, t0, t1); 2945 tcg_gen_add_tl(t2, t2, t3); 2946 } 2947 tcg_gen_shli_tl(t2, t2, 16); 2948 tcg_gen_extract_tl(t0, t0, 0, 16); 2949 tcg_gen_or_tl(t4, t2, t0); 2950 } 2951 if (XRd != 0) { 2952 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); 2953 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 0, 8); 2954 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 8, 8); 2955 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 8, 8); 2956 if (aptn2 & 1) { 2957 tcg_gen_sub_tl(t0, t0, t1); 2958 tcg_gen_sub_tl(t2, t2, t3); 2959 } else { 2960 tcg_gen_add_tl(t0, t0, t1); 2961 tcg_gen_add_tl(t2, t2, t3); 2962 } 2963 if (accumulate) { 2964 gen_load_mxu_gpr(t5, XRd); 2965 tcg_gen_extract_tl(t1, t5, 0, 16); 2966 tcg_gen_extract_tl(t3, t5, 16, 16); 2967 tcg_gen_add_tl(t0, t0, t1); 2968 tcg_gen_add_tl(t2, t2, t3); 2969 } 2970 tcg_gen_shli_tl(t2, t2, 16); 2971 tcg_gen_extract_tl(t0, t0, 0, 16); 2972 tcg_gen_or_tl(t5, t2, t0); 2973 } 2974 2975 gen_store_mxu_gpr(t4, XRa); 2976 gen_store_mxu_gpr(t5, XRd); 2977 } 2978 } 2979 2980 /* 2981 * D8SUM XRa, XRb, XRc 2982 * Double parallel add of quadruple unsigned 8-bit together 2983 * with zero extending to 16-bit data. 2984 * D8SUMC XRa, XRb, XRc 2985 * Double parallel add of quadruple unsigned 8-bit together 2986 * with zero extending to 16-bit data and adding 2 to each 2987 * parallel result. 2988 */ 2989 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc) 2990 { 2991 uint32_t pad, pad2, XRc, XRb, XRa; 2992 2993 pad = extract32(ctx->opcode, 24, 2); 2994 pad2 = extract32(ctx->opcode, 18, 4); 2995 XRc = extract32(ctx->opcode, 14, 4); 2996 XRb = extract32(ctx->opcode, 10, 4); 2997 XRa = extract32(ctx->opcode, 6, 4); 2998 2999 if (unlikely(pad != 0 || pad2 != 0)) { 3000 /* opcode padding incorrect -> do nothing */ 3001 } else if (unlikely(XRa == 0)) { 3002 /* destination is zero register -> do nothing */ 3003 } else if (unlikely((XRb == 0) && (XRc == 0))) { 3004 /* both operands zero registers -> just set destination to zero */ 3005 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 3006 } else { 3007 /* the most general case */ 3008 TCGv t0 = tcg_temp_new(); 3009 TCGv t1 = tcg_temp_new(); 3010 TCGv t2 = tcg_temp_new(); 3011 TCGv t3 = tcg_temp_new(); 3012 TCGv t4 = tcg_temp_new(); 3013 TCGv t5 = tcg_temp_new(); 3014 3015 if (XRb != 0) { 3016 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); 3017 tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8); 3018 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8); 3019 tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8); 3020 tcg_gen_add_tl(t4, t0, t1); 3021 tcg_gen_add_tl(t4, t4, t2); 3022 tcg_gen_add_tl(t4, t4, t3); 3023 } else { 3024 tcg_gen_mov_tl(t4, 0); 3025 } 3026 if (XRc != 0) { 3027 tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8); 3028 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8); 3029 tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8); 3030 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); 3031 tcg_gen_add_tl(t5, t0, t1); 3032 tcg_gen_add_tl(t5, t5, t2); 3033 tcg_gen_add_tl(t5, t5, t3); 3034 } else { 3035 tcg_gen_mov_tl(t5, 0); 3036 } 3037 3038 if (sumc) { 3039 tcg_gen_addi_tl(t4, t4, 2); 3040 tcg_gen_addi_tl(t5, t5, 2); 3041 } 3042 tcg_gen_shli_tl(t4, t4, 16); 3043 3044 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3045 } 3046 } 3047 3048 /* 3049 * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed 3050 * 16-bit pattern addition. 3051 */ 3052 static void gen_mxu_q16add(DisasContext *ctx) 3053 { 3054 uint32_t aptn2, optn2, XRc, XRb, XRa, XRd; 3055 3056 aptn2 = extract32(ctx->opcode, 24, 2); 3057 optn2 = extract32(ctx->opcode, 22, 2); 3058 XRd = extract32(ctx->opcode, 18, 4); 3059 XRc = extract32(ctx->opcode, 14, 4); 3060 XRb = extract32(ctx->opcode, 10, 4); 3061 XRa = extract32(ctx->opcode, 6, 4); 3062 3063 TCGv t0 = tcg_temp_new(); 3064 TCGv t1 = tcg_temp_new(); 3065 TCGv t2 = tcg_temp_new(); 3066 TCGv t3 = tcg_temp_new(); 3067 TCGv t4 = tcg_temp_new(); 3068 TCGv t5 = tcg_temp_new(); 3069 3070 gen_load_mxu_gpr(t1, XRb); 3071 tcg_gen_extract_tl(t0, t1, 0, 16); 3072 tcg_gen_extract_tl(t1, t1, 16, 16); 3073 3074 gen_load_mxu_gpr(t3, XRc); 3075 tcg_gen_extract_tl(t2, t3, 0, 16); 3076 tcg_gen_extract_tl(t3, t3, 16, 16); 3077 3078 switch (optn2) { 3079 case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */ 3080 tcg_gen_mov_tl(t4, t1); 3081 tcg_gen_mov_tl(t5, t0); 3082 break; 3083 case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */ 3084 tcg_gen_mov_tl(t4, t0); 3085 tcg_gen_mov_tl(t5, t0); 3086 break; 3087 case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */ 3088 tcg_gen_mov_tl(t4, t1); 3089 tcg_gen_mov_tl(t5, t1); 3090 break; 3091 case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */ 3092 tcg_gen_mov_tl(t4, t0); 3093 tcg_gen_mov_tl(t5, t1); 3094 break; 3095 } 3096 3097 switch (aptn2) { 3098 case MXU_APTN2_AA: /* lop +, rop + */ 3099 tcg_gen_add_tl(t0, t4, t3); 3100 tcg_gen_add_tl(t1, t5, t2); 3101 tcg_gen_add_tl(t4, t4, t3); 3102 tcg_gen_add_tl(t5, t5, t2); 3103 break; 3104 case MXU_APTN2_AS: /* lop +, rop + */ 3105 tcg_gen_sub_tl(t0, t4, t3); 3106 tcg_gen_sub_tl(t1, t5, t2); 3107 tcg_gen_add_tl(t4, t4, t3); 3108 tcg_gen_add_tl(t5, t5, t2); 3109 break; 3110 case MXU_APTN2_SA: /* lop +, rop + */ 3111 tcg_gen_add_tl(t0, t4, t3); 3112 tcg_gen_add_tl(t1, t5, t2); 3113 tcg_gen_sub_tl(t4, t4, t3); 3114 tcg_gen_sub_tl(t5, t5, t2); 3115 break; 3116 case MXU_APTN2_SS: /* lop +, rop + */ 3117 tcg_gen_sub_tl(t0, t4, t3); 3118 tcg_gen_sub_tl(t1, t5, t2); 3119 tcg_gen_sub_tl(t4, t4, t3); 3120 tcg_gen_sub_tl(t5, t5, t2); 3121 break; 3122 } 3123 3124 tcg_gen_shli_tl(t0, t0, 16); 3125 tcg_gen_extract_tl(t1, t1, 0, 16); 3126 tcg_gen_shli_tl(t4, t4, 16); 3127 tcg_gen_extract_tl(t5, t5, 0, 16); 3128 3129 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3130 tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1); 3131 } 3132 3133 /* 3134 * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed 3135 * 16-bit addition/subtraction with accumulate. 3136 */ 3137 static void gen_mxu_q16acc(DisasContext *ctx) 3138 { 3139 uint32_t aptn2, XRc, XRb, XRa, XRd; 3140 3141 aptn2 = extract32(ctx->opcode, 24, 2); 3142 XRd = extract32(ctx->opcode, 18, 4); 3143 XRc = extract32(ctx->opcode, 14, 4); 3144 XRb = extract32(ctx->opcode, 10, 4); 3145 XRa = extract32(ctx->opcode, 6, 4); 3146 3147 TCGv t0 = tcg_temp_new(); 3148 TCGv t1 = tcg_temp_new(); 3149 TCGv t2 = tcg_temp_new(); 3150 TCGv t3 = tcg_temp_new(); 3151 TCGv s3 = tcg_temp_new(); 3152 TCGv s2 = tcg_temp_new(); 3153 TCGv s1 = tcg_temp_new(); 3154 TCGv s0 = tcg_temp_new(); 3155 3156 gen_load_mxu_gpr(t1, XRb); 3157 tcg_gen_extract_tl(t0, t1, 0, 16); 3158 tcg_gen_extract_tl(t1, t1, 16, 16); 3159 3160 gen_load_mxu_gpr(t3, XRc); 3161 tcg_gen_extract_tl(t2, t3, 0, 16); 3162 tcg_gen_extract_tl(t3, t3, 16, 16); 3163 3164 switch (aptn2) { 3165 case MXU_APTN2_AA: /* lop +, rop + */ 3166 tcg_gen_add_tl(s3, t1, t3); 3167 tcg_gen_add_tl(s2, t0, t2); 3168 tcg_gen_add_tl(s1, t1, t3); 3169 tcg_gen_add_tl(s0, t0, t2); 3170 break; 3171 case MXU_APTN2_AS: /* lop +, rop - */ 3172 tcg_gen_sub_tl(s3, t1, t3); 3173 tcg_gen_sub_tl(s2, t0, t2); 3174 tcg_gen_add_tl(s1, t1, t3); 3175 tcg_gen_add_tl(s0, t0, t2); 3176 break; 3177 case MXU_APTN2_SA: /* lop -, rop + */ 3178 tcg_gen_add_tl(s3, t1, t3); 3179 tcg_gen_add_tl(s2, t0, t2); 3180 tcg_gen_sub_tl(s1, t1, t3); 3181 tcg_gen_sub_tl(s0, t0, t2); 3182 break; 3183 case MXU_APTN2_SS: /* lop -, rop - */ 3184 tcg_gen_sub_tl(s3, t1, t3); 3185 tcg_gen_sub_tl(s2, t0, t2); 3186 tcg_gen_sub_tl(s1, t1, t3); 3187 tcg_gen_sub_tl(s0, t0, t2); 3188 break; 3189 } 3190 3191 if (XRa != 0) { 3192 tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0); 3193 tcg_gen_extract_tl(t0, t0, 0, 16); 3194 tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16); 3195 tcg_gen_add_tl(t1, t1, s1); 3196 tcg_gen_shli_tl(t1, t1, 16); 3197 tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0); 3198 } 3199 3200 if (XRd != 0) { 3201 tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2); 3202 tcg_gen_extract_tl(t0, t0, 0, 16); 3203 tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16); 3204 tcg_gen_add_tl(t1, t1, s3); 3205 tcg_gen_shli_tl(t1, t1, 16); 3206 tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0); 3207 } 3208 } 3209 3210 /* 3211 * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed 3212 * 16-bit accumulate. 3213 */ 3214 static void gen_mxu_q16accm(DisasContext *ctx) 3215 { 3216 uint32_t aptn2, XRc, XRb, XRa, XRd; 3217 3218 aptn2 = extract32(ctx->opcode, 24, 2); 3219 XRd = extract32(ctx->opcode, 18, 4); 3220 XRc = extract32(ctx->opcode, 14, 4); 3221 XRb = extract32(ctx->opcode, 10, 4); 3222 XRa = extract32(ctx->opcode, 6, 4); 3223 3224 TCGv t0 = tcg_temp_new(); 3225 TCGv t1 = tcg_temp_new(); 3226 TCGv t2 = tcg_temp_new(); 3227 TCGv t3 = tcg_temp_new(); 3228 3229 gen_load_mxu_gpr(t2, XRb); 3230 gen_load_mxu_gpr(t3, XRc); 3231 3232 if (XRa != 0) { 3233 TCGv a0 = tcg_temp_new(); 3234 TCGv a1 = tcg_temp_new(); 3235 3236 tcg_gen_extract_tl(t0, t2, 0, 16); 3237 tcg_gen_extract_tl(t1, t2, 16, 16); 3238 3239 gen_load_mxu_gpr(a1, XRa); 3240 tcg_gen_extract_tl(a0, a1, 0, 16); 3241 tcg_gen_extract_tl(a1, a1, 16, 16); 3242 3243 if (aptn2 & 2) { 3244 tcg_gen_sub_tl(a0, a0, t0); 3245 tcg_gen_sub_tl(a1, a1, t1); 3246 } else { 3247 tcg_gen_add_tl(a0, a0, t0); 3248 tcg_gen_add_tl(a1, a1, t1); 3249 } 3250 tcg_gen_extract_tl(a0, a0, 0, 16); 3251 tcg_gen_shli_tl(a1, a1, 16); 3252 tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0); 3253 } 3254 3255 if (XRd != 0) { 3256 TCGv a0 = tcg_temp_new(); 3257 TCGv a1 = tcg_temp_new(); 3258 3259 tcg_gen_extract_tl(t0, t3, 0, 16); 3260 tcg_gen_extract_tl(t1, t3, 16, 16); 3261 3262 gen_load_mxu_gpr(a1, XRd); 3263 tcg_gen_extract_tl(a0, a1, 0, 16); 3264 tcg_gen_extract_tl(a1, a1, 16, 16); 3265 3266 if (aptn2 & 1) { 3267 tcg_gen_sub_tl(a0, a0, t0); 3268 tcg_gen_sub_tl(a1, a1, t1); 3269 } else { 3270 tcg_gen_add_tl(a0, a0, t0); 3271 tcg_gen_add_tl(a1, a1, t1); 3272 } 3273 tcg_gen_extract_tl(a0, a0, 0, 16); 3274 tcg_gen_shli_tl(a1, a1, 16); 3275 tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0); 3276 } 3277 } 3278 3279 3280 /* 3281 * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed 3282 * 16-bit sign extended addition and accumulate. 3283 */ 3284 static void gen_mxu_d16asum(DisasContext *ctx) 3285 { 3286 uint32_t aptn2, XRc, XRb, XRa, XRd; 3287 3288 aptn2 = extract32(ctx->opcode, 24, 2); 3289 XRd = extract32(ctx->opcode, 18, 4); 3290 XRc = extract32(ctx->opcode, 14, 4); 3291 XRb = extract32(ctx->opcode, 10, 4); 3292 XRa = extract32(ctx->opcode, 6, 4); 3293 3294 TCGv t0 = tcg_temp_new(); 3295 TCGv t1 = tcg_temp_new(); 3296 TCGv t2 = tcg_temp_new(); 3297 TCGv t3 = tcg_temp_new(); 3298 3299 gen_load_mxu_gpr(t2, XRb); 3300 gen_load_mxu_gpr(t3, XRc); 3301 3302 if (XRa != 0) { 3303 tcg_gen_sextract_tl(t0, t2, 0, 16); 3304 tcg_gen_sextract_tl(t1, t2, 16, 16); 3305 tcg_gen_add_tl(t0, t0, t1); 3306 if (aptn2 & 2) { 3307 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3308 } else { 3309 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3310 } 3311 } 3312 3313 if (XRd != 0) { 3314 tcg_gen_sextract_tl(t0, t3, 0, 16); 3315 tcg_gen_sextract_tl(t1, t3, 16, 16); 3316 tcg_gen_add_tl(t0, t0, t1); 3317 if (aptn2 & 1) { 3318 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3319 } else { 3320 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3321 } 3322 } 3323 } 3324 3325 /* 3326 * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double 3327 * 32 bit pattern addition/subtraction, set carry. 3328 * 3329 * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double 3330 * 32 bit pattern addition/subtraction with carry. 3331 */ 3332 static void gen_mxu_d32add(DisasContext *ctx) 3333 { 3334 uint32_t aptn2, addc, XRc, XRb, XRa, XRd; 3335 3336 aptn2 = extract32(ctx->opcode, 24, 2); 3337 addc = extract32(ctx->opcode, 22, 2); 3338 XRd = extract32(ctx->opcode, 18, 4); 3339 XRc = extract32(ctx->opcode, 14, 4); 3340 XRb = extract32(ctx->opcode, 10, 4); 3341 XRa = extract32(ctx->opcode, 6, 4); 3342 3343 TCGv t0 = tcg_temp_new(); 3344 TCGv t1 = tcg_temp_new(); 3345 TCGv t2 = tcg_temp_new(); 3346 TCGv cr = tcg_temp_new(); 3347 3348 if (unlikely(addc > 1)) { 3349 /* opcode incorrect -> do nothing */ 3350 } else if (addc == 1) { 3351 if (unlikely(XRa == 0 && XRd == 0)) { 3352 /* destinations are zero register -> do nothing */ 3353 } else { 3354 /* FIXME ??? What if XRa == XRd ??? */ 3355 /* aptn2 is unused here */ 3356 gen_load_mxu_gpr(t0, XRb); 3357 gen_load_mxu_gpr(t1, XRc); 3358 gen_load_mxu_cr(cr); 3359 if (XRa != 0) { 3360 tcg_gen_extract_tl(t2, cr, 31, 1); 3361 tcg_gen_add_tl(t0, t0, t2); 3362 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3363 } 3364 if (XRd != 0) { 3365 tcg_gen_extract_tl(t2, cr, 30, 1); 3366 tcg_gen_add_tl(t1, t1, t2); 3367 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3368 } 3369 } 3370 } else if (unlikely(XRa == 0 && XRd == 0)) { 3371 /* destinations are zero register -> do nothing */ 3372 } else { 3373 /* common case */ 3374 /* FIXME ??? What if XRa == XRd ??? */ 3375 TCGv carry = tcg_temp_new(); 3376 3377 gen_load_mxu_gpr(t0, XRb); 3378 gen_load_mxu_gpr(t1, XRc); 3379 gen_load_mxu_cr(cr); 3380 if (XRa != 0) { 3381 if (aptn2 & 2) { 3382 tcg_gen_sub_i32(t2, t0, t1); 3383 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3384 } else { 3385 tcg_gen_add_i32(t2, t0, t1); 3386 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3387 } 3388 tcg_gen_andi_tl(cr, cr, 0x7fffffff); 3389 tcg_gen_shli_tl(carry, carry, 31); 3390 tcg_gen_or_tl(cr, cr, carry); 3391 gen_store_mxu_gpr(t2, XRa); 3392 } 3393 if (XRd != 0) { 3394 if (aptn2 & 1) { 3395 tcg_gen_sub_i32(t2, t0, t1); 3396 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3397 } else { 3398 tcg_gen_add_i32(t2, t0, t1); 3399 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3400 } 3401 tcg_gen_andi_tl(cr, cr, 0xbfffffff); 3402 tcg_gen_shli_tl(carry, carry, 30); 3403 tcg_gen_or_tl(cr, cr, carry); 3404 gen_store_mxu_gpr(t2, XRd); 3405 } 3406 gen_store_mxu_cr(cr); 3407 } 3408 } 3409 3410 /* 3411 * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double 3412 * 32 bit pattern addition/subtraction and accumulate. 3413 */ 3414 static void gen_mxu_d32acc(DisasContext *ctx) 3415 { 3416 uint32_t aptn2, XRc, XRb, XRa, XRd; 3417 3418 aptn2 = extract32(ctx->opcode, 24, 2); 3419 XRd = extract32(ctx->opcode, 18, 4); 3420 XRc = extract32(ctx->opcode, 14, 4); 3421 XRb = extract32(ctx->opcode, 10, 4); 3422 XRa = extract32(ctx->opcode, 6, 4); 3423 3424 TCGv t0 = tcg_temp_new(); 3425 TCGv t1 = tcg_temp_new(); 3426 TCGv t2 = tcg_temp_new(); 3427 3428 if (unlikely(XRa == 0 && XRd == 0)) { 3429 /* destinations are zero register -> do nothing */ 3430 } else { 3431 /* common case */ 3432 gen_load_mxu_gpr(t0, XRb); 3433 gen_load_mxu_gpr(t1, XRc); 3434 if (XRa != 0) { 3435 if (aptn2 & 2) { 3436 tcg_gen_sub_tl(t2, t0, t1); 3437 } else { 3438 tcg_gen_add_tl(t2, t0, t1); 3439 } 3440 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3441 } 3442 if (XRd != 0) { 3443 if (aptn2 & 1) { 3444 tcg_gen_sub_tl(t2, t0, t1); 3445 } else { 3446 tcg_gen_add_tl(t2, t0, t1); 3447 } 3448 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3449 } 3450 } 3451 } 3452 3453 /* 3454 * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double 3455 * 32 bit pattern addition/subtraction and accumulate. 3456 */ 3457 static void gen_mxu_d32accm(DisasContext *ctx) 3458 { 3459 uint32_t aptn2, XRc, XRb, XRa, XRd; 3460 3461 aptn2 = extract32(ctx->opcode, 24, 2); 3462 XRd = extract32(ctx->opcode, 18, 4); 3463 XRc = extract32(ctx->opcode, 14, 4); 3464 XRb = extract32(ctx->opcode, 10, 4); 3465 XRa = extract32(ctx->opcode, 6, 4); 3466 3467 TCGv t0 = tcg_temp_new(); 3468 TCGv t1 = tcg_temp_new(); 3469 TCGv t2 = tcg_temp_new(); 3470 3471 if (unlikely(XRa == 0 && XRd == 0)) { 3472 /* destinations are zero register -> do nothing */ 3473 } else { 3474 /* common case */ 3475 gen_load_mxu_gpr(t0, XRb); 3476 gen_load_mxu_gpr(t1, XRc); 3477 if (XRa != 0) { 3478 tcg_gen_add_tl(t2, t0, t1); 3479 if (aptn2 & 2) { 3480 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3481 } else { 3482 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3483 } 3484 } 3485 if (XRd != 0) { 3486 tcg_gen_sub_tl(t2, t0, t1); 3487 if (aptn2 & 1) { 3488 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3489 } else { 3490 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3491 } 3492 } 3493 } 3494 } 3495 3496 /* 3497 * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double 3498 * 32 bit pattern addition/subtraction. 3499 */ 3500 static void gen_mxu_d32asum(DisasContext *ctx) 3501 { 3502 uint32_t aptn2, XRc, XRb, XRa, XRd; 3503 3504 aptn2 = extract32(ctx->opcode, 24, 2); 3505 XRd = extract32(ctx->opcode, 18, 4); 3506 XRc = extract32(ctx->opcode, 14, 4); 3507 XRb = extract32(ctx->opcode, 10, 4); 3508 XRa = extract32(ctx->opcode, 6, 4); 3509 3510 TCGv t0 = tcg_temp_new(); 3511 TCGv t1 = tcg_temp_new(); 3512 3513 if (unlikely(XRa == 0 && XRd == 0)) { 3514 /* destinations are zero register -> do nothing */ 3515 } else { 3516 /* common case */ 3517 gen_load_mxu_gpr(t0, XRb); 3518 gen_load_mxu_gpr(t1, XRc); 3519 if (XRa != 0) { 3520 if (aptn2 & 2) { 3521 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3522 } else { 3523 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3524 } 3525 } 3526 if (XRd != 0) { 3527 if (aptn2 & 1) { 3528 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3529 } else { 3530 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3531 } 3532 } 3533 } 3534 } 3535 3536 /* 3537 * MXU instruction category: Miscellaneous 3538 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3539 * 3540 * S32EXTR S32LUI 3541 * S32EXTRV 3542 * Q16SAT 3543 * Q16SCOP 3544 */ 3545 3546 /* 3547 * S32EXTR XRa, XRd, rs, bits5 3548 * Extract bits5 bits from 64-bit pair {XRa:XRd} 3549 * starting from rs[4:0] offset and put to the XRa. 3550 */ 3551 static void gen_mxu_s32extr(DisasContext *ctx) 3552 { 3553 TCGv t0, t1, t2, t3; 3554 uint32_t XRa, XRd, rs, bits5; 3555 3556 t0 = tcg_temp_new(); 3557 t1 = tcg_temp_new(); 3558 t2 = tcg_temp_new(); 3559 t3 = tcg_temp_new(); 3560 3561 XRa = extract32(ctx->opcode, 6, 4); 3562 XRd = extract32(ctx->opcode, 10, 4); 3563 bits5 = extract32(ctx->opcode, 16, 5); 3564 rs = extract32(ctx->opcode, 21, 5); 3565 3566 /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */ 3567 /* {XRa} = extract({tmp}, 0, bits5); */ 3568 if (bits5 > 0) { 3569 TCGLabel *l_xra_only = gen_new_label(); 3570 TCGLabel *l_done = gen_new_label(); 3571 3572 gen_load_mxu_gpr(t0, XRd); 3573 gen_load_mxu_gpr(t1, XRa); 3574 gen_load_gpr(t2, rs); 3575 tcg_gen_andi_tl(t2, t2, 0x1f); 3576 tcg_gen_subfi_tl(t2, 32, t2); 3577 tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only); 3578 tcg_gen_subfi_tl(t2, bits5, t2); 3579 tcg_gen_subfi_tl(t3, 32, t2); 3580 tcg_gen_shr_tl(t0, t0, t3); 3581 tcg_gen_shl_tl(t1, t1, t2); 3582 tcg_gen_or_tl(t0, t0, t1); 3583 tcg_gen_br(l_done); 3584 gen_set_label(l_xra_only); 3585 tcg_gen_subi_tl(t2, t2, bits5); 3586 tcg_gen_shr_tl(t0, t1, t2); 3587 gen_set_label(l_done); 3588 tcg_gen_extract_tl(t0, t0, 0, bits5); 3589 } else { 3590 /* unspecified behavior but matches tests on real hardware*/ 3591 tcg_gen_movi_tl(t0, 0); 3592 } 3593 gen_store_mxu_gpr(t0, XRa); 3594 } 3595 3596 /* 3597 * S32EXTRV XRa, XRd, rs, rt 3598 * Extract rt[4:0] bits from 64-bit pair {XRa:XRd} 3599 * starting from rs[4:0] offset and put to the XRa. 3600 */ 3601 static void gen_mxu_s32extrv(DisasContext *ctx) 3602 { 3603 TCGv t0, t1, t2, t3, t4; 3604 uint32_t XRa, XRd, rs, rt; 3605 3606 t0 = tcg_temp_new(); 3607 t1 = tcg_temp_new(); 3608 t2 = tcg_temp_new(); 3609 t3 = tcg_temp_new(); 3610 t4 = tcg_temp_new(); 3611 TCGLabel *l_xra_only = gen_new_label(); 3612 TCGLabel *l_done = gen_new_label(); 3613 TCGLabel *l_zero = gen_new_label(); 3614 TCGLabel *l_extract = gen_new_label(); 3615 3616 XRa = extract32(ctx->opcode, 6, 4); 3617 XRd = extract32(ctx->opcode, 10, 4); 3618 rt = extract32(ctx->opcode, 16, 5); 3619 rs = extract32(ctx->opcode, 21, 5); 3620 3621 /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */ 3622 gen_load_mxu_gpr(t0, XRd); 3623 gen_load_mxu_gpr(t1, XRa); 3624 gen_load_gpr(t2, rs); 3625 gen_load_gpr(t4, rt); 3626 tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero); 3627 tcg_gen_andi_tl(t2, t2, 0x1f); 3628 tcg_gen_subfi_tl(t2, 32, t2); 3629 tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only); 3630 tcg_gen_sub_tl(t2, t4, t2); 3631 tcg_gen_subfi_tl(t3, 32, t2); 3632 tcg_gen_shr_tl(t0, t0, t3); 3633 tcg_gen_shl_tl(t1, t1, t2); 3634 tcg_gen_or_tl(t0, t0, t1); 3635 tcg_gen_br(l_extract); 3636 3637 gen_set_label(l_xra_only); 3638 tcg_gen_sub_tl(t2, t2, t4); 3639 tcg_gen_shr_tl(t0, t1, t2); 3640 tcg_gen_br(l_extract); 3641 3642 /* unspecified behavior but matches tests on real hardware*/ 3643 gen_set_label(l_zero); 3644 tcg_gen_movi_tl(t0, 0); 3645 tcg_gen_br(l_done); 3646 3647 /* {XRa} = extract({tmp}, 0, rt) */ 3648 gen_set_label(l_extract); 3649 tcg_gen_subfi_tl(t4, 32, t4); 3650 tcg_gen_shl_tl(t0, t0, t4); 3651 tcg_gen_shr_tl(t0, t0, t4); 3652 3653 gen_set_label(l_done); 3654 gen_store_mxu_gpr(t0, XRa); 3655 } 3656 3657 /* 3658 * S32LUI XRa, S8, optn3 3659 * Permutate the immediate S8 value to form a word 3660 * to update XRa. 3661 */ 3662 static void gen_mxu_s32lui(DisasContext *ctx) 3663 { 3664 uint32_t XRa, s8, optn3, pad; 3665 3666 XRa = extract32(ctx->opcode, 6, 4); 3667 s8 = extract32(ctx->opcode, 10, 8); 3668 pad = extract32(ctx->opcode, 21, 2); 3669 optn3 = extract32(ctx->opcode, 23, 3); 3670 3671 if (unlikely(pad != 0)) { 3672 /* opcode padding incorrect -> do nothing */ 3673 } else if (unlikely(XRa == 0)) { 3674 /* destination is zero register -> do nothing */ 3675 } else { 3676 uint32_t s16; 3677 TCGv t0 = tcg_temp_new(); 3678 3679 switch (optn3) { 3680 case 0: 3681 tcg_gen_movi_tl(t0, s8); 3682 break; 3683 case 1: 3684 tcg_gen_movi_tl(t0, s8 << 8); 3685 break; 3686 case 2: 3687 tcg_gen_movi_tl(t0, s8 << 16); 3688 break; 3689 case 3: 3690 tcg_gen_movi_tl(t0, s8 << 24); 3691 break; 3692 case 4: 3693 tcg_gen_movi_tl(t0, (s8 << 16) | s8); 3694 break; 3695 case 5: 3696 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8)); 3697 break; 3698 case 6: 3699 s16 = (uint16_t)(int16_t)(int8_t)s8; 3700 tcg_gen_movi_tl(t0, (s16 << 16) | s16); 3701 break; 3702 case 7: 3703 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8); 3704 break; 3705 } 3706 gen_store_mxu_gpr(t0, XRa); 3707 } 3708 } 3709 3710 /* 3711 * Q16SAT XRa, XRb, XRc 3712 * Packs four 16-bit signed integers in XRb and XRc to 3713 * four saturated unsigned 8-bit into XRa. 3714 * 3715 */ 3716 static void gen_mxu_Q16SAT(DisasContext *ctx) 3717 { 3718 uint32_t pad, XRc, XRb, XRa; 3719 3720 pad = extract32(ctx->opcode, 21, 3); 3721 XRc = extract32(ctx->opcode, 14, 4); 3722 XRb = extract32(ctx->opcode, 10, 4); 3723 XRa = extract32(ctx->opcode, 6, 4); 3724 3725 if (unlikely(pad != 0)) { 3726 /* opcode padding incorrect -> do nothing */ 3727 } else if (unlikely(XRa == 0)) { 3728 /* destination is zero register -> do nothing */ 3729 } else { 3730 /* the most general case */ 3731 TCGv t0 = tcg_temp_new(); 3732 TCGv t1 = tcg_temp_new(); 3733 TCGv t2 = tcg_temp_new(); 3734 3735 tcg_gen_movi_tl(t2, 0); 3736 if (XRb != 0) { 3737 TCGLabel *l_less_hi = gen_new_label(); 3738 TCGLabel *l_less_lo = gen_new_label(); 3739 TCGLabel *l_lo = gen_new_label(); 3740 TCGLabel *l_greater_hi = gen_new_label(); 3741 TCGLabel *l_greater_lo = gen_new_label(); 3742 TCGLabel *l_done = gen_new_label(); 3743 3744 tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16); 3745 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3746 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3747 tcg_gen_br(l_lo); 3748 gen_set_label(l_less_hi); 3749 tcg_gen_movi_tl(t0, 0); 3750 tcg_gen_br(l_lo); 3751 gen_set_label(l_greater_hi); 3752 tcg_gen_movi_tl(t0, 255); 3753 3754 gen_set_label(l_lo); 3755 tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16); 3756 tcg_gen_sari_tl(t1, t1, 16); 3757 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3758 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3759 tcg_gen_br(l_done); 3760 gen_set_label(l_less_lo); 3761 tcg_gen_movi_tl(t1, 0); 3762 tcg_gen_br(l_done); 3763 gen_set_label(l_greater_lo); 3764 tcg_gen_movi_tl(t1, 255); 3765 3766 gen_set_label(l_done); 3767 tcg_gen_shli_tl(t2, t0, 24); 3768 tcg_gen_shli_tl(t1, t1, 16); 3769 tcg_gen_or_tl(t2, t2, t1); 3770 } 3771 3772 if (XRc != 0) { 3773 TCGLabel *l_less_hi = gen_new_label(); 3774 TCGLabel *l_less_lo = gen_new_label(); 3775 TCGLabel *l_lo = gen_new_label(); 3776 TCGLabel *l_greater_hi = gen_new_label(); 3777 TCGLabel *l_greater_lo = gen_new_label(); 3778 TCGLabel *l_done = gen_new_label(); 3779 3780 tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16); 3781 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3782 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3783 tcg_gen_br(l_lo); 3784 gen_set_label(l_less_hi); 3785 tcg_gen_movi_tl(t0, 0); 3786 tcg_gen_br(l_lo); 3787 gen_set_label(l_greater_hi); 3788 tcg_gen_movi_tl(t0, 255); 3789 3790 gen_set_label(l_lo); 3791 tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16); 3792 tcg_gen_sari_tl(t1, t1, 16); 3793 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3794 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3795 tcg_gen_br(l_done); 3796 gen_set_label(l_less_lo); 3797 tcg_gen_movi_tl(t1, 0); 3798 tcg_gen_br(l_done); 3799 gen_set_label(l_greater_lo); 3800 tcg_gen_movi_tl(t1, 255); 3801 3802 gen_set_label(l_done); 3803 tcg_gen_shli_tl(t0, t0, 8); 3804 tcg_gen_or_tl(t2, t2, t0); 3805 tcg_gen_or_tl(t2, t2, t1); 3806 } 3807 gen_store_mxu_gpr(t2, XRa); 3808 } 3809 } 3810 3811 /* 3812 * Q16SCOP XRa, XRd, XRb, XRc 3813 * Determine sign of quad packed 16-bit signed values 3814 * in XRb and XRc put result in XRa and XRd respectively. 3815 */ 3816 static void gen_mxu_q16scop(DisasContext *ctx) 3817 { 3818 uint32_t XRd, XRc, XRb, XRa; 3819 3820 XRd = extract32(ctx->opcode, 18, 4); 3821 XRc = extract32(ctx->opcode, 14, 4); 3822 XRb = extract32(ctx->opcode, 10, 4); 3823 XRa = extract32(ctx->opcode, 6, 4); 3824 3825 TCGv t0 = tcg_temp_new(); 3826 TCGv t1 = tcg_temp_new(); 3827 TCGv t2 = tcg_temp_new(); 3828 TCGv t3 = tcg_temp_new(); 3829 TCGv t4 = tcg_temp_new(); 3830 3831 TCGLabel *l_b_hi_lt = gen_new_label(); 3832 TCGLabel *l_b_hi_gt = gen_new_label(); 3833 TCGLabel *l_b_lo = gen_new_label(); 3834 TCGLabel *l_b_lo_lt = gen_new_label(); 3835 TCGLabel *l_c_hi = gen_new_label(); 3836 TCGLabel *l_c_hi_lt = gen_new_label(); 3837 TCGLabel *l_c_hi_gt = gen_new_label(); 3838 TCGLabel *l_c_lo = gen_new_label(); 3839 TCGLabel *l_c_lo_lt = gen_new_label(); 3840 TCGLabel *l_done = gen_new_label(); 3841 3842 gen_load_mxu_gpr(t0, XRb); 3843 gen_load_mxu_gpr(t1, XRc); 3844 3845 tcg_gen_sextract_tl(t2, t0, 16, 16); 3846 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt); 3847 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt); 3848 tcg_gen_movi_tl(t3, 0); 3849 tcg_gen_br(l_b_lo); 3850 gen_set_label(l_b_hi_lt); 3851 tcg_gen_movi_tl(t3, 0xffff0000); 3852 tcg_gen_br(l_b_lo); 3853 gen_set_label(l_b_hi_gt); 3854 tcg_gen_movi_tl(t3, 0x00010000); 3855 3856 gen_set_label(l_b_lo); 3857 tcg_gen_sextract_tl(t2, t0, 0, 16); 3858 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi); 3859 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt); 3860 tcg_gen_ori_tl(t3, t3, 0x00000001); 3861 tcg_gen_br(l_c_hi); 3862 gen_set_label(l_b_lo_lt); 3863 tcg_gen_ori_tl(t3, t3, 0x0000ffff); 3864 tcg_gen_br(l_c_hi); 3865 3866 gen_set_label(l_c_hi); 3867 tcg_gen_sextract_tl(t2, t1, 16, 16); 3868 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt); 3869 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt); 3870 tcg_gen_movi_tl(t4, 0); 3871 tcg_gen_br(l_c_lo); 3872 gen_set_label(l_c_hi_lt); 3873 tcg_gen_movi_tl(t4, 0xffff0000); 3874 tcg_gen_br(l_c_lo); 3875 gen_set_label(l_c_hi_gt); 3876 tcg_gen_movi_tl(t4, 0x00010000); 3877 3878 gen_set_label(l_c_lo); 3879 tcg_gen_sextract_tl(t2, t1, 0, 16); 3880 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done); 3881 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt); 3882 tcg_gen_ori_tl(t4, t4, 0x00000001); 3883 tcg_gen_br(l_done); 3884 gen_set_label(l_c_lo_lt); 3885 tcg_gen_ori_tl(t4, t4, 0x0000ffff); 3886 3887 gen_set_label(l_done); 3888 gen_store_mxu_gpr(t3, XRa); 3889 gen_store_mxu_gpr(t4, XRd); 3890 } 3891 3892 /* 3893 * MXU instruction category: align 3894 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3895 * 3896 * S32ALN S32ALNI 3897 */ 3898 3899 /* 3900 * S32ALNI XRc, XRb, XRa, optn3 3901 * Arrange bytes from XRb and XRc according to one of five sets of 3902 * rules determined by optn3, and place the result in XRa. 3903 */ 3904 static void gen_mxu_S32ALNI(DisasContext *ctx) 3905 { 3906 uint32_t optn3, pad, XRc, XRb, XRa; 3907 3908 optn3 = extract32(ctx->opcode, 23, 3); 3909 pad = extract32(ctx->opcode, 21, 2); 3910 XRc = extract32(ctx->opcode, 14, 4); 3911 XRb = extract32(ctx->opcode, 10, 4); 3912 XRa = extract32(ctx->opcode, 6, 4); 3913 3914 if (unlikely(pad != 0)) { 3915 /* opcode padding incorrect -> do nothing */ 3916 } else if (unlikely(XRa == 0)) { 3917 /* destination is zero register -> do nothing */ 3918 } else if (unlikely((XRb == 0) && (XRc == 0))) { 3919 /* both operands zero registers -> just set destination to all 0s */ 3920 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 3921 } else if (unlikely(XRb == 0)) { 3922 /* XRb zero register -> just appropriatelly shift XRc into XRa */ 3923 switch (optn3) { 3924 case MXU_OPTN3_PTN0: 3925 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 3926 break; 3927 case MXU_OPTN3_PTN1: 3928 case MXU_OPTN3_PTN2: 3929 case MXU_OPTN3_PTN3: 3930 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1], 3931 8 * (4 - optn3)); 3932 break; 3933 case MXU_OPTN3_PTN4: 3934 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 3935 break; 3936 } 3937 } else if (unlikely(XRc == 0)) { 3938 /* XRc zero register -> just appropriatelly shift XRb into XRa */ 3939 switch (optn3) { 3940 case MXU_OPTN3_PTN0: 3941 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 3942 break; 3943 case MXU_OPTN3_PTN1: 3944 case MXU_OPTN3_PTN2: 3945 case MXU_OPTN3_PTN3: 3946 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 3947 break; 3948 case MXU_OPTN3_PTN4: 3949 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 3950 break; 3951 } 3952 } else if (unlikely(XRb == XRc)) { 3953 /* both operands same -> just rotation or moving from any of them */ 3954 switch (optn3) { 3955 case MXU_OPTN3_PTN0: 3956 case MXU_OPTN3_PTN4: 3957 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 3958 break; 3959 case MXU_OPTN3_PTN1: 3960 case MXU_OPTN3_PTN2: 3961 case MXU_OPTN3_PTN3: 3962 tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 3963 break; 3964 } 3965 } else { 3966 /* the most general case */ 3967 switch (optn3) { 3968 case MXU_OPTN3_PTN0: 3969 { 3970 /* */ 3971 /* XRb XRc */ 3972 /* +---------------+ */ 3973 /* | A B C D | E F G H */ 3974 /* +-------+-------+ */ 3975 /* | */ 3976 /* XRa */ 3977 /* */ 3978 3979 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 3980 } 3981 break; 3982 case MXU_OPTN3_PTN1: 3983 { 3984 /* */ 3985 /* XRb XRc */ 3986 /* +-------------------+ */ 3987 /* A | B C D E | F G H */ 3988 /* +---------+---------+ */ 3989 /* | */ 3990 /* XRa */ 3991 /* */ 3992 3993 TCGv_i32 t0 = tcg_temp_new(); 3994 TCGv_i32 t1 = tcg_temp_new(); 3995 3996 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF); 3997 tcg_gen_shli_i32(t0, t0, 8); 3998 3999 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 4000 tcg_gen_shri_i32(t1, t1, 24); 4001 4002 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4003 } 4004 break; 4005 case MXU_OPTN3_PTN2: 4006 { 4007 /* */ 4008 /* XRb XRc */ 4009 /* +-------------------+ */ 4010 /* A B | C D E F | G H */ 4011 /* +---------+---------+ */ 4012 /* | */ 4013 /* XRa */ 4014 /* */ 4015 4016 TCGv_i32 t0 = tcg_temp_new(); 4017 TCGv_i32 t1 = tcg_temp_new(); 4018 4019 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 4020 tcg_gen_shli_i32(t0, t0, 16); 4021 4022 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 4023 tcg_gen_shri_i32(t1, t1, 16); 4024 4025 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4026 } 4027 break; 4028 case MXU_OPTN3_PTN3: 4029 { 4030 /* */ 4031 /* XRb XRc */ 4032 /* +-------------------+ */ 4033 /* A B C | D E F G | H */ 4034 /* +---------+---------+ */ 4035 /* | */ 4036 /* XRa */ 4037 /* */ 4038 4039 TCGv_i32 t0 = tcg_temp_new(); 4040 TCGv_i32 t1 = tcg_temp_new(); 4041 4042 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF); 4043 tcg_gen_shli_i32(t0, t0, 24); 4044 4045 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00); 4046 tcg_gen_shri_i32(t1, t1, 8); 4047 4048 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4049 } 4050 break; 4051 case MXU_OPTN3_PTN4: 4052 { 4053 /* */ 4054 /* XRb XRc */ 4055 /* +---------------+ */ 4056 /* A B C D | E F G H | */ 4057 /* +-------+-------+ */ 4058 /* | */ 4059 /* XRa */ 4060 /* */ 4061 4062 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 4063 } 4064 break; 4065 } 4066 } 4067 } 4068 4069 /* 4070 * S32ALN XRc, XRb, XRa, rs 4071 * Arrange bytes from XRb and XRc according to one of five sets of 4072 * rules determined by rs[2:0], and place the result in XRa. 4073 */ 4074 static void gen_mxu_S32ALN(DisasContext *ctx) 4075 { 4076 uint32_t rs, XRc, XRb, XRa; 4077 4078 rs = extract32(ctx->opcode, 21, 5); 4079 XRc = extract32(ctx->opcode, 14, 4); 4080 XRb = extract32(ctx->opcode, 10, 4); 4081 XRa = extract32(ctx->opcode, 6, 4); 4082 4083 if (unlikely(XRa == 0)) { 4084 /* destination is zero register -> do nothing */ 4085 } else if (unlikely((XRb == 0) && (XRc == 0))) { 4086 /* both operands zero registers -> just set destination to all 0s */ 4087 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 4088 } else { 4089 /* the most general case */ 4090 TCGv t0 = tcg_temp_new(); 4091 TCGv t1 = tcg_temp_new(); 4092 TCGv t2 = tcg_temp_new(); 4093 TCGv t3 = tcg_temp_new(); 4094 TCGLabel *l_exit = gen_new_label(); 4095 TCGLabel *l_b_only = gen_new_label(); 4096 TCGLabel *l_c_only = gen_new_label(); 4097 4098 gen_load_mxu_gpr(t0, XRb); 4099 gen_load_mxu_gpr(t1, XRc); 4100 gen_load_gpr(t2, rs); 4101 tcg_gen_andi_tl(t2, t2, 0x07); 4102 4103 /* do nothing for undefined cases */ 4104 tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit); 4105 4106 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only); 4107 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only); 4108 4109 tcg_gen_shli_tl(t2, t2, 3); 4110 tcg_gen_subfi_tl(t3, 32, t2); 4111 4112 tcg_gen_shl_tl(t0, t0, t2); 4113 tcg_gen_shr_tl(t1, t1, t3); 4114 tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1); 4115 tcg_gen_br(l_exit); 4116 4117 gen_set_label(l_b_only); 4118 gen_store_mxu_gpr(t0, XRa); 4119 tcg_gen_br(l_exit); 4120 4121 gen_set_label(l_c_only); 4122 gen_store_mxu_gpr(t1, XRa); 4123 4124 gen_set_label(l_exit); 4125 } 4126 } 4127 4128 /* 4129 * S32MADD XRa, XRd, rb, rc 4130 * 32 to 64 bit signed multiply with subsequent add 4131 * result stored in {XRa, XRd} pair, stain HI/LO. 4132 * S32MADDU XRa, XRd, rb, rc 4133 * 32 to 64 bit unsigned multiply with subsequent add 4134 * result stored in {XRa, XRd} pair, stain HI/LO. 4135 * S32MSUB XRa, XRd, rb, rc 4136 * 32 to 64 bit signed multiply with subsequent subtract 4137 * result stored in {XRa, XRd} pair, stain HI/LO. 4138 * S32MSUBU XRa, XRd, rb, rc 4139 * 32 to 64 bit unsigned multiply with subsequent subtract 4140 * result stored in {XRa, XRd} pair, stain HI/LO. 4141 */ 4142 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns) 4143 { 4144 uint32_t XRa, XRd, Rb, Rc; 4145 4146 XRa = extract32(ctx->opcode, 6, 4); 4147 XRd = extract32(ctx->opcode, 10, 4); 4148 Rb = extract32(ctx->opcode, 16, 5); 4149 Rc = extract32(ctx->opcode, 21, 5); 4150 4151 if (unlikely(Rb == 0 || Rc == 0)) { 4152 /* do nothing because x + 0 * y => x */ 4153 } else if (unlikely(XRa == 0 && XRd == 0)) { 4154 /* do nothing because result just dropped */ 4155 } else { 4156 TCGv t0 = tcg_temp_new(); 4157 TCGv t1 = tcg_temp_new(); 4158 TCGv_i64 t2 = tcg_temp_new_i64(); 4159 TCGv_i64 t3 = tcg_temp_new_i64(); 4160 4161 gen_load_gpr(t0, Rb); 4162 gen_load_gpr(t1, Rc); 4163 4164 if (uns) { 4165 tcg_gen_extu_tl_i64(t2, t0); 4166 tcg_gen_extu_tl_i64(t3, t1); 4167 } else { 4168 tcg_gen_ext_tl_i64(t2, t0); 4169 tcg_gen_ext_tl_i64(t3, t1); 4170 } 4171 tcg_gen_mul_i64(t2, t2, t3); 4172 4173 gen_load_mxu_gpr(t0, XRa); 4174 gen_load_mxu_gpr(t1, XRd); 4175 4176 tcg_gen_concat_tl_i64(t3, t1, t0); 4177 if (sub) { 4178 tcg_gen_sub_i64(t3, t3, t2); 4179 } else { 4180 tcg_gen_add_i64(t3, t3, t2); 4181 } 4182 gen_move_low32(t1, t3); 4183 gen_move_high32(t0, t3); 4184 4185 tcg_gen_mov_tl(cpu_HI[0], t0); 4186 tcg_gen_mov_tl(cpu_LO[0], t1); 4187 4188 gen_store_mxu_gpr(t1, XRd); 4189 gen_store_mxu_gpr(t0, XRa); 4190 } 4191 } 4192 4193 /* 4194 * Decoding engine for MXU 4195 * ======================= 4196 */ 4197 4198 static void decode_opc_mxu__pool00(DisasContext *ctx) 4199 { 4200 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4201 4202 switch (opcode) { 4203 case OPC_MXU_S32MAX: 4204 case OPC_MXU_S32MIN: 4205 gen_mxu_S32MAX_S32MIN(ctx); 4206 break; 4207 case OPC_MXU_D16MAX: 4208 case OPC_MXU_D16MIN: 4209 gen_mxu_D16MAX_D16MIN(ctx); 4210 break; 4211 case OPC_MXU_Q8MAX: 4212 case OPC_MXU_Q8MIN: 4213 gen_mxu_Q8MAX_Q8MIN(ctx); 4214 break; 4215 case OPC_MXU_Q8SLT: 4216 gen_mxu_q8slt(ctx, false); 4217 break; 4218 case OPC_MXU_Q8SLTU: 4219 gen_mxu_q8slt(ctx, true); 4220 break; 4221 default: 4222 MIPS_INVAL("decode_opc_mxu"); 4223 gen_reserved_instruction(ctx); 4224 break; 4225 } 4226 } 4227 4228 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx) 4229 { 4230 uint32_t opcode = extract32(ctx->opcode, 0, 6); 4231 uint32_t pad = extract32(ctx->opcode, 14, 2); 4232 4233 if (pad != 2) { 4234 /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */ 4235 return false; 4236 } 4237 4238 switch (opcode) { 4239 case OPC_MXU_S32MADD: 4240 gen_mxu_s32madd_sub(ctx, false, false); 4241 break; 4242 case OPC_MXU_S32MADDU: 4243 gen_mxu_s32madd_sub(ctx, false, true); 4244 break; 4245 case OPC_MXU_S32MSUB: 4246 gen_mxu_s32madd_sub(ctx, true, false); 4247 break; 4248 case OPC_MXU_S32MSUBU: 4249 gen_mxu_s32madd_sub(ctx, true, true); 4250 break; 4251 default: 4252 return false; 4253 } 4254 return true; 4255 } 4256 4257 static void decode_opc_mxu__pool01(DisasContext *ctx) 4258 { 4259 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4260 4261 switch (opcode) { 4262 case OPC_MXU_S32SLT: 4263 gen_mxu_S32SLT(ctx); 4264 break; 4265 case OPC_MXU_D16SLT: 4266 gen_mxu_D16SLT(ctx); 4267 break; 4268 case OPC_MXU_D16AVG: 4269 gen_mxu_d16avg(ctx, false); 4270 break; 4271 case OPC_MXU_D16AVGR: 4272 gen_mxu_d16avg(ctx, true); 4273 break; 4274 case OPC_MXU_Q8AVG: 4275 gen_mxu_q8avg(ctx, false); 4276 break; 4277 case OPC_MXU_Q8AVGR: 4278 gen_mxu_q8avg(ctx, true); 4279 break; 4280 case OPC_MXU_Q8ADD: 4281 gen_mxu_Q8ADD(ctx); 4282 break; 4283 default: 4284 MIPS_INVAL("decode_opc_mxu"); 4285 gen_reserved_instruction(ctx); 4286 break; 4287 } 4288 } 4289 4290 static void decode_opc_mxu__pool02(DisasContext *ctx) 4291 { 4292 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4293 4294 switch (opcode) { 4295 case OPC_MXU_S32CPS: 4296 gen_mxu_S32CPS(ctx); 4297 break; 4298 case OPC_MXU_D16CPS: 4299 gen_mxu_D16CPS(ctx); 4300 break; 4301 case OPC_MXU_Q8ABD: 4302 gen_mxu_Q8ABD(ctx); 4303 break; 4304 case OPC_MXU_Q16SAT: 4305 gen_mxu_Q16SAT(ctx); 4306 break; 4307 default: 4308 MIPS_INVAL("decode_opc_mxu"); 4309 gen_reserved_instruction(ctx); 4310 break; 4311 } 4312 } 4313 4314 static void decode_opc_mxu__pool03(DisasContext *ctx) 4315 { 4316 uint32_t opcode = extract32(ctx->opcode, 24, 2); 4317 4318 switch (opcode) { 4319 case OPC_MXU_D16MULF: 4320 gen_mxu_d16mul(ctx, true, true); 4321 break; 4322 case OPC_MXU_D16MULE: 4323 gen_mxu_d16mul(ctx, true, false); 4324 break; 4325 default: 4326 MIPS_INVAL("decode_opc_mxu"); 4327 gen_reserved_instruction(ctx); 4328 break; 4329 } 4330 } 4331 4332 static void decode_opc_mxu__pool04(DisasContext *ctx) 4333 { 4334 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4335 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4336 4337 /* Don't care about opcode bits as their meaning is unknown yet */ 4338 switch (opcode) { 4339 default: 4340 gen_mxu_s32ldxx(ctx, reversed, false); 4341 break; 4342 } 4343 } 4344 4345 static void decode_opc_mxu__pool05(DisasContext *ctx) 4346 { 4347 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4348 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4349 4350 /* Don't care about opcode bits as their meaning is unknown yet */ 4351 switch (opcode) { 4352 default: 4353 gen_mxu_s32stxx(ctx, reversed, false); 4354 break; 4355 } 4356 } 4357 4358 static void decode_opc_mxu__pool06(DisasContext *ctx) 4359 { 4360 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4361 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4362 4363 switch (opcode) { 4364 case OPC_MXU_S32LDST: 4365 case OPC_MXU_S32LDSTR: 4366 if (strd2 <= 2) { 4367 gen_mxu_s32ldxvx(ctx, opcode, false, strd2); 4368 break; 4369 } 4370 /* fallthrough */ 4371 default: 4372 MIPS_INVAL("decode_opc_mxu"); 4373 gen_reserved_instruction(ctx); 4374 break; 4375 } 4376 } 4377 4378 static void decode_opc_mxu__pool07(DisasContext *ctx) 4379 { 4380 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4381 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4382 4383 switch (opcode) { 4384 case OPC_MXU_S32LDST: 4385 case OPC_MXU_S32LDSTR: 4386 if (strd2 <= 2) { 4387 gen_mxu_s32stxvx(ctx, opcode, false, strd2); 4388 break; 4389 } 4390 /* fallthrough */ 4391 default: 4392 MIPS_INVAL("decode_opc_mxu"); 4393 gen_reserved_instruction(ctx); 4394 break; 4395 } 4396 } 4397 4398 static void decode_opc_mxu__pool08(DisasContext *ctx) 4399 { 4400 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4401 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4402 4403 /* Don't care about opcode bits as their meaning is unknown yet */ 4404 switch (opcode) { 4405 default: 4406 gen_mxu_s32ldxx(ctx, reversed, true); 4407 break; 4408 } 4409 } 4410 4411 static void decode_opc_mxu__pool09(DisasContext *ctx) 4412 { 4413 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4414 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4415 4416 /* Don't care about opcode bits as their meaning is unknown yet */ 4417 switch (opcode) { 4418 default: 4419 gen_mxu_s32stxx(ctx, reversed, true); 4420 break; 4421 } 4422 } 4423 4424 static void decode_opc_mxu__pool10(DisasContext *ctx) 4425 { 4426 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4427 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4428 4429 switch (opcode) { 4430 case OPC_MXU_S32LDST: 4431 case OPC_MXU_S32LDSTR: 4432 if (strd2 <= 2) { 4433 gen_mxu_s32ldxvx(ctx, opcode, true, strd2); 4434 break; 4435 } 4436 /* fallthrough */ 4437 default: 4438 MIPS_INVAL("decode_opc_mxu"); 4439 gen_reserved_instruction(ctx); 4440 break; 4441 } 4442 } 4443 4444 static void decode_opc_mxu__pool11(DisasContext *ctx) 4445 { 4446 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4447 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4448 4449 switch (opcode) { 4450 case OPC_MXU_S32LDST: 4451 case OPC_MXU_S32LDSTR: 4452 if (strd2 <= 2) { 4453 gen_mxu_s32stxvx(ctx, opcode, true, strd2); 4454 break; 4455 } 4456 /* fallthrough */ 4457 default: 4458 MIPS_INVAL("decode_opc_mxu"); 4459 gen_reserved_instruction(ctx); 4460 break; 4461 } 4462 } 4463 4464 static void decode_opc_mxu__pool12(DisasContext *ctx) 4465 { 4466 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4467 4468 switch (opcode) { 4469 case OPC_MXU_D32ACC: 4470 gen_mxu_d32acc(ctx); 4471 break; 4472 case OPC_MXU_D32ACCM: 4473 gen_mxu_d32accm(ctx); 4474 break; 4475 case OPC_MXU_D32ASUM: 4476 gen_mxu_d32asum(ctx); 4477 break; 4478 default: 4479 MIPS_INVAL("decode_opc_mxu"); 4480 gen_reserved_instruction(ctx); 4481 break; 4482 } 4483 } 4484 4485 static void decode_opc_mxu__pool13(DisasContext *ctx) 4486 { 4487 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4488 4489 switch (opcode) { 4490 case OPC_MXU_Q16ACC: 4491 gen_mxu_q16acc(ctx); 4492 break; 4493 case OPC_MXU_Q16ACCM: 4494 gen_mxu_q16accm(ctx); 4495 break; 4496 case OPC_MXU_D16ASUM: 4497 gen_mxu_d16asum(ctx); 4498 break; 4499 default: 4500 MIPS_INVAL("decode_opc_mxu"); 4501 gen_reserved_instruction(ctx); 4502 break; 4503 } 4504 } 4505 4506 static void decode_opc_mxu__pool14(DisasContext *ctx) 4507 { 4508 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4509 4510 switch (opcode) { 4511 case OPC_MXU_Q8ADDE: 4512 gen_mxu_q8adde(ctx, false); 4513 break; 4514 case OPC_MXU_D8SUM: 4515 gen_mxu_d8sum(ctx, false); 4516 break; 4517 case OPC_MXU_D8SUMC: 4518 gen_mxu_d8sum(ctx, true); 4519 break; 4520 default: 4521 MIPS_INVAL("decode_opc_mxu"); 4522 gen_reserved_instruction(ctx); 4523 break; 4524 } 4525 } 4526 4527 static void decode_opc_mxu__pool15(DisasContext *ctx) 4528 { 4529 uint32_t opcode = extract32(ctx->opcode, 14, 2); 4530 4531 switch (opcode) { 4532 case OPC_MXU_S32MUL: 4533 gen_mxu_s32mul(ctx, false); 4534 break; 4535 case OPC_MXU_S32MULU: 4536 gen_mxu_s32mul(ctx, true); 4537 break; 4538 case OPC_MXU_S32EXTR: 4539 gen_mxu_s32extr(ctx); 4540 break; 4541 case OPC_MXU_S32EXTRV: 4542 gen_mxu_s32extrv(ctx); 4543 break; 4544 default: 4545 MIPS_INVAL("decode_opc_mxu"); 4546 gen_reserved_instruction(ctx); 4547 break; 4548 } 4549 } 4550 4551 static void decode_opc_mxu__pool16(DisasContext *ctx) 4552 { 4553 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4554 4555 switch (opcode) { 4556 case OPC_MXU_D32SARW: 4557 gen_mxu_d32sarl(ctx, true); 4558 break; 4559 case OPC_MXU_S32ALN: 4560 gen_mxu_S32ALN(ctx); 4561 break; 4562 case OPC_MXU_S32ALNI: 4563 gen_mxu_S32ALNI(ctx); 4564 break; 4565 case OPC_MXU_S32LUI: 4566 gen_mxu_s32lui(ctx); 4567 break; 4568 case OPC_MXU_S32NOR: 4569 gen_mxu_S32NOR(ctx); 4570 break; 4571 case OPC_MXU_S32AND: 4572 gen_mxu_S32AND(ctx); 4573 break; 4574 case OPC_MXU_S32OR: 4575 gen_mxu_S32OR(ctx); 4576 break; 4577 case OPC_MXU_S32XOR: 4578 gen_mxu_S32XOR(ctx); 4579 break; 4580 default: 4581 MIPS_INVAL("decode_opc_mxu"); 4582 gen_reserved_instruction(ctx); 4583 break; 4584 } 4585 } 4586 4587 static void decode_opc_mxu__pool17(DisasContext *ctx) 4588 { 4589 uint32_t opcode = extract32(ctx->opcode, 6, 3); 4590 uint32_t strd2 = extract32(ctx->opcode, 9, 2); 4591 4592 if (strd2 > 2) { 4593 MIPS_INVAL("decode_opc_mxu"); 4594 gen_reserved_instruction(ctx); 4595 return; 4596 } 4597 4598 switch (opcode) { 4599 case OPC_MXU_LXW: 4600 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL); 4601 break; 4602 case OPC_MXU_LXB: 4603 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB); 4604 break; 4605 case OPC_MXU_LXH: 4606 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW); 4607 break; 4608 case OPC_MXU_LXBU: 4609 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB); 4610 break; 4611 case OPC_MXU_LXHU: 4612 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW); 4613 break; 4614 default: 4615 MIPS_INVAL("decode_opc_mxu"); 4616 gen_reserved_instruction(ctx); 4617 break; 4618 } 4619 } 4620 4621 static void decode_opc_mxu__pool18(DisasContext *ctx) 4622 { 4623 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4624 4625 switch (opcode) { 4626 case OPC_MXU_D32SLLV: 4627 gen_mxu_d32sxxv(ctx, false, false); 4628 break; 4629 case OPC_MXU_D32SLRV: 4630 gen_mxu_d32sxxv(ctx, true, false); 4631 break; 4632 case OPC_MXU_D32SARV: 4633 gen_mxu_d32sxxv(ctx, true, true); 4634 break; 4635 case OPC_MXU_Q16SLLV: 4636 gen_mxu_q16sxxv(ctx, false, false); 4637 break; 4638 case OPC_MXU_Q16SLRV: 4639 gen_mxu_q16sxxv(ctx, true, false); 4640 break; 4641 case OPC_MXU_Q16SARV: 4642 gen_mxu_q16sxxv(ctx, true, true); 4643 break; 4644 default: 4645 MIPS_INVAL("decode_opc_mxu"); 4646 gen_reserved_instruction(ctx); 4647 break; 4648 } 4649 } 4650 4651 static void decode_opc_mxu__pool19(DisasContext *ctx) 4652 { 4653 uint32_t opcode = extract32(ctx->opcode, 22, 4); 4654 4655 switch (opcode) { 4656 case OPC_MXU_Q8MUL: 4657 gen_mxu_q8mul_mac(ctx, false, false); 4658 break; 4659 case OPC_MXU_Q8MULSU: 4660 gen_mxu_q8mul_mac(ctx, true, false); 4661 break; 4662 default: 4663 MIPS_INVAL("decode_opc_mxu"); 4664 gen_reserved_instruction(ctx); 4665 break; 4666 } 4667 } 4668 4669 static void decode_opc_mxu__pool20(DisasContext *ctx) 4670 { 4671 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4672 4673 switch (opcode) { 4674 case OPC_MXU_Q8MOVZ: 4675 gen_mxu_q8movzn(ctx, TCG_COND_NE); 4676 break; 4677 case OPC_MXU_Q8MOVN: 4678 gen_mxu_q8movzn(ctx, TCG_COND_EQ); 4679 break; 4680 case OPC_MXU_D16MOVZ: 4681 gen_mxu_d16movzn(ctx, TCG_COND_NE); 4682 break; 4683 case OPC_MXU_D16MOVN: 4684 gen_mxu_d16movzn(ctx, TCG_COND_EQ); 4685 break; 4686 case OPC_MXU_S32MOVZ: 4687 gen_mxu_s32movzn(ctx, TCG_COND_NE); 4688 break; 4689 case OPC_MXU_S32MOVN: 4690 gen_mxu_s32movzn(ctx, TCG_COND_EQ); 4691 break; 4692 default: 4693 MIPS_INVAL("decode_opc_mxu"); 4694 gen_reserved_instruction(ctx); 4695 break; 4696 } 4697 } 4698 4699 static void decode_opc_mxu__pool21(DisasContext *ctx) 4700 { 4701 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4702 4703 switch (opcode) { 4704 case OPC_MXU_Q8MAC: 4705 gen_mxu_q8mul_mac(ctx, false, true); 4706 break; 4707 case OPC_MXU_Q8MACSU: 4708 gen_mxu_q8mul_mac(ctx, true, true); 4709 break; 4710 default: 4711 MIPS_INVAL("decode_opc_mxu"); 4712 gen_reserved_instruction(ctx); 4713 break; 4714 } 4715 } 4716 4717 4718 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) 4719 { 4720 uint32_t opcode = extract32(insn, 0, 6); 4721 4722 if (opcode == OPC_MXU_S32M2I) { 4723 gen_mxu_s32m2i(ctx); 4724 return true; 4725 } 4726 4727 if (opcode == OPC_MXU_S32I2M) { 4728 gen_mxu_s32i2m(ctx); 4729 return true; 4730 } 4731 4732 { 4733 TCGv t_mxu_cr = tcg_temp_new(); 4734 TCGLabel *l_exit = gen_new_label(); 4735 4736 gen_load_mxu_cr(t_mxu_cr); 4737 tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN); 4738 tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit); 4739 4740 switch (opcode) { 4741 case OPC_MXU_S32MADD: 4742 case OPC_MXU_S32MADDU: 4743 case OPC_MXU_S32MSUB: 4744 case OPC_MXU_S32MSUBU: 4745 return decode_opc_mxu_s32madd_sub(ctx); 4746 case OPC_MXU__POOL00: 4747 decode_opc_mxu__pool00(ctx); 4748 break; 4749 case OPC_MXU_D16MUL: 4750 gen_mxu_d16mul(ctx, false, false); 4751 break; 4752 case OPC_MXU_D16MAC: 4753 gen_mxu_d16mac(ctx, false, false); 4754 break; 4755 case OPC_MXU_D16MACF: 4756 gen_mxu_d16mac(ctx, true, true); 4757 break; 4758 case OPC_MXU_D16MADL: 4759 gen_mxu_d16madl(ctx); 4760 break; 4761 case OPC_MXU_S16MAD: 4762 gen_mxu_s16mad(ctx); 4763 break; 4764 case OPC_MXU_Q16ADD: 4765 gen_mxu_q16add(ctx); 4766 break; 4767 case OPC_MXU_D16MACE: 4768 gen_mxu_d16mac(ctx, true, false); 4769 break; 4770 case OPC_MXU__POOL01: 4771 decode_opc_mxu__pool01(ctx); 4772 break; 4773 case OPC_MXU__POOL02: 4774 decode_opc_mxu__pool02(ctx); 4775 break; 4776 case OPC_MXU__POOL03: 4777 decode_opc_mxu__pool03(ctx); 4778 break; 4779 case OPC_MXU__POOL04: 4780 decode_opc_mxu__pool04(ctx); 4781 break; 4782 case OPC_MXU__POOL05: 4783 decode_opc_mxu__pool05(ctx); 4784 break; 4785 case OPC_MXU__POOL06: 4786 decode_opc_mxu__pool06(ctx); 4787 break; 4788 case OPC_MXU__POOL07: 4789 decode_opc_mxu__pool07(ctx); 4790 break; 4791 case OPC_MXU__POOL08: 4792 decode_opc_mxu__pool08(ctx); 4793 break; 4794 case OPC_MXU__POOL09: 4795 decode_opc_mxu__pool09(ctx); 4796 break; 4797 case OPC_MXU__POOL10: 4798 decode_opc_mxu__pool10(ctx); 4799 break; 4800 case OPC_MXU__POOL11: 4801 decode_opc_mxu__pool11(ctx); 4802 break; 4803 case OPC_MXU_D32ADD: 4804 gen_mxu_d32add(ctx); 4805 break; 4806 case OPC_MXU__POOL12: 4807 decode_opc_mxu__pool12(ctx); 4808 break; 4809 case OPC_MXU__POOL13: 4810 decode_opc_mxu__pool13(ctx); 4811 break; 4812 case OPC_MXU__POOL14: 4813 decode_opc_mxu__pool14(ctx); 4814 break; 4815 case OPC_MXU_Q8ACCE: 4816 gen_mxu_q8adde(ctx, true); 4817 break; 4818 case OPC_MXU_S8LDD: 4819 gen_mxu_s8ldd(ctx, false); 4820 break; 4821 case OPC_MXU_S8STD: 4822 gen_mxu_s8std(ctx, false); 4823 break; 4824 case OPC_MXU_S8LDI: 4825 gen_mxu_s8ldd(ctx, true); 4826 break; 4827 case OPC_MXU_S8SDI: 4828 gen_mxu_s8std(ctx, true); 4829 break; 4830 case OPC_MXU__POOL15: 4831 decode_opc_mxu__pool15(ctx); 4832 break; 4833 case OPC_MXU__POOL16: 4834 decode_opc_mxu__pool16(ctx); 4835 break; 4836 case OPC_MXU__POOL17: 4837 decode_opc_mxu__pool17(ctx); 4838 break; 4839 case OPC_MXU_S16LDD: 4840 gen_mxu_s16ldd(ctx, false); 4841 break; 4842 case OPC_MXU_S16STD: 4843 gen_mxu_s16std(ctx, false); 4844 break; 4845 case OPC_MXU_S16LDI: 4846 gen_mxu_s16ldd(ctx, true); 4847 break; 4848 case OPC_MXU_S16SDI: 4849 gen_mxu_s16std(ctx, true); 4850 break; 4851 case OPC_MXU_D32SLL: 4852 gen_mxu_d32sxx(ctx, false, false); 4853 break; 4854 case OPC_MXU_D32SLR: 4855 gen_mxu_d32sxx(ctx, true, false); 4856 break; 4857 case OPC_MXU_D32SARL: 4858 gen_mxu_d32sarl(ctx, false); 4859 break; 4860 case OPC_MXU_D32SAR: 4861 gen_mxu_d32sxx(ctx, true, true); 4862 break; 4863 case OPC_MXU_Q16SLL: 4864 gen_mxu_q16sxx(ctx, false, false); 4865 break; 4866 case OPC_MXU__POOL18: 4867 decode_opc_mxu__pool18(ctx); 4868 break; 4869 case OPC_MXU_Q16SLR: 4870 gen_mxu_q16sxx(ctx, true, false); 4871 break; 4872 case OPC_MXU_Q16SAR: 4873 gen_mxu_q16sxx(ctx, true, true); 4874 break; 4875 case OPC_MXU__POOL19: 4876 decode_opc_mxu__pool19(ctx); 4877 break; 4878 case OPC_MXU__POOL20: 4879 decode_opc_mxu__pool20(ctx); 4880 break; 4881 case OPC_MXU__POOL21: 4882 decode_opc_mxu__pool21(ctx); 4883 break; 4884 case OPC_MXU_Q16SCOP: 4885 gen_mxu_q16scop(ctx); 4886 break; 4887 default: 4888 return false; 4889 } 4890 4891 gen_set_label(l_exit); 4892 } 4893 4894 return true; 4895 } 4896