1 /* 2 * Ingenic XBurst Media eXtension Unit (MXU) translation routines. 3 * 4 * Copyright (c) 2004-2005 Jocelyn Mayer 5 * Copyright (c) 2006 Marius Groeger (FPU operations) 6 * Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support) 7 * Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support) 8 * Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support) 9 * 10 * SPDX-License-Identifier: LGPL-2.1-or-later 11 * 12 * Datasheet: 13 * 14 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 15 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 16 */ 17 18 #include "qemu/osdep.h" 19 #include "translate.h" 20 21 /* 22 * 23 * AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET 24 * ============================================ 25 * 26 * 27 * MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32 28 * instructions set. It is designed to fit the needs of signal, graphical and 29 * video processing applications. MXU instruction set is used in Xburst family 30 * of microprocessors by Ingenic. 31 * 32 * MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is 33 * the control register. 34 * 35 * 36 * The notation used in MXU assembler mnemonics 37 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 38 * 39 * Register operands: 40 * 41 * XRa, XRb, XRc, XRd - MXU registers 42 * Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers 43 * 44 * Non-register operands: 45 * 46 * aptn1 - 1-bit accumulate add/subtract pattern 47 * aptn2 - 2-bit accumulate add/subtract pattern 48 * eptn2 - 2-bit execute add/subtract pattern 49 * optn2 - 2-bit operand pattern 50 * optn3 - 3-bit operand pattern 51 * sft4 - 4-bit shift amount 52 * strd2 - 2-bit stride amount 53 * 54 * Prefixes: 55 * 56 * Level of parallelism: Operand size: 57 * S - single operation at a time 32 - word 58 * D - two operations in parallel 16 - half word 59 * Q - four operations in parallel 8 - byte 60 * 61 * Operations: 62 * 63 * ADD - Add or subtract 64 * ADDC - Add with carry-in 65 * ACC - Accumulate 66 * ASUM - Sum together then accumulate (add or subtract) 67 * ASUMC - Sum together then accumulate (add or subtract) with carry-in 68 * AVG - Average between 2 operands 69 * ABD - Absolute difference 70 * ALN - Align data 71 * AND - Logical bitwise 'and' operation 72 * CPS - Copy sign 73 * EXTR - Extract bits 74 * I2M - Move from GPR register to MXU register 75 * LDD - Load data from memory to XRF 76 * LDI - Load data from memory to XRF (and increase the address base) 77 * LUI - Load unsigned immediate 78 * MUL - Multiply 79 * MULU - Unsigned multiply 80 * MADD - 64-bit operand add 32x32 product 81 * MSUB - 64-bit operand subtract 32x32 product 82 * MAC - Multiply and accumulate (add or subtract) 83 * MAD - Multiply and add or subtract 84 * MAX - Maximum between 2 operands 85 * MIN - Minimum between 2 operands 86 * M2I - Move from MXU register to GPR register 87 * MOVZ - Move if zero 88 * MOVN - Move if non-zero 89 * NOR - Logical bitwise 'nor' operation 90 * OR - Logical bitwise 'or' operation 91 * STD - Store data from XRF to memory 92 * SDI - Store data from XRF to memory (and increase the address base) 93 * SLT - Set of less than comparison 94 * SAD - Sum of absolute differences 95 * SLL - Logical shift left 96 * SLR - Logical shift right 97 * SAR - Arithmetic shift right 98 * SAT - Saturation 99 * SFL - Shuffle 100 * SCOP - Calculate x’s scope (-1, means x<0; 0, means x==0; 1, means x>0) 101 * XOR - Logical bitwise 'exclusive or' operation 102 * 103 * Suffixes: 104 * 105 * E - Expand results 106 * F - Fixed point multiplication 107 * L - Low part result 108 * R - Doing rounding 109 * V - Variable instead of immediate 110 * W - Combine above L and V 111 * 112 * 113 * The list of MXU instructions grouped by functionality 114 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 115 * 116 * Load/Store instructions Multiplication instructions 117 * ----------------------- --------------------------- 118 * 119 * S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt 120 * S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt 121 * S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt 122 * S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt 123 * S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt 124 * S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt 125 * S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2 126 * S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2 127 * S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2 128 * S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 129 * S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 130 * S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2 131 * S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 132 * S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2 133 * S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd 134 * S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd 135 * S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2 136 * S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2 137 * S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2 138 * S16SDI XRa, Rb, s10, eptn2 139 * S8LDD XRa, Rb, s8, eptn3 140 * S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions 141 * S8LDI XRa, Rb, s8, eptn3 ------------------------------------- 142 * S8SDI XRa, Rb, s8, eptn3 143 * LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2 144 * LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd 145 * LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2 146 * LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2 147 * LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2 148 * S32CPS XRa, XRb, XRc 149 * Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2 150 * Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2 151 * ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2 152 * D16ASUM XRa, XRb, XRc, XRd, eptn2 153 * S32MAX XRa, XRb, XRc D16CPS XRa, XRb, 154 * S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc 155 * S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc 156 * S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2 157 * S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2 158 * D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2 159 * D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc 160 * D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd 161 * D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc 162 * D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc 163 * Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd 164 * Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd 165 * Q8SLT XRa, XRb, XRc 166 * Q8SLTU XRa, XRb, XRc 167 * Q8MOVZ XRa, XRb, XRc Shift instructions 168 * Q8MOVN XRa, XRb, XRc ------------------ 169 * 170 * D32SLL XRa, XRb, XRc, XRd, sft4 171 * Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4 172 * -------------------- D32SAR XRa, XRb, XRc, XRd, sft4 173 * D32SARL XRa, XRb, XRc, sft4 174 * S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb 175 * S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb 176 * S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb 177 * S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb 178 * Q16SLL XRa, XRb, XRc, XRd, sft4 179 * Q16SLR XRa, XRb, XRc, XRd, sft4 180 * Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4 181 * ------------------------- Q16SLLV XRa, XRb, Rb 182 * Q16SLRV XRa, XRb, Rb 183 * S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb 184 * S32ALN XRa, XRb, XRc, Rb 185 * S32ALNI XRa, XRb, XRc, s3 186 * S32LUI XRa, s8, optn3 Move instructions 187 * S32EXTR XRa, XRb, Rb, bits5 ----------------- 188 * S32EXTRV XRa, XRb, Rs, Rt 189 * Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb 190 * Q16SAT XRa, XRb, XRc S32I2M XRa, Rb 191 * 192 * 193 * The opcode organization of MXU instructions 194 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 195 * 196 * The bits 31..26 of all MXU instructions are equal to 0x1C (also referred 197 * as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of 198 * other bits up to the instruction level is as follows: 199 * 200 * bits 201 * 05..00 202 * 203 * ┌─ 000000 ─ OPC_MXU_S32MADD 204 * ├─ 000001 ─ OPC_MXU_S32MADDU 205 * ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL) 206 * │ 207 * │ 20..18 208 * ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX 209 * │ ├─ 001 ─ OPC_MXU_S32MIN 210 * │ ├─ 010 ─ OPC_MXU_D16MAX 211 * │ ├─ 011 ─ OPC_MXU_D16MIN 212 * │ ├─ 100 ─ OPC_MXU_Q8MAX 213 * │ ├─ 101 ─ OPC_MXU_Q8MIN 214 * │ ├─ 110 ─ OPC_MXU_Q8SLT 215 * │ └─ 111 ─ OPC_MXU_Q8SLTU 216 * ├─ 000100 ─ OPC_MXU_S32MSUB 217 * ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18 218 * ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT 219 * │ ├─ 001 ─ OPC_MXU_D16SLT 220 * │ ├─ 010 ─ OPC_MXU_D16AVG 221 * │ ├─ 011 ─ OPC_MXU_D16AVGR 222 * │ ├─ 100 ─ OPC_MXU_Q8AVG 223 * │ ├─ 101 ─ OPC_MXU_Q8AVGR 224 * │ └─ 111 ─ OPC_MXU_Q8ADD 225 * │ 226 * │ 20..18 227 * ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS 228 * │ ├─ 010 ─ OPC_MXU_D16CPS 229 * │ ├─ 100 ─ OPC_MXU_Q8ABD 230 * │ └─ 110 ─ OPC_MXU_Q16SAT 231 * ├─ 001000 ─ OPC_MXU_D16MUL 232 * │ 25..24 233 * ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF 234 * │ └─ 01 ─ OPC_MXU_D16MULE 235 * ├─ 001010 ─ OPC_MXU_D16MAC 236 * ├─ 001011 ─ OPC_MXU_D16MACF 237 * ├─ 001100 ─ OPC_MXU_D16MADL 238 * ├─ 001101 ─ OPC_MXU_S16MAD 239 * ├─ 001110 ─ OPC_MXU_Q16ADD 240 * ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care) 241 * │ ┌─ 0 ─ OPC_MXU_S32LDD 242 * ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR 243 * │ 244 * │ 20 (13..10 don't care) 245 * ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD 246 * │ └─ 1 ─ OPC_MXU_S32STDR 247 * │ 248 * │ 13..10 249 * ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV 250 * │ └─ 0001 ─ OPC_MXU_S32LDDVR 251 * │ 252 * │ 13..10 253 * ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV 254 * │ └─ 0001 ─ OPC_MXU_S32STDVR 255 * │ 256 * │ 20 (13..10 don't care) 257 * ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI 258 * │ └─ 1 ─ OPC_MXU_S32LDIR 259 * │ 260 * │ 20 (13..10 don't care) 261 * ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI 262 * │ └─ 1 ─ OPC_MXU_S32SDIR 263 * │ 264 * │ 13..10 265 * ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV 266 * │ └─ 0001 ─ OPC_MXU_S32LDIVR 267 * │ 268 * │ 13..10 269 * ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV 270 * │ └─ 0001 ─ OPC_MXU_S32SDIVR 271 * ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too) 272 * │ 23..22 273 * MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC 274 * opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM 275 * │ └─ 10 ─ OPC_MXU_D32ASUM 276 * ├─ 011010 ─ <not assigned> 277 * │ 23..22 278 * ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC 279 * │ ├─ 01 ─ OPC_MXU_Q16ACCM 280 * │ └─ 10 ─ OPC_MXU_D16ASUM 281 * │ 282 * │ 23..22 283 * ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE 284 * │ ├─ 01 ─ OPC_MXU_D8SUM 285 * ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC 286 * ├─ 011110 ─ <not assigned> 287 * ├─ 011111 ─ <not assigned> 288 * ├─ 100000 ─ <not assigned> (overlaps with CLZ) 289 * ├─ 100001 ─ <not assigned> (overlaps with CLO) 290 * ├─ 100010 ─ OPC_MXU_S8LDD 291 * ├─ 100011 ─ OPC_MXU_S8STD 15..14 292 * ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL 293 * ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU 294 * │ ├─ 10 ─ OPC_MXU_S32EXTR 295 * ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV 296 * │ 297 * │ 20..18 298 * ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW 299 * │ ├─ 001 ─ OPC_MXU_S32ALN 300 * │ ├─ 010 ─ OPC_MXU_S32ALNI 301 * │ ├─ 011 ─ OPC_MXU_S32LUI 302 * │ ├─ 100 ─ OPC_MXU_S32NOR 303 * │ ├─ 101 ─ OPC_MXU_S32AND 304 * │ ├─ 110 ─ OPC_MXU_S32OR 305 * │ └─ 111 ─ OPC_MXU_S32XOR 306 * │ 307 * │ 8..6 308 * ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB 309 * │ ├─ 001 ─ OPC_MXU_LXH 310 * ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW 311 * ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU 312 * ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU 313 * ├─ 101100 ─ OPC_MXU_S16LDI 314 * ├─ 101101 ─ OPC_MXU_S16SDI 315 * ├─ 101110 ─ OPC_MXU_S32M2I 316 * ├─ 101111 ─ OPC_MXU_S32I2M 317 * ├─ 110000 ─ OPC_MXU_D32SLL 318 * ├─ 110001 ─ OPC_MXU_D32SLR 20..18 319 * ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV 320 * ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV 321 * ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV 322 * ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV 323 * │ ├─ 101 ─ OPC_MXU_Q16SLRV 324 * ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV 325 * │ 326 * ├─ 110111 ─ OPC_MXU_Q16SAR 327 * │ 23..22 328 * ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL 329 * │ └─ 10 ─ OPC_MXU_Q8MULSU 330 * │ 331 * │ 20..18 332 * ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ 333 * │ ├─ 001 ─ OPC_MXU_Q8MOVN 334 * │ ├─ 010 ─ OPC_MXU_D16MOVZ 335 * │ ├─ 011 ─ OPC_MXU_D16MOVN 336 * │ ├─ 100 ─ OPC_MXU_S32MOVZ 337 * │ └─ 101 ─ OPC_MXU_S32MOVN 338 * │ 339 * │ 23..22 340 * ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC 341 * │ └─ 10 ─ OPC_MXU_Q8MACSU 342 * ├─ 111011 ─ OPC_MXU_Q16SCOP 343 * ├─ 111100 ─ OPC_MXU_Q8MADL 344 * ├─ 111101 ─ OPC_MXU_S32SFL 345 * ├─ 111110 ─ OPC_MXU_Q8SAD 346 * └─ 111111 ─ <not assigned> (overlaps with SDBBP) 347 * 348 * 349 * Compiled after: 350 * 351 * "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit 352 * Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017 353 */ 354 355 enum { 356 OPC_MXU_S32MADD = 0x00, 357 OPC_MXU_S32MADDU = 0x01, 358 OPC_MXU__POOL00 = 0x03, 359 OPC_MXU_S32MSUB = 0x04, 360 OPC_MXU_S32MSUBU = 0x05, 361 OPC_MXU__POOL01 = 0x06, 362 OPC_MXU__POOL02 = 0x07, 363 OPC_MXU_D16MUL = 0x08, 364 OPC_MXU__POOL03 = 0x09, 365 OPC_MXU_D16MAC = 0x0A, 366 OPC_MXU_D16MACF = 0x0B, 367 OPC_MXU_D16MADL = 0x0C, 368 OPC_MXU_S16MAD = 0x0D, 369 OPC_MXU_Q16ADD = 0x0E, 370 OPC_MXU_D16MACE = 0x0F, 371 OPC_MXU__POOL04 = 0x10, 372 OPC_MXU__POOL05 = 0x11, 373 OPC_MXU__POOL06 = 0x12, 374 OPC_MXU__POOL07 = 0x13, 375 OPC_MXU__POOL08 = 0x14, 376 OPC_MXU__POOL09 = 0x15, 377 OPC_MXU__POOL10 = 0x16, 378 OPC_MXU__POOL11 = 0x17, 379 OPC_MXU_D32ADD = 0x18, 380 OPC_MXU__POOL12 = 0x19, 381 OPC_MXU__POOL13 = 0x1B, 382 OPC_MXU__POOL14 = 0x1C, 383 OPC_MXU_Q8ACCE = 0x1D, 384 OPC_MXU_S8LDD = 0x22, 385 OPC_MXU_S8STD = 0x23, 386 OPC_MXU_S8LDI = 0x24, 387 OPC_MXU_S8SDI = 0x25, 388 OPC_MXU__POOL15 = 0x26, 389 OPC_MXU__POOL16 = 0x27, 390 OPC_MXU__POOL17 = 0x28, 391 OPC_MXU_S16LDD = 0x2A, 392 OPC_MXU_S16STD = 0x2B, 393 OPC_MXU_S16LDI = 0x2C, 394 OPC_MXU_S16SDI = 0x2D, 395 OPC_MXU_S32M2I = 0x2E, 396 OPC_MXU_S32I2M = 0x2F, 397 OPC_MXU_D32SLL = 0x30, 398 OPC_MXU_D32SLR = 0x31, 399 OPC_MXU_D32SARL = 0x32, 400 OPC_MXU_D32SAR = 0x33, 401 OPC_MXU_Q16SLL = 0x34, 402 OPC_MXU_Q16SLR = 0x35, 403 OPC_MXU__POOL18 = 0x36, 404 OPC_MXU_Q16SAR = 0x37, 405 OPC_MXU__POOL19 = 0x38, 406 OPC_MXU__POOL20 = 0x39, 407 OPC_MXU__POOL21 = 0x3A, 408 OPC_MXU_Q16SCOP = 0x3B, 409 OPC_MXU_Q8MADL = 0x3C, 410 OPC_MXU_S32SFL = 0x3D, 411 OPC_MXU_Q8SAD = 0x3E, 412 }; 413 414 415 /* 416 * MXU pool 00 417 */ 418 enum { 419 OPC_MXU_S32MAX = 0x00, 420 OPC_MXU_S32MIN = 0x01, 421 OPC_MXU_D16MAX = 0x02, 422 OPC_MXU_D16MIN = 0x03, 423 OPC_MXU_Q8MAX = 0x04, 424 OPC_MXU_Q8MIN = 0x05, 425 OPC_MXU_Q8SLT = 0x06, 426 OPC_MXU_Q8SLTU = 0x07, 427 }; 428 429 /* 430 * MXU pool 01 431 */ 432 enum { 433 OPC_MXU_S32SLT = 0x00, 434 OPC_MXU_D16SLT = 0x01, 435 OPC_MXU_D16AVG = 0x02, 436 OPC_MXU_D16AVGR = 0x03, 437 OPC_MXU_Q8AVG = 0x04, 438 OPC_MXU_Q8AVGR = 0x05, 439 OPC_MXU_Q8ADD = 0x07, 440 }; 441 442 /* 443 * MXU pool 02 444 */ 445 enum { 446 OPC_MXU_S32CPS = 0x00, 447 OPC_MXU_D16CPS = 0x02, 448 OPC_MXU_Q8ABD = 0x04, 449 OPC_MXU_Q16SAT = 0x06, 450 }; 451 452 /* 453 * MXU pool 03 454 */ 455 enum { 456 OPC_MXU_D16MULF = 0x00, 457 OPC_MXU_D16MULE = 0x01, 458 }; 459 460 /* 461 * MXU pool 04 05 06 07 08 09 10 11 462 */ 463 enum { 464 OPC_MXU_S32LDST = 0x00, 465 OPC_MXU_S32LDSTR = 0x01, 466 }; 467 468 /* 469 * MXU pool 12 470 */ 471 enum { 472 OPC_MXU_D32ACC = 0x00, 473 OPC_MXU_D32ACCM = 0x01, 474 OPC_MXU_D32ASUM = 0x02, 475 }; 476 477 /* 478 * MXU pool 13 479 */ 480 enum { 481 OPC_MXU_Q16ACC = 0x00, 482 OPC_MXU_Q16ACCM = 0x01, 483 OPC_MXU_D16ASUM = 0x02, 484 }; 485 486 /* 487 * MXU pool 14 488 */ 489 enum { 490 OPC_MXU_Q8ADDE = 0x00, 491 OPC_MXU_D8SUM = 0x01, 492 OPC_MXU_D8SUMC = 0x02, 493 }; 494 495 /* 496 * MXU pool 15 497 */ 498 enum { 499 OPC_MXU_S32MUL = 0x00, 500 OPC_MXU_S32MULU = 0x01, 501 OPC_MXU_S32EXTR = 0x02, 502 OPC_MXU_S32EXTRV = 0x03, 503 }; 504 505 /* 506 * MXU pool 16 507 */ 508 enum { 509 OPC_MXU_D32SARW = 0x00, 510 OPC_MXU_S32ALN = 0x01, 511 OPC_MXU_S32ALNI = 0x02, 512 OPC_MXU_S32LUI = 0x03, 513 OPC_MXU_S32NOR = 0x04, 514 OPC_MXU_S32AND = 0x05, 515 OPC_MXU_S32OR = 0x06, 516 OPC_MXU_S32XOR = 0x07, 517 }; 518 519 /* 520 * MXU pool 17 521 */ 522 enum { 523 OPC_MXU_LXB = 0x00, 524 OPC_MXU_LXH = 0x01, 525 OPC_MXU_LXW = 0x03, 526 OPC_MXU_LXBU = 0x04, 527 OPC_MXU_LXHU = 0x05, 528 }; 529 530 /* 531 * MXU pool 18 532 */ 533 enum { 534 OPC_MXU_D32SLLV = 0x00, 535 OPC_MXU_D32SLRV = 0x01, 536 OPC_MXU_D32SARV = 0x03, 537 OPC_MXU_Q16SLLV = 0x04, 538 OPC_MXU_Q16SLRV = 0x05, 539 OPC_MXU_Q16SARV = 0x07, 540 }; 541 542 /* 543 * MXU pool 19 544 */ 545 enum { 546 OPC_MXU_Q8MUL = 0x00, 547 OPC_MXU_Q8MULSU = 0x02, 548 }; 549 550 /* 551 * MXU pool 20 552 */ 553 enum { 554 OPC_MXU_Q8MOVZ = 0x00, 555 OPC_MXU_Q8MOVN = 0x01, 556 OPC_MXU_D16MOVZ = 0x02, 557 OPC_MXU_D16MOVN = 0x03, 558 OPC_MXU_S32MOVZ = 0x04, 559 OPC_MXU_S32MOVN = 0x05, 560 }; 561 562 /* 563 * MXU pool 21 564 */ 565 enum { 566 OPC_MXU_Q8MAC = 0x00, 567 OPC_MXU_Q8MACSU = 0x02, 568 }; 569 570 571 /* MXU accumulate add/subtract 1-bit pattern 'aptn1' */ 572 #define MXU_APTN1_A 0 573 #define MXU_APTN1_S 1 574 575 /* MXU accumulate add/subtract 2-bit pattern 'aptn2' */ 576 #define MXU_APTN2_AA 0 577 #define MXU_APTN2_AS 1 578 #define MXU_APTN2_SA 2 579 #define MXU_APTN2_SS 3 580 581 /* MXU execute add/subtract 2-bit pattern 'eptn2' */ 582 #define MXU_EPTN2_AA 0 583 #define MXU_EPTN2_AS 1 584 #define MXU_EPTN2_SA 2 585 #define MXU_EPTN2_SS 3 586 587 /* MXU operand getting pattern 'optn2' */ 588 #define MXU_OPTN2_PTN0 0 589 #define MXU_OPTN2_PTN1 1 590 #define MXU_OPTN2_PTN2 2 591 #define MXU_OPTN2_PTN3 3 592 /* alternative naming scheme for 'optn2' */ 593 #define MXU_OPTN2_WW 0 594 #define MXU_OPTN2_LW 1 595 #define MXU_OPTN2_HW 2 596 #define MXU_OPTN2_XW 3 597 598 /* MXU operand getting pattern 'optn3' */ 599 #define MXU_OPTN3_PTN0 0 600 #define MXU_OPTN3_PTN1 1 601 #define MXU_OPTN3_PTN2 2 602 #define MXU_OPTN3_PTN3 3 603 #define MXU_OPTN3_PTN4 4 604 #define MXU_OPTN3_PTN5 5 605 #define MXU_OPTN3_PTN6 6 606 #define MXU_OPTN3_PTN7 7 607 608 /* MXU registers */ 609 static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1]; 610 static TCGv mxu_CR; 611 612 static const char mxuregnames[NUMBER_OF_MXU_REGISTERS][4] = { 613 "XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8", 614 "XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR", 615 }; 616 617 void mxu_translate_init(void) 618 { 619 for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) { 620 mxu_gpr[i] = tcg_global_mem_new(cpu_env, 621 offsetof(CPUMIPSState, active_tc.mxu_gpr[i]), 622 mxuregnames[i]); 623 } 624 625 mxu_CR = tcg_global_mem_new(cpu_env, 626 offsetof(CPUMIPSState, active_tc.mxu_cr), 627 mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]); 628 } 629 630 /* MXU General purpose registers moves. */ 631 static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg) 632 { 633 if (reg == 0) { 634 tcg_gen_movi_tl(t, 0); 635 } else if (reg <= 15) { 636 tcg_gen_mov_tl(t, mxu_gpr[reg - 1]); 637 } 638 } 639 640 static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg) 641 { 642 if (reg > 0 && reg <= 15) { 643 tcg_gen_mov_tl(mxu_gpr[reg - 1], t); 644 } 645 } 646 647 static inline void gen_extract_mxu_gpr(TCGv t, unsigned int reg, 648 unsigned int ofs, unsigned int len) 649 { 650 if (reg == 0) { 651 tcg_gen_movi_tl(t, 0); 652 } else if (reg <= 15) { 653 tcg_gen_extract_tl(t, mxu_gpr[reg - 1], ofs, len); 654 } 655 } 656 657 /* MXU control register moves. */ 658 static inline void gen_load_mxu_cr(TCGv t) 659 { 660 tcg_gen_mov_tl(t, mxu_CR); 661 } 662 663 static inline void gen_store_mxu_cr(TCGv t) 664 { 665 /* TODO: Add handling of RW rules for MXU_CR. */ 666 tcg_gen_mov_tl(mxu_CR, t); 667 } 668 669 /* 670 * S32I2M XRa, rb - Register move from GRF to XRF 671 */ 672 static void gen_mxu_s32i2m(DisasContext *ctx) 673 { 674 TCGv t0; 675 uint32_t XRa, Rb; 676 677 t0 = tcg_temp_new(); 678 679 XRa = extract32(ctx->opcode, 6, 5); 680 Rb = extract32(ctx->opcode, 16, 5); 681 682 gen_load_gpr(t0, Rb); 683 if (XRa <= 15) { 684 gen_store_mxu_gpr(t0, XRa); 685 } else if (XRa == 16) { 686 gen_store_mxu_cr(t0); 687 } 688 } 689 690 /* 691 * S32M2I XRa, rb - Register move from XRF to GRF 692 */ 693 static void gen_mxu_s32m2i(DisasContext *ctx) 694 { 695 TCGv t0; 696 uint32_t XRa, Rb; 697 698 t0 = tcg_temp_new(); 699 700 XRa = extract32(ctx->opcode, 6, 5); 701 Rb = extract32(ctx->opcode, 16, 5); 702 703 if (XRa <= 15) { 704 gen_load_mxu_gpr(t0, XRa); 705 } else if (XRa == 16) { 706 gen_load_mxu_cr(t0); 707 } 708 709 gen_store_gpr(t0, Rb); 710 } 711 712 /* 713 * S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF 714 * 715 * S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF, 716 * post modify address register 717 */ 718 static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify) 719 { 720 TCGv t0, t1; 721 uint32_t XRa, Rb, s8, optn3; 722 723 t0 = tcg_temp_new(); 724 t1 = tcg_temp_new(); 725 726 XRa = extract32(ctx->opcode, 6, 4); 727 s8 = extract32(ctx->opcode, 10, 8); 728 optn3 = extract32(ctx->opcode, 18, 3); 729 Rb = extract32(ctx->opcode, 21, 5); 730 731 gen_load_gpr(t0, Rb); 732 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 733 if (postmodify) { 734 gen_store_gpr(t0, Rb); 735 } 736 737 switch (optn3) { 738 /* XRa[7:0] = tmp8 */ 739 case MXU_OPTN3_PTN0: 740 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 741 gen_load_mxu_gpr(t0, XRa); 742 tcg_gen_deposit_tl(t0, t0, t1, 0, 8); 743 break; 744 /* XRa[15:8] = tmp8 */ 745 case MXU_OPTN3_PTN1: 746 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 747 gen_load_mxu_gpr(t0, XRa); 748 tcg_gen_deposit_tl(t0, t0, t1, 8, 8); 749 break; 750 /* XRa[23:16] = tmp8 */ 751 case MXU_OPTN3_PTN2: 752 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 753 gen_load_mxu_gpr(t0, XRa); 754 tcg_gen_deposit_tl(t0, t0, t1, 16, 8); 755 break; 756 /* XRa[31:24] = tmp8 */ 757 case MXU_OPTN3_PTN3: 758 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 759 gen_load_mxu_gpr(t0, XRa); 760 tcg_gen_deposit_tl(t0, t0, t1, 24, 8); 761 break; 762 /* XRa = {8'b0, tmp8, 8'b0, tmp8} */ 763 case MXU_OPTN3_PTN4: 764 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 765 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 766 break; 767 /* XRa = {tmp8, 8'b0, tmp8, 8'b0} */ 768 case MXU_OPTN3_PTN5: 769 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 770 tcg_gen_shli_tl(t1, t1, 8); 771 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 772 break; 773 /* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */ 774 case MXU_OPTN3_PTN6: 775 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB); 776 tcg_gen_mov_tl(t0, t1); 777 tcg_gen_andi_tl(t0, t0, 0xFF00FFFF); 778 tcg_gen_shli_tl(t1, t1, 16); 779 tcg_gen_or_tl(t0, t0, t1); 780 break; 781 /* XRa = {tmp8, tmp8, tmp8, tmp8} */ 782 case MXU_OPTN3_PTN7: 783 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB); 784 tcg_gen_deposit_tl(t1, t1, t1, 8, 8); 785 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 786 break; 787 } 788 789 gen_store_mxu_gpr(t0, XRa); 790 } 791 792 /* 793 * S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory 794 * 795 * S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory, 796 * post modify address register 797 */ 798 static void gen_mxu_s8std(DisasContext *ctx, bool postmodify) 799 { 800 TCGv t0, t1; 801 uint32_t XRa, Rb, s8, optn3; 802 803 t0 = tcg_temp_new(); 804 t1 = tcg_temp_new(); 805 806 XRa = extract32(ctx->opcode, 6, 4); 807 s8 = extract32(ctx->opcode, 10, 8); 808 optn3 = extract32(ctx->opcode, 18, 3); 809 Rb = extract32(ctx->opcode, 21, 5); 810 811 if (optn3 > 3) { 812 /* reserved, do nothing */ 813 return; 814 } 815 816 gen_load_gpr(t0, Rb); 817 tcg_gen_addi_tl(t0, t0, (int8_t)s8); 818 if (postmodify) { 819 gen_store_gpr(t0, Rb); 820 } 821 gen_load_mxu_gpr(t1, XRa); 822 823 switch (optn3) { 824 /* XRa[7:0] => tmp8 */ 825 case MXU_OPTN3_PTN0: 826 tcg_gen_extract_tl(t1, t1, 0, 8); 827 break; 828 /* XRa[15:8] => tmp8 */ 829 case MXU_OPTN3_PTN1: 830 tcg_gen_extract_tl(t1, t1, 8, 8); 831 break; 832 /* XRa[23:16] => tmp8 */ 833 case MXU_OPTN3_PTN2: 834 tcg_gen_extract_tl(t1, t1, 16, 8); 835 break; 836 /* XRa[31:24] => tmp8 */ 837 case MXU_OPTN3_PTN3: 838 tcg_gen_extract_tl(t1, t1, 24, 8); 839 break; 840 } 841 842 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB); 843 } 844 845 /* 846 * S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF 847 * 848 * S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF, 849 * post modify address register 850 */ 851 static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify) 852 { 853 TCGv t0, t1; 854 uint32_t XRa, Rb, optn2; 855 int32_t s10; 856 857 t0 = tcg_temp_new(); 858 t1 = tcg_temp_new(); 859 860 XRa = extract32(ctx->opcode, 6, 4); 861 s10 = sextract32(ctx->opcode, 10, 9) * 2; 862 optn2 = extract32(ctx->opcode, 19, 2); 863 Rb = extract32(ctx->opcode, 21, 5); 864 865 gen_load_gpr(t0, Rb); 866 tcg_gen_addi_tl(t0, t0, s10); 867 if (postmodify) { 868 gen_store_gpr(t0, Rb); 869 } 870 871 switch (optn2) { 872 /* XRa[15:0] = tmp16 */ 873 case MXU_OPTN2_PTN0: 874 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 875 gen_load_mxu_gpr(t0, XRa); 876 tcg_gen_deposit_tl(t0, t0, t1, 0, 16); 877 break; 878 /* XRa[31:16] = tmp16 */ 879 case MXU_OPTN2_PTN1: 880 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 881 gen_load_mxu_gpr(t0, XRa); 882 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 883 break; 884 /* XRa = sign_extend(tmp16) */ 885 case MXU_OPTN2_PTN2: 886 tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW); 887 break; 888 /* XRa = {tmp16, tmp16} */ 889 case MXU_OPTN2_PTN3: 890 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW); 891 tcg_gen_deposit_tl(t0, t1, t1, 0, 16); 892 tcg_gen_deposit_tl(t0, t1, t1, 16, 16); 893 break; 894 } 895 896 gen_store_mxu_gpr(t0, XRa); 897 } 898 899 /* 900 * S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory 901 * 902 * S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory, 903 * post modify address register 904 */ 905 static void gen_mxu_s16std(DisasContext *ctx, bool postmodify) 906 { 907 TCGv t0, t1; 908 uint32_t XRa, Rb, optn2; 909 int32_t s10; 910 911 t0 = tcg_temp_new(); 912 t1 = tcg_temp_new(); 913 914 XRa = extract32(ctx->opcode, 6, 4); 915 s10 = sextract32(ctx->opcode, 10, 9) * 2; 916 optn2 = extract32(ctx->opcode, 19, 2); 917 Rb = extract32(ctx->opcode, 21, 5); 918 919 if (optn2 > 1) { 920 /* reserved, do nothing */ 921 return; 922 } 923 924 gen_load_gpr(t0, Rb); 925 tcg_gen_addi_tl(t0, t0, s10); 926 if (postmodify) { 927 gen_store_gpr(t0, Rb); 928 } 929 gen_load_mxu_gpr(t1, XRa); 930 931 switch (optn2) { 932 /* XRa[15:0] => tmp16 */ 933 case MXU_OPTN2_PTN0: 934 tcg_gen_extract_tl(t1, t1, 0, 16); 935 break; 936 /* XRa[31:16] => tmp16 */ 937 case MXU_OPTN2_PTN1: 938 tcg_gen_extract_tl(t1, t1, 16, 16); 939 break; 940 } 941 942 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW); 943 } 944 945 /* 946 * S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication 947 * of GPR's and stores result into pair of MXU registers. 948 * It strains HI and LO registers. 949 * 950 * S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication 951 * of GPR's and stores result into pair of MXU registers. 952 * It strains HI and LO registers. 953 */ 954 static void gen_mxu_s32mul(DisasContext *ctx, bool mulu) 955 { 956 TCGv t0, t1; 957 uint32_t XRa, XRd, rs, rt; 958 959 t0 = tcg_temp_new(); 960 t1 = tcg_temp_new(); 961 962 XRa = extract32(ctx->opcode, 6, 4); 963 XRd = extract32(ctx->opcode, 10, 4); 964 rs = extract32(ctx->opcode, 16, 5); 965 rt = extract32(ctx->opcode, 21, 5); 966 967 if (unlikely(rs == 0 || rt == 0)) { 968 tcg_gen_movi_tl(t0, 0); 969 tcg_gen_movi_tl(t1, 0); 970 } else { 971 gen_load_gpr(t0, rs); 972 gen_load_gpr(t1, rt); 973 974 if (mulu) { 975 tcg_gen_mulu2_tl(t0, t1, t0, t1); 976 } else { 977 tcg_gen_muls2_tl(t0, t1, t0, t1); 978 } 979 } 980 tcg_gen_mov_tl(cpu_HI[0], t1); 981 tcg_gen_mov_tl(cpu_LO[0], t0); 982 gen_store_mxu_gpr(t1, XRa); 983 gen_store_mxu_gpr(t0, XRd); 984 } 985 986 /* 987 * D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication 988 * D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication 989 * with rounding and packing result 990 * D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern 991 * multiplication with rounding 992 */ 993 static void gen_mxu_d16mul(DisasContext *ctx, bool fractional, 994 bool packed_result) 995 { 996 TCGv t0, t1, t2, t3; 997 uint32_t XRa, XRb, XRc, XRd, optn2; 998 999 t0 = tcg_temp_new(); 1000 t1 = tcg_temp_new(); 1001 t2 = tcg_temp_new(); 1002 t3 = tcg_temp_new(); 1003 1004 XRa = extract32(ctx->opcode, 6, 4); 1005 XRb = extract32(ctx->opcode, 10, 4); 1006 XRc = extract32(ctx->opcode, 14, 4); 1007 XRd = extract32(ctx->opcode, 18, 4); 1008 optn2 = extract32(ctx->opcode, 22, 2); 1009 1010 /* 1011 * TODO: XRd field isn't used for D16MULF 1012 * There's no knowledge how this field affect 1013 * instruction decoding/behavior 1014 */ 1015 1016 gen_load_mxu_gpr(t1, XRb); 1017 tcg_gen_sextract_tl(t0, t1, 0, 16); 1018 tcg_gen_sextract_tl(t1, t1, 16, 16); 1019 gen_load_mxu_gpr(t3, XRc); 1020 tcg_gen_sextract_tl(t2, t3, 0, 16); 1021 tcg_gen_sextract_tl(t3, t3, 16, 16); 1022 1023 switch (optn2) { 1024 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1025 tcg_gen_mul_tl(t3, t1, t3); 1026 tcg_gen_mul_tl(t2, t0, t2); 1027 break; 1028 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1029 tcg_gen_mul_tl(t3, t0, t3); 1030 tcg_gen_mul_tl(t2, t0, t2); 1031 break; 1032 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1033 tcg_gen_mul_tl(t3, t1, t3); 1034 tcg_gen_mul_tl(t2, t1, t2); 1035 break; 1036 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1037 tcg_gen_mul_tl(t3, t0, t3); 1038 tcg_gen_mul_tl(t2, t1, t2); 1039 break; 1040 } 1041 if (fractional) { 1042 TCGLabel *l_done = gen_new_label(); 1043 TCGv rounding = tcg_temp_new(); 1044 1045 tcg_gen_shli_tl(t3, t3, 1); 1046 tcg_gen_shli_tl(t2, t2, 1); 1047 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1048 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1049 if (packed_result) { 1050 TCGLabel *l_apply_bias_l = gen_new_label(); 1051 TCGLabel *l_apply_bias_r = gen_new_label(); 1052 TCGLabel *l_half_done = gen_new_label(); 1053 TCGv bias = tcg_temp_new(); 1054 1055 /* 1056 * D16MULF supports unbiased rounding aka "bankers rounding", 1057 * "round to even", "convergent rounding" 1058 */ 1059 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1060 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1061 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1062 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1063 gen_set_label(l_apply_bias_l); 1064 tcg_gen_addi_tl(t3, t3, 0x8000); 1065 gen_set_label(l_half_done); 1066 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1067 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1068 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1069 gen_set_label(l_apply_bias_r); 1070 tcg_gen_addi_tl(t2, t2, 0x8000); 1071 } else { 1072 /* D16MULE doesn't support unbiased rounding */ 1073 tcg_gen_addi_tl(t3, t3, 0x8000); 1074 tcg_gen_addi_tl(t2, t2, 0x8000); 1075 } 1076 gen_set_label(l_done); 1077 } 1078 if (!packed_result) { 1079 gen_store_mxu_gpr(t3, XRa); 1080 gen_store_mxu_gpr(t2, XRd); 1081 } else { 1082 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1083 tcg_gen_shri_tl(t2, t2, 16); 1084 tcg_gen_or_tl(t3, t3, t2); 1085 gen_store_mxu_gpr(t3, XRa); 1086 } 1087 } 1088 1089 /* 1090 * D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 1091 * Signed 16 bit pattern multiply and accumulate 1092 * D16MACF XRa, XRb, XRc, aptn2, optn2 1093 * Signed Q15 fraction pattern multiply accumulate and pack 1094 * D16MACE XRa, XRb, XRc, XRd, aptn2, optn2 1095 * Signed Q15 fraction pattern multiply and accumulate 1096 */ 1097 static void gen_mxu_d16mac(DisasContext *ctx, bool fractional, 1098 bool packed_result) 1099 { 1100 TCGv t0, t1, t2, t3; 1101 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1102 1103 t0 = tcg_temp_new(); 1104 t1 = tcg_temp_new(); 1105 t2 = tcg_temp_new(); 1106 t3 = tcg_temp_new(); 1107 1108 XRa = extract32(ctx->opcode, 6, 4); 1109 XRb = extract32(ctx->opcode, 10, 4); 1110 XRc = extract32(ctx->opcode, 14, 4); 1111 XRd = extract32(ctx->opcode, 18, 4); 1112 optn2 = extract32(ctx->opcode, 22, 2); 1113 aptn2 = extract32(ctx->opcode, 24, 2); 1114 1115 gen_load_mxu_gpr(t1, XRb); 1116 tcg_gen_sextract_tl(t0, t1, 0, 16); 1117 tcg_gen_sextract_tl(t1, t1, 16, 16); 1118 1119 gen_load_mxu_gpr(t3, XRc); 1120 tcg_gen_sextract_tl(t2, t3, 0, 16); 1121 tcg_gen_sextract_tl(t3, t3, 16, 16); 1122 1123 switch (optn2) { 1124 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1125 tcg_gen_mul_tl(t3, t1, t3); 1126 tcg_gen_mul_tl(t2, t0, t2); 1127 break; 1128 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1129 tcg_gen_mul_tl(t3, t0, t3); 1130 tcg_gen_mul_tl(t2, t0, t2); 1131 break; 1132 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1133 tcg_gen_mul_tl(t3, t1, t3); 1134 tcg_gen_mul_tl(t2, t1, t2); 1135 break; 1136 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1137 tcg_gen_mul_tl(t3, t0, t3); 1138 tcg_gen_mul_tl(t2, t1, t2); 1139 break; 1140 } 1141 1142 if (fractional) { 1143 tcg_gen_shli_tl(t3, t3, 1); 1144 tcg_gen_shli_tl(t2, t2, 1); 1145 } 1146 gen_load_mxu_gpr(t0, XRa); 1147 gen_load_mxu_gpr(t1, XRd); 1148 1149 switch (aptn2) { 1150 case MXU_APTN2_AA: 1151 tcg_gen_add_tl(t3, t0, t3); 1152 tcg_gen_add_tl(t2, t1, t2); 1153 break; 1154 case MXU_APTN2_AS: 1155 tcg_gen_add_tl(t3, t0, t3); 1156 tcg_gen_sub_tl(t2, t1, t2); 1157 break; 1158 case MXU_APTN2_SA: 1159 tcg_gen_sub_tl(t3, t0, t3); 1160 tcg_gen_add_tl(t2, t1, t2); 1161 break; 1162 case MXU_APTN2_SS: 1163 tcg_gen_sub_tl(t3, t0, t3); 1164 tcg_gen_sub_tl(t2, t1, t2); 1165 break; 1166 } 1167 1168 if (fractional) { 1169 TCGLabel *l_done = gen_new_label(); 1170 TCGv rounding = tcg_temp_new(); 1171 1172 tcg_gen_andi_tl(rounding, mxu_CR, 0x2); 1173 tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done); 1174 if (packed_result) { 1175 TCGLabel *l_apply_bias_l = gen_new_label(); 1176 TCGLabel *l_apply_bias_r = gen_new_label(); 1177 TCGLabel *l_half_done = gen_new_label(); 1178 TCGv bias = tcg_temp_new(); 1179 1180 /* 1181 * D16MACF supports unbiased rounding aka "bankers rounding", 1182 * "round to even", "convergent rounding" 1183 */ 1184 tcg_gen_andi_tl(bias, mxu_CR, 0x4); 1185 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l); 1186 tcg_gen_andi_tl(t0, t3, 0x1ffff); 1187 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done); 1188 gen_set_label(l_apply_bias_l); 1189 tcg_gen_addi_tl(t3, t3, 0x8000); 1190 gen_set_label(l_half_done); 1191 tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r); 1192 tcg_gen_andi_tl(t0, t2, 0x1ffff); 1193 tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done); 1194 gen_set_label(l_apply_bias_r); 1195 tcg_gen_addi_tl(t2, t2, 0x8000); 1196 } else { 1197 /* D16MACE doesn't support unbiased rounding */ 1198 tcg_gen_addi_tl(t3, t3, 0x8000); 1199 tcg_gen_addi_tl(t2, t2, 0x8000); 1200 } 1201 gen_set_label(l_done); 1202 } 1203 1204 if (!packed_result) { 1205 gen_store_mxu_gpr(t3, XRa); 1206 gen_store_mxu_gpr(t2, XRd); 1207 } else { 1208 tcg_gen_andi_tl(t3, t3, 0xffff0000); 1209 tcg_gen_shri_tl(t2, t2, 16); 1210 tcg_gen_or_tl(t3, t3, t2); 1211 gen_store_mxu_gpr(t3, XRa); 1212 } 1213 } 1214 1215 /* 1216 * D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed 1217 * unsigned 16 bit pattern multiply and add/subtract. 1218 */ 1219 static void gen_mxu_d16madl(DisasContext *ctx) 1220 { 1221 TCGv t0, t1, t2, t3; 1222 uint32_t XRa, XRb, XRc, XRd, optn2, aptn2; 1223 1224 t0 = tcg_temp_new(); 1225 t1 = tcg_temp_new(); 1226 t2 = tcg_temp_new(); 1227 t3 = tcg_temp_new(); 1228 1229 XRa = extract32(ctx->opcode, 6, 4); 1230 XRb = extract32(ctx->opcode, 10, 4); 1231 XRc = extract32(ctx->opcode, 14, 4); 1232 XRd = extract32(ctx->opcode, 18, 4); 1233 optn2 = extract32(ctx->opcode, 22, 2); 1234 aptn2 = extract32(ctx->opcode, 24, 2); 1235 1236 gen_load_mxu_gpr(t1, XRb); 1237 tcg_gen_sextract_tl(t0, t1, 0, 16); 1238 tcg_gen_sextract_tl(t1, t1, 16, 16); 1239 1240 gen_load_mxu_gpr(t3, XRc); 1241 tcg_gen_sextract_tl(t2, t3, 0, 16); 1242 tcg_gen_sextract_tl(t3, t3, 16, 16); 1243 1244 switch (optn2) { 1245 case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */ 1246 tcg_gen_mul_tl(t3, t1, t3); 1247 tcg_gen_mul_tl(t2, t0, t2); 1248 break; 1249 case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */ 1250 tcg_gen_mul_tl(t3, t0, t3); 1251 tcg_gen_mul_tl(t2, t0, t2); 1252 break; 1253 case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */ 1254 tcg_gen_mul_tl(t3, t1, t3); 1255 tcg_gen_mul_tl(t2, t1, t2); 1256 break; 1257 case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */ 1258 tcg_gen_mul_tl(t3, t0, t3); 1259 tcg_gen_mul_tl(t2, t1, t2); 1260 break; 1261 } 1262 tcg_gen_extract_tl(t2, t2, 0, 16); 1263 tcg_gen_extract_tl(t3, t3, 0, 16); 1264 1265 gen_load_mxu_gpr(t1, XRa); 1266 tcg_gen_extract_tl(t0, t1, 0, 16); 1267 tcg_gen_extract_tl(t1, t1, 16, 16); 1268 1269 switch (aptn2) { 1270 case MXU_APTN2_AA: 1271 tcg_gen_add_tl(t3, t1, t3); 1272 tcg_gen_add_tl(t2, t0, t2); 1273 break; 1274 case MXU_APTN2_AS: 1275 tcg_gen_add_tl(t3, t1, t3); 1276 tcg_gen_sub_tl(t2, t0, t2); 1277 break; 1278 case MXU_APTN2_SA: 1279 tcg_gen_sub_tl(t3, t1, t3); 1280 tcg_gen_add_tl(t2, t0, t2); 1281 break; 1282 case MXU_APTN2_SS: 1283 tcg_gen_sub_tl(t3, t1, t3); 1284 tcg_gen_sub_tl(t2, t0, t2); 1285 break; 1286 } 1287 1288 tcg_gen_andi_tl(t2, t2, 0xffff); 1289 tcg_gen_shli_tl(t3, t3, 16); 1290 tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2); 1291 } 1292 1293 /* 1294 * S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed 1295 * signed 16 bit pattern multiply and 32-bit add/subtract. 1296 */ 1297 static void gen_mxu_s16mad(DisasContext *ctx) 1298 { 1299 TCGv t0, t1; 1300 uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad; 1301 1302 t0 = tcg_temp_new(); 1303 t1 = tcg_temp_new(); 1304 1305 XRa = extract32(ctx->opcode, 6, 4); 1306 XRb = extract32(ctx->opcode, 10, 4); 1307 XRc = extract32(ctx->opcode, 14, 4); 1308 XRd = extract32(ctx->opcode, 18, 4); 1309 optn2 = extract32(ctx->opcode, 22, 2); 1310 aptn1 = extract32(ctx->opcode, 24, 1); 1311 pad = extract32(ctx->opcode, 25, 1); 1312 1313 if (pad) { 1314 /* FIXME check if it influence the result */ 1315 } 1316 1317 gen_load_mxu_gpr(t0, XRb); 1318 gen_load_mxu_gpr(t1, XRc); 1319 1320 switch (optn2) { 1321 case MXU_OPTN2_WW: /* XRB.H*XRC.H */ 1322 tcg_gen_sextract_tl(t0, t0, 16, 16); 1323 tcg_gen_sextract_tl(t1, t1, 16, 16); 1324 break; 1325 case MXU_OPTN2_LW: /* XRB.L*XRC.L */ 1326 tcg_gen_sextract_tl(t0, t0, 0, 16); 1327 tcg_gen_sextract_tl(t1, t1, 0, 16); 1328 break; 1329 case MXU_OPTN2_HW: /* XRB.H*XRC.L */ 1330 tcg_gen_sextract_tl(t0, t0, 16, 16); 1331 tcg_gen_sextract_tl(t1, t1, 0, 16); 1332 break; 1333 case MXU_OPTN2_XW: /* XRB.L*XRC.H */ 1334 tcg_gen_sextract_tl(t0, t0, 0, 16); 1335 tcg_gen_sextract_tl(t1, t1, 16, 16); 1336 break; 1337 } 1338 tcg_gen_mul_tl(t0, t0, t1); 1339 1340 gen_load_mxu_gpr(t1, XRa); 1341 1342 switch (aptn1) { 1343 case MXU_APTN1_A: 1344 tcg_gen_add_tl(t1, t1, t0); 1345 break; 1346 case MXU_APTN1_S: 1347 tcg_gen_sub_tl(t1, t1, t0); 1348 break; 1349 } 1350 1351 gen_store_mxu_gpr(t1, XRd); 1352 } 1353 1354 /* 1355 * Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1356 * Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1357 * Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply 1358 * and accumulate 1359 * Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply 1360 * and accumulate 1361 */ 1362 static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac) 1363 { 1364 TCGv t0, t1, t2, t3, t4, t5, t6, t7; 1365 uint32_t XRa, XRb, XRc, XRd, aptn2; 1366 1367 t0 = tcg_temp_new(); 1368 t1 = tcg_temp_new(); 1369 t2 = tcg_temp_new(); 1370 t3 = tcg_temp_new(); 1371 t4 = tcg_temp_new(); 1372 t5 = tcg_temp_new(); 1373 t6 = tcg_temp_new(); 1374 t7 = tcg_temp_new(); 1375 1376 XRa = extract32(ctx->opcode, 6, 4); 1377 XRb = extract32(ctx->opcode, 10, 4); 1378 XRc = extract32(ctx->opcode, 14, 4); 1379 XRd = extract32(ctx->opcode, 18, 4); 1380 aptn2 = extract32(ctx->opcode, 24, 2); 1381 1382 gen_load_mxu_gpr(t3, XRb); 1383 gen_load_mxu_gpr(t7, XRc); 1384 1385 if (su) { 1386 /* Q8MULSU / Q8MACSU */ 1387 tcg_gen_sextract_tl(t0, t3, 0, 8); 1388 tcg_gen_sextract_tl(t1, t3, 8, 8); 1389 tcg_gen_sextract_tl(t2, t3, 16, 8); 1390 tcg_gen_sextract_tl(t3, t3, 24, 8); 1391 } else { 1392 /* Q8MUL / Q8MAC */ 1393 tcg_gen_extract_tl(t0, t3, 0, 8); 1394 tcg_gen_extract_tl(t1, t3, 8, 8); 1395 tcg_gen_extract_tl(t2, t3, 16, 8); 1396 tcg_gen_extract_tl(t3, t3, 24, 8); 1397 } 1398 1399 tcg_gen_extract_tl(t4, t7, 0, 8); 1400 tcg_gen_extract_tl(t5, t7, 8, 8); 1401 tcg_gen_extract_tl(t6, t7, 16, 8); 1402 tcg_gen_extract_tl(t7, t7, 24, 8); 1403 1404 tcg_gen_mul_tl(t0, t0, t4); 1405 tcg_gen_mul_tl(t1, t1, t5); 1406 tcg_gen_mul_tl(t2, t2, t6); 1407 tcg_gen_mul_tl(t3, t3, t7); 1408 1409 if (mac) { 1410 gen_load_mxu_gpr(t4, XRd); 1411 gen_load_mxu_gpr(t5, XRa); 1412 tcg_gen_extract_tl(t6, t4, 0, 16); 1413 tcg_gen_extract_tl(t7, t4, 16, 16); 1414 if (aptn2 & 1) { 1415 tcg_gen_sub_tl(t0, t6, t0); 1416 tcg_gen_sub_tl(t1, t7, t1); 1417 } else { 1418 tcg_gen_add_tl(t0, t6, t0); 1419 tcg_gen_add_tl(t1, t7, t1); 1420 } 1421 tcg_gen_extract_tl(t6, t5, 0, 16); 1422 tcg_gen_extract_tl(t7, t5, 16, 16); 1423 if (aptn2 & 2) { 1424 tcg_gen_sub_tl(t2, t6, t2); 1425 tcg_gen_sub_tl(t3, t7, t3); 1426 } else { 1427 tcg_gen_add_tl(t2, t6, t2); 1428 tcg_gen_add_tl(t3, t7, t3); 1429 } 1430 } 1431 1432 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 1433 tcg_gen_deposit_tl(t1, t2, t3, 16, 16); 1434 1435 gen_store_mxu_gpr(t0, XRd); 1436 gen_store_mxu_gpr(t1, XRa); 1437 } 1438 1439 /* 1440 * Q8MADL XRd, XRa, XRb, XRc 1441 * Parallel quad unsigned 8 bit multiply and accumulate. 1442 * e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3] 1443 */ 1444 static void gen_mxu_q8madl(DisasContext *ctx) 1445 { 1446 TCGv t0, t1, t2, t3, t4, t5, t6, t7; 1447 uint32_t XRa, XRb, XRc, XRd, aptn2; 1448 1449 t0 = tcg_temp_new(); 1450 t1 = tcg_temp_new(); 1451 t2 = tcg_temp_new(); 1452 t3 = tcg_temp_new(); 1453 t4 = tcg_temp_new(); 1454 t5 = tcg_temp_new(); 1455 t6 = tcg_temp_new(); 1456 t7 = tcg_temp_new(); 1457 1458 XRa = extract32(ctx->opcode, 6, 4); 1459 XRb = extract32(ctx->opcode, 10, 4); 1460 XRc = extract32(ctx->opcode, 14, 4); 1461 XRd = extract32(ctx->opcode, 18, 4); 1462 aptn2 = extract32(ctx->opcode, 24, 2); 1463 1464 gen_load_mxu_gpr(t3, XRb); 1465 gen_load_mxu_gpr(t7, XRc); 1466 1467 tcg_gen_extract_tl(t0, t3, 0, 8); 1468 tcg_gen_extract_tl(t1, t3, 8, 8); 1469 tcg_gen_extract_tl(t2, t3, 16, 8); 1470 tcg_gen_extract_tl(t3, t3, 24, 8); 1471 1472 tcg_gen_extract_tl(t4, t7, 0, 8); 1473 tcg_gen_extract_tl(t5, t7, 8, 8); 1474 tcg_gen_extract_tl(t6, t7, 16, 8); 1475 tcg_gen_extract_tl(t7, t7, 24, 8); 1476 1477 tcg_gen_mul_tl(t0, t0, t4); 1478 tcg_gen_mul_tl(t1, t1, t5); 1479 tcg_gen_mul_tl(t2, t2, t6); 1480 tcg_gen_mul_tl(t3, t3, t7); 1481 1482 gen_load_mxu_gpr(t4, XRa); 1483 tcg_gen_extract_tl(t6, t4, 0, 8); 1484 tcg_gen_extract_tl(t7, t4, 8, 8); 1485 if (aptn2 & 1) { 1486 tcg_gen_sub_tl(t0, t6, t0); 1487 tcg_gen_sub_tl(t1, t7, t1); 1488 } else { 1489 tcg_gen_add_tl(t0, t6, t0); 1490 tcg_gen_add_tl(t1, t7, t1); 1491 } 1492 tcg_gen_extract_tl(t6, t4, 16, 8); 1493 tcg_gen_extract_tl(t7, t4, 24, 8); 1494 if (aptn2 & 2) { 1495 tcg_gen_sub_tl(t2, t6, t2); 1496 tcg_gen_sub_tl(t3, t7, t3); 1497 } else { 1498 tcg_gen_add_tl(t2, t6, t2); 1499 tcg_gen_add_tl(t3, t7, t3); 1500 } 1501 1502 tcg_gen_andi_tl(t5, t0, 0xff); 1503 tcg_gen_deposit_tl(t5, t5, t1, 8, 8); 1504 tcg_gen_deposit_tl(t5, t5, t2, 16, 8); 1505 tcg_gen_deposit_tl(t5, t5, t3, 24, 8); 1506 1507 gen_store_mxu_gpr(t5, XRd); 1508 } 1509 1510 /* 1511 * S32LDD XRa, Rb, S12 - Load a word from memory to XRF 1512 * S32LDDR XRa, Rb, S12 - Load a word from memory to XRF 1513 * in reversed byte seq. 1514 * S32LDI XRa, Rb, S12 - Load a word from memory to XRF, 1515 * post modify base address GPR. 1516 * S32LDIR XRa, Rb, S12 - Load a word from memory to XRF, 1517 * post modify base address GPR and load in reversed byte seq. 1518 */ 1519 static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc) 1520 { 1521 TCGv t0, t1; 1522 uint32_t XRa, Rb, s12; 1523 1524 t0 = tcg_temp_new(); 1525 t1 = tcg_temp_new(); 1526 1527 XRa = extract32(ctx->opcode, 6, 4); 1528 s12 = sextract32(ctx->opcode, 10, 10); 1529 Rb = extract32(ctx->opcode, 21, 5); 1530 1531 gen_load_gpr(t0, Rb); 1532 tcg_gen_movi_tl(t1, s12 * 4); 1533 tcg_gen_add_tl(t0, t0, t1); 1534 1535 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1536 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1537 ctx->default_tcg_memop_mask); 1538 gen_store_mxu_gpr(t1, XRa); 1539 1540 if (postinc) { 1541 gen_store_gpr(t0, Rb); 1542 } 1543 } 1544 1545 /* 1546 * S32STD XRa, Rb, S12 - Store a word from XRF to memory 1547 * S32STDR XRa, Rb, S12 - Store a word from XRF to memory 1548 * in reversed byte seq. 1549 * S32SDI XRa, Rb, S12 - Store a word from XRF to memory, 1550 * post modify base address GPR. 1551 * S32SDIR XRa, Rb, S12 - Store a word from XRF to memory, 1552 * post modify base address GPR and store in reversed byte seq. 1553 */ 1554 static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc) 1555 { 1556 TCGv t0, t1; 1557 uint32_t XRa, Rb, s12; 1558 1559 t0 = tcg_temp_new(); 1560 t1 = tcg_temp_new(); 1561 1562 XRa = extract32(ctx->opcode, 6, 4); 1563 s12 = sextract32(ctx->opcode, 10, 10); 1564 Rb = extract32(ctx->opcode, 21, 5); 1565 1566 gen_load_gpr(t0, Rb); 1567 tcg_gen_movi_tl(t1, s12 * 4); 1568 tcg_gen_add_tl(t0, t0, t1); 1569 1570 gen_load_mxu_gpr(t1, XRa); 1571 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1572 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1573 ctx->default_tcg_memop_mask); 1574 1575 if (postinc) { 1576 gen_store_gpr(t0, Rb); 1577 } 1578 } 1579 1580 /* 1581 * S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1582 * S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1583 * in reversed byte seq. 1584 * S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1585 * post modify base address GPR. 1586 * S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1587 * post modify base address GPR and load in reversed byte seq. 1588 */ 1589 static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed, 1590 bool postinc, uint32_t strd2) 1591 { 1592 TCGv t0, t1; 1593 uint32_t XRa, Rb, Rc; 1594 1595 t0 = tcg_temp_new(); 1596 t1 = tcg_temp_new(); 1597 1598 XRa = extract32(ctx->opcode, 6, 4); 1599 Rc = extract32(ctx->opcode, 16, 5); 1600 Rb = extract32(ctx->opcode, 21, 5); 1601 1602 gen_load_gpr(t0, Rb); 1603 gen_load_gpr(t1, Rc); 1604 tcg_gen_shli_tl(t1, t1, strd2); 1605 tcg_gen_add_tl(t0, t0, t1); 1606 1607 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, 1608 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1609 ctx->default_tcg_memop_mask); 1610 gen_store_mxu_gpr(t1, XRa); 1611 1612 if (postinc) { 1613 gen_store_gpr(t0, Rb); 1614 } 1615 } 1616 1617 /* 1618 * LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR 1619 * LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1620 * sign extending to GPR size. 1621 * LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR, 1622 * sign extending to GPR size. 1623 * LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1624 * zero extending to GPR size. 1625 * LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR, 1626 * zero extending to GPR size. 1627 */ 1628 static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop) 1629 { 1630 TCGv t0, t1; 1631 uint32_t Ra, Rb, Rc; 1632 1633 t0 = tcg_temp_new(); 1634 t1 = tcg_temp_new(); 1635 1636 Ra = extract32(ctx->opcode, 11, 5); 1637 Rc = extract32(ctx->opcode, 16, 5); 1638 Rb = extract32(ctx->opcode, 21, 5); 1639 1640 gen_load_gpr(t0, Rb); 1641 gen_load_gpr(t1, Rc); 1642 tcg_gen_shli_tl(t1, t1, strd2); 1643 tcg_gen_add_tl(t0, t0, t1); 1644 1645 tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask); 1646 gen_store_gpr(t1, Ra); 1647 } 1648 1649 /* 1650 * S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1651 * S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF 1652 * in reversed byte seq. 1653 * S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1654 * post modify base address GPR. 1655 * S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF, 1656 * post modify base address GPR and store in reversed byte seq. 1657 */ 1658 static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed, 1659 bool postinc, uint32_t strd2) 1660 { 1661 TCGv t0, t1; 1662 uint32_t XRa, Rb, Rc; 1663 1664 t0 = tcg_temp_new(); 1665 t1 = tcg_temp_new(); 1666 1667 XRa = extract32(ctx->opcode, 6, 4); 1668 Rc = extract32(ctx->opcode, 16, 5); 1669 Rb = extract32(ctx->opcode, 21, 5); 1670 1671 gen_load_gpr(t0, Rb); 1672 gen_load_gpr(t1, Rc); 1673 tcg_gen_shli_tl(t1, t1, strd2); 1674 tcg_gen_add_tl(t0, t0, t1); 1675 1676 gen_load_mxu_gpr(t1, XRa); 1677 tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, 1678 (MO_TESL ^ (reversed ? MO_BSWAP : 0)) | 1679 ctx->default_tcg_memop_mask); 1680 1681 if (postinc) { 1682 gen_store_gpr(t0, Rb); 1683 } 1684 } 1685 1686 /* 1687 * MXU instruction category: logic 1688 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1689 * 1690 * S32NOR S32AND S32OR S32XOR 1691 */ 1692 1693 /* 1694 * S32NOR XRa, XRb, XRc 1695 * Update XRa with the result of logical bitwise 'nor' operation 1696 * applied to the content of XRb and XRc. 1697 */ 1698 static void gen_mxu_S32NOR(DisasContext *ctx) 1699 { 1700 uint32_t pad, XRc, XRb, XRa; 1701 1702 pad = extract32(ctx->opcode, 21, 5); 1703 XRc = extract32(ctx->opcode, 14, 4); 1704 XRb = extract32(ctx->opcode, 10, 4); 1705 XRa = extract32(ctx->opcode, 6, 4); 1706 1707 if (unlikely(pad != 0)) { 1708 /* opcode padding incorrect -> do nothing */ 1709 } else if (unlikely(XRa == 0)) { 1710 /* destination is zero register -> do nothing */ 1711 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1712 /* both operands zero registers -> just set destination to all 1s */ 1713 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF); 1714 } else if (unlikely(XRb == 0)) { 1715 /* XRb zero register -> just set destination to the negation of XRc */ 1716 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1717 } else if (unlikely(XRc == 0)) { 1718 /* XRa zero register -> just set destination to the negation of XRb */ 1719 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1720 } else if (unlikely(XRb == XRc)) { 1721 /* both operands same -> just set destination to the negation of XRb */ 1722 tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1723 } else { 1724 /* the most general case */ 1725 tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1726 } 1727 } 1728 1729 /* 1730 * S32AND XRa, XRb, XRc 1731 * Update XRa with the result of logical bitwise 'and' operation 1732 * applied to the content of XRb and XRc. 1733 */ 1734 static void gen_mxu_S32AND(DisasContext *ctx) 1735 { 1736 uint32_t pad, XRc, XRb, XRa; 1737 1738 pad = extract32(ctx->opcode, 21, 5); 1739 XRc = extract32(ctx->opcode, 14, 4); 1740 XRb = extract32(ctx->opcode, 10, 4); 1741 XRa = extract32(ctx->opcode, 6, 4); 1742 1743 if (unlikely(pad != 0)) { 1744 /* opcode padding incorrect -> do nothing */ 1745 } else if (unlikely(XRa == 0)) { 1746 /* destination is zero register -> do nothing */ 1747 } else if (unlikely((XRb == 0) || (XRc == 0))) { 1748 /* one of operands zero register -> just set destination to all 0s */ 1749 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1750 } else if (unlikely(XRb == XRc)) { 1751 /* both operands same -> just set destination to one of them */ 1752 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1753 } else { 1754 /* the most general case */ 1755 tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1756 } 1757 } 1758 1759 /* 1760 * S32OR XRa, XRb, XRc 1761 * Update XRa with the result of logical bitwise 'or' operation 1762 * applied to the content of XRb and XRc. 1763 */ 1764 static void gen_mxu_S32OR(DisasContext *ctx) 1765 { 1766 uint32_t pad, XRc, XRb, XRa; 1767 1768 pad = extract32(ctx->opcode, 21, 5); 1769 XRc = extract32(ctx->opcode, 14, 4); 1770 XRb = extract32(ctx->opcode, 10, 4); 1771 XRa = extract32(ctx->opcode, 6, 4); 1772 1773 if (unlikely(pad != 0)) { 1774 /* opcode padding incorrect -> do nothing */ 1775 } else if (unlikely(XRa == 0)) { 1776 /* destination is zero register -> do nothing */ 1777 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1778 /* both operands zero registers -> just set destination to all 0s */ 1779 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1780 } else if (unlikely(XRb == 0)) { 1781 /* XRb zero register -> just set destination to the content of XRc */ 1782 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1783 } else if (unlikely(XRc == 0)) { 1784 /* XRc zero register -> just set destination to the content of XRb */ 1785 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1786 } else if (unlikely(XRb == XRc)) { 1787 /* both operands same -> just set destination to one of them */ 1788 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1789 } else { 1790 /* the most general case */ 1791 tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1792 } 1793 } 1794 1795 /* 1796 * S32XOR XRa, XRb, XRc 1797 * Update XRa with the result of logical bitwise 'xor' operation 1798 * applied to the content of XRb and XRc. 1799 */ 1800 static void gen_mxu_S32XOR(DisasContext *ctx) 1801 { 1802 uint32_t pad, XRc, XRb, XRa; 1803 1804 pad = extract32(ctx->opcode, 21, 5); 1805 XRc = extract32(ctx->opcode, 14, 4); 1806 XRb = extract32(ctx->opcode, 10, 4); 1807 XRa = extract32(ctx->opcode, 6, 4); 1808 1809 if (unlikely(pad != 0)) { 1810 /* opcode padding incorrect -> do nothing */ 1811 } else if (unlikely(XRa == 0)) { 1812 /* destination is zero register -> do nothing */ 1813 } else if (unlikely((XRb == 0) && (XRc == 0))) { 1814 /* both operands zero registers -> just set destination to all 0s */ 1815 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1816 } else if (unlikely(XRb == 0)) { 1817 /* XRb zero register -> just set destination to the content of XRc */ 1818 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 1819 } else if (unlikely(XRc == 0)) { 1820 /* XRc zero register -> just set destination to the content of XRb */ 1821 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 1822 } else if (unlikely(XRb == XRc)) { 1823 /* both operands same -> just set destination to all 0s */ 1824 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 1825 } else { 1826 /* the most general case */ 1827 tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]); 1828 } 1829 } 1830 1831 /* 1832 * MXU instruction category: shift 1833 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1834 * 1835 * D32SLL D32SLR D32SAR D32SARL 1836 * D32SLLV D32SLRV D32SARV D32SARW 1837 * Q16SLL Q16SLR Q16SAR 1838 * Q16SLLV Q16SLRV Q16SARV 1839 */ 1840 1841 /* 1842 * D32SLL XRa, XRd, XRb, XRc, SFT4 1843 * Dual 32-bit shift left from XRb and XRc to SFT4 1844 * bits (0..15). Store to XRa and XRd respectively. 1845 * D32SLR XRa, XRd, XRb, XRc, SFT4 1846 * Dual 32-bit shift logic right from XRb and XRc 1847 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1848 * D32SAR XRa, XRd, XRb, XRc, SFT4 1849 * Dual 32-bit shift arithmetic right from XRb and XRc 1850 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1851 */ 1852 static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic) 1853 { 1854 uint32_t XRa, XRb, XRc, XRd, sft4; 1855 1856 XRa = extract32(ctx->opcode, 6, 4); 1857 XRb = extract32(ctx->opcode, 10, 4); 1858 XRc = extract32(ctx->opcode, 14, 4); 1859 XRd = extract32(ctx->opcode, 18, 4); 1860 sft4 = extract32(ctx->opcode, 22, 4); 1861 1862 TCGv t0 = tcg_temp_new(); 1863 TCGv t1 = tcg_temp_new(); 1864 1865 gen_load_mxu_gpr(t0, XRb); 1866 gen_load_mxu_gpr(t1, XRc); 1867 1868 if (right) { 1869 if (arithmetic) { 1870 tcg_gen_sari_tl(t0, t0, sft4); 1871 tcg_gen_sari_tl(t1, t1, sft4); 1872 } else { 1873 tcg_gen_shri_tl(t0, t0, sft4); 1874 tcg_gen_shri_tl(t1, t1, sft4); 1875 } 1876 } else { 1877 tcg_gen_shli_tl(t0, t0, sft4); 1878 tcg_gen_shli_tl(t1, t1, sft4); 1879 } 1880 gen_store_mxu_gpr(t0, XRa); 1881 gen_store_mxu_gpr(t1, XRd); 1882 } 1883 1884 /* 1885 * D32SLLV XRa, XRd, rs 1886 * Dual 32-bit shift left from XRa and XRd to rs[3:0] 1887 * bits. Store back to XRa and XRd respectively. 1888 * D32SLRV XRa, XRd, rs 1889 * Dual 32-bit shift logic right from XRa and XRd to rs[3:0] 1890 * bits. Store back to XRa and XRd respectively. 1891 * D32SARV XRa, XRd, rs 1892 * Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0] 1893 * bits. Store back to XRa and XRd respectively. 1894 */ 1895 static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic) 1896 { 1897 uint32_t XRa, XRd, rs; 1898 1899 XRa = extract32(ctx->opcode, 10, 4); 1900 XRd = extract32(ctx->opcode, 14, 4); 1901 rs = extract32(ctx->opcode, 21, 5); 1902 1903 TCGv t0 = tcg_temp_new(); 1904 TCGv t1 = tcg_temp_new(); 1905 TCGv t2 = tcg_temp_new(); 1906 1907 gen_load_mxu_gpr(t0, XRa); 1908 gen_load_mxu_gpr(t1, XRd); 1909 gen_load_gpr(t2, rs); 1910 tcg_gen_andi_tl(t2, t2, 0x0f); 1911 1912 if (right) { 1913 if (arithmetic) { 1914 tcg_gen_sar_tl(t0, t0, t2); 1915 tcg_gen_sar_tl(t1, t1, t2); 1916 } else { 1917 tcg_gen_shr_tl(t0, t0, t2); 1918 tcg_gen_shr_tl(t1, t1, t2); 1919 } 1920 } else { 1921 tcg_gen_shl_tl(t0, t0, t2); 1922 tcg_gen_shl_tl(t1, t1, t2); 1923 } 1924 gen_store_mxu_gpr(t0, XRa); 1925 gen_store_mxu_gpr(t1, XRd); 1926 } 1927 1928 /* 1929 * D32SARL XRa, XRb, XRc, SFT4 1930 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1931 * to SFT4 bits (0..15). Pack 16 LSBs of each into XRa. 1932 * 1933 * D32SARW XRa, XRb, XRc, rb 1934 * Dual shift arithmetic right 32-bit integers in XRb and XRc 1935 * to rb[3:0] bits. Pack 16 LSBs of each into XRa. 1936 */ 1937 static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw) 1938 { 1939 uint32_t XRa, XRb, XRc, rb; 1940 1941 XRa = extract32(ctx->opcode, 6, 4); 1942 XRb = extract32(ctx->opcode, 10, 4); 1943 XRc = extract32(ctx->opcode, 14, 4); 1944 rb = extract32(ctx->opcode, 21, 5); 1945 1946 if (unlikely(XRa == 0)) { 1947 /* destination is zero register -> do nothing */ 1948 } else { 1949 TCGv t0 = tcg_temp_new(); 1950 TCGv t1 = tcg_temp_new(); 1951 TCGv t2 = tcg_temp_new(); 1952 1953 if (!sarw) { 1954 /* Make SFT4 from rb field */ 1955 tcg_gen_movi_tl(t2, rb >> 1); 1956 } else { 1957 gen_load_gpr(t2, rb); 1958 tcg_gen_andi_tl(t2, t2, 0x0f); 1959 } 1960 gen_load_mxu_gpr(t0, XRb); 1961 gen_load_mxu_gpr(t1, XRc); 1962 tcg_gen_sar_tl(t0, t0, t2); 1963 tcg_gen_sar_tl(t1, t1, t2); 1964 tcg_gen_extract_tl(t2, t1, 0, 16); 1965 tcg_gen_deposit_tl(t2, t2, t0, 16, 16); 1966 gen_store_mxu_gpr(t2, XRa); 1967 } 1968 } 1969 1970 /* 1971 * Q16SLL XRa, XRd, XRb, XRc, SFT4 1972 * Quad 16-bit shift left from XRb and XRc to SFT4 1973 * bits (0..15). Store to XRa and XRd respectively. 1974 * Q16SLR XRa, XRd, XRb, XRc, SFT4 1975 * Quad 16-bit shift logic right from XRb and XRc 1976 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1977 * Q16SAR XRa, XRd, XRb, XRc, SFT4 1978 * Quad 16-bit shift arithmetic right from XRb and XRc 1979 * to SFT4 bits (0..15). Store to XRa and XRd respectively. 1980 */ 1981 static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic) 1982 { 1983 uint32_t XRa, XRb, XRc, XRd, sft4; 1984 1985 XRa = extract32(ctx->opcode, 6, 4); 1986 XRb = extract32(ctx->opcode, 10, 4); 1987 XRc = extract32(ctx->opcode, 14, 4); 1988 XRd = extract32(ctx->opcode, 18, 4); 1989 sft4 = extract32(ctx->opcode, 22, 4); 1990 1991 TCGv t0 = tcg_temp_new(); 1992 TCGv t1 = tcg_temp_new(); 1993 TCGv t2 = tcg_temp_new(); 1994 TCGv t3 = tcg_temp_new(); 1995 1996 gen_load_mxu_gpr(t0, XRb); 1997 gen_load_mxu_gpr(t2, XRc); 1998 1999 if (arithmetic) { 2000 tcg_gen_sextract_tl(t1, t0, 16, 16); 2001 tcg_gen_sextract_tl(t0, t0, 0, 16); 2002 tcg_gen_sextract_tl(t3, t2, 16, 16); 2003 tcg_gen_sextract_tl(t2, t2, 0, 16); 2004 } else { 2005 tcg_gen_extract_tl(t1, t0, 16, 16); 2006 tcg_gen_extract_tl(t0, t0, 0, 16); 2007 tcg_gen_extract_tl(t3, t2, 16, 16); 2008 tcg_gen_extract_tl(t2, t2, 0, 16); 2009 } 2010 2011 if (right) { 2012 if (arithmetic) { 2013 tcg_gen_sari_tl(t0, t0, sft4); 2014 tcg_gen_sari_tl(t1, t1, sft4); 2015 tcg_gen_sari_tl(t2, t2, sft4); 2016 tcg_gen_sari_tl(t3, t3, sft4); 2017 } else { 2018 tcg_gen_shri_tl(t0, t0, sft4); 2019 tcg_gen_shri_tl(t1, t1, sft4); 2020 tcg_gen_shri_tl(t2, t2, sft4); 2021 tcg_gen_shri_tl(t3, t3, sft4); 2022 } 2023 } else { 2024 tcg_gen_shli_tl(t0, t0, sft4); 2025 tcg_gen_shli_tl(t1, t1, sft4); 2026 tcg_gen_shli_tl(t2, t2, sft4); 2027 tcg_gen_shli_tl(t3, t3, sft4); 2028 } 2029 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 2030 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2031 2032 gen_store_mxu_gpr(t0, XRa); 2033 gen_store_mxu_gpr(t2, XRd); 2034 } 2035 2036 /* 2037 * Q16SLLV XRa, XRd, rs 2038 * Quad 16-bit shift left from XRa and XRd to rs[3:0] 2039 * bits. Store to XRa and XRd respectively. 2040 * Q16SLRV XRa, XRd, rs 2041 * Quad 16-bit shift logic right from XRa and XRd to rs[3:0] 2042 * bits. Store to XRa and XRd respectively. 2043 * Q16SARV XRa, XRd, rs 2044 * Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0] 2045 * bits. Store to XRa and XRd respectively. 2046 */ 2047 static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic) 2048 { 2049 uint32_t XRa, XRd, rs; 2050 2051 XRa = extract32(ctx->opcode, 10, 4); 2052 XRd = extract32(ctx->opcode, 14, 4); 2053 rs = extract32(ctx->opcode, 21, 5); 2054 2055 TCGv t0 = tcg_temp_new(); 2056 TCGv t1 = tcg_temp_new(); 2057 TCGv t2 = tcg_temp_new(); 2058 TCGv t3 = tcg_temp_new(); 2059 TCGv t5 = tcg_temp_new(); 2060 2061 gen_load_mxu_gpr(t0, XRa); 2062 gen_load_mxu_gpr(t2, XRd); 2063 gen_load_gpr(t5, rs); 2064 tcg_gen_andi_tl(t5, t5, 0x0f); 2065 2066 2067 if (arithmetic) { 2068 tcg_gen_sextract_tl(t1, t0, 16, 16); 2069 tcg_gen_sextract_tl(t0, t0, 0, 16); 2070 tcg_gen_sextract_tl(t3, t2, 16, 16); 2071 tcg_gen_sextract_tl(t2, t2, 0, 16); 2072 } else { 2073 tcg_gen_extract_tl(t1, t0, 16, 16); 2074 tcg_gen_extract_tl(t0, t0, 0, 16); 2075 tcg_gen_extract_tl(t3, t2, 16, 16); 2076 tcg_gen_extract_tl(t2, t2, 0, 16); 2077 } 2078 2079 if (right) { 2080 if (arithmetic) { 2081 tcg_gen_sar_tl(t0, t0, t5); 2082 tcg_gen_sar_tl(t1, t1, t5); 2083 tcg_gen_sar_tl(t2, t2, t5); 2084 tcg_gen_sar_tl(t3, t3, t5); 2085 } else { 2086 tcg_gen_shr_tl(t0, t0, t5); 2087 tcg_gen_shr_tl(t1, t1, t5); 2088 tcg_gen_shr_tl(t2, t2, t5); 2089 tcg_gen_shr_tl(t3, t3, t5); 2090 } 2091 } else { 2092 tcg_gen_shl_tl(t0, t0, t5); 2093 tcg_gen_shl_tl(t1, t1, t5); 2094 tcg_gen_shl_tl(t2, t2, t5); 2095 tcg_gen_shl_tl(t3, t3, t5); 2096 } 2097 tcg_gen_deposit_tl(t0, t0, t1, 16, 16); 2098 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2099 2100 gen_store_mxu_gpr(t0, XRa); 2101 gen_store_mxu_gpr(t2, XRd); 2102 } 2103 2104 /* 2105 * MXU instruction category max/min/avg 2106 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2107 * 2108 * S32MAX D16MAX Q8MAX 2109 * S32MIN D16MIN Q8MIN 2110 * S32SLT D16SLT Q8SLT 2111 * Q8SLTU 2112 * D16AVG Q8AVG 2113 * D16AVGR Q8AVGR 2114 * S32MOVZ D16MOVZ Q8MOVZ 2115 * S32MOVN D16MOVN Q8MOVN 2116 */ 2117 2118 /* 2119 * S32MAX XRa, XRb, XRc 2120 * Update XRa with the maximum of signed 32-bit integers contained 2121 * in XRb and XRc. 2122 * 2123 * S32MIN XRa, XRb, XRc 2124 * Update XRa with the minimum of signed 32-bit integers contained 2125 * in XRb and XRc. 2126 */ 2127 static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx) 2128 { 2129 uint32_t pad, opc, XRc, XRb, XRa; 2130 2131 pad = extract32(ctx->opcode, 21, 5); 2132 opc = extract32(ctx->opcode, 18, 3); 2133 XRc = extract32(ctx->opcode, 14, 4); 2134 XRb = extract32(ctx->opcode, 10, 4); 2135 XRa = extract32(ctx->opcode, 6, 4); 2136 2137 if (unlikely(pad != 0)) { 2138 /* opcode padding incorrect -> do nothing */ 2139 } else if (unlikely(XRa == 0)) { 2140 /* destination is zero register -> do nothing */ 2141 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2142 /* both operands zero registers -> just set destination to zero */ 2143 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2144 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2145 /* exactly one operand is zero register - find which one is not...*/ 2146 uint32_t XRx = XRb ? XRb : XRc; 2147 /* ...and do max/min operation with one operand 0 */ 2148 if (opc == OPC_MXU_S32MAX) { 2149 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2150 } else { 2151 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0); 2152 } 2153 } else if (unlikely(XRb == XRc)) { 2154 /* both operands same -> just set destination to one of them */ 2155 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2156 } else { 2157 /* the most general case */ 2158 if (opc == OPC_MXU_S32MAX) { 2159 tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2160 mxu_gpr[XRc - 1]); 2161 } else { 2162 tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 2163 mxu_gpr[XRc - 1]); 2164 } 2165 } 2166 } 2167 2168 /* 2169 * D16MAX 2170 * Update XRa with the 16-bit-wise maximums of signed integers 2171 * contained in XRb and XRc. 2172 * 2173 * D16MIN 2174 * Update XRa with the 16-bit-wise minimums of signed integers 2175 * contained in XRb and XRc. 2176 */ 2177 static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx) 2178 { 2179 uint32_t pad, opc, XRc, XRb, XRa; 2180 2181 pad = extract32(ctx->opcode, 21, 5); 2182 opc = extract32(ctx->opcode, 18, 3); 2183 XRc = extract32(ctx->opcode, 14, 4); 2184 XRb = extract32(ctx->opcode, 10, 4); 2185 XRa = extract32(ctx->opcode, 6, 4); 2186 2187 if (unlikely(pad != 0)) { 2188 /* opcode padding incorrect -> do nothing */ 2189 } else if (unlikely(XRa == 0)) { 2190 /* destination is zero register -> do nothing */ 2191 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2192 /* both operands zero registers -> just set destination to zero */ 2193 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2194 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2195 /* exactly one operand is zero register - find which one is not...*/ 2196 uint32_t XRx = XRb ? XRb : XRc; 2197 /* ...and do half-word-wise max/min with one operand 0 */ 2198 TCGv_i32 t0 = tcg_temp_new(); 2199 TCGv_i32 t1 = tcg_constant_i32(0); 2200 TCGv_i32 t2 = tcg_temp_new(); 2201 2202 /* the left half-word first */ 2203 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000); 2204 if (opc == OPC_MXU_D16MAX) { 2205 tcg_gen_smax_i32(t2, t0, t1); 2206 } else { 2207 tcg_gen_smin_i32(t2, t0, t1); 2208 } 2209 2210 /* the right half-word */ 2211 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF); 2212 /* move half-words to the leftmost position */ 2213 tcg_gen_shli_i32(t0, t0, 16); 2214 /* t0 will be max/min of t0 and t1 */ 2215 if (opc == OPC_MXU_D16MAX) { 2216 tcg_gen_smax_i32(t0, t0, t1); 2217 } else { 2218 tcg_gen_smin_i32(t0, t0, t1); 2219 } 2220 /* return resulting half-words to its original position */ 2221 tcg_gen_shri_i32(t0, t0, 16); 2222 /* finally update the destination */ 2223 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2224 } else if (unlikely(XRb == XRc)) { 2225 /* both operands same -> just set destination to one of them */ 2226 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2227 } else { 2228 /* the most general case */ 2229 TCGv_i32 t0 = tcg_temp_new(); 2230 TCGv_i32 t1 = tcg_temp_new(); 2231 TCGv_i32 t2 = tcg_temp_new(); 2232 2233 /* the left half-word first */ 2234 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000); 2235 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 2236 if (opc == OPC_MXU_D16MAX) { 2237 tcg_gen_smax_i32(t2, t0, t1); 2238 } else { 2239 tcg_gen_smin_i32(t2, t0, t1); 2240 } 2241 2242 /* the right half-word */ 2243 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 2244 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF); 2245 /* move half-words to the leftmost position */ 2246 tcg_gen_shli_i32(t0, t0, 16); 2247 tcg_gen_shli_i32(t1, t1, 16); 2248 /* t0 will be max/min of t0 and t1 */ 2249 if (opc == OPC_MXU_D16MAX) { 2250 tcg_gen_smax_i32(t0, t0, t1); 2251 } else { 2252 tcg_gen_smin_i32(t0, t0, t1); 2253 } 2254 /* return resulting half-words to its original position */ 2255 tcg_gen_shri_i32(t0, t0, 16); 2256 /* finally update the destination */ 2257 tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0); 2258 } 2259 } 2260 2261 /* 2262 * Q8MAX 2263 * Update XRa with the 8-bit-wise maximums of signed integers 2264 * contained in XRb and XRc. 2265 * 2266 * Q8MIN 2267 * Update XRa with the 8-bit-wise minimums of signed integers 2268 * contained in XRb and XRc. 2269 */ 2270 static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx) 2271 { 2272 uint32_t pad, opc, XRc, XRb, XRa; 2273 2274 pad = extract32(ctx->opcode, 21, 5); 2275 opc = extract32(ctx->opcode, 18, 3); 2276 XRc = extract32(ctx->opcode, 14, 4); 2277 XRb = extract32(ctx->opcode, 10, 4); 2278 XRa = extract32(ctx->opcode, 6, 4); 2279 2280 if (unlikely(pad != 0)) { 2281 /* opcode padding incorrect -> do nothing */ 2282 } else if (unlikely(XRa == 0)) { 2283 /* destination is zero register -> do nothing */ 2284 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2285 /* both operands zero registers -> just set destination to zero */ 2286 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2287 } else if (unlikely((XRb == 0) || (XRc == 0))) { 2288 /* exactly one operand is zero register - make it be the first...*/ 2289 uint32_t XRx = XRb ? XRb : XRc; 2290 /* ...and do byte-wise max/min with one operand 0 */ 2291 TCGv_i32 t0 = tcg_temp_new(); 2292 TCGv_i32 t1 = tcg_constant_i32(0); 2293 TCGv_i32 t2 = tcg_temp_new(); 2294 int32_t i; 2295 2296 /* the leftmost byte (byte 3) first */ 2297 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000); 2298 if (opc == OPC_MXU_Q8MAX) { 2299 tcg_gen_smax_i32(t2, t0, t1); 2300 } else { 2301 tcg_gen_smin_i32(t2, t0, t1); 2302 } 2303 2304 /* bytes 2, 1, 0 */ 2305 for (i = 2; i >= 0; i--) { 2306 /* extract the byte */ 2307 tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i)); 2308 /* move the byte to the leftmost position */ 2309 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2310 /* t0 will be max/min of t0 and t1 */ 2311 if (opc == OPC_MXU_Q8MAX) { 2312 tcg_gen_smax_i32(t0, t0, t1); 2313 } else { 2314 tcg_gen_smin_i32(t0, t0, t1); 2315 } 2316 /* return resulting byte to its original position */ 2317 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2318 /* finally update the destination */ 2319 tcg_gen_or_i32(t2, t2, t0); 2320 } 2321 gen_store_mxu_gpr(t2, XRa); 2322 } else if (unlikely(XRb == XRc)) { 2323 /* both operands same -> just set destination to one of them */ 2324 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2325 } else { 2326 /* the most general case */ 2327 TCGv_i32 t0 = tcg_temp_new(); 2328 TCGv_i32 t1 = tcg_temp_new(); 2329 TCGv_i32 t2 = tcg_temp_new(); 2330 int32_t i; 2331 2332 /* the leftmost bytes (bytes 3) first */ 2333 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000); 2334 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 2335 if (opc == OPC_MXU_Q8MAX) { 2336 tcg_gen_smax_i32(t2, t0, t1); 2337 } else { 2338 tcg_gen_smin_i32(t2, t0, t1); 2339 } 2340 2341 /* bytes 2, 1, 0 */ 2342 for (i = 2; i >= 0; i--) { 2343 /* extract corresponding bytes */ 2344 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i)); 2345 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i)); 2346 /* move the bytes to the leftmost position */ 2347 tcg_gen_shli_i32(t0, t0, 8 * (3 - i)); 2348 tcg_gen_shli_i32(t1, t1, 8 * (3 - i)); 2349 /* t0 will be max/min of t0 and t1 */ 2350 if (opc == OPC_MXU_Q8MAX) { 2351 tcg_gen_smax_i32(t0, t0, t1); 2352 } else { 2353 tcg_gen_smin_i32(t0, t0, t1); 2354 } 2355 /* return resulting byte to its original position */ 2356 tcg_gen_shri_i32(t0, t0, 8 * (3 - i)); 2357 /* finally update the destination */ 2358 tcg_gen_or_i32(t2, t2, t0); 2359 } 2360 gen_store_mxu_gpr(t2, XRa); 2361 } 2362 } 2363 2364 /* 2365 * Q8SLT 2366 * Update XRa with the signed "set less than" comparison of XRb and XRc 2367 * on per-byte basis. 2368 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2369 * 2370 * Q8SLTU 2371 * Update XRa with the unsigned "set less than" comparison of XRb and XRc 2372 * on per-byte basis. 2373 * a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0; 2374 */ 2375 static void gen_mxu_q8slt(DisasContext *ctx, bool sltu) 2376 { 2377 uint32_t pad, XRc, XRb, XRa; 2378 2379 pad = extract32(ctx->opcode, 21, 5); 2380 XRc = extract32(ctx->opcode, 14, 4); 2381 XRb = extract32(ctx->opcode, 10, 4); 2382 XRa = extract32(ctx->opcode, 6, 4); 2383 2384 if (unlikely(pad != 0)) { 2385 /* opcode padding incorrect -> do nothing */ 2386 } else if (unlikely(XRa == 0)) { 2387 /* destination is zero register -> do nothing */ 2388 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2389 /* both operands zero registers -> just set destination to zero */ 2390 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2391 } else if (unlikely(XRb == XRc)) { 2392 /* both operands same registers -> just set destination to zero */ 2393 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2394 } else { 2395 /* the most general case */ 2396 TCGv t0 = tcg_temp_new(); 2397 TCGv t1 = tcg_temp_new(); 2398 TCGv t2 = tcg_temp_new(); 2399 TCGv t3 = tcg_temp_new(); 2400 TCGv t4 = tcg_temp_new(); 2401 2402 gen_load_mxu_gpr(t3, XRb); 2403 gen_load_mxu_gpr(t4, XRc); 2404 tcg_gen_movi_tl(t2, 0); 2405 2406 for (int i = 0; i < 4; i++) { 2407 if (sltu) { 2408 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2409 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2410 } else { 2411 tcg_gen_sextract_tl(t0, t3, 8 * i, 8); 2412 tcg_gen_sextract_tl(t1, t4, 8 * i, 8); 2413 } 2414 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2415 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2416 } 2417 gen_store_mxu_gpr(t2, XRa); 2418 } 2419 } 2420 2421 /* 2422 * S32SLT 2423 * Update XRa with the signed "set less than" comparison of XRb and XRc. 2424 * a.k.a. XRa = XRb < XRc ? 1 : 0; 2425 */ 2426 static void gen_mxu_S32SLT(DisasContext *ctx) 2427 { 2428 uint32_t pad, XRc, XRb, XRa; 2429 2430 pad = extract32(ctx->opcode, 21, 5); 2431 XRc = extract32(ctx->opcode, 14, 4); 2432 XRb = extract32(ctx->opcode, 10, 4); 2433 XRa = extract32(ctx->opcode, 6, 4); 2434 2435 if (unlikely(pad != 0)) { 2436 /* opcode padding incorrect -> do nothing */ 2437 } else if (unlikely(XRa == 0)) { 2438 /* destination is zero register -> do nothing */ 2439 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2440 /* both operands zero registers -> just set destination to zero */ 2441 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2442 } else if (unlikely(XRb == XRc)) { 2443 /* both operands same registers -> just set destination to zero */ 2444 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2445 } else { 2446 /* the most general case */ 2447 TCGv t0 = tcg_temp_new(); 2448 TCGv t1 = tcg_temp_new(); 2449 2450 gen_load_mxu_gpr(t0, XRb); 2451 gen_load_mxu_gpr(t1, XRc); 2452 tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], t0, t1); 2453 } 2454 } 2455 2456 /* 2457 * D16SLT 2458 * Update XRa with the signed "set less than" comparison of XRb and XRc 2459 * on per-word basis. 2460 * a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0; 2461 */ 2462 static void gen_mxu_D16SLT(DisasContext *ctx) 2463 { 2464 uint32_t pad, XRc, XRb, XRa; 2465 2466 pad = extract32(ctx->opcode, 21, 5); 2467 XRc = extract32(ctx->opcode, 14, 4); 2468 XRb = extract32(ctx->opcode, 10, 4); 2469 XRa = extract32(ctx->opcode, 6, 4); 2470 2471 if (unlikely(pad != 0)) { 2472 /* opcode padding incorrect -> do nothing */ 2473 } else if (unlikely(XRa == 0)) { 2474 /* destination is zero register -> do nothing */ 2475 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2476 /* both operands zero registers -> just set destination to zero */ 2477 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2478 } else if (unlikely(XRb == XRc)) { 2479 /* both operands same registers -> just set destination to zero */ 2480 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2481 } else { 2482 /* the most general case */ 2483 TCGv t0 = tcg_temp_new(); 2484 TCGv t1 = tcg_temp_new(); 2485 TCGv t2 = tcg_temp_new(); 2486 TCGv t3 = tcg_temp_new(); 2487 TCGv t4 = tcg_temp_new(); 2488 2489 gen_load_mxu_gpr(t3, XRb); 2490 gen_load_mxu_gpr(t4, XRc); 2491 tcg_gen_sextract_tl(t0, t3, 16, 16); 2492 tcg_gen_sextract_tl(t1, t4, 16, 16); 2493 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2494 tcg_gen_shli_tl(t2, t0, 16); 2495 tcg_gen_sextract_tl(t0, t3, 0, 16); 2496 tcg_gen_sextract_tl(t1, t4, 0, 16); 2497 tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1); 2498 tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0); 2499 } 2500 } 2501 2502 /* 2503 * D16AVG 2504 * Update XRa with the signed average of XRb and XRc 2505 * on per-word basis, rounding down. 2506 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1; 2507 * 2508 * D16AVGR 2509 * Update XRa with the signed average of XRb and XRc 2510 * on per-word basis, math rounding 4/5. 2511 * a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1; 2512 */ 2513 static void gen_mxu_d16avg(DisasContext *ctx, bool round45) 2514 { 2515 uint32_t pad, XRc, XRb, XRa; 2516 2517 pad = extract32(ctx->opcode, 21, 5); 2518 XRc = extract32(ctx->opcode, 14, 4); 2519 XRb = extract32(ctx->opcode, 10, 4); 2520 XRa = extract32(ctx->opcode, 6, 4); 2521 2522 if (unlikely(pad != 0)) { 2523 /* opcode padding incorrect -> do nothing */ 2524 } else if (unlikely(XRa == 0)) { 2525 /* destination is zero register -> do nothing */ 2526 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2527 /* both operands zero registers -> just set destination to zero */ 2528 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2529 } else if (unlikely(XRb == XRc)) { 2530 /* both operands same registers -> just set destination to same */ 2531 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2532 } else { 2533 /* the most general case */ 2534 TCGv t0 = tcg_temp_new(); 2535 TCGv t1 = tcg_temp_new(); 2536 TCGv t2 = tcg_temp_new(); 2537 TCGv t3 = tcg_temp_new(); 2538 TCGv t4 = tcg_temp_new(); 2539 2540 gen_load_mxu_gpr(t3, XRb); 2541 gen_load_mxu_gpr(t4, XRc); 2542 tcg_gen_sextract_tl(t0, t3, 16, 16); 2543 tcg_gen_sextract_tl(t1, t4, 16, 16); 2544 tcg_gen_add_tl(t0, t0, t1); 2545 if (round45) { 2546 tcg_gen_addi_tl(t0, t0, 1); 2547 } 2548 tcg_gen_shli_tl(t2, t0, 15); 2549 tcg_gen_andi_tl(t2, t2, 0xffff0000); 2550 tcg_gen_sextract_tl(t0, t3, 0, 16); 2551 tcg_gen_sextract_tl(t1, t4, 0, 16); 2552 tcg_gen_add_tl(t0, t0, t1); 2553 if (round45) { 2554 tcg_gen_addi_tl(t0, t0, 1); 2555 } 2556 tcg_gen_shri_tl(t0, t0, 1); 2557 tcg_gen_deposit_tl(t2, t2, t0, 0, 16); 2558 gen_store_mxu_gpr(t2, XRa); 2559 } 2560 } 2561 2562 /* 2563 * Q8AVG 2564 * Update XRa with the signed average of XRb and XRc 2565 * on per-byte basis, rounding down. 2566 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1; 2567 * 2568 * Q8AVGR 2569 * Update XRa with the signed average of XRb and XRc 2570 * on per-word basis, math rounding 4/5. 2571 * a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1; 2572 */ 2573 static void gen_mxu_q8avg(DisasContext *ctx, bool round45) 2574 { 2575 uint32_t pad, XRc, XRb, XRa; 2576 2577 pad = extract32(ctx->opcode, 21, 5); 2578 XRc = extract32(ctx->opcode, 14, 4); 2579 XRb = extract32(ctx->opcode, 10, 4); 2580 XRa = extract32(ctx->opcode, 6, 4); 2581 2582 if (unlikely(pad != 0)) { 2583 /* opcode padding incorrect -> do nothing */ 2584 } else if (unlikely(XRa == 0)) { 2585 /* destination is zero register -> do nothing */ 2586 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2587 /* both operands zero registers -> just set destination to zero */ 2588 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2589 } else if (unlikely(XRb == XRc)) { 2590 /* both operands same registers -> just set destination to same */ 2591 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2592 } else { 2593 /* the most general case */ 2594 TCGv t0 = tcg_temp_new(); 2595 TCGv t1 = tcg_temp_new(); 2596 TCGv t2 = tcg_temp_new(); 2597 TCGv t3 = tcg_temp_new(); 2598 TCGv t4 = tcg_temp_new(); 2599 2600 gen_load_mxu_gpr(t3, XRb); 2601 gen_load_mxu_gpr(t4, XRc); 2602 tcg_gen_movi_tl(t2, 0); 2603 2604 for (int i = 0; i < 4; i++) { 2605 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2606 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2607 tcg_gen_add_tl(t0, t0, t1); 2608 if (round45) { 2609 tcg_gen_addi_tl(t0, t0, 1); 2610 } 2611 tcg_gen_shri_tl(t0, t0, 1); 2612 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2613 } 2614 gen_store_mxu_gpr(t2, XRa); 2615 } 2616 } 2617 2618 /* 2619 * Q8MOVZ 2620 * Quadruple 8-bit packed conditional move where 2621 * XRb contains conditions, XRc what to move and 2622 * XRa is the destination. 2623 * a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] } 2624 * 2625 * Q8MOVN 2626 * Quadruple 8-bit packed conditional move where 2627 * XRb contains conditions, XRc what to move and 2628 * XRa is the destination. 2629 * a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] } 2630 */ 2631 static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond) 2632 { 2633 uint32_t XRc, XRb, XRa; 2634 2635 XRa = extract32(ctx->opcode, 6, 4); 2636 XRb = extract32(ctx->opcode, 10, 4); 2637 XRc = extract32(ctx->opcode, 14, 4); 2638 2639 TCGv t0 = tcg_temp_new(); 2640 TCGv t1 = tcg_temp_new(); 2641 TCGv t2 = tcg_temp_new(); 2642 TCGv t3 = tcg_temp_new(); 2643 TCGLabel *l_quarterdone = gen_new_label(); 2644 TCGLabel *l_halfdone = gen_new_label(); 2645 TCGLabel *l_quarterrest = gen_new_label(); 2646 TCGLabel *l_done = gen_new_label(); 2647 2648 gen_load_mxu_gpr(t0, XRc); 2649 gen_load_mxu_gpr(t1, XRb); 2650 gen_load_mxu_gpr(t2, XRa); 2651 2652 tcg_gen_extract_tl(t3, t1, 24, 8); 2653 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone); 2654 tcg_gen_extract_tl(t3, t0, 24, 8); 2655 tcg_gen_deposit_tl(t2, t2, t3, 24, 8); 2656 2657 gen_set_label(l_quarterdone); 2658 tcg_gen_extract_tl(t3, t1, 16, 8); 2659 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2660 tcg_gen_extract_tl(t3, t0, 16, 8); 2661 tcg_gen_deposit_tl(t2, t2, t3, 16, 8); 2662 2663 gen_set_label(l_halfdone); 2664 tcg_gen_extract_tl(t3, t1, 8, 8); 2665 tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest); 2666 tcg_gen_extract_tl(t3, t0, 8, 8); 2667 tcg_gen_deposit_tl(t2, t2, t3, 8, 8); 2668 2669 gen_set_label(l_quarterrest); 2670 tcg_gen_extract_tl(t3, t1, 0, 8); 2671 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2672 tcg_gen_extract_tl(t3, t0, 0, 8); 2673 tcg_gen_deposit_tl(t2, t2, t3, 0, 8); 2674 2675 gen_set_label(l_done); 2676 gen_store_mxu_gpr(t2, XRa); 2677 } 2678 2679 /* 2680 * D16MOVZ 2681 * Double 16-bit packed conditional move where 2682 * XRb contains conditions, XRc what to move and 2683 * XRa is the destination. 2684 * a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] } 2685 * 2686 * D16MOVN 2687 * Double 16-bit packed conditional move where 2688 * XRb contains conditions, XRc what to move and 2689 * XRa is the destination. 2690 * a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] } 2691 */ 2692 static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond) 2693 { 2694 uint32_t XRc, XRb, XRa; 2695 2696 XRa = extract32(ctx->opcode, 6, 4); 2697 XRb = extract32(ctx->opcode, 10, 4); 2698 XRc = extract32(ctx->opcode, 14, 4); 2699 2700 TCGv t0 = tcg_temp_new(); 2701 TCGv t1 = tcg_temp_new(); 2702 TCGv t2 = tcg_temp_new(); 2703 TCGv t3 = tcg_temp_new(); 2704 TCGLabel *l_halfdone = gen_new_label(); 2705 TCGLabel *l_done = gen_new_label(); 2706 2707 gen_load_mxu_gpr(t0, XRc); 2708 gen_load_mxu_gpr(t1, XRb); 2709 gen_load_mxu_gpr(t2, XRa); 2710 2711 tcg_gen_extract_tl(t3, t1, 16, 16); 2712 tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone); 2713 tcg_gen_extract_tl(t3, t0, 16, 16); 2714 tcg_gen_deposit_tl(t2, t2, t3, 16, 16); 2715 2716 gen_set_label(l_halfdone); 2717 tcg_gen_extract_tl(t3, t1, 0, 16); 2718 tcg_gen_brcondi_tl(cond, t3, 0, l_done); 2719 tcg_gen_extract_tl(t3, t0, 0, 16); 2720 tcg_gen_deposit_tl(t2, t2, t3, 0, 16); 2721 2722 gen_set_label(l_done); 2723 gen_store_mxu_gpr(t2, XRa); 2724 } 2725 2726 /* 2727 * S32MOVZ 2728 * Quadruple 32-bit conditional move where 2729 * XRb contains conditions, XRc what to move and 2730 * XRa is the destination. 2731 * a.k.a. if (XRb == 0) { XRa = XRc } 2732 * 2733 * S32MOVN 2734 * Single 32-bit conditional move where 2735 * XRb contains conditions, XRc what to move and 2736 * XRa is the destination. 2737 * a.k.a. if (XRb != 0) { XRa = XRc } 2738 */ 2739 static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond) 2740 { 2741 uint32_t XRc, XRb, XRa; 2742 2743 XRa = extract32(ctx->opcode, 6, 4); 2744 XRb = extract32(ctx->opcode, 10, 4); 2745 XRc = extract32(ctx->opcode, 14, 4); 2746 2747 TCGv t0 = tcg_temp_new(); 2748 TCGv t1 = tcg_temp_new(); 2749 TCGLabel *l_done = gen_new_label(); 2750 2751 gen_load_mxu_gpr(t0, XRc); 2752 gen_load_mxu_gpr(t1, XRb); 2753 2754 tcg_gen_brcondi_tl(cond, t1, 0, l_done); 2755 gen_store_mxu_gpr(t0, XRa); 2756 gen_set_label(l_done); 2757 } 2758 2759 /* 2760 * MXU instruction category: Addition and subtraction 2761 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2762 * 2763 * S32CPS D16CPS 2764 * Q8ADD 2765 */ 2766 2767 /* 2768 * S32CPS 2769 * Update XRa if XRc < 0 by value of 0 - XRb 2770 * else XRa = XRb 2771 */ 2772 static void gen_mxu_S32CPS(DisasContext *ctx) 2773 { 2774 uint32_t pad, XRc, XRb, XRa; 2775 2776 pad = extract32(ctx->opcode, 21, 5); 2777 XRc = extract32(ctx->opcode, 14, 4); 2778 XRb = extract32(ctx->opcode, 10, 4); 2779 XRa = extract32(ctx->opcode, 6, 4); 2780 2781 if (unlikely(pad != 0)) { 2782 /* opcode padding incorrect -> do nothing */ 2783 } else if (unlikely(XRa == 0)) { 2784 /* destination is zero register -> do nothing */ 2785 } else if (unlikely(XRb == 0)) { 2786 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2787 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2788 } else if (unlikely(XRc == 0)) { 2789 /* condition always false -> just move XRb to XRa */ 2790 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2791 } else { 2792 /* the most general case */ 2793 TCGv t0 = tcg_temp_new(); 2794 TCGLabel *l_not_less = gen_new_label(); 2795 TCGLabel *l_done = gen_new_label(); 2796 2797 tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less); 2798 tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]); 2799 tcg_gen_br(l_done); 2800 gen_set_label(l_not_less); 2801 gen_load_mxu_gpr(t0, XRb); 2802 gen_set_label(l_done); 2803 gen_store_mxu_gpr(t0, XRa); 2804 } 2805 } 2806 2807 /* 2808 * D16CPS 2809 * Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1] 2810 * else XRa[0..1] = XRb[0..1] 2811 */ 2812 static void gen_mxu_D16CPS(DisasContext *ctx) 2813 { 2814 uint32_t pad, XRc, XRb, XRa; 2815 2816 pad = extract32(ctx->opcode, 21, 5); 2817 XRc = extract32(ctx->opcode, 14, 4); 2818 XRb = extract32(ctx->opcode, 10, 4); 2819 XRa = extract32(ctx->opcode, 6, 4); 2820 2821 if (unlikely(pad != 0)) { 2822 /* opcode padding incorrect -> do nothing */ 2823 } else if (unlikely(XRa == 0)) { 2824 /* destination is zero register -> do nothing */ 2825 } else if (unlikely(XRb == 0)) { 2826 /* XRc make no sense 0 - 0 = 0 -> just set destination to zero */ 2827 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2828 } else if (unlikely(XRc == 0)) { 2829 /* condition always false -> just move XRb to XRa */ 2830 tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 2831 } else { 2832 /* the most general case */ 2833 TCGv t0 = tcg_temp_new(); 2834 TCGv t1 = tcg_temp_new(); 2835 TCGLabel *l_done_hi = gen_new_label(); 2836 TCGLabel *l_not_less_lo = gen_new_label(); 2837 TCGLabel *l_done_lo = gen_new_label(); 2838 2839 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16); 2840 tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16); 2841 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi); 2842 tcg_gen_subfi_tl(t1, 0, t1); 2843 2844 gen_set_label(l_done_hi); 2845 tcg_gen_shli_i32(t1, t1, 16); 2846 2847 tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16); 2848 tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo); 2849 tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2850 tcg_gen_subfi_tl(t0, 0, t0); 2851 tcg_gen_br(l_done_lo); 2852 2853 gen_set_label(l_not_less_lo); 2854 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16); 2855 2856 gen_set_label(l_done_lo); 2857 tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16); 2858 } 2859 } 2860 2861 /* 2862 * Q8ABD XRa, XRb, XRc 2863 * Gets absolute difference for quadruple of 8-bit 2864 * packed in XRb to another one in XRc, 2865 * put the result in XRa. 2866 * a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]); 2867 */ 2868 static void gen_mxu_Q8ABD(DisasContext *ctx) 2869 { 2870 uint32_t pad, XRc, XRb, XRa; 2871 2872 pad = extract32(ctx->opcode, 21, 3); 2873 XRc = extract32(ctx->opcode, 14, 4); 2874 XRb = extract32(ctx->opcode, 10, 4); 2875 XRa = extract32(ctx->opcode, 6, 4); 2876 2877 if (unlikely(pad != 0)) { 2878 /* opcode padding incorrect -> do nothing */ 2879 } else if (unlikely(XRa == 0)) { 2880 /* destination is zero register -> do nothing */ 2881 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2882 /* both operands zero registers -> just set destination to zero */ 2883 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 2884 } else { 2885 /* the most general case */ 2886 TCGv t0 = tcg_temp_new(); 2887 TCGv t1 = tcg_temp_new(); 2888 TCGv t2 = tcg_temp_new(); 2889 TCGv t3 = tcg_temp_new(); 2890 TCGv t4 = tcg_temp_new(); 2891 2892 gen_load_mxu_gpr(t3, XRb); 2893 gen_load_mxu_gpr(t4, XRc); 2894 tcg_gen_movi_tl(t2, 0); 2895 2896 for (int i = 0; i < 4; i++) { 2897 tcg_gen_extract_tl(t0, t3, 8 * i, 8); 2898 tcg_gen_extract_tl(t1, t4, 8 * i, 8); 2899 2900 tcg_gen_sub_tl(t0, t0, t1); 2901 tcg_gen_abs_tl(t0, t0); 2902 2903 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2904 } 2905 gen_store_mxu_gpr(t2, XRa); 2906 } 2907 } 2908 2909 /* 2910 * Q8ADD XRa, XRb, XRc, ptn2 2911 * Add/subtract quadruple of 8-bit packed in XRb 2912 * to another one in XRc, put the result in XRa. 2913 */ 2914 static void gen_mxu_Q8ADD(DisasContext *ctx) 2915 { 2916 uint32_t aptn2, pad, XRc, XRb, XRa; 2917 2918 aptn2 = extract32(ctx->opcode, 24, 2); 2919 pad = extract32(ctx->opcode, 21, 3); 2920 XRc = extract32(ctx->opcode, 14, 4); 2921 XRb = extract32(ctx->opcode, 10, 4); 2922 XRa = extract32(ctx->opcode, 6, 4); 2923 2924 if (unlikely(pad != 0)) { 2925 /* opcode padding incorrect -> do nothing */ 2926 } else if (unlikely(XRa == 0)) { 2927 /* destination is zero register -> do nothing */ 2928 } else if (unlikely((XRb == 0) && (XRc == 0))) { 2929 /* both operands zero registers -> just set destination to zero */ 2930 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 2931 } else { 2932 /* the most general case */ 2933 TCGv t0 = tcg_temp_new(); 2934 TCGv t1 = tcg_temp_new(); 2935 TCGv t2 = tcg_temp_new(); 2936 TCGv t3 = tcg_temp_new(); 2937 TCGv t4 = tcg_temp_new(); 2938 2939 gen_load_mxu_gpr(t3, XRb); 2940 gen_load_mxu_gpr(t4, XRc); 2941 2942 for (int i = 0; i < 4; i++) { 2943 tcg_gen_andi_tl(t0, t3, 0xff); 2944 tcg_gen_andi_tl(t1, t4, 0xff); 2945 2946 if (i < 2) { 2947 if (aptn2 & 0x01) { 2948 tcg_gen_sub_tl(t0, t0, t1); 2949 } else { 2950 tcg_gen_add_tl(t0, t0, t1); 2951 } 2952 } else { 2953 if (aptn2 & 0x02) { 2954 tcg_gen_sub_tl(t0, t0, t1); 2955 } else { 2956 tcg_gen_add_tl(t0, t0, t1); 2957 } 2958 } 2959 if (i < 3) { 2960 tcg_gen_shri_tl(t3, t3, 8); 2961 tcg_gen_shri_tl(t4, t4, 8); 2962 } 2963 if (i > 0) { 2964 tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8); 2965 } else { 2966 tcg_gen_andi_tl(t0, t0, 0xff); 2967 tcg_gen_mov_tl(t2, t0); 2968 } 2969 } 2970 gen_store_mxu_gpr(t2, XRa); 2971 } 2972 } 2973 2974 /* 2975 * Q8ADDE XRa, XRb, XRc, XRd, aptn2 2976 * Add/subtract quadruple of 8-bit packed in XRb 2977 * to another one in XRc, with zero extending 2978 * to 16-bit and put results as packed 16-bit data 2979 * into XRa and XRd. 2980 * aptn2 manages action add or subtract of pairs of data. 2981 * 2982 * Q8ACCE XRa, XRb, XRc, XRd, aptn2 2983 * Add/subtract quadruple of 8-bit packed in XRb 2984 * to another one in XRc, with zero extending 2985 * to 16-bit and accumulate results as packed 16-bit data 2986 * into XRa and XRd. 2987 * aptn2 manages action add or subtract of pairs of data. 2988 */ 2989 static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate) 2990 { 2991 uint32_t aptn2, XRd, XRc, XRb, XRa; 2992 2993 aptn2 = extract32(ctx->opcode, 24, 2); 2994 XRd = extract32(ctx->opcode, 18, 4); 2995 XRc = extract32(ctx->opcode, 14, 4); 2996 XRb = extract32(ctx->opcode, 10, 4); 2997 XRa = extract32(ctx->opcode, 6, 4); 2998 2999 if (unlikely((XRb == 0) && (XRc == 0))) { 3000 /* both operands zero registers -> just set destination to zero */ 3001 if (XRa != 0) { 3002 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 3003 } 3004 if (XRd != 0) { 3005 tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0); 3006 } 3007 } else { 3008 /* the most general case */ 3009 TCGv t0 = tcg_temp_new(); 3010 TCGv t1 = tcg_temp_new(); 3011 TCGv t2 = tcg_temp_new(); 3012 TCGv t3 = tcg_temp_new(); 3013 TCGv t4 = tcg_temp_new(); 3014 TCGv t5 = tcg_temp_new(); 3015 3016 if (XRa != 0) { 3017 gen_extract_mxu_gpr(t0, XRb, 16, 8); 3018 gen_extract_mxu_gpr(t1, XRc, 16, 8); 3019 gen_extract_mxu_gpr(t2, XRb, 24, 8); 3020 gen_extract_mxu_gpr(t3, XRc, 24, 8); 3021 if (aptn2 & 2) { 3022 tcg_gen_sub_tl(t0, t0, t1); 3023 tcg_gen_sub_tl(t2, t2, t3); 3024 } else { 3025 tcg_gen_add_tl(t0, t0, t1); 3026 tcg_gen_add_tl(t2, t2, t3); 3027 } 3028 if (accumulate) { 3029 gen_load_mxu_gpr(t5, XRa); 3030 tcg_gen_extract_tl(t1, t5, 0, 16); 3031 tcg_gen_extract_tl(t3, t5, 16, 16); 3032 tcg_gen_add_tl(t0, t0, t1); 3033 tcg_gen_add_tl(t2, t2, t3); 3034 } 3035 tcg_gen_shli_tl(t2, t2, 16); 3036 tcg_gen_extract_tl(t0, t0, 0, 16); 3037 tcg_gen_or_tl(t4, t2, t0); 3038 } 3039 if (XRd != 0) { 3040 gen_extract_mxu_gpr(t0, XRb, 0, 8); 3041 gen_extract_mxu_gpr(t1, XRc, 0, 8); 3042 gen_extract_mxu_gpr(t2, XRb, 8, 8); 3043 gen_extract_mxu_gpr(t3, XRc, 8, 8); 3044 if (aptn2 & 1) { 3045 tcg_gen_sub_tl(t0, t0, t1); 3046 tcg_gen_sub_tl(t2, t2, t3); 3047 } else { 3048 tcg_gen_add_tl(t0, t0, t1); 3049 tcg_gen_add_tl(t2, t2, t3); 3050 } 3051 if (accumulate) { 3052 gen_load_mxu_gpr(t5, XRd); 3053 tcg_gen_extract_tl(t1, t5, 0, 16); 3054 tcg_gen_extract_tl(t3, t5, 16, 16); 3055 tcg_gen_add_tl(t0, t0, t1); 3056 tcg_gen_add_tl(t2, t2, t3); 3057 } 3058 tcg_gen_shli_tl(t2, t2, 16); 3059 tcg_gen_extract_tl(t0, t0, 0, 16); 3060 tcg_gen_or_tl(t5, t2, t0); 3061 } 3062 3063 gen_store_mxu_gpr(t4, XRa); 3064 gen_store_mxu_gpr(t5, XRd); 3065 } 3066 } 3067 3068 /* 3069 * D8SUM XRa, XRb, XRc 3070 * Double parallel add of quadruple unsigned 8-bit together 3071 * with zero extending to 16-bit data. 3072 * D8SUMC XRa, XRb, XRc 3073 * Double parallel add of quadruple unsigned 8-bit together 3074 * with zero extending to 16-bit data and adding 2 to each 3075 * parallel result. 3076 */ 3077 static void gen_mxu_d8sum(DisasContext *ctx, bool sumc) 3078 { 3079 uint32_t pad, pad2, XRc, XRb, XRa; 3080 3081 pad = extract32(ctx->opcode, 24, 2); 3082 pad2 = extract32(ctx->opcode, 18, 4); 3083 XRc = extract32(ctx->opcode, 14, 4); 3084 XRb = extract32(ctx->opcode, 10, 4); 3085 XRa = extract32(ctx->opcode, 6, 4); 3086 3087 if (unlikely(pad != 0 || pad2 != 0)) { 3088 /* opcode padding incorrect -> do nothing */ 3089 } else if (unlikely(XRa == 0)) { 3090 /* destination is zero register -> do nothing */ 3091 } else if (unlikely((XRb == 0) && (XRc == 0))) { 3092 /* both operands zero registers -> just set destination to zero */ 3093 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 3094 } else { 3095 /* the most general case */ 3096 TCGv t0 = tcg_temp_new(); 3097 TCGv t1 = tcg_temp_new(); 3098 TCGv t2 = tcg_temp_new(); 3099 TCGv t3 = tcg_temp_new(); 3100 TCGv t4 = tcg_temp_new(); 3101 TCGv t5 = tcg_temp_new(); 3102 3103 if (XRb != 0) { 3104 tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8); 3105 tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8); 3106 tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8); 3107 tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8); 3108 tcg_gen_add_tl(t4, t0, t1); 3109 tcg_gen_add_tl(t4, t4, t2); 3110 tcg_gen_add_tl(t4, t4, t3); 3111 } else { 3112 tcg_gen_mov_tl(t4, 0); 3113 } 3114 if (XRc != 0) { 3115 tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8); 3116 tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8); 3117 tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8); 3118 tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8); 3119 tcg_gen_add_tl(t5, t0, t1); 3120 tcg_gen_add_tl(t5, t5, t2); 3121 tcg_gen_add_tl(t5, t5, t3); 3122 } else { 3123 tcg_gen_mov_tl(t5, 0); 3124 } 3125 3126 if (sumc) { 3127 tcg_gen_addi_tl(t4, t4, 2); 3128 tcg_gen_addi_tl(t5, t5, 2); 3129 } 3130 tcg_gen_shli_tl(t4, t4, 16); 3131 3132 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3133 } 3134 } 3135 3136 /* 3137 * Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed 3138 * 16-bit pattern addition. 3139 */ 3140 static void gen_mxu_q16add(DisasContext *ctx) 3141 { 3142 uint32_t aptn2, optn2, XRc, XRb, XRa, XRd; 3143 3144 aptn2 = extract32(ctx->opcode, 24, 2); 3145 optn2 = extract32(ctx->opcode, 22, 2); 3146 XRd = extract32(ctx->opcode, 18, 4); 3147 XRc = extract32(ctx->opcode, 14, 4); 3148 XRb = extract32(ctx->opcode, 10, 4); 3149 XRa = extract32(ctx->opcode, 6, 4); 3150 3151 TCGv t0 = tcg_temp_new(); 3152 TCGv t1 = tcg_temp_new(); 3153 TCGv t2 = tcg_temp_new(); 3154 TCGv t3 = tcg_temp_new(); 3155 TCGv t4 = tcg_temp_new(); 3156 TCGv t5 = tcg_temp_new(); 3157 3158 gen_load_mxu_gpr(t1, XRb); 3159 tcg_gen_extract_tl(t0, t1, 0, 16); 3160 tcg_gen_extract_tl(t1, t1, 16, 16); 3161 3162 gen_load_mxu_gpr(t3, XRc); 3163 tcg_gen_extract_tl(t2, t3, 0, 16); 3164 tcg_gen_extract_tl(t3, t3, 16, 16); 3165 3166 switch (optn2) { 3167 case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */ 3168 tcg_gen_mov_tl(t4, t1); 3169 tcg_gen_mov_tl(t5, t0); 3170 break; 3171 case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */ 3172 tcg_gen_mov_tl(t4, t0); 3173 tcg_gen_mov_tl(t5, t0); 3174 break; 3175 case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */ 3176 tcg_gen_mov_tl(t4, t1); 3177 tcg_gen_mov_tl(t5, t1); 3178 break; 3179 case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */ 3180 tcg_gen_mov_tl(t4, t0); 3181 tcg_gen_mov_tl(t5, t1); 3182 break; 3183 } 3184 3185 switch (aptn2) { 3186 case MXU_APTN2_AA: /* lop +, rop + */ 3187 tcg_gen_add_tl(t0, t4, t3); 3188 tcg_gen_add_tl(t1, t5, t2); 3189 tcg_gen_add_tl(t4, t4, t3); 3190 tcg_gen_add_tl(t5, t5, t2); 3191 break; 3192 case MXU_APTN2_AS: /* lop +, rop + */ 3193 tcg_gen_sub_tl(t0, t4, t3); 3194 tcg_gen_sub_tl(t1, t5, t2); 3195 tcg_gen_add_tl(t4, t4, t3); 3196 tcg_gen_add_tl(t5, t5, t2); 3197 break; 3198 case MXU_APTN2_SA: /* lop +, rop + */ 3199 tcg_gen_add_tl(t0, t4, t3); 3200 tcg_gen_add_tl(t1, t5, t2); 3201 tcg_gen_sub_tl(t4, t4, t3); 3202 tcg_gen_sub_tl(t5, t5, t2); 3203 break; 3204 case MXU_APTN2_SS: /* lop +, rop + */ 3205 tcg_gen_sub_tl(t0, t4, t3); 3206 tcg_gen_sub_tl(t1, t5, t2); 3207 tcg_gen_sub_tl(t4, t4, t3); 3208 tcg_gen_sub_tl(t5, t5, t2); 3209 break; 3210 } 3211 3212 tcg_gen_shli_tl(t0, t0, 16); 3213 tcg_gen_extract_tl(t1, t1, 0, 16); 3214 tcg_gen_shli_tl(t4, t4, 16); 3215 tcg_gen_extract_tl(t5, t5, 0, 16); 3216 3217 tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5); 3218 tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1); 3219 } 3220 3221 /* 3222 * Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed 3223 * 16-bit addition/subtraction with accumulate. 3224 */ 3225 static void gen_mxu_q16acc(DisasContext *ctx) 3226 { 3227 uint32_t aptn2, XRc, XRb, XRa, XRd; 3228 3229 aptn2 = extract32(ctx->opcode, 24, 2); 3230 XRd = extract32(ctx->opcode, 18, 4); 3231 XRc = extract32(ctx->opcode, 14, 4); 3232 XRb = extract32(ctx->opcode, 10, 4); 3233 XRa = extract32(ctx->opcode, 6, 4); 3234 3235 TCGv t0 = tcg_temp_new(); 3236 TCGv t1 = tcg_temp_new(); 3237 TCGv t2 = tcg_temp_new(); 3238 TCGv t3 = tcg_temp_new(); 3239 TCGv s3 = tcg_temp_new(); 3240 TCGv s2 = tcg_temp_new(); 3241 TCGv s1 = tcg_temp_new(); 3242 TCGv s0 = tcg_temp_new(); 3243 3244 gen_load_mxu_gpr(t1, XRb); 3245 tcg_gen_extract_tl(t0, t1, 0, 16); 3246 tcg_gen_extract_tl(t1, t1, 16, 16); 3247 3248 gen_load_mxu_gpr(t3, XRc); 3249 tcg_gen_extract_tl(t2, t3, 0, 16); 3250 tcg_gen_extract_tl(t3, t3, 16, 16); 3251 3252 switch (aptn2) { 3253 case MXU_APTN2_AA: /* lop +, rop + */ 3254 tcg_gen_add_tl(s3, t1, t3); 3255 tcg_gen_add_tl(s2, t0, t2); 3256 tcg_gen_add_tl(s1, t1, t3); 3257 tcg_gen_add_tl(s0, t0, t2); 3258 break; 3259 case MXU_APTN2_AS: /* lop +, rop - */ 3260 tcg_gen_sub_tl(s3, t1, t3); 3261 tcg_gen_sub_tl(s2, t0, t2); 3262 tcg_gen_add_tl(s1, t1, t3); 3263 tcg_gen_add_tl(s0, t0, t2); 3264 break; 3265 case MXU_APTN2_SA: /* lop -, rop + */ 3266 tcg_gen_add_tl(s3, t1, t3); 3267 tcg_gen_add_tl(s2, t0, t2); 3268 tcg_gen_sub_tl(s1, t1, t3); 3269 tcg_gen_sub_tl(s0, t0, t2); 3270 break; 3271 case MXU_APTN2_SS: /* lop -, rop - */ 3272 tcg_gen_sub_tl(s3, t1, t3); 3273 tcg_gen_sub_tl(s2, t0, t2); 3274 tcg_gen_sub_tl(s1, t1, t3); 3275 tcg_gen_sub_tl(s0, t0, t2); 3276 break; 3277 } 3278 3279 if (XRa != 0) { 3280 tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0); 3281 tcg_gen_extract_tl(t0, t0, 0, 16); 3282 tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16); 3283 tcg_gen_add_tl(t1, t1, s1); 3284 tcg_gen_shli_tl(t1, t1, 16); 3285 tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0); 3286 } 3287 3288 if (XRd != 0) { 3289 tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2); 3290 tcg_gen_extract_tl(t0, t0, 0, 16); 3291 tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16); 3292 tcg_gen_add_tl(t1, t1, s3); 3293 tcg_gen_shli_tl(t1, t1, 16); 3294 tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0); 3295 } 3296 } 3297 3298 /* 3299 * Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed 3300 * 16-bit accumulate. 3301 */ 3302 static void gen_mxu_q16accm(DisasContext *ctx) 3303 { 3304 uint32_t aptn2, XRc, XRb, XRa, XRd; 3305 3306 aptn2 = extract32(ctx->opcode, 24, 2); 3307 XRd = extract32(ctx->opcode, 18, 4); 3308 XRc = extract32(ctx->opcode, 14, 4); 3309 XRb = extract32(ctx->opcode, 10, 4); 3310 XRa = extract32(ctx->opcode, 6, 4); 3311 3312 TCGv t0 = tcg_temp_new(); 3313 TCGv t1 = tcg_temp_new(); 3314 TCGv t2 = tcg_temp_new(); 3315 TCGv t3 = tcg_temp_new(); 3316 3317 gen_load_mxu_gpr(t2, XRb); 3318 gen_load_mxu_gpr(t3, XRc); 3319 3320 if (XRa != 0) { 3321 TCGv a0 = tcg_temp_new(); 3322 TCGv a1 = tcg_temp_new(); 3323 3324 tcg_gen_extract_tl(t0, t2, 0, 16); 3325 tcg_gen_extract_tl(t1, t2, 16, 16); 3326 3327 gen_load_mxu_gpr(a1, XRa); 3328 tcg_gen_extract_tl(a0, a1, 0, 16); 3329 tcg_gen_extract_tl(a1, a1, 16, 16); 3330 3331 if (aptn2 & 2) { 3332 tcg_gen_sub_tl(a0, a0, t0); 3333 tcg_gen_sub_tl(a1, a1, t1); 3334 } else { 3335 tcg_gen_add_tl(a0, a0, t0); 3336 tcg_gen_add_tl(a1, a1, t1); 3337 } 3338 tcg_gen_extract_tl(a0, a0, 0, 16); 3339 tcg_gen_shli_tl(a1, a1, 16); 3340 tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0); 3341 } 3342 3343 if (XRd != 0) { 3344 TCGv a0 = tcg_temp_new(); 3345 TCGv a1 = tcg_temp_new(); 3346 3347 tcg_gen_extract_tl(t0, t3, 0, 16); 3348 tcg_gen_extract_tl(t1, t3, 16, 16); 3349 3350 gen_load_mxu_gpr(a1, XRd); 3351 tcg_gen_extract_tl(a0, a1, 0, 16); 3352 tcg_gen_extract_tl(a1, a1, 16, 16); 3353 3354 if (aptn2 & 1) { 3355 tcg_gen_sub_tl(a0, a0, t0); 3356 tcg_gen_sub_tl(a1, a1, t1); 3357 } else { 3358 tcg_gen_add_tl(a0, a0, t0); 3359 tcg_gen_add_tl(a1, a1, t1); 3360 } 3361 tcg_gen_extract_tl(a0, a0, 0, 16); 3362 tcg_gen_shli_tl(a1, a1, 16); 3363 tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0); 3364 } 3365 } 3366 3367 3368 /* 3369 * D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed 3370 * 16-bit sign extended addition and accumulate. 3371 */ 3372 static void gen_mxu_d16asum(DisasContext *ctx) 3373 { 3374 uint32_t aptn2, XRc, XRb, XRa, XRd; 3375 3376 aptn2 = extract32(ctx->opcode, 24, 2); 3377 XRd = extract32(ctx->opcode, 18, 4); 3378 XRc = extract32(ctx->opcode, 14, 4); 3379 XRb = extract32(ctx->opcode, 10, 4); 3380 XRa = extract32(ctx->opcode, 6, 4); 3381 3382 TCGv t0 = tcg_temp_new(); 3383 TCGv t1 = tcg_temp_new(); 3384 TCGv t2 = tcg_temp_new(); 3385 TCGv t3 = tcg_temp_new(); 3386 3387 gen_load_mxu_gpr(t2, XRb); 3388 gen_load_mxu_gpr(t3, XRc); 3389 3390 if (XRa != 0) { 3391 tcg_gen_sextract_tl(t0, t2, 0, 16); 3392 tcg_gen_sextract_tl(t1, t2, 16, 16); 3393 tcg_gen_add_tl(t0, t0, t1); 3394 if (aptn2 & 2) { 3395 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3396 } else { 3397 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3398 } 3399 } 3400 3401 if (XRd != 0) { 3402 tcg_gen_sextract_tl(t0, t3, 0, 16); 3403 tcg_gen_sextract_tl(t1, t3, 16, 16); 3404 tcg_gen_add_tl(t0, t0, t1); 3405 if (aptn2 & 1) { 3406 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3407 } else { 3408 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0); 3409 } 3410 } 3411 } 3412 3413 /* 3414 * D32ADD XRa, XRb, XRc, XRd, aptn2 - Double 3415 * 32 bit pattern addition/subtraction, set carry. 3416 * 3417 * D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double 3418 * 32 bit pattern addition/subtraction with carry. 3419 */ 3420 static void gen_mxu_d32add(DisasContext *ctx) 3421 { 3422 uint32_t aptn2, addc, XRc, XRb, XRa, XRd; 3423 3424 aptn2 = extract32(ctx->opcode, 24, 2); 3425 addc = extract32(ctx->opcode, 22, 2); 3426 XRd = extract32(ctx->opcode, 18, 4); 3427 XRc = extract32(ctx->opcode, 14, 4); 3428 XRb = extract32(ctx->opcode, 10, 4); 3429 XRa = extract32(ctx->opcode, 6, 4); 3430 3431 TCGv t0 = tcg_temp_new(); 3432 TCGv t1 = tcg_temp_new(); 3433 TCGv t2 = tcg_temp_new(); 3434 TCGv cr = tcg_temp_new(); 3435 3436 if (unlikely(addc > 1)) { 3437 /* opcode incorrect -> do nothing */ 3438 } else if (addc == 1) { 3439 if (unlikely(XRa == 0 && XRd == 0)) { 3440 /* destinations are zero register -> do nothing */ 3441 } else { 3442 /* FIXME ??? What if XRa == XRd ??? */ 3443 /* aptn2 is unused here */ 3444 gen_load_mxu_gpr(t0, XRb); 3445 gen_load_mxu_gpr(t1, XRc); 3446 gen_load_mxu_cr(cr); 3447 if (XRa != 0) { 3448 tcg_gen_extract_tl(t2, cr, 31, 1); 3449 tcg_gen_add_tl(t0, t0, t2); 3450 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3451 } 3452 if (XRd != 0) { 3453 tcg_gen_extract_tl(t2, cr, 30, 1); 3454 tcg_gen_add_tl(t1, t1, t2); 3455 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3456 } 3457 } 3458 } else if (unlikely(XRa == 0 && XRd == 0)) { 3459 /* destinations are zero register -> do nothing */ 3460 } else { 3461 /* common case */ 3462 /* FIXME ??? What if XRa == XRd ??? */ 3463 TCGv carry = tcg_temp_new(); 3464 3465 gen_load_mxu_gpr(t0, XRb); 3466 gen_load_mxu_gpr(t1, XRc); 3467 gen_load_mxu_cr(cr); 3468 if (XRa != 0) { 3469 if (aptn2 & 2) { 3470 tcg_gen_sub_i32(t2, t0, t1); 3471 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3472 } else { 3473 tcg_gen_add_i32(t2, t0, t1); 3474 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3475 } 3476 tcg_gen_andi_tl(cr, cr, 0x7fffffff); 3477 tcg_gen_shli_tl(carry, carry, 31); 3478 tcg_gen_or_tl(cr, cr, carry); 3479 gen_store_mxu_gpr(t2, XRa); 3480 } 3481 if (XRd != 0) { 3482 if (aptn2 & 1) { 3483 tcg_gen_sub_i32(t2, t0, t1); 3484 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1); 3485 } else { 3486 tcg_gen_add_i32(t2, t0, t1); 3487 tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2); 3488 } 3489 tcg_gen_andi_tl(cr, cr, 0xbfffffff); 3490 tcg_gen_shli_tl(carry, carry, 30); 3491 tcg_gen_or_tl(cr, cr, carry); 3492 gen_store_mxu_gpr(t2, XRd); 3493 } 3494 gen_store_mxu_cr(cr); 3495 } 3496 } 3497 3498 /* 3499 * D32ACC XRa, XRb, XRc, XRd, aptn2 - Double 3500 * 32 bit pattern addition/subtraction and accumulate. 3501 */ 3502 static void gen_mxu_d32acc(DisasContext *ctx) 3503 { 3504 uint32_t aptn2, XRc, XRb, XRa, XRd; 3505 3506 aptn2 = extract32(ctx->opcode, 24, 2); 3507 XRd = extract32(ctx->opcode, 18, 4); 3508 XRc = extract32(ctx->opcode, 14, 4); 3509 XRb = extract32(ctx->opcode, 10, 4); 3510 XRa = extract32(ctx->opcode, 6, 4); 3511 3512 TCGv t0 = tcg_temp_new(); 3513 TCGv t1 = tcg_temp_new(); 3514 TCGv t2 = tcg_temp_new(); 3515 3516 if (unlikely(XRa == 0 && XRd == 0)) { 3517 /* destinations are zero register -> do nothing */ 3518 } else { 3519 /* common case */ 3520 gen_load_mxu_gpr(t0, XRb); 3521 gen_load_mxu_gpr(t1, XRc); 3522 if (XRa != 0) { 3523 if (aptn2 & 2) { 3524 tcg_gen_sub_tl(t2, t0, t1); 3525 } else { 3526 tcg_gen_add_tl(t2, t0, t1); 3527 } 3528 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3529 } 3530 if (XRd != 0) { 3531 if (aptn2 & 1) { 3532 tcg_gen_sub_tl(t2, t0, t1); 3533 } else { 3534 tcg_gen_add_tl(t2, t0, t1); 3535 } 3536 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3537 } 3538 } 3539 } 3540 3541 /* 3542 * D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double 3543 * 32 bit pattern addition/subtraction and accumulate. 3544 */ 3545 static void gen_mxu_d32accm(DisasContext *ctx) 3546 { 3547 uint32_t aptn2, XRc, XRb, XRa, XRd; 3548 3549 aptn2 = extract32(ctx->opcode, 24, 2); 3550 XRd = extract32(ctx->opcode, 18, 4); 3551 XRc = extract32(ctx->opcode, 14, 4); 3552 XRb = extract32(ctx->opcode, 10, 4); 3553 XRa = extract32(ctx->opcode, 6, 4); 3554 3555 TCGv t0 = tcg_temp_new(); 3556 TCGv t1 = tcg_temp_new(); 3557 TCGv t2 = tcg_temp_new(); 3558 3559 if (unlikely(XRa == 0 && XRd == 0)) { 3560 /* destinations are zero register -> do nothing */ 3561 } else { 3562 /* common case */ 3563 gen_load_mxu_gpr(t0, XRb); 3564 gen_load_mxu_gpr(t1, XRc); 3565 if (XRa != 0) { 3566 tcg_gen_add_tl(t2, t0, t1); 3567 if (aptn2 & 2) { 3568 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3569 } else { 3570 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2); 3571 } 3572 } 3573 if (XRd != 0) { 3574 tcg_gen_sub_tl(t2, t0, t1); 3575 if (aptn2 & 1) { 3576 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3577 } else { 3578 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2); 3579 } 3580 } 3581 } 3582 } 3583 3584 /* 3585 * D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double 3586 * 32 bit pattern addition/subtraction. 3587 */ 3588 static void gen_mxu_d32asum(DisasContext *ctx) 3589 { 3590 uint32_t aptn2, XRc, XRb, XRa, XRd; 3591 3592 aptn2 = extract32(ctx->opcode, 24, 2); 3593 XRd = extract32(ctx->opcode, 18, 4); 3594 XRc = extract32(ctx->opcode, 14, 4); 3595 XRb = extract32(ctx->opcode, 10, 4); 3596 XRa = extract32(ctx->opcode, 6, 4); 3597 3598 TCGv t0 = tcg_temp_new(); 3599 TCGv t1 = tcg_temp_new(); 3600 3601 if (unlikely(XRa == 0 && XRd == 0)) { 3602 /* destinations are zero register -> do nothing */ 3603 } else { 3604 /* common case */ 3605 gen_load_mxu_gpr(t0, XRb); 3606 gen_load_mxu_gpr(t1, XRc); 3607 if (XRa != 0) { 3608 if (aptn2 & 2) { 3609 tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3610 } else { 3611 tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0); 3612 } 3613 } 3614 if (XRd != 0) { 3615 if (aptn2 & 1) { 3616 tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3617 } else { 3618 tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1); 3619 } 3620 } 3621 } 3622 } 3623 3624 /* 3625 * MXU instruction category: Miscellaneous 3626 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3627 * 3628 * S32EXTR S32LUI 3629 * S32EXTRV 3630 * Q16SAT 3631 * Q16SCOP 3632 */ 3633 3634 /* 3635 * S32EXTR XRa, XRd, rs, bits5 3636 * Extract bits5 bits from 64-bit pair {XRa:XRd} 3637 * starting from rs[4:0] offset and put to the XRa. 3638 */ 3639 static void gen_mxu_s32extr(DisasContext *ctx) 3640 { 3641 TCGv t0, t1, t2, t3; 3642 uint32_t XRa, XRd, rs, bits5; 3643 3644 t0 = tcg_temp_new(); 3645 t1 = tcg_temp_new(); 3646 t2 = tcg_temp_new(); 3647 t3 = tcg_temp_new(); 3648 3649 XRa = extract32(ctx->opcode, 6, 4); 3650 XRd = extract32(ctx->opcode, 10, 4); 3651 bits5 = extract32(ctx->opcode, 16, 5); 3652 rs = extract32(ctx->opcode, 21, 5); 3653 3654 /* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */ 3655 /* {XRa} = extract({tmp}, 0, bits5); */ 3656 if (bits5 > 0) { 3657 TCGLabel *l_xra_only = gen_new_label(); 3658 TCGLabel *l_done = gen_new_label(); 3659 3660 gen_load_mxu_gpr(t0, XRd); 3661 gen_load_mxu_gpr(t1, XRa); 3662 gen_load_gpr(t2, rs); 3663 tcg_gen_andi_tl(t2, t2, 0x1f); 3664 tcg_gen_subfi_tl(t2, 32, t2); 3665 tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only); 3666 tcg_gen_subfi_tl(t2, bits5, t2); 3667 tcg_gen_subfi_tl(t3, 32, t2); 3668 tcg_gen_shr_tl(t0, t0, t3); 3669 tcg_gen_shl_tl(t1, t1, t2); 3670 tcg_gen_or_tl(t0, t0, t1); 3671 tcg_gen_br(l_done); 3672 gen_set_label(l_xra_only); 3673 tcg_gen_subi_tl(t2, t2, bits5); 3674 tcg_gen_shr_tl(t0, t1, t2); 3675 gen_set_label(l_done); 3676 tcg_gen_extract_tl(t0, t0, 0, bits5); 3677 } else { 3678 /* unspecified behavior but matches tests on real hardware*/ 3679 tcg_gen_movi_tl(t0, 0); 3680 } 3681 gen_store_mxu_gpr(t0, XRa); 3682 } 3683 3684 /* 3685 * S32EXTRV XRa, XRd, rs, rt 3686 * Extract rt[4:0] bits from 64-bit pair {XRa:XRd} 3687 * starting from rs[4:0] offset and put to the XRa. 3688 */ 3689 static void gen_mxu_s32extrv(DisasContext *ctx) 3690 { 3691 TCGv t0, t1, t2, t3, t4; 3692 uint32_t XRa, XRd, rs, rt; 3693 3694 t0 = tcg_temp_new(); 3695 t1 = tcg_temp_new(); 3696 t2 = tcg_temp_new(); 3697 t3 = tcg_temp_new(); 3698 t4 = tcg_temp_new(); 3699 TCGLabel *l_xra_only = gen_new_label(); 3700 TCGLabel *l_done = gen_new_label(); 3701 TCGLabel *l_zero = gen_new_label(); 3702 TCGLabel *l_extract = gen_new_label(); 3703 3704 XRa = extract32(ctx->opcode, 6, 4); 3705 XRd = extract32(ctx->opcode, 10, 4); 3706 rt = extract32(ctx->opcode, 16, 5); 3707 rs = extract32(ctx->opcode, 21, 5); 3708 3709 /* {tmp} = {XRa:XRd} >> (64 - rs - rt) */ 3710 gen_load_mxu_gpr(t0, XRd); 3711 gen_load_mxu_gpr(t1, XRa); 3712 gen_load_gpr(t2, rs); 3713 gen_load_gpr(t4, rt); 3714 tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero); 3715 tcg_gen_andi_tl(t2, t2, 0x1f); 3716 tcg_gen_subfi_tl(t2, 32, t2); 3717 tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only); 3718 tcg_gen_sub_tl(t2, t4, t2); 3719 tcg_gen_subfi_tl(t3, 32, t2); 3720 tcg_gen_shr_tl(t0, t0, t3); 3721 tcg_gen_shl_tl(t1, t1, t2); 3722 tcg_gen_or_tl(t0, t0, t1); 3723 tcg_gen_br(l_extract); 3724 3725 gen_set_label(l_xra_only); 3726 tcg_gen_sub_tl(t2, t2, t4); 3727 tcg_gen_shr_tl(t0, t1, t2); 3728 tcg_gen_br(l_extract); 3729 3730 /* unspecified behavior but matches tests on real hardware*/ 3731 gen_set_label(l_zero); 3732 tcg_gen_movi_tl(t0, 0); 3733 tcg_gen_br(l_done); 3734 3735 /* {XRa} = extract({tmp}, 0, rt) */ 3736 gen_set_label(l_extract); 3737 tcg_gen_subfi_tl(t4, 32, t4); 3738 tcg_gen_shl_tl(t0, t0, t4); 3739 tcg_gen_shr_tl(t0, t0, t4); 3740 3741 gen_set_label(l_done); 3742 gen_store_mxu_gpr(t0, XRa); 3743 } 3744 3745 /* 3746 * S32LUI XRa, S8, optn3 3747 * Permutate the immediate S8 value to form a word 3748 * to update XRa. 3749 */ 3750 static void gen_mxu_s32lui(DisasContext *ctx) 3751 { 3752 uint32_t XRa, s8, optn3, pad; 3753 3754 XRa = extract32(ctx->opcode, 6, 4); 3755 s8 = extract32(ctx->opcode, 10, 8); 3756 pad = extract32(ctx->opcode, 21, 2); 3757 optn3 = extract32(ctx->opcode, 23, 3); 3758 3759 if (unlikely(pad != 0)) { 3760 /* opcode padding incorrect -> do nothing */ 3761 } else if (unlikely(XRa == 0)) { 3762 /* destination is zero register -> do nothing */ 3763 } else { 3764 uint32_t s16; 3765 TCGv t0 = tcg_temp_new(); 3766 3767 switch (optn3) { 3768 case 0: 3769 tcg_gen_movi_tl(t0, s8); 3770 break; 3771 case 1: 3772 tcg_gen_movi_tl(t0, s8 << 8); 3773 break; 3774 case 2: 3775 tcg_gen_movi_tl(t0, s8 << 16); 3776 break; 3777 case 3: 3778 tcg_gen_movi_tl(t0, s8 << 24); 3779 break; 3780 case 4: 3781 tcg_gen_movi_tl(t0, (s8 << 16) | s8); 3782 break; 3783 case 5: 3784 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8)); 3785 break; 3786 case 6: 3787 s16 = (uint16_t)(int16_t)(int8_t)s8; 3788 tcg_gen_movi_tl(t0, (s16 << 16) | s16); 3789 break; 3790 case 7: 3791 tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8); 3792 break; 3793 } 3794 gen_store_mxu_gpr(t0, XRa); 3795 } 3796 } 3797 3798 /* 3799 * Q16SAT XRa, XRb, XRc 3800 * Packs four 16-bit signed integers in XRb and XRc to 3801 * four saturated unsigned 8-bit into XRa. 3802 * 3803 */ 3804 static void gen_mxu_Q16SAT(DisasContext *ctx) 3805 { 3806 uint32_t pad, XRc, XRb, XRa; 3807 3808 pad = extract32(ctx->opcode, 21, 3); 3809 XRc = extract32(ctx->opcode, 14, 4); 3810 XRb = extract32(ctx->opcode, 10, 4); 3811 XRa = extract32(ctx->opcode, 6, 4); 3812 3813 if (unlikely(pad != 0)) { 3814 /* opcode padding incorrect -> do nothing */ 3815 } else if (unlikely(XRa == 0)) { 3816 /* destination is zero register -> do nothing */ 3817 } else { 3818 /* the most general case */ 3819 TCGv t0 = tcg_temp_new(); 3820 TCGv t1 = tcg_temp_new(); 3821 TCGv t2 = tcg_temp_new(); 3822 3823 tcg_gen_movi_tl(t2, 0); 3824 if (XRb != 0) { 3825 TCGLabel *l_less_hi = gen_new_label(); 3826 TCGLabel *l_less_lo = gen_new_label(); 3827 TCGLabel *l_lo = gen_new_label(); 3828 TCGLabel *l_greater_hi = gen_new_label(); 3829 TCGLabel *l_greater_lo = gen_new_label(); 3830 TCGLabel *l_done = gen_new_label(); 3831 3832 tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16); 3833 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3834 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3835 tcg_gen_br(l_lo); 3836 gen_set_label(l_less_hi); 3837 tcg_gen_movi_tl(t0, 0); 3838 tcg_gen_br(l_lo); 3839 gen_set_label(l_greater_hi); 3840 tcg_gen_movi_tl(t0, 255); 3841 3842 gen_set_label(l_lo); 3843 tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16); 3844 tcg_gen_sari_tl(t1, t1, 16); 3845 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3846 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3847 tcg_gen_br(l_done); 3848 gen_set_label(l_less_lo); 3849 tcg_gen_movi_tl(t1, 0); 3850 tcg_gen_br(l_done); 3851 gen_set_label(l_greater_lo); 3852 tcg_gen_movi_tl(t1, 255); 3853 3854 gen_set_label(l_done); 3855 tcg_gen_shli_tl(t2, t0, 24); 3856 tcg_gen_shli_tl(t1, t1, 16); 3857 tcg_gen_or_tl(t2, t2, t1); 3858 } 3859 3860 if (XRc != 0) { 3861 TCGLabel *l_less_hi = gen_new_label(); 3862 TCGLabel *l_less_lo = gen_new_label(); 3863 TCGLabel *l_lo = gen_new_label(); 3864 TCGLabel *l_greater_hi = gen_new_label(); 3865 TCGLabel *l_greater_lo = gen_new_label(); 3866 TCGLabel *l_done = gen_new_label(); 3867 3868 tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16); 3869 tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi); 3870 tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi); 3871 tcg_gen_br(l_lo); 3872 gen_set_label(l_less_hi); 3873 tcg_gen_movi_tl(t0, 0); 3874 tcg_gen_br(l_lo); 3875 gen_set_label(l_greater_hi); 3876 tcg_gen_movi_tl(t0, 255); 3877 3878 gen_set_label(l_lo); 3879 tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16); 3880 tcg_gen_sari_tl(t1, t1, 16); 3881 tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo); 3882 tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo); 3883 tcg_gen_br(l_done); 3884 gen_set_label(l_less_lo); 3885 tcg_gen_movi_tl(t1, 0); 3886 tcg_gen_br(l_done); 3887 gen_set_label(l_greater_lo); 3888 tcg_gen_movi_tl(t1, 255); 3889 3890 gen_set_label(l_done); 3891 tcg_gen_shli_tl(t0, t0, 8); 3892 tcg_gen_or_tl(t2, t2, t0); 3893 tcg_gen_or_tl(t2, t2, t1); 3894 } 3895 gen_store_mxu_gpr(t2, XRa); 3896 } 3897 } 3898 3899 /* 3900 * Q16SCOP XRa, XRd, XRb, XRc 3901 * Determine sign of quad packed 16-bit signed values 3902 * in XRb and XRc put result in XRa and XRd respectively. 3903 */ 3904 static void gen_mxu_q16scop(DisasContext *ctx) 3905 { 3906 uint32_t XRd, XRc, XRb, XRa; 3907 3908 XRd = extract32(ctx->opcode, 18, 4); 3909 XRc = extract32(ctx->opcode, 14, 4); 3910 XRb = extract32(ctx->opcode, 10, 4); 3911 XRa = extract32(ctx->opcode, 6, 4); 3912 3913 TCGv t0 = tcg_temp_new(); 3914 TCGv t1 = tcg_temp_new(); 3915 TCGv t2 = tcg_temp_new(); 3916 TCGv t3 = tcg_temp_new(); 3917 TCGv t4 = tcg_temp_new(); 3918 3919 TCGLabel *l_b_hi_lt = gen_new_label(); 3920 TCGLabel *l_b_hi_gt = gen_new_label(); 3921 TCGLabel *l_b_lo = gen_new_label(); 3922 TCGLabel *l_b_lo_lt = gen_new_label(); 3923 TCGLabel *l_c_hi = gen_new_label(); 3924 TCGLabel *l_c_hi_lt = gen_new_label(); 3925 TCGLabel *l_c_hi_gt = gen_new_label(); 3926 TCGLabel *l_c_lo = gen_new_label(); 3927 TCGLabel *l_c_lo_lt = gen_new_label(); 3928 TCGLabel *l_done = gen_new_label(); 3929 3930 gen_load_mxu_gpr(t0, XRb); 3931 gen_load_mxu_gpr(t1, XRc); 3932 3933 tcg_gen_sextract_tl(t2, t0, 16, 16); 3934 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt); 3935 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt); 3936 tcg_gen_movi_tl(t3, 0); 3937 tcg_gen_br(l_b_lo); 3938 gen_set_label(l_b_hi_lt); 3939 tcg_gen_movi_tl(t3, 0xffff0000); 3940 tcg_gen_br(l_b_lo); 3941 gen_set_label(l_b_hi_gt); 3942 tcg_gen_movi_tl(t3, 0x00010000); 3943 3944 gen_set_label(l_b_lo); 3945 tcg_gen_sextract_tl(t2, t0, 0, 16); 3946 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi); 3947 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt); 3948 tcg_gen_ori_tl(t3, t3, 0x00000001); 3949 tcg_gen_br(l_c_hi); 3950 gen_set_label(l_b_lo_lt); 3951 tcg_gen_ori_tl(t3, t3, 0x0000ffff); 3952 tcg_gen_br(l_c_hi); 3953 3954 gen_set_label(l_c_hi); 3955 tcg_gen_sextract_tl(t2, t1, 16, 16); 3956 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt); 3957 tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt); 3958 tcg_gen_movi_tl(t4, 0); 3959 tcg_gen_br(l_c_lo); 3960 gen_set_label(l_c_hi_lt); 3961 tcg_gen_movi_tl(t4, 0xffff0000); 3962 tcg_gen_br(l_c_lo); 3963 gen_set_label(l_c_hi_gt); 3964 tcg_gen_movi_tl(t4, 0x00010000); 3965 3966 gen_set_label(l_c_lo); 3967 tcg_gen_sextract_tl(t2, t1, 0, 16); 3968 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done); 3969 tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt); 3970 tcg_gen_ori_tl(t4, t4, 0x00000001); 3971 tcg_gen_br(l_done); 3972 gen_set_label(l_c_lo_lt); 3973 tcg_gen_ori_tl(t4, t4, 0x0000ffff); 3974 3975 gen_set_label(l_done); 3976 gen_store_mxu_gpr(t3, XRa); 3977 gen_store_mxu_gpr(t4, XRd); 3978 } 3979 3980 /* 3981 * S32SFL XRa, XRd, XRb, XRc 3982 * Shuffle bytes according to one of four patterns. 3983 */ 3984 static void gen_mxu_s32sfl(DisasContext *ctx) 3985 { 3986 uint32_t XRd, XRc, XRb, XRa, ptn2; 3987 3988 XRd = extract32(ctx->opcode, 18, 4); 3989 XRc = extract32(ctx->opcode, 14, 4); 3990 XRb = extract32(ctx->opcode, 10, 4); 3991 XRa = extract32(ctx->opcode, 6, 4); 3992 ptn2 = extract32(ctx->opcode, 24, 2); 3993 3994 TCGv t0 = tcg_temp_new(); 3995 TCGv t1 = tcg_temp_new(); 3996 TCGv t2 = tcg_temp_new(); 3997 TCGv t3 = tcg_temp_new(); 3998 3999 gen_load_mxu_gpr(t0, XRb); 4000 gen_load_mxu_gpr(t1, XRc); 4001 4002 switch (ptn2) { 4003 case 0: 4004 tcg_gen_andi_tl(t2, t0, 0xff000000); 4005 tcg_gen_andi_tl(t3, t1, 0x000000ff); 4006 tcg_gen_deposit_tl(t3, t3, t0, 8, 8); 4007 tcg_gen_shri_tl(t0, t0, 8); 4008 tcg_gen_shri_tl(t1, t1, 8); 4009 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 4010 tcg_gen_deposit_tl(t3, t3, t1, 16, 8); 4011 tcg_gen_shri_tl(t0, t0, 8); 4012 tcg_gen_shri_tl(t1, t1, 8); 4013 tcg_gen_deposit_tl(t2, t2, t0, 8, 8); 4014 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4015 tcg_gen_shri_tl(t1, t1, 8); 4016 tcg_gen_deposit_tl(t2, t2, t1, 16, 8); 4017 break; 4018 case 1: 4019 tcg_gen_andi_tl(t2, t0, 0xff000000); 4020 tcg_gen_andi_tl(t3, t1, 0x000000ff); 4021 tcg_gen_deposit_tl(t3, t3, t0, 16, 8); 4022 tcg_gen_shri_tl(t0, t0, 8); 4023 tcg_gen_shri_tl(t1, t1, 8); 4024 tcg_gen_deposit_tl(t2, t2, t0, 16, 8); 4025 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4026 tcg_gen_shri_tl(t0, t0, 8); 4027 tcg_gen_shri_tl(t1, t1, 8); 4028 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 4029 tcg_gen_deposit_tl(t3, t3, t1, 8, 8); 4030 tcg_gen_shri_tl(t1, t1, 8); 4031 tcg_gen_deposit_tl(t2, t2, t1, 8, 8); 4032 break; 4033 case 2: 4034 tcg_gen_andi_tl(t2, t0, 0xff00ff00); 4035 tcg_gen_andi_tl(t3, t1, 0x00ff00ff); 4036 tcg_gen_deposit_tl(t3, t3, t0, 8, 8); 4037 tcg_gen_shri_tl(t0, t0, 16); 4038 tcg_gen_shri_tl(t1, t1, 8); 4039 tcg_gen_deposit_tl(t2, t2, t1, 0, 8); 4040 tcg_gen_deposit_tl(t3, t3, t0, 24, 8); 4041 tcg_gen_shri_tl(t1, t1, 16); 4042 tcg_gen_deposit_tl(t2, t2, t1, 16, 8); 4043 break; 4044 case 3: 4045 tcg_gen_andi_tl(t2, t0, 0xffff0000); 4046 tcg_gen_andi_tl(t3, t1, 0x0000ffff); 4047 tcg_gen_shri_tl(t1, t1, 16); 4048 tcg_gen_deposit_tl(t2, t2, t1, 0, 16); 4049 tcg_gen_deposit_tl(t3, t3, t0, 16, 16); 4050 break; 4051 } 4052 4053 gen_store_mxu_gpr(t2, XRa); 4054 gen_store_mxu_gpr(t3, XRd); 4055 } 4056 4057 /* 4058 * Q8SAD XRa, XRd, XRb, XRc 4059 * Typical SAD operation for motion estimation. 4060 */ 4061 static void gen_mxu_q8sad(DisasContext *ctx) 4062 { 4063 uint32_t XRd, XRc, XRb, XRa; 4064 4065 XRd = extract32(ctx->opcode, 18, 4); 4066 XRc = extract32(ctx->opcode, 14, 4); 4067 XRb = extract32(ctx->opcode, 10, 4); 4068 XRa = extract32(ctx->opcode, 6, 4); 4069 4070 TCGv t0 = tcg_temp_new(); 4071 TCGv t1 = tcg_temp_new(); 4072 TCGv t2 = tcg_temp_new(); 4073 TCGv t3 = tcg_temp_new(); 4074 TCGv t4 = tcg_temp_new(); 4075 TCGv t5 = tcg_temp_new(); 4076 4077 gen_load_mxu_gpr(t2, XRb); 4078 gen_load_mxu_gpr(t3, XRc); 4079 gen_load_mxu_gpr(t5, XRd); 4080 tcg_gen_movi_tl(t4, 0); 4081 4082 for (int i = 0; i < 4; i++) { 4083 tcg_gen_andi_tl(t0, t2, 0xff); 4084 tcg_gen_andi_tl(t1, t3, 0xff); 4085 tcg_gen_sub_tl(t0, t0, t1); 4086 tcg_gen_abs_tl(t0, t0); 4087 tcg_gen_add_tl(t4, t4, t0); 4088 if (i < 3) { 4089 tcg_gen_shri_tl(t2, t2, 8); 4090 tcg_gen_shri_tl(t3, t3, 8); 4091 } 4092 } 4093 tcg_gen_add_tl(t5, t5, t4); 4094 gen_store_mxu_gpr(t4, XRa); 4095 gen_store_mxu_gpr(t5, XRd); 4096 } 4097 4098 /* 4099 * MXU instruction category: align 4100 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4101 * 4102 * S32ALN S32ALNI 4103 */ 4104 4105 /* 4106 * S32ALNI XRc, XRb, XRa, optn3 4107 * Arrange bytes from XRb and XRc according to one of five sets of 4108 * rules determined by optn3, and place the result in XRa. 4109 */ 4110 static void gen_mxu_S32ALNI(DisasContext *ctx) 4111 { 4112 uint32_t optn3, pad, XRc, XRb, XRa; 4113 4114 optn3 = extract32(ctx->opcode, 23, 3); 4115 pad = extract32(ctx->opcode, 21, 2); 4116 XRc = extract32(ctx->opcode, 14, 4); 4117 XRb = extract32(ctx->opcode, 10, 4); 4118 XRa = extract32(ctx->opcode, 6, 4); 4119 4120 if (unlikely(pad != 0)) { 4121 /* opcode padding incorrect -> do nothing */ 4122 } else if (unlikely(XRa == 0)) { 4123 /* destination is zero register -> do nothing */ 4124 } else if (unlikely((XRb == 0) && (XRc == 0))) { 4125 /* both operands zero registers -> just set destination to all 0s */ 4126 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4127 } else if (unlikely(XRb == 0)) { 4128 /* XRb zero register -> just appropriatelly shift XRc into XRa */ 4129 switch (optn3) { 4130 case MXU_OPTN3_PTN0: 4131 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4132 break; 4133 case MXU_OPTN3_PTN1: 4134 case MXU_OPTN3_PTN2: 4135 case MXU_OPTN3_PTN3: 4136 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1], 4137 8 * (4 - optn3)); 4138 break; 4139 case MXU_OPTN3_PTN4: 4140 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 4141 break; 4142 } 4143 } else if (unlikely(XRc == 0)) { 4144 /* XRc zero register -> just appropriatelly shift XRb into XRa */ 4145 switch (optn3) { 4146 case MXU_OPTN3_PTN0: 4147 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4148 break; 4149 case MXU_OPTN3_PTN1: 4150 case MXU_OPTN3_PTN2: 4151 case MXU_OPTN3_PTN3: 4152 tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 4153 break; 4154 case MXU_OPTN3_PTN4: 4155 tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0); 4156 break; 4157 } 4158 } else if (unlikely(XRb == XRc)) { 4159 /* both operands same -> just rotation or moving from any of them */ 4160 switch (optn3) { 4161 case MXU_OPTN3_PTN0: 4162 case MXU_OPTN3_PTN4: 4163 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4164 break; 4165 case MXU_OPTN3_PTN1: 4166 case MXU_OPTN3_PTN2: 4167 case MXU_OPTN3_PTN3: 4168 tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3); 4169 break; 4170 } 4171 } else { 4172 /* the most general case */ 4173 switch (optn3) { 4174 case MXU_OPTN3_PTN0: 4175 { 4176 /* */ 4177 /* XRb XRc */ 4178 /* +---------------+ */ 4179 /* | A B C D | E F G H */ 4180 /* +-------+-------+ */ 4181 /* | */ 4182 /* XRa */ 4183 /* */ 4184 4185 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]); 4186 } 4187 break; 4188 case MXU_OPTN3_PTN1: 4189 { 4190 /* */ 4191 /* XRb XRc */ 4192 /* +-------------------+ */ 4193 /* A | B C D E | F G H */ 4194 /* +---------+---------+ */ 4195 /* | */ 4196 /* XRa */ 4197 /* */ 4198 4199 TCGv_i32 t0 = tcg_temp_new(); 4200 TCGv_i32 t1 = tcg_temp_new(); 4201 4202 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF); 4203 tcg_gen_shli_i32(t0, t0, 8); 4204 4205 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000); 4206 tcg_gen_shri_i32(t1, t1, 24); 4207 4208 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4209 } 4210 break; 4211 case MXU_OPTN3_PTN2: 4212 { 4213 /* */ 4214 /* XRb XRc */ 4215 /* +-------------------+ */ 4216 /* A B | C D E F | G H */ 4217 /* +---------+---------+ */ 4218 /* | */ 4219 /* XRa */ 4220 /* */ 4221 4222 TCGv_i32 t0 = tcg_temp_new(); 4223 TCGv_i32 t1 = tcg_temp_new(); 4224 4225 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF); 4226 tcg_gen_shli_i32(t0, t0, 16); 4227 4228 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000); 4229 tcg_gen_shri_i32(t1, t1, 16); 4230 4231 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4232 } 4233 break; 4234 case MXU_OPTN3_PTN3: 4235 { 4236 /* */ 4237 /* XRb XRc */ 4238 /* +-------------------+ */ 4239 /* A B C | D E F G | H */ 4240 /* +---------+---------+ */ 4241 /* | */ 4242 /* XRa */ 4243 /* */ 4244 4245 TCGv_i32 t0 = tcg_temp_new(); 4246 TCGv_i32 t1 = tcg_temp_new(); 4247 4248 tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF); 4249 tcg_gen_shli_i32(t0, t0, 24); 4250 4251 tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00); 4252 tcg_gen_shri_i32(t1, t1, 8); 4253 4254 tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1); 4255 } 4256 break; 4257 case MXU_OPTN3_PTN4: 4258 { 4259 /* */ 4260 /* XRb XRc */ 4261 /* +---------------+ */ 4262 /* A B C D | E F G H | */ 4263 /* +-------+-------+ */ 4264 /* | */ 4265 /* XRa */ 4266 /* */ 4267 4268 tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]); 4269 } 4270 break; 4271 } 4272 } 4273 } 4274 4275 /* 4276 * S32ALN XRc, XRb, XRa, rs 4277 * Arrange bytes from XRb and XRc according to one of five sets of 4278 * rules determined by rs[2:0], and place the result in XRa. 4279 */ 4280 static void gen_mxu_S32ALN(DisasContext *ctx) 4281 { 4282 uint32_t rs, XRc, XRb, XRa; 4283 4284 rs = extract32(ctx->opcode, 21, 5); 4285 XRc = extract32(ctx->opcode, 14, 4); 4286 XRb = extract32(ctx->opcode, 10, 4); 4287 XRa = extract32(ctx->opcode, 6, 4); 4288 4289 if (unlikely(XRa == 0)) { 4290 /* destination is zero register -> do nothing */ 4291 } else if (unlikely((XRb == 0) && (XRc == 0))) { 4292 /* both operands zero registers -> just set destination to all 0s */ 4293 tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0); 4294 } else { 4295 /* the most general case */ 4296 TCGv t0 = tcg_temp_new(); 4297 TCGv t1 = tcg_temp_new(); 4298 TCGv t2 = tcg_temp_new(); 4299 TCGv t3 = tcg_temp_new(); 4300 TCGLabel *l_exit = gen_new_label(); 4301 TCGLabel *l_b_only = gen_new_label(); 4302 TCGLabel *l_c_only = gen_new_label(); 4303 4304 gen_load_mxu_gpr(t0, XRb); 4305 gen_load_mxu_gpr(t1, XRc); 4306 gen_load_gpr(t2, rs); 4307 tcg_gen_andi_tl(t2, t2, 0x07); 4308 4309 /* do nothing for undefined cases */ 4310 tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit); 4311 4312 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only); 4313 tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only); 4314 4315 tcg_gen_shli_tl(t2, t2, 3); 4316 tcg_gen_subfi_tl(t3, 32, t2); 4317 4318 tcg_gen_shl_tl(t0, t0, t2); 4319 tcg_gen_shr_tl(t1, t1, t3); 4320 tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1); 4321 tcg_gen_br(l_exit); 4322 4323 gen_set_label(l_b_only); 4324 gen_store_mxu_gpr(t0, XRa); 4325 tcg_gen_br(l_exit); 4326 4327 gen_set_label(l_c_only); 4328 gen_store_mxu_gpr(t1, XRa); 4329 4330 gen_set_label(l_exit); 4331 } 4332 } 4333 4334 /* 4335 * S32MADD XRa, XRd, rb, rc 4336 * 32 to 64 bit signed multiply with subsequent add 4337 * result stored in {XRa, XRd} pair, stain HI/LO. 4338 * S32MADDU XRa, XRd, rb, rc 4339 * 32 to 64 bit unsigned multiply with subsequent add 4340 * result stored in {XRa, XRd} pair, stain HI/LO. 4341 * S32MSUB XRa, XRd, rb, rc 4342 * 32 to 64 bit signed multiply with subsequent subtract 4343 * result stored in {XRa, XRd} pair, stain HI/LO. 4344 * S32MSUBU XRa, XRd, rb, rc 4345 * 32 to 64 bit unsigned multiply with subsequent subtract 4346 * result stored in {XRa, XRd} pair, stain HI/LO. 4347 */ 4348 static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns) 4349 { 4350 uint32_t XRa, XRd, Rb, Rc; 4351 4352 XRa = extract32(ctx->opcode, 6, 4); 4353 XRd = extract32(ctx->opcode, 10, 4); 4354 Rb = extract32(ctx->opcode, 16, 5); 4355 Rc = extract32(ctx->opcode, 21, 5); 4356 4357 if (unlikely(Rb == 0 || Rc == 0)) { 4358 /* do nothing because x + 0 * y => x */ 4359 } else if (unlikely(XRa == 0 && XRd == 0)) { 4360 /* do nothing because result just dropped */ 4361 } else { 4362 TCGv t0 = tcg_temp_new(); 4363 TCGv t1 = tcg_temp_new(); 4364 TCGv_i64 t2 = tcg_temp_new_i64(); 4365 TCGv_i64 t3 = tcg_temp_new_i64(); 4366 4367 gen_load_gpr(t0, Rb); 4368 gen_load_gpr(t1, Rc); 4369 4370 if (uns) { 4371 tcg_gen_extu_tl_i64(t2, t0); 4372 tcg_gen_extu_tl_i64(t3, t1); 4373 } else { 4374 tcg_gen_ext_tl_i64(t2, t0); 4375 tcg_gen_ext_tl_i64(t3, t1); 4376 } 4377 tcg_gen_mul_i64(t2, t2, t3); 4378 4379 gen_load_mxu_gpr(t0, XRa); 4380 gen_load_mxu_gpr(t1, XRd); 4381 4382 tcg_gen_concat_tl_i64(t3, t1, t0); 4383 if (sub) { 4384 tcg_gen_sub_i64(t3, t3, t2); 4385 } else { 4386 tcg_gen_add_i64(t3, t3, t2); 4387 } 4388 gen_move_low32(t1, t3); 4389 gen_move_high32(t0, t3); 4390 4391 tcg_gen_mov_tl(cpu_HI[0], t0); 4392 tcg_gen_mov_tl(cpu_LO[0], t1); 4393 4394 gen_store_mxu_gpr(t1, XRd); 4395 gen_store_mxu_gpr(t0, XRa); 4396 } 4397 } 4398 4399 /* 4400 * Decoding engine for MXU 4401 * ======================= 4402 */ 4403 4404 static void decode_opc_mxu__pool00(DisasContext *ctx) 4405 { 4406 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4407 4408 switch (opcode) { 4409 case OPC_MXU_S32MAX: 4410 case OPC_MXU_S32MIN: 4411 gen_mxu_S32MAX_S32MIN(ctx); 4412 break; 4413 case OPC_MXU_D16MAX: 4414 case OPC_MXU_D16MIN: 4415 gen_mxu_D16MAX_D16MIN(ctx); 4416 break; 4417 case OPC_MXU_Q8MAX: 4418 case OPC_MXU_Q8MIN: 4419 gen_mxu_Q8MAX_Q8MIN(ctx); 4420 break; 4421 case OPC_MXU_Q8SLT: 4422 gen_mxu_q8slt(ctx, false); 4423 break; 4424 case OPC_MXU_Q8SLTU: 4425 gen_mxu_q8slt(ctx, true); 4426 break; 4427 default: 4428 MIPS_INVAL("decode_opc_mxu"); 4429 gen_reserved_instruction(ctx); 4430 break; 4431 } 4432 } 4433 4434 static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx) 4435 { 4436 uint32_t opcode = extract32(ctx->opcode, 0, 6); 4437 uint32_t pad = extract32(ctx->opcode, 14, 2); 4438 4439 if (pad != 2) { 4440 /* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */ 4441 return false; 4442 } 4443 4444 switch (opcode) { 4445 case OPC_MXU_S32MADD: 4446 gen_mxu_s32madd_sub(ctx, false, false); 4447 break; 4448 case OPC_MXU_S32MADDU: 4449 gen_mxu_s32madd_sub(ctx, false, true); 4450 break; 4451 case OPC_MXU_S32MSUB: 4452 gen_mxu_s32madd_sub(ctx, true, false); 4453 break; 4454 case OPC_MXU_S32MSUBU: 4455 gen_mxu_s32madd_sub(ctx, true, true); 4456 break; 4457 default: 4458 return false; 4459 } 4460 return true; 4461 } 4462 4463 static void decode_opc_mxu__pool01(DisasContext *ctx) 4464 { 4465 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4466 4467 switch (opcode) { 4468 case OPC_MXU_S32SLT: 4469 gen_mxu_S32SLT(ctx); 4470 break; 4471 case OPC_MXU_D16SLT: 4472 gen_mxu_D16SLT(ctx); 4473 break; 4474 case OPC_MXU_D16AVG: 4475 gen_mxu_d16avg(ctx, false); 4476 break; 4477 case OPC_MXU_D16AVGR: 4478 gen_mxu_d16avg(ctx, true); 4479 break; 4480 case OPC_MXU_Q8AVG: 4481 gen_mxu_q8avg(ctx, false); 4482 break; 4483 case OPC_MXU_Q8AVGR: 4484 gen_mxu_q8avg(ctx, true); 4485 break; 4486 case OPC_MXU_Q8ADD: 4487 gen_mxu_Q8ADD(ctx); 4488 break; 4489 default: 4490 MIPS_INVAL("decode_opc_mxu"); 4491 gen_reserved_instruction(ctx); 4492 break; 4493 } 4494 } 4495 4496 static void decode_opc_mxu__pool02(DisasContext *ctx) 4497 { 4498 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4499 4500 switch (opcode) { 4501 case OPC_MXU_S32CPS: 4502 gen_mxu_S32CPS(ctx); 4503 break; 4504 case OPC_MXU_D16CPS: 4505 gen_mxu_D16CPS(ctx); 4506 break; 4507 case OPC_MXU_Q8ABD: 4508 gen_mxu_Q8ABD(ctx); 4509 break; 4510 case OPC_MXU_Q16SAT: 4511 gen_mxu_Q16SAT(ctx); 4512 break; 4513 default: 4514 MIPS_INVAL("decode_opc_mxu"); 4515 gen_reserved_instruction(ctx); 4516 break; 4517 } 4518 } 4519 4520 static void decode_opc_mxu__pool03(DisasContext *ctx) 4521 { 4522 uint32_t opcode = extract32(ctx->opcode, 24, 2); 4523 4524 switch (opcode) { 4525 case OPC_MXU_D16MULF: 4526 gen_mxu_d16mul(ctx, true, true); 4527 break; 4528 case OPC_MXU_D16MULE: 4529 gen_mxu_d16mul(ctx, true, false); 4530 break; 4531 default: 4532 MIPS_INVAL("decode_opc_mxu"); 4533 gen_reserved_instruction(ctx); 4534 break; 4535 } 4536 } 4537 4538 static void decode_opc_mxu__pool04(DisasContext *ctx) 4539 { 4540 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4541 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4542 4543 /* Don't care about opcode bits as their meaning is unknown yet */ 4544 switch (opcode) { 4545 default: 4546 gen_mxu_s32ldxx(ctx, reversed, false); 4547 break; 4548 } 4549 } 4550 4551 static void decode_opc_mxu__pool05(DisasContext *ctx) 4552 { 4553 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4554 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4555 4556 /* Don't care about opcode bits as their meaning is unknown yet */ 4557 switch (opcode) { 4558 default: 4559 gen_mxu_s32stxx(ctx, reversed, false); 4560 break; 4561 } 4562 } 4563 4564 static void decode_opc_mxu__pool06(DisasContext *ctx) 4565 { 4566 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4567 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4568 4569 switch (opcode) { 4570 case OPC_MXU_S32LDST: 4571 case OPC_MXU_S32LDSTR: 4572 if (strd2 <= 2) { 4573 gen_mxu_s32ldxvx(ctx, opcode, false, strd2); 4574 break; 4575 } 4576 /* fallthrough */ 4577 default: 4578 MIPS_INVAL("decode_opc_mxu"); 4579 gen_reserved_instruction(ctx); 4580 break; 4581 } 4582 } 4583 4584 static void decode_opc_mxu__pool07(DisasContext *ctx) 4585 { 4586 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4587 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4588 4589 switch (opcode) { 4590 case OPC_MXU_S32LDST: 4591 case OPC_MXU_S32LDSTR: 4592 if (strd2 <= 2) { 4593 gen_mxu_s32stxvx(ctx, opcode, false, strd2); 4594 break; 4595 } 4596 /* fallthrough */ 4597 default: 4598 MIPS_INVAL("decode_opc_mxu"); 4599 gen_reserved_instruction(ctx); 4600 break; 4601 } 4602 } 4603 4604 static void decode_opc_mxu__pool08(DisasContext *ctx) 4605 { 4606 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4607 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4608 4609 /* Don't care about opcode bits as their meaning is unknown yet */ 4610 switch (opcode) { 4611 default: 4612 gen_mxu_s32ldxx(ctx, reversed, true); 4613 break; 4614 } 4615 } 4616 4617 static void decode_opc_mxu__pool09(DisasContext *ctx) 4618 { 4619 uint32_t reversed = extract32(ctx->opcode, 20, 1); 4620 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4621 4622 /* Don't care about opcode bits as their meaning is unknown yet */ 4623 switch (opcode) { 4624 default: 4625 gen_mxu_s32stxx(ctx, reversed, true); 4626 break; 4627 } 4628 } 4629 4630 static void decode_opc_mxu__pool10(DisasContext *ctx) 4631 { 4632 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4633 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4634 4635 switch (opcode) { 4636 case OPC_MXU_S32LDST: 4637 case OPC_MXU_S32LDSTR: 4638 if (strd2 <= 2) { 4639 gen_mxu_s32ldxvx(ctx, opcode, true, strd2); 4640 break; 4641 } 4642 /* fallthrough */ 4643 default: 4644 MIPS_INVAL("decode_opc_mxu"); 4645 gen_reserved_instruction(ctx); 4646 break; 4647 } 4648 } 4649 4650 static void decode_opc_mxu__pool11(DisasContext *ctx) 4651 { 4652 uint32_t opcode = extract32(ctx->opcode, 10, 4); 4653 uint32_t strd2 = extract32(ctx->opcode, 14, 2); 4654 4655 switch (opcode) { 4656 case OPC_MXU_S32LDST: 4657 case OPC_MXU_S32LDSTR: 4658 if (strd2 <= 2) { 4659 gen_mxu_s32stxvx(ctx, opcode, true, strd2); 4660 break; 4661 } 4662 /* fallthrough */ 4663 default: 4664 MIPS_INVAL("decode_opc_mxu"); 4665 gen_reserved_instruction(ctx); 4666 break; 4667 } 4668 } 4669 4670 static void decode_opc_mxu__pool12(DisasContext *ctx) 4671 { 4672 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4673 4674 switch (opcode) { 4675 case OPC_MXU_D32ACC: 4676 gen_mxu_d32acc(ctx); 4677 break; 4678 case OPC_MXU_D32ACCM: 4679 gen_mxu_d32accm(ctx); 4680 break; 4681 case OPC_MXU_D32ASUM: 4682 gen_mxu_d32asum(ctx); 4683 break; 4684 default: 4685 MIPS_INVAL("decode_opc_mxu"); 4686 gen_reserved_instruction(ctx); 4687 break; 4688 } 4689 } 4690 4691 static void decode_opc_mxu__pool13(DisasContext *ctx) 4692 { 4693 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4694 4695 switch (opcode) { 4696 case OPC_MXU_Q16ACC: 4697 gen_mxu_q16acc(ctx); 4698 break; 4699 case OPC_MXU_Q16ACCM: 4700 gen_mxu_q16accm(ctx); 4701 break; 4702 case OPC_MXU_D16ASUM: 4703 gen_mxu_d16asum(ctx); 4704 break; 4705 default: 4706 MIPS_INVAL("decode_opc_mxu"); 4707 gen_reserved_instruction(ctx); 4708 break; 4709 } 4710 } 4711 4712 static void decode_opc_mxu__pool14(DisasContext *ctx) 4713 { 4714 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4715 4716 switch (opcode) { 4717 case OPC_MXU_Q8ADDE: 4718 gen_mxu_q8adde(ctx, false); 4719 break; 4720 case OPC_MXU_D8SUM: 4721 gen_mxu_d8sum(ctx, false); 4722 break; 4723 case OPC_MXU_D8SUMC: 4724 gen_mxu_d8sum(ctx, true); 4725 break; 4726 default: 4727 MIPS_INVAL("decode_opc_mxu"); 4728 gen_reserved_instruction(ctx); 4729 break; 4730 } 4731 } 4732 4733 static void decode_opc_mxu__pool15(DisasContext *ctx) 4734 { 4735 uint32_t opcode = extract32(ctx->opcode, 14, 2); 4736 4737 switch (opcode) { 4738 case OPC_MXU_S32MUL: 4739 gen_mxu_s32mul(ctx, false); 4740 break; 4741 case OPC_MXU_S32MULU: 4742 gen_mxu_s32mul(ctx, true); 4743 break; 4744 case OPC_MXU_S32EXTR: 4745 gen_mxu_s32extr(ctx); 4746 break; 4747 case OPC_MXU_S32EXTRV: 4748 gen_mxu_s32extrv(ctx); 4749 break; 4750 default: 4751 MIPS_INVAL("decode_opc_mxu"); 4752 gen_reserved_instruction(ctx); 4753 break; 4754 } 4755 } 4756 4757 static void decode_opc_mxu__pool16(DisasContext *ctx) 4758 { 4759 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4760 4761 switch (opcode) { 4762 case OPC_MXU_D32SARW: 4763 gen_mxu_d32sarl(ctx, true); 4764 break; 4765 case OPC_MXU_S32ALN: 4766 gen_mxu_S32ALN(ctx); 4767 break; 4768 case OPC_MXU_S32ALNI: 4769 gen_mxu_S32ALNI(ctx); 4770 break; 4771 case OPC_MXU_S32LUI: 4772 gen_mxu_s32lui(ctx); 4773 break; 4774 case OPC_MXU_S32NOR: 4775 gen_mxu_S32NOR(ctx); 4776 break; 4777 case OPC_MXU_S32AND: 4778 gen_mxu_S32AND(ctx); 4779 break; 4780 case OPC_MXU_S32OR: 4781 gen_mxu_S32OR(ctx); 4782 break; 4783 case OPC_MXU_S32XOR: 4784 gen_mxu_S32XOR(ctx); 4785 break; 4786 default: 4787 MIPS_INVAL("decode_opc_mxu"); 4788 gen_reserved_instruction(ctx); 4789 break; 4790 } 4791 } 4792 4793 static void decode_opc_mxu__pool17(DisasContext *ctx) 4794 { 4795 uint32_t opcode = extract32(ctx->opcode, 6, 3); 4796 uint32_t strd2 = extract32(ctx->opcode, 9, 2); 4797 4798 if (strd2 > 2) { 4799 MIPS_INVAL("decode_opc_mxu"); 4800 gen_reserved_instruction(ctx); 4801 return; 4802 } 4803 4804 switch (opcode) { 4805 case OPC_MXU_LXW: 4806 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL); 4807 break; 4808 case OPC_MXU_LXB: 4809 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB); 4810 break; 4811 case OPC_MXU_LXH: 4812 gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW); 4813 break; 4814 case OPC_MXU_LXBU: 4815 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB); 4816 break; 4817 case OPC_MXU_LXHU: 4818 gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW); 4819 break; 4820 default: 4821 MIPS_INVAL("decode_opc_mxu"); 4822 gen_reserved_instruction(ctx); 4823 break; 4824 } 4825 } 4826 4827 static void decode_opc_mxu__pool18(DisasContext *ctx) 4828 { 4829 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4830 4831 switch (opcode) { 4832 case OPC_MXU_D32SLLV: 4833 gen_mxu_d32sxxv(ctx, false, false); 4834 break; 4835 case OPC_MXU_D32SLRV: 4836 gen_mxu_d32sxxv(ctx, true, false); 4837 break; 4838 case OPC_MXU_D32SARV: 4839 gen_mxu_d32sxxv(ctx, true, true); 4840 break; 4841 case OPC_MXU_Q16SLLV: 4842 gen_mxu_q16sxxv(ctx, false, false); 4843 break; 4844 case OPC_MXU_Q16SLRV: 4845 gen_mxu_q16sxxv(ctx, true, false); 4846 break; 4847 case OPC_MXU_Q16SARV: 4848 gen_mxu_q16sxxv(ctx, true, true); 4849 break; 4850 default: 4851 MIPS_INVAL("decode_opc_mxu"); 4852 gen_reserved_instruction(ctx); 4853 break; 4854 } 4855 } 4856 4857 static void decode_opc_mxu__pool19(DisasContext *ctx) 4858 { 4859 uint32_t opcode = extract32(ctx->opcode, 22, 4); 4860 4861 switch (opcode) { 4862 case OPC_MXU_Q8MUL: 4863 gen_mxu_q8mul_mac(ctx, false, false); 4864 break; 4865 case OPC_MXU_Q8MULSU: 4866 gen_mxu_q8mul_mac(ctx, true, false); 4867 break; 4868 default: 4869 MIPS_INVAL("decode_opc_mxu"); 4870 gen_reserved_instruction(ctx); 4871 break; 4872 } 4873 } 4874 4875 static void decode_opc_mxu__pool20(DisasContext *ctx) 4876 { 4877 uint32_t opcode = extract32(ctx->opcode, 18, 3); 4878 4879 switch (opcode) { 4880 case OPC_MXU_Q8MOVZ: 4881 gen_mxu_q8movzn(ctx, TCG_COND_NE); 4882 break; 4883 case OPC_MXU_Q8MOVN: 4884 gen_mxu_q8movzn(ctx, TCG_COND_EQ); 4885 break; 4886 case OPC_MXU_D16MOVZ: 4887 gen_mxu_d16movzn(ctx, TCG_COND_NE); 4888 break; 4889 case OPC_MXU_D16MOVN: 4890 gen_mxu_d16movzn(ctx, TCG_COND_EQ); 4891 break; 4892 case OPC_MXU_S32MOVZ: 4893 gen_mxu_s32movzn(ctx, TCG_COND_NE); 4894 break; 4895 case OPC_MXU_S32MOVN: 4896 gen_mxu_s32movzn(ctx, TCG_COND_EQ); 4897 break; 4898 default: 4899 MIPS_INVAL("decode_opc_mxu"); 4900 gen_reserved_instruction(ctx); 4901 break; 4902 } 4903 } 4904 4905 static void decode_opc_mxu__pool21(DisasContext *ctx) 4906 { 4907 uint32_t opcode = extract32(ctx->opcode, 22, 2); 4908 4909 switch (opcode) { 4910 case OPC_MXU_Q8MAC: 4911 gen_mxu_q8mul_mac(ctx, false, true); 4912 break; 4913 case OPC_MXU_Q8MACSU: 4914 gen_mxu_q8mul_mac(ctx, true, true); 4915 break; 4916 default: 4917 MIPS_INVAL("decode_opc_mxu"); 4918 gen_reserved_instruction(ctx); 4919 break; 4920 } 4921 } 4922 4923 4924 bool decode_ase_mxu(DisasContext *ctx, uint32_t insn) 4925 { 4926 uint32_t opcode = extract32(insn, 0, 6); 4927 4928 if (opcode == OPC_MXU_S32M2I) { 4929 gen_mxu_s32m2i(ctx); 4930 return true; 4931 } 4932 4933 if (opcode == OPC_MXU_S32I2M) { 4934 gen_mxu_s32i2m(ctx); 4935 return true; 4936 } 4937 4938 { 4939 TCGv t_mxu_cr = tcg_temp_new(); 4940 TCGLabel *l_exit = gen_new_label(); 4941 4942 gen_load_mxu_cr(t_mxu_cr); 4943 tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN); 4944 tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit); 4945 4946 switch (opcode) { 4947 case OPC_MXU_S32MADD: 4948 case OPC_MXU_S32MADDU: 4949 case OPC_MXU_S32MSUB: 4950 case OPC_MXU_S32MSUBU: 4951 return decode_opc_mxu_s32madd_sub(ctx); 4952 case OPC_MXU__POOL00: 4953 decode_opc_mxu__pool00(ctx); 4954 break; 4955 case OPC_MXU_D16MUL: 4956 gen_mxu_d16mul(ctx, false, false); 4957 break; 4958 case OPC_MXU_D16MAC: 4959 gen_mxu_d16mac(ctx, false, false); 4960 break; 4961 case OPC_MXU_D16MACF: 4962 gen_mxu_d16mac(ctx, true, true); 4963 break; 4964 case OPC_MXU_D16MADL: 4965 gen_mxu_d16madl(ctx); 4966 break; 4967 case OPC_MXU_S16MAD: 4968 gen_mxu_s16mad(ctx); 4969 break; 4970 case OPC_MXU_Q16ADD: 4971 gen_mxu_q16add(ctx); 4972 break; 4973 case OPC_MXU_D16MACE: 4974 gen_mxu_d16mac(ctx, true, false); 4975 break; 4976 case OPC_MXU__POOL01: 4977 decode_opc_mxu__pool01(ctx); 4978 break; 4979 case OPC_MXU__POOL02: 4980 decode_opc_mxu__pool02(ctx); 4981 break; 4982 case OPC_MXU__POOL03: 4983 decode_opc_mxu__pool03(ctx); 4984 break; 4985 case OPC_MXU__POOL04: 4986 decode_opc_mxu__pool04(ctx); 4987 break; 4988 case OPC_MXU__POOL05: 4989 decode_opc_mxu__pool05(ctx); 4990 break; 4991 case OPC_MXU__POOL06: 4992 decode_opc_mxu__pool06(ctx); 4993 break; 4994 case OPC_MXU__POOL07: 4995 decode_opc_mxu__pool07(ctx); 4996 break; 4997 case OPC_MXU__POOL08: 4998 decode_opc_mxu__pool08(ctx); 4999 break; 5000 case OPC_MXU__POOL09: 5001 decode_opc_mxu__pool09(ctx); 5002 break; 5003 case OPC_MXU__POOL10: 5004 decode_opc_mxu__pool10(ctx); 5005 break; 5006 case OPC_MXU__POOL11: 5007 decode_opc_mxu__pool11(ctx); 5008 break; 5009 case OPC_MXU_D32ADD: 5010 gen_mxu_d32add(ctx); 5011 break; 5012 case OPC_MXU__POOL12: 5013 decode_opc_mxu__pool12(ctx); 5014 break; 5015 case OPC_MXU__POOL13: 5016 decode_opc_mxu__pool13(ctx); 5017 break; 5018 case OPC_MXU__POOL14: 5019 decode_opc_mxu__pool14(ctx); 5020 break; 5021 case OPC_MXU_Q8ACCE: 5022 gen_mxu_q8adde(ctx, true); 5023 break; 5024 case OPC_MXU_S8LDD: 5025 gen_mxu_s8ldd(ctx, false); 5026 break; 5027 case OPC_MXU_S8STD: 5028 gen_mxu_s8std(ctx, false); 5029 break; 5030 case OPC_MXU_S8LDI: 5031 gen_mxu_s8ldd(ctx, true); 5032 break; 5033 case OPC_MXU_S8SDI: 5034 gen_mxu_s8std(ctx, true); 5035 break; 5036 case OPC_MXU__POOL15: 5037 decode_opc_mxu__pool15(ctx); 5038 break; 5039 case OPC_MXU__POOL16: 5040 decode_opc_mxu__pool16(ctx); 5041 break; 5042 case OPC_MXU__POOL17: 5043 decode_opc_mxu__pool17(ctx); 5044 break; 5045 case OPC_MXU_S16LDD: 5046 gen_mxu_s16ldd(ctx, false); 5047 break; 5048 case OPC_MXU_S16STD: 5049 gen_mxu_s16std(ctx, false); 5050 break; 5051 case OPC_MXU_S16LDI: 5052 gen_mxu_s16ldd(ctx, true); 5053 break; 5054 case OPC_MXU_S16SDI: 5055 gen_mxu_s16std(ctx, true); 5056 break; 5057 case OPC_MXU_D32SLL: 5058 gen_mxu_d32sxx(ctx, false, false); 5059 break; 5060 case OPC_MXU_D32SLR: 5061 gen_mxu_d32sxx(ctx, true, false); 5062 break; 5063 case OPC_MXU_D32SARL: 5064 gen_mxu_d32sarl(ctx, false); 5065 break; 5066 case OPC_MXU_D32SAR: 5067 gen_mxu_d32sxx(ctx, true, true); 5068 break; 5069 case OPC_MXU_Q16SLL: 5070 gen_mxu_q16sxx(ctx, false, false); 5071 break; 5072 case OPC_MXU__POOL18: 5073 decode_opc_mxu__pool18(ctx); 5074 break; 5075 case OPC_MXU_Q16SLR: 5076 gen_mxu_q16sxx(ctx, true, false); 5077 break; 5078 case OPC_MXU_Q16SAR: 5079 gen_mxu_q16sxx(ctx, true, true); 5080 break; 5081 case OPC_MXU__POOL19: 5082 decode_opc_mxu__pool19(ctx); 5083 break; 5084 case OPC_MXU__POOL20: 5085 decode_opc_mxu__pool20(ctx); 5086 break; 5087 case OPC_MXU__POOL21: 5088 decode_opc_mxu__pool21(ctx); 5089 break; 5090 case OPC_MXU_Q16SCOP: 5091 gen_mxu_q16scop(ctx); 5092 break; 5093 case OPC_MXU_Q8MADL: 5094 gen_mxu_q8madl(ctx); 5095 break; 5096 case OPC_MXU_S32SFL: 5097 gen_mxu_s32sfl(ctx); 5098 break; 5099 case OPC_MXU_Q8SAD: 5100 gen_mxu_q8sad(ctx); 5101 break; 5102 default: 5103 return false; 5104 } 5105 5106 gen_set_label(l_exit); 5107 } 5108 5109 return true; 5110 } 5111