xref: /openbmc/qemu/target/hexagon/mmvec/macros.h (revision fd9a38fd437c4c31705071c240f4be11394ca1f8)
1  /*
2   *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3   *
4   *  This program is free software; you can redistribute it and/or modify
5   *  it under the terms of the GNU General Public License as published by
6   *  the Free Software Foundation; either version 2 of the License, or
7   *  (at your option) any later version.
8   *
9   *  This program is distributed in the hope that it will be useful,
10   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   *  GNU General Public License for more details.
13   *
14   *  You should have received a copy of the GNU General Public License
15   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16   */
17  
18  #ifndef HEXAGON_MMVEC_MACROS_H
19  #define HEXAGON_MMVEC_MACROS_H
20  
21  #include "qemu/host-utils.h"
22  #include "arch.h"
23  #include "mmvec/system_ext_mmvec.h"
24  
25  #ifndef QEMU_GENERATE
26  #define VdV      (*(MMVector *)(VdV_void))
27  #define VsV      (*(MMVector *)(VsV_void))
28  #define VuV      (*(MMVector *)(VuV_void))
29  #define VvV      (*(MMVector *)(VvV_void))
30  #define VwV      (*(MMVector *)(VwV_void))
31  #define VxV      (*(MMVector *)(VxV_void))
32  #define VyV      (*(MMVector *)(VyV_void))
33  
34  #define VddV     (*(MMVectorPair *)(VddV_void))
35  #define VuuV     (*(MMVectorPair *)(VuuV_void))
36  #define VvvV     (*(MMVectorPair *)(VvvV_void))
37  #define VxxV     (*(MMVectorPair *)(VxxV_void))
38  
39  #define QeV      (*(MMQReg *)(QeV_void))
40  #define QdV      (*(MMQReg *)(QdV_void))
41  #define QsV      (*(MMQReg *)(QsV_void))
42  #define QtV      (*(MMQReg *)(QtV_void))
43  #define QuV      (*(MMQReg *)(QuV_void))
44  #define QvV      (*(MMQReg *)(QvV_void))
45  #define QxV      (*(MMQReg *)(QxV_void))
46  #endif
47  
48  #define LOG_VTCM_BYTE(VA, MASK, VAL, IDX) \
49      do { \
50          env->vtcm_log.data.ub[IDX] = (VAL); \
51          if (MASK) { \
52              set_bit((IDX), env->vtcm_log.mask); \
53          } else { \
54              clear_bit((IDX), env->vtcm_log.mask); \
55          } \
56          env->vtcm_log.va[IDX] = (VA); \
57      } while (0)
58  
59  #define fNOTQ(VAL) \
60      ({ \
61          MMQReg _ret;  \
62          int _i_;  \
63          for (_i_ = 0; _i_ < fVECSIZE() / 64; _i_++) { \
64              _ret.ud[_i_] = ~VAL.ud[_i_]; \
65          } \
66          _ret;\
67       })
68  #define fGETQBITS(REG, WIDTH, MASK, BITNO) \
69      ((MASK) & (REG.w[(BITNO) >> 5] >> ((BITNO) & 0x1f)))
70  #define fGETQBIT(REG, BITNO) fGETQBITS(REG, 1, 1, BITNO)
71  #define fGENMASKW(QREG, IDX) \
72      (((fGETQBIT(QREG, (IDX * 4 + 0)) ? 0xFF : 0x0) << 0)  | \
73       ((fGETQBIT(QREG, (IDX * 4 + 1)) ? 0xFF : 0x0) << 8)  | \
74       ((fGETQBIT(QREG, (IDX * 4 + 2)) ? 0xFF : 0x0) << 16) | \
75       ((fGETQBIT(QREG, (IDX * 4 + 3)) ? 0xFF : 0x0) << 24))
76  #define fGETNIBBLE(IDX, SRC) (fSXTN(4, 8, (SRC >> (4 * IDX)) & 0xF))
77  #define fGETCRUMB(IDX, SRC) (fSXTN(2, 8, (SRC >> (2 * IDX)) & 0x3))
78  #define fGETCRUMB_SYMMETRIC(IDX, SRC) \
79      ((fGETCRUMB(IDX, SRC) >= 0 ? (2 - fGETCRUMB(IDX, SRC)) \
80                                 : fGETCRUMB(IDX, SRC)))
81  #define fGENMASKH(QREG, IDX) \
82      (((fGETQBIT(QREG, (IDX * 2 + 0)) ? 0xFF : 0x0) << 0) | \
83       ((fGETQBIT(QREG, (IDX * 2 + 1)) ? 0xFF : 0x0) << 8))
84  #define fGETMASKW(VREG, QREG, IDX) (VREG.w[IDX] & fGENMASKW((QREG), IDX))
85  #define fGETMASKH(VREG, QREG, IDX) (VREG.h[IDX] & fGENMASKH((QREG), IDX))
86  #define fCONDMASK8(QREG, IDX, YESVAL, NOVAL) \
87      (fGETQBIT(QREG, IDX) ? (YESVAL) : (NOVAL))
88  #define fCONDMASK16(QREG, IDX, YESVAL, NOVAL) \
89      ((fGENMASKH(QREG, IDX) & (YESVAL)) | \
90       (fGENMASKH(fNOTQ(QREG), IDX) & (NOVAL)))
91  #define fCONDMASK32(QREG, IDX, YESVAL, NOVAL) \
92      ((fGENMASKW(QREG, IDX) & (YESVAL)) | \
93       (fGENMASKW(fNOTQ(QREG), IDX) & (NOVAL)))
94  #define fSETQBITS(REG, WIDTH, MASK, BITNO, VAL) \
95      do { \
96          uint32_t __TMP = (VAL); \
97          REG.w[(BITNO) >> 5] &= ~((MASK) << ((BITNO) & 0x1f)); \
98          REG.w[(BITNO) >> 5] |= (((__TMP) & (MASK)) << ((BITNO) & 0x1f)); \
99      } while (0)
100  #define fSETQBIT(REG, BITNO, VAL) fSETQBITS(REG, 1, 1, BITNO, VAL)
101  #define fVBYTES() (fVECSIZE())
102  #define fVALIGN(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR & ~(LOG2_ALIGNMENT - 1))
103  #define fVLASTBYTE(ADDR, LOG2_ALIGNMENT) (ADDR = ADDR | (LOG2_ALIGNMENT - 1))
104  #define fVELEM(WIDTH) ((fVECSIZE() * 8) / WIDTH)
105  #define fVECLOGSIZE() (7)
106  #define fVECSIZE() (1 << fVECLOGSIZE())
107  #define fSWAPB(A, B) do { uint8_t tmp = A; A = B; B = tmp; } while (0)
108  #define fV_AL_CHECK(EA, MASK) \
109      if ((EA) & (MASK)) { \
110          warn("aligning misaligned vector. EA=%08x", (EA)); \
111      }
112  #define fSCATTER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
113      mem_vector_scatter_init(env)
114  #define fGATHER_INIT(REGION_START, LENGTH, ELEMENT_SIZE) \
115      mem_vector_gather_init(env)
116  #define fSCATTER_FINISH(OP)
117  #define fGATHER_FINISH()
118  #define fLOG_SCATTER_OP(SIZE) \
119      do { \
120          env->vtcm_log.op = true; \
121          env->vtcm_log.op_size = SIZE; \
122      } while (0)
123  #define fVLOG_VTCM_WORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
124      do { \
125          int log_byte = 0; \
126          target_ulong va = EA; \
127          target_ulong va_high = EA + LEN; \
128          for (int i0 = 0; i0 < 4; i0++) { \
129              log_byte = (va + i0) <= va_high; \
130              LOG_VTCM_BYTE(va + i0, log_byte, INC. ub[4 * IDX + i0], \
131                            4 * IDX + i0); \
132          } \
133      } while (0)
134  #define fVLOG_VTCM_HALFWORD_INCREMENT(EA, OFFSET, INC, IDX, ALIGNMENT, LEN) \
135      do { \
136          int log_byte = 0; \
137          target_ulong va = EA; \
138          target_ulong va_high = EA + LEN; \
139          for (int i0 = 0; i0 < 2; i0++) { \
140              log_byte = (va + i0) <= va_high; \
141              LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
142                            2 * IDX + i0); \
143          } \
144      } while (0)
145  
146  #define fVLOG_VTCM_HALFWORD_INCREMENT_DV(EA, OFFSET, INC, IDX, IDX2, IDX_H, \
147                                           ALIGNMENT, LEN) \
148      do { \
149          int log_byte = 0; \
150          target_ulong va = EA; \
151          target_ulong va_high = EA + LEN; \
152          for (int i0 = 0; i0 < 2; i0++) { \
153              log_byte = (va + i0) <= va_high; \
154              LOG_VTCM_BYTE(va + i0, log_byte, INC.ub[2 * IDX + i0], \
155                            2 * IDX + i0); \
156          } \
157      } while (0)
158  
159  /* NOTE - Will this always be tmp_VRegs[0]; */
160  #define GATHER_FUNCTION(EA, OFFSET, IDX, LEN, ELEMENT_SIZE, BANK_IDX, QVAL) \
161      do { \
162          int i0; \
163          target_ulong va = EA; \
164          target_ulong va_high = EA + LEN; \
165          uintptr_t ra = GETPC(); \
166          int log_byte = 0; \
167          for (i0 = 0; i0 < ELEMENT_SIZE; i0++) { \
168              log_byte = ((va + i0) <= va_high) && QVAL; \
169              uint8_t B; \
170              B = cpu_ldub_data_ra(env, EA + i0, ra); \
171              env->tmp_VRegs[0].ub[ELEMENT_SIZE * IDX + i0] = B; \
172              LOG_VTCM_BYTE(va + i0, log_byte, B, ELEMENT_SIZE * IDX + i0); \
173          } \
174      } while (0)
175  #define fVLOG_VTCM_GATHER_WORD(EA, OFFSET, IDX, LEN) \
176      do { \
177          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1); \
178      } while (0)
179  #define fVLOG_VTCM_GATHER_HALFWORD(EA, OFFSET, IDX, LEN) \
180      do { \
181          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1); \
182      } while (0)
183  #define fVLOG_VTCM_GATHER_HALFWORD_DV(EA, OFFSET, IDX, IDX2, IDX_H, LEN) \
184      do { \
185          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), 1); \
186      } while (0)
187  #define fVLOG_VTCM_GATHER_WORDQ(EA, OFFSET, IDX, Q, LEN) \
188      do { \
189          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
190                          fGETQBIT(QsV, 4 * IDX + i0)); \
191      } while (0)
192  #define fVLOG_VTCM_GATHER_HALFWORDQ(EA, OFFSET, IDX, Q, LEN) \
193      do { \
194          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
195                          fGETQBIT(QsV, 2 * IDX + i0)); \
196      } while (0)
197  #define fVLOG_VTCM_GATHER_HALFWORDQ_DV(EA, OFFSET, IDX, IDX2, IDX_H, Q, LEN) \
198      do { \
199          GATHER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
200                          fGETQBIT(QsV, 2 * IDX + i0)); \
201      } while (0)
202  #define SCATTER_OP_WRITE_TO_MEM(TYPE) \
203      do { \
204          ra = GETPC(); \
205          for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
206              if (test_bit(i, env->vtcm_log.mask)) { \
207                  TYPE dst = 0; \
208                  TYPE inc = 0; \
209                  for (int j = 0; j < sizeof(TYPE); j++) { \
210                      uint8_t val; \
211                      val = cpu_ldub_data_ra(env, env->vtcm_log.va[i + j], ra); \
212                      dst |= val << (8 * j); \
213                      inc |= env->vtcm_log.data.ub[j + i] << (8 * j); \
214                      clear_bit(j + i, env->vtcm_log.mask); \
215                      env->vtcm_log.data.ub[j + i] = 0; \
216                  } \
217                  dst += inc; \
218                  for (int j = 0; j < sizeof(TYPE); j++) { \
219                      cpu_stb_data_ra(env, env->vtcm_log.va[i + j], \
220                                      (dst >> (8 * j)) & 0xFF, ra); \
221                  } \
222              } \
223          } \
224      } while (0)
225  #define SCATTER_OP_PROBE_MEM(TYPE, MMU_IDX, RETADDR) \
226      do { \
227          for (int i = 0; i < sizeof(MMVector); i += sizeof(TYPE)) { \
228              if (test_bit(i, env->vtcm_log.mask)) { \
229                  for (int j = 0; j < sizeof(TYPE); j++) { \
230                      probe_read(env, env->vtcm_log.va[i + j], 1, \
231                                 MMU_IDX, RETADDR); \
232                      probe_write(env, env->vtcm_log.va[i + j], 1, \
233                                  MMU_IDX, RETADDR); \
234                  } \
235              } \
236          } \
237      } while (0)
238  #define SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, ELEM_SIZE, BANK_IDX, QVAL, IN) \
239      do { \
240          int i0; \
241          target_ulong va = EA; \
242          target_ulong va_high = EA + LEN; \
243          int log_byte = 0; \
244          for (i0 = 0; i0 < ELEM_SIZE; i0++) { \
245              log_byte = ((va + i0) <= va_high) && QVAL; \
246              LOG_VTCM_BYTE(va + i0, log_byte, IN.ub[ELEM_SIZE * IDX + i0], \
247                            ELEM_SIZE * IDX + i0); \
248          } \
249      } while (0)
250  #define fVLOG_VTCM_HALFWORD(EA, OFFSET, IN, IDX, LEN) \
251      do { \
252          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, 1, IN); \
253      } while (0)
254  #define fVLOG_VTCM_WORD(EA, OFFSET, IN, IDX, LEN) \
255      do { \
256          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, 1, IN); \
257      } while (0)
258  #define fVLOG_VTCM_HALFWORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
259      do { \
260          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, IDX, \
261                           fGETQBIT(QsV, 2 * IDX + i0), IN); \
262      } while (0)
263  #define fVLOG_VTCM_WORDQ(EA, OFFSET, IN, IDX, Q, LEN) \
264      do { \
265          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 4, IDX, \
266                           fGETQBIT(QsV, 4 * IDX + i0), IN); \
267      } while (0)
268  #define fVLOG_VTCM_HALFWORD_DV(EA, OFFSET, IN, IDX, IDX2, IDX_H, LEN) \
269      do { \
270          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, \
271                           (2 * IDX2 + IDX_H), 1, IN); \
272      } while (0)
273  #define fVLOG_VTCM_HALFWORDQ_DV(EA, OFFSET, IN, IDX, Q, IDX2, IDX_H, LEN) \
274      do { \
275          SCATTER_FUNCTION(EA, OFFSET, IDX, LEN, 2, (2 * IDX2 + IDX_H), \
276                           fGETQBIT(QsV, 2 * IDX + i0), IN); \
277      } while (0)
278  #define fSTORERELEASE(EA, TYPE) \
279      do { \
280          fV_AL_CHECK(EA, fVECSIZE() - 1); \
281      } while (0)
282  #ifdef QEMU_GENERATE
283  #define fLOADMMV(EA, DST) gen_vreg_load(ctx, DST##_off, EA, true)
284  #endif
285  #ifdef QEMU_GENERATE
286  #define fLOADMMVU(EA, DST) gen_vreg_load(ctx, DST##_off, EA, false)
287  #endif
288  #ifdef QEMU_GENERATE
289  #define fSTOREMMV(EA, SRC) \
290      gen_vreg_store(ctx, EA, SRC##_off, insn->slot, true)
291  #endif
292  #ifdef QEMU_GENERATE
293  #define fSTOREMMVQ(EA, SRC, MASK) \
294      gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, false)
295  #endif
296  #ifdef QEMU_GENERATE
297  #define fSTOREMMVNQ(EA, SRC, MASK) \
298      gen_vreg_masked_store(ctx, EA, SRC##_off, MASK##_off, insn->slot, true)
299  #endif
300  #ifdef QEMU_GENERATE
301  #define fSTOREMMVU(EA, SRC) \
302      gen_vreg_store(ctx, EA, SRC##_off, insn->slot, false)
303  #endif
304  #define fVFOREACH(WIDTH, VAR) for (VAR = 0; VAR < fVELEM(WIDTH); VAR++)
305  #define fVARRAY_ELEMENT_ACCESS(ARRAY, TYPE, INDEX) \
306      ARRAY.v[(INDEX) / (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))].TYPE[(INDEX) % \
307      (fVECSIZE() / (sizeof(ARRAY.TYPE[0])))]
308  
309  #define fVSATDW(U, V) fVSATW(((((long long)U) << 32) | fZXTN(32, 64, V)))
310  #define fVASL_SATHI(U, V) fVSATW(((U) << 1) | ((V) >> 31))
311  #define fVUADDSAT(WIDTH, U, V) \
312      fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V))
313  #define fVSADDSAT(WIDTH, U, V) \
314      fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V))
315  #define fVUSUBSAT(WIDTH, U, V) \
316      fVSATUN(WIDTH, fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V))
317  #define fVSSUBSAT(WIDTH, U, V) \
318      fVSATN(WIDTH, fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V))
319  #define fVAVGU(WIDTH, U, V) \
320      ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
321  #define fVAVGURND(WIDTH, U, V) \
322      ((fZXTN(WIDTH, 2 * WIDTH, U) + fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
323  #define fVNAVGU(WIDTH, U, V) \
324      ((fZXTN(WIDTH, 2 * WIDTH, U) - fZXTN(WIDTH, 2 * WIDTH, V)) >> 1)
325  #define fVNAVGURNDSAT(WIDTH, U, V) \
326      fVSATUN(WIDTH, ((fZXTN(WIDTH, 2 * WIDTH, U) - \
327                       fZXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
328  #define fVAVGS(WIDTH, U, V) \
329      ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
330  #define fVAVGSRND(WIDTH, U, V) \
331      ((fSXTN(WIDTH, 2 * WIDTH, U) + fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
332  #define fVNAVGS(WIDTH, U, V) \
333      ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V)) >> 1)
334  #define fVNAVGSRND(WIDTH, U, V) \
335      ((fSXTN(WIDTH, 2 * WIDTH, U) - fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1)
336  #define fVNAVGSRNDSAT(WIDTH, U, V) \
337      fVSATN(WIDTH, ((fSXTN(WIDTH, 2 * WIDTH, U) - \
338                      fSXTN(WIDTH, 2 * WIDTH, V) + 1) >> 1))
339  #define fVNOROUND(VAL, SHAMT) VAL
340  #define fVNOSAT(VAL) VAL
341  #define fVROUND(VAL, SHAMT) \
342      ((VAL) + (((SHAMT) > 0) ? (1LL << ((SHAMT) - 1)) : 0))
343  #define fCARRY_FROM_ADD32(A, B, C) \
344      (((fZXTN(32, 64, A) + fZXTN(32, 64, B) + C) >> 32) & 1)
345  #define fUARCH_NOTE_PUMP_4X()
346  #define fUARCH_NOTE_PUMP_2X()
347  
348  #define IV1DEAD()
349  
350  #define fGET10BIT(COE, VAL, POS) \
351      do { \
352          COE = (sextract32(VAL, 24 + 2 * POS, 2) << 8) | \
353                 extract32(VAL, POS * 8, 8); \
354      } while (0);
355  
356  #endif
357