xref: /openbmc/qemu/target/hexagon/gen_tcg.h (revision a32086de)
1 /*
2  *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef HEXAGON_GEN_TCG_H
19 #define HEXAGON_GEN_TCG_H
20 
21 /*
22  * Here is a primer to understand the tag names for load/store instructions
23  *
24  * Data types
25  *      b        signed byte                       r0 = memb(r2+#0)
26  *     ub        unsigned byte                     r0 = memub(r2+#0)
27  *      h        signed half word (16 bits)        r0 = memh(r2+#0)
28  *     uh        unsigned half word                r0 = memuh(r2+#0)
29  *      i        integer (32 bits)                 r0 = memw(r2+#0)
30  *      d        double word (64 bits)             r1:0 = memd(r2+#0)
31  *
32  * Addressing modes
33  *     _io       indirect with offset              r0 = memw(r1+#4)
34  *     _ur       absolute with register offset     r0 = memw(r1<<#4+##variable)
35  *     _rr       indirect with register offset     r0 = memw(r1+r4<<#2)
36  *     gp        global pointer relative           r0 = memw(gp+#200)
37  *     _sp       stack pointer relative            r0 = memw(r29+#12)
38  *     _ap       absolute set                      r0 = memw(r1=##variable)
39  *     _pr       post increment register           r0 = memw(r1++m1)
40  *     _pbr      post increment bit reverse        r0 = memw(r1++m1:brev)
41  *     _pi       post increment immediate          r0 = memb(r1++#1)
42  *     _pci      post increment circular immediate r0 = memw(r1++#4:circ(m0))
43  *     _pcr      post increment circular register  r0 = memw(r1++I:circ(m0))
44  */
45 
46 /* Macros for complex addressing modes */
47 #define GET_EA_ap \
48     do { \
49         fEA_IMM(UiV); \
50         tcg_gen_movi_tl(ReV, UiV); \
51     } while (0)
52 #define GET_EA_pr \
53     do { \
54         fEA_REG(RxV); \
55         fPM_M(RxV, MuV); \
56     } while (0)
57 #define GET_EA_pbr \
58     do { \
59         gen_helper_fbrev(EA, RxV); \
60         tcg_gen_add_tl(RxV, RxV, MuV); \
61     } while (0)
62 #define GET_EA_pi \
63     do { \
64         fEA_REG(RxV); \
65         fPM_I(RxV, siV); \
66     } while (0)
67 #define GET_EA_pci \
68     do { \
69         TCGv tcgv_siV = tcg_constant_tl(siV); \
70         tcg_gen_mov_tl(EA, RxV); \
71         gen_helper_fcircadd(RxV, RxV, tcgv_siV, MuV, \
72                             hex_gpr[HEX_REG_CS0 + MuN]); \
73     } while (0)
74 #define GET_EA_pcr(SHIFT) \
75     do { \
76         TCGv ireg = tcg_temp_new(); \
77         tcg_gen_mov_tl(EA, RxV); \
78         gen_read_ireg(ireg, MuV, (SHIFT)); \
79         gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
80         tcg_temp_free(ireg); \
81     } while (0)
82 
83 /* Instructions with multiple definitions */
84 #define fGEN_TCG_LOAD_AP(RES, SIZE, SIGN) \
85     do { \
86         fMUST_IMMEXT(UiV); \
87         fEA_IMM(UiV); \
88         fLOAD(1, SIZE, SIGN, EA, RES); \
89         tcg_gen_movi_tl(ReV, UiV); \
90     } while (0)
91 
92 #define fGEN_TCG_L4_loadrub_ap(SHORTCODE) \
93     fGEN_TCG_LOAD_AP(RdV, 1, u)
94 #define fGEN_TCG_L4_loadrb_ap(SHORTCODE) \
95     fGEN_TCG_LOAD_AP(RdV, 1, s)
96 #define fGEN_TCG_L4_loadruh_ap(SHORTCODE) \
97     fGEN_TCG_LOAD_AP(RdV, 2, u)
98 #define fGEN_TCG_L4_loadrh_ap(SHORTCODE) \
99     fGEN_TCG_LOAD_AP(RdV, 2, s)
100 #define fGEN_TCG_L4_loadri_ap(SHORTCODE) \
101     fGEN_TCG_LOAD_AP(RdV, 4, u)
102 #define fGEN_TCG_L4_loadrd_ap(SHORTCODE) \
103     fGEN_TCG_LOAD_AP(RddV, 8, u)
104 
105 #define fGEN_TCG_L2_loadrub_pci(SHORTCODE)    SHORTCODE
106 #define fGEN_TCG_L2_loadrb_pci(SHORTCODE)     SHORTCODE
107 #define fGEN_TCG_L2_loadruh_pci(SHORTCODE)    SHORTCODE
108 #define fGEN_TCG_L2_loadrh_pci(SHORTCODE)     SHORTCODE
109 #define fGEN_TCG_L2_loadri_pci(SHORTCODE)     SHORTCODE
110 #define fGEN_TCG_L2_loadrd_pci(SHORTCODE)     SHORTCODE
111 
112 #define fGEN_TCG_LOAD_pcr(SHIFT, LOAD) \
113     do { \
114         TCGv ireg = tcg_temp_new(); \
115         tcg_gen_mov_tl(EA, RxV); \
116         gen_read_ireg(ireg, MuV, SHIFT); \
117         gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
118         LOAD; \
119         tcg_temp_free(ireg); \
120     } while (0)
121 
122 #define fGEN_TCG_L2_loadrub_pcr(SHORTCODE) \
123       fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, u, EA, RdV))
124 #define fGEN_TCG_L2_loadrb_pcr(SHORTCODE) \
125       fGEN_TCG_LOAD_pcr(0, fLOAD(1, 1, s, EA, RdV))
126 #define fGEN_TCG_L2_loadruh_pcr(SHORTCODE) \
127       fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, u, EA, RdV))
128 #define fGEN_TCG_L2_loadrh_pcr(SHORTCODE) \
129       fGEN_TCG_LOAD_pcr(1, fLOAD(1, 2, s, EA, RdV))
130 #define fGEN_TCG_L2_loadri_pcr(SHORTCODE) \
131       fGEN_TCG_LOAD_pcr(2, fLOAD(1, 4, u, EA, RdV))
132 #define fGEN_TCG_L2_loadrd_pcr(SHORTCODE) \
133       fGEN_TCG_LOAD_pcr(3, fLOAD(1, 8, u, EA, RddV))
134 
135 #define fGEN_TCG_L2_loadrub_pr(SHORTCODE)      SHORTCODE
136 #define fGEN_TCG_L2_loadrub_pbr(SHORTCODE)     SHORTCODE
137 #define fGEN_TCG_L2_loadrub_pi(SHORTCODE)      SHORTCODE
138 #define fGEN_TCG_L2_loadrb_pr(SHORTCODE)       SHORTCODE
139 #define fGEN_TCG_L2_loadrb_pbr(SHORTCODE)      SHORTCODE
140 #define fGEN_TCG_L2_loadrb_pi(SHORTCODE)       SHORTCODE
141 #define fGEN_TCG_L2_loadruh_pr(SHORTCODE)      SHORTCODE
142 #define fGEN_TCG_L2_loadruh_pbr(SHORTCODE)     SHORTCODE
143 #define fGEN_TCG_L2_loadruh_pi(SHORTCODE)      SHORTCODE
144 #define fGEN_TCG_L2_loadrh_pr(SHORTCODE)       SHORTCODE
145 #define fGEN_TCG_L2_loadrh_pbr(SHORTCODE)      SHORTCODE
146 #define fGEN_TCG_L2_loadrh_pi(SHORTCODE)       SHORTCODE
147 #define fGEN_TCG_L2_loadri_pr(SHORTCODE)       SHORTCODE
148 #define fGEN_TCG_L2_loadri_pbr(SHORTCODE)      SHORTCODE
149 #define fGEN_TCG_L2_loadri_pi(SHORTCODE)       SHORTCODE
150 #define fGEN_TCG_L2_loadrd_pr(SHORTCODE)       SHORTCODE
151 #define fGEN_TCG_L2_loadrd_pbr(SHORTCODE)      SHORTCODE
152 #define fGEN_TCG_L2_loadrd_pi(SHORTCODE)       SHORTCODE
153 
154 /*
155  * These instructions load 2 bytes and places them in
156  * two halves of the destination register.
157  * The GET_EA macro determines the addressing mode.
158  * The SIGN argument determines whether to zero-extend or
159  * sign-extend.
160  */
161 #define fGEN_TCG_loadbXw2(GET_EA, SIGN) \
162     do { \
163         TCGv tmp = tcg_temp_new(); \
164         TCGv byte = tcg_temp_new(); \
165         GET_EA; \
166         fLOAD(1, 2, u, EA, tmp); \
167         tcg_gen_movi_tl(RdV, 0); \
168         for (int i = 0; i < 2; i++) { \
169             gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \
170         } \
171         tcg_temp_free(tmp); \
172         tcg_temp_free(byte); \
173     } while (0)
174 
175 #define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \
176     fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false)
177 #define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \
178     fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false)
179 #define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \
180     fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true)
181 #define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \
182     fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true)
183 #define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \
184     fGEN_TCG_loadbXw2(GET_EA_ap, false)
185 #define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \
186     fGEN_TCG_loadbXw2(GET_EA_pr, false)
187 #define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \
188     fGEN_TCG_loadbXw2(GET_EA_pbr, false)
189 #define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \
190     fGEN_TCG_loadbXw2(GET_EA_pi, false)
191 #define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \
192     fGEN_TCG_loadbXw2(GET_EA_ap, true)
193 #define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \
194     fGEN_TCG_loadbXw2(GET_EA_pr, true)
195 #define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \
196     fGEN_TCG_loadbXw2(GET_EA_pbr, true)
197 #define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \
198     fGEN_TCG_loadbXw2(GET_EA_pi, true)
199 #define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \
200     fGEN_TCG_loadbXw2(GET_EA_pci, false)
201 #define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \
202     fGEN_TCG_loadbXw2(GET_EA_pci, true)
203 #define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \
204     fGEN_TCG_loadbXw2(GET_EA_pcr(1), false)
205 #define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \
206     fGEN_TCG_loadbXw2(GET_EA_pcr(1), true)
207 
208 /*
209  * These instructions load 4 bytes and places them in
210  * four halves of the destination register pair.
211  * The GET_EA macro determines the addressing mode.
212  * The SIGN argument determines whether to zero-extend or
213  * sign-extend.
214  */
215 #define fGEN_TCG_loadbXw4(GET_EA, SIGN) \
216     do { \
217         TCGv tmp = tcg_temp_new(); \
218         TCGv byte = tcg_temp_new(); \
219         GET_EA; \
220         fLOAD(1, 4, u, EA, tmp);  \
221         tcg_gen_movi_i64(RddV, 0); \
222         for (int i = 0; i < 4; i++) { \
223             gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN)));  \
224         }  \
225         tcg_temp_free(tmp); \
226         tcg_temp_free(byte); \
227     } while (0)
228 
229 #define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \
230     fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false)
231 #define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \
232     fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false)
233 #define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \
234     fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true)
235 #define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \
236     fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true)
237 #define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \
238     fGEN_TCG_loadbXw4(GET_EA_pci, false)
239 #define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \
240     fGEN_TCG_loadbXw4(GET_EA_pci, true)
241 #define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \
242     fGEN_TCG_loadbXw4(GET_EA_pcr(2), false)
243 #define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \
244     fGEN_TCG_loadbXw4(GET_EA_pcr(2), true)
245 #define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \
246     fGEN_TCG_loadbXw4(GET_EA_ap, false)
247 #define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \
248     fGEN_TCG_loadbXw4(GET_EA_pr, false)
249 #define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \
250     fGEN_TCG_loadbXw4(GET_EA_pbr, false)
251 #define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \
252     fGEN_TCG_loadbXw4(GET_EA_pi, false)
253 #define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \
254     fGEN_TCG_loadbXw4(GET_EA_ap, true)
255 #define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \
256     fGEN_TCG_loadbXw4(GET_EA_pr, true)
257 #define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \
258     fGEN_TCG_loadbXw4(GET_EA_pbr, true)
259 #define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \
260     fGEN_TCG_loadbXw4(GET_EA_pi, true)
261 
262 /*
263  * These instructions load a half word, shift the destination right by 16 bits
264  * and place the loaded value in the high half word of the destination pair.
265  * The GET_EA macro determines the addressing mode.
266  */
267 #define fGEN_TCG_loadalignh(GET_EA) \
268     do { \
269         TCGv tmp = tcg_temp_new(); \
270         TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
271         GET_EA;  \
272         fLOAD(1, 2, u, EA, tmp);  \
273         tcg_gen_extu_i32_i64(tmp_i64, tmp); \
274         tcg_gen_shri_i64(RyyV, RyyV, 16); \
275         tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 48, 16); \
276         tcg_temp_free(tmp); \
277         tcg_temp_free_i64(tmp_i64); \
278     } while (0)
279 
280 #define fGEN_TCG_L4_loadalignh_ur(SHORTCODE) \
281     fGEN_TCG_loadalignh(fEA_IRs(UiV, RtV, uiV))
282 #define fGEN_TCG_L2_loadalignh_io(SHORTCODE) \
283     fGEN_TCG_loadalignh(fEA_RI(RsV, siV))
284 #define fGEN_TCG_L2_loadalignh_pci(SHORTCODE) \
285     fGEN_TCG_loadalignh(GET_EA_pci)
286 #define fGEN_TCG_L2_loadalignh_pcr(SHORTCODE) \
287     fGEN_TCG_loadalignh(GET_EA_pcr(1))
288 #define fGEN_TCG_L4_loadalignh_ap(SHORTCODE) \
289     fGEN_TCG_loadalignh(GET_EA_ap)
290 #define fGEN_TCG_L2_loadalignh_pr(SHORTCODE) \
291     fGEN_TCG_loadalignh(GET_EA_pr)
292 #define fGEN_TCG_L2_loadalignh_pbr(SHORTCODE) \
293     fGEN_TCG_loadalignh(GET_EA_pbr)
294 #define fGEN_TCG_L2_loadalignh_pi(SHORTCODE) \
295     fGEN_TCG_loadalignh(GET_EA_pi)
296 
297 /* Same as above, but loads a byte instead of half word */
298 #define fGEN_TCG_loadalignb(GET_EA) \
299     do { \
300         TCGv tmp = tcg_temp_new(); \
301         TCGv_i64 tmp_i64 = tcg_temp_new_i64(); \
302         GET_EA;  \
303         fLOAD(1, 1, u, EA, tmp);  \
304         tcg_gen_extu_i32_i64(tmp_i64, tmp); \
305         tcg_gen_shri_i64(RyyV, RyyV, 8); \
306         tcg_gen_deposit_i64(RyyV, RyyV, tmp_i64, 56, 8); \
307         tcg_temp_free(tmp); \
308         tcg_temp_free_i64(tmp_i64); \
309     } while (0)
310 
311 #define fGEN_TCG_L2_loadalignb_io(SHORTCODE) \
312     fGEN_TCG_loadalignb(fEA_RI(RsV, siV))
313 #define fGEN_TCG_L4_loadalignb_ur(SHORTCODE) \
314     fGEN_TCG_loadalignb(fEA_IRs(UiV, RtV, uiV))
315 #define fGEN_TCG_L2_loadalignb_pci(SHORTCODE) \
316     fGEN_TCG_loadalignb(GET_EA_pci)
317 #define fGEN_TCG_L2_loadalignb_pcr(SHORTCODE) \
318     fGEN_TCG_loadalignb(GET_EA_pcr(0))
319 #define fGEN_TCG_L4_loadalignb_ap(SHORTCODE) \
320     fGEN_TCG_loadalignb(GET_EA_ap)
321 #define fGEN_TCG_L2_loadalignb_pr(SHORTCODE) \
322     fGEN_TCG_loadalignb(GET_EA_pr)
323 #define fGEN_TCG_L2_loadalignb_pbr(SHORTCODE) \
324     fGEN_TCG_loadalignb(GET_EA_pbr)
325 #define fGEN_TCG_L2_loadalignb_pi(SHORTCODE) \
326     fGEN_TCG_loadalignb(GET_EA_pi)
327 
328 /*
329  * Predicated loads
330  * Here is a primer to understand the tag names
331  *
332  * Predicate used
333  *      t        true "old" value                  if (p0) r0 = memb(r2+#0)
334  *      f        false "old" value                 if (!p0) r0 = memb(r2+#0)
335  *      tnew     true "new" value                  if (p0.new) r0 = memb(r2+#0)
336  *      fnew     false "new" value                 if (!p0.new) r0 = memb(r2+#0)
337  */
338 #define fGEN_TCG_PRED_LOAD(GET_EA, PRED, SIZE, SIGN) \
339     do { \
340         TCGv LSB = tcg_temp_local_new(); \
341         TCGLabel *label = gen_new_label(); \
342         tcg_gen_movi_tl(EA, 0); \
343         PRED;  \
344         CHECK_NOSHUF_PRED(GET_EA, SIZE, LSB); \
345         PRED_LOAD_CANCEL(LSB, EA); \
346         tcg_gen_movi_tl(RdV, 0); \
347         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
348         fLOAD(1, SIZE, SIGN, EA, RdV); \
349         gen_set_label(label); \
350         tcg_temp_free(LSB); \
351     } while (0)
352 
353 #define fGEN_TCG_L2_ploadrubt_pi(SHORTCODE) \
354     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLD(PtV), 1, u)
355 #define fGEN_TCG_L2_ploadrubf_pi(SHORTCODE) \
356     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLDNOT(PtV), 1, u)
357 #define fGEN_TCG_L2_ploadrubtnew_pi(SHORTCODE) \
358     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEW(PtN), 1, u)
359 #define fGEN_TCG_L2_ploadrubfnew_pi(SHORTCODE) \
360     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEWNOT(PtN), 1, u)
361 #define fGEN_TCG_L2_ploadrbt_pi(SHORTCODE) \
362     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLD(PtV), 1, s)
363 #define fGEN_TCG_L2_ploadrbf_pi(SHORTCODE) \
364     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLDNOT(PtV), 1, s)
365 #define fGEN_TCG_L2_ploadrbtnew_pi(SHORTCODE) \
366     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEW(PtN), 1, s)
367 #define fGEN_TCG_L2_ploadrbfnew_pi(SHORTCODE) \
368     fGEN_TCG_PRED_LOAD({ fEA_REG(RxV); fPM_I(RxV, siV); }, \
369                        fLSBNEWNOT(PtN), 1, s)
370 
371 #define fGEN_TCG_L2_ploadruht_pi(SHORTCODE) \
372     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLD(PtV), 2, u)
373 #define fGEN_TCG_L2_ploadruhf_pi(SHORTCODE) \
374     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLDNOT(PtV), 2, u)
375 #define fGEN_TCG_L2_ploadruhtnew_pi(SHORTCODE) \
376     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEW(PtN), 2, u)
377 #define fGEN_TCG_L2_ploadruhfnew_pi(SHORTCODE) \
378     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEWNOT(PtN), 2, u)
379 #define fGEN_TCG_L2_ploadrht_pi(SHORTCODE) \
380     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLD(PtV), 2, s)
381 #define fGEN_TCG_L2_ploadrhf_pi(SHORTCODE) \
382     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLDNOT(PtV), 2, s)
383 #define fGEN_TCG_L2_ploadrhtnew_pi(SHORTCODE) \
384     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEW(PtN), 2, s)
385 #define fGEN_TCG_L2_ploadrhfnew_pi(SHORTCODE) \
386     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEWNOT(PtN), 2, s)
387 
388 #define fGEN_TCG_L2_ploadrit_pi(SHORTCODE) \
389     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLD(PtV), 4, u)
390 #define fGEN_TCG_L2_ploadrif_pi(SHORTCODE) \
391     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBOLDNOT(PtV), 4, u)
392 #define fGEN_TCG_L2_ploadritnew_pi(SHORTCODE) \
393     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEW(PtN), 4, u)
394 #define fGEN_TCG_L2_ploadrifnew_pi(SHORTCODE) \
395     fGEN_TCG_PRED_LOAD(GET_EA_pi, fLSBNEWNOT(PtN), 4, u)
396 
397 /* Predicated loads into a register pair */
398 #define fGEN_TCG_PRED_LOAD_PAIR(GET_EA, PRED) \
399     do { \
400         TCGv LSB = tcg_temp_local_new(); \
401         TCGLabel *label = gen_new_label(); \
402         tcg_gen_movi_tl(EA, 0); \
403         PRED;  \
404         CHECK_NOSHUF_PRED(GET_EA, 8, LSB); \
405         PRED_LOAD_CANCEL(LSB, EA); \
406         tcg_gen_movi_i64(RddV, 0); \
407         tcg_gen_brcondi_tl(TCG_COND_EQ, LSB, 0, label); \
408         fLOAD(1, 8, u, EA, RddV); \
409         gen_set_label(label); \
410         tcg_temp_free(LSB); \
411     } while (0)
412 
413 #define fGEN_TCG_L2_ploadrdt_pi(SHORTCODE) \
414     fGEN_TCG_PRED_LOAD_PAIR(GET_EA_pi, fLSBOLD(PtV))
415 #define fGEN_TCG_L2_ploadrdf_pi(SHORTCODE) \
416     fGEN_TCG_PRED_LOAD_PAIR(GET_EA_pi, fLSBOLDNOT(PtV))
417 #define fGEN_TCG_L2_ploadrdtnew_pi(SHORTCODE) \
418     fGEN_TCG_PRED_LOAD_PAIR(GET_EA_pi, fLSBNEW(PtN))
419 #define fGEN_TCG_L2_ploadrdfnew_pi(SHORTCODE) \
420     fGEN_TCG_PRED_LOAD_PAIR(GET_EA_pi, fLSBNEWNOT(PtN))
421 
422 /* load-locked and store-locked */
423 #define fGEN_TCG_L2_loadw_locked(SHORTCODE) \
424     SHORTCODE
425 #define fGEN_TCG_L4_loadd_locked(SHORTCODE) \
426     SHORTCODE
427 #define fGEN_TCG_S2_storew_locked(SHORTCODE) \
428     SHORTCODE
429 #define fGEN_TCG_S4_stored_locked(SHORTCODE) \
430     SHORTCODE
431 
432 #define fGEN_TCG_STORE(SHORTCODE) \
433     do { \
434         TCGv HALF = tcg_temp_new(); \
435         TCGv BYTE = tcg_temp_new(); \
436         SHORTCODE; \
437         tcg_temp_free(HALF); \
438         tcg_temp_free(BYTE); \
439     } while (0)
440 
441 #define fGEN_TCG_STORE_pcr(SHIFT, STORE) \
442     do { \
443         TCGv ireg = tcg_temp_new(); \
444         TCGv HALF = tcg_temp_new(); \
445         TCGv BYTE = tcg_temp_new(); \
446         tcg_gen_mov_tl(EA, RxV); \
447         gen_read_ireg(ireg, MuV, SHIFT); \
448         gen_helper_fcircadd(RxV, RxV, ireg, MuV, hex_gpr[HEX_REG_CS0 + MuN]); \
449         STORE; \
450         tcg_temp_free(ireg); \
451         tcg_temp_free(HALF); \
452         tcg_temp_free(BYTE); \
453     } while (0)
454 
455 #define fGEN_TCG_S2_storerb_pbr(SHORTCODE) \
456     fGEN_TCG_STORE(SHORTCODE)
457 #define fGEN_TCG_S2_storerb_pci(SHORTCODE) \
458     fGEN_TCG_STORE(SHORTCODE)
459 #define fGEN_TCG_S2_storerb_pcr(SHORTCODE) \
460     fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, RtV)))
461 
462 #define fGEN_TCG_S2_storerh_pbr(SHORTCODE) \
463     fGEN_TCG_STORE(SHORTCODE)
464 #define fGEN_TCG_S2_storerh_pci(SHORTCODE) \
465     fGEN_TCG_STORE(SHORTCODE)
466 #define fGEN_TCG_S2_storerh_pcr(SHORTCODE) \
467     fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, RtV)))
468 
469 #define fGEN_TCG_S2_storerf_pbr(SHORTCODE) \
470     fGEN_TCG_STORE(SHORTCODE)
471 #define fGEN_TCG_S2_storerf_pci(SHORTCODE) \
472     fGEN_TCG_STORE(SHORTCODE)
473 #define fGEN_TCG_S2_storerf_pcr(SHORTCODE) \
474     fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(1, RtV)))
475 
476 #define fGEN_TCG_S2_storeri_pbr(SHORTCODE) \
477     fGEN_TCG_STORE(SHORTCODE)
478 #define fGEN_TCG_S2_storeri_pci(SHORTCODE) \
479     fGEN_TCG_STORE(SHORTCODE)
480 #define fGEN_TCG_S2_storeri_pcr(SHORTCODE) \
481     fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, RtV))
482 
483 #define fGEN_TCG_S2_storerd_pbr(SHORTCODE) \
484     fGEN_TCG_STORE(SHORTCODE)
485 #define fGEN_TCG_S2_storerd_pci(SHORTCODE) \
486     fGEN_TCG_STORE(SHORTCODE)
487 #define fGEN_TCG_S2_storerd_pcr(SHORTCODE) \
488     fGEN_TCG_STORE_pcr(3, fSTORE(1, 8, EA, RttV))
489 
490 #define fGEN_TCG_S2_storerbnew_pbr(SHORTCODE) \
491     fGEN_TCG_STORE(SHORTCODE)
492 #define fGEN_TCG_S2_storerbnew_pci(SHORTCODE) \
493     fGEN_TCG_STORE(SHORTCODE)
494 #define fGEN_TCG_S2_storerbnew_pcr(SHORTCODE) \
495     fGEN_TCG_STORE_pcr(0, fSTORE(1, 1, EA, fGETBYTE(0, NtN)))
496 
497 #define fGEN_TCG_S2_storerhnew_pbr(SHORTCODE) \
498     fGEN_TCG_STORE(SHORTCODE)
499 #define fGEN_TCG_S2_storerhnew_pci(SHORTCODE) \
500     fGEN_TCG_STORE(SHORTCODE)
501 #define fGEN_TCG_S2_storerhnew_pcr(SHORTCODE) \
502     fGEN_TCG_STORE_pcr(1, fSTORE(1, 2, EA, fGETHALF(0, NtN)))
503 
504 #define fGEN_TCG_S2_storerinew_pbr(SHORTCODE) \
505     fGEN_TCG_STORE(SHORTCODE)
506 #define fGEN_TCG_S2_storerinew_pci(SHORTCODE) \
507     fGEN_TCG_STORE(SHORTCODE)
508 #define fGEN_TCG_S2_storerinew_pcr(SHORTCODE) \
509     fGEN_TCG_STORE_pcr(2, fSTORE(1, 4, EA, NtN))
510 
511 /*
512  * Mathematical operations with more than one definition require
513  * special handling
514  */
515 #define fGEN_TCG_A5_ACS(SHORTCODE) \
516     do { \
517         gen_helper_vacsh_pred(PeV, cpu_env, RxxV, RssV, RttV); \
518         gen_helper_vacsh_val(RxxV, cpu_env, RxxV, RssV, RttV); \
519     } while (0)
520 
521 /*
522  * Approximate reciprocal
523  * r3,p1 = sfrecipa(r0, r1)
524  *
525  * The helper packs the 2 32-bit results into a 64-bit value,
526  * so unpack them into the proper results.
527  */
528 #define fGEN_TCG_F2_sfrecipa(SHORTCODE) \
529     do { \
530         TCGv_i64 tmp = tcg_temp_new_i64(); \
531         gen_helper_sfrecipa(tmp, cpu_env, RsV, RtV);  \
532         tcg_gen_extrh_i64_i32(RdV, tmp); \
533         tcg_gen_extrl_i64_i32(PeV, tmp); \
534         tcg_temp_free_i64(tmp); \
535     } while (0)
536 
537 /*
538  * Approximation of the reciprocal square root
539  * r1,p0 = sfinvsqrta(r0)
540  *
541  * The helper packs the 2 32-bit results into a 64-bit value,
542  * so unpack them into the proper results.
543  */
544 #define fGEN_TCG_F2_sfinvsqrta(SHORTCODE) \
545     do { \
546         TCGv_i64 tmp = tcg_temp_new_i64(); \
547         gen_helper_sfinvsqrta(tmp, cpu_env, RsV); \
548         tcg_gen_extrh_i64_i32(RdV, tmp); \
549         tcg_gen_extrl_i64_i32(PeV, tmp); \
550         tcg_temp_free_i64(tmp); \
551     } while (0)
552 
553 /*
554  * Add or subtract with carry.
555  * Predicate register is used as an extra input and output.
556  * r5:4 = add(r1:0, r3:2, p1):carry
557  */
558 #define fGEN_TCG_A4_addp_c(SHORTCODE) \
559     do { \
560         TCGv_i64 carry = tcg_temp_new_i64(); \
561         TCGv_i64 zero = tcg_constant_i64(0); \
562         tcg_gen_extu_i32_i64(carry, PxV); \
563         tcg_gen_andi_i64(carry, carry, 1); \
564         tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
565         tcg_gen_add2_i64(RddV, carry, RddV, carry, RttV, zero); \
566         tcg_gen_extrl_i64_i32(PxV, carry); \
567         gen_8bitsof(PxV, PxV); \
568         tcg_temp_free_i64(carry); \
569     } while (0)
570 
571 /* r5:4 = sub(r1:0, r3:2, p1):carry */
572 #define fGEN_TCG_A4_subp_c(SHORTCODE) \
573     do { \
574         TCGv_i64 carry = tcg_temp_new_i64(); \
575         TCGv_i64 zero = tcg_constant_i64(0); \
576         TCGv_i64 not_RttV = tcg_temp_new_i64(); \
577         tcg_gen_extu_i32_i64(carry, PxV); \
578         tcg_gen_andi_i64(carry, carry, 1); \
579         tcg_gen_not_i64(not_RttV, RttV); \
580         tcg_gen_add2_i64(RddV, carry, RssV, zero, carry, zero); \
581         tcg_gen_add2_i64(RddV, carry, RddV, carry, not_RttV, zero); \
582         tcg_gen_extrl_i64_i32(PxV, carry); \
583         gen_8bitsof(PxV, PxV); \
584         tcg_temp_free_i64(carry); \
585         tcg_temp_free_i64(not_RttV); \
586     } while (0)
587 
588 /*
589  * Compare each of the 8 unsigned bytes
590  * The minimum is placed in each byte of the destination.
591  * Each bit of the predicate is set true if the bit from the first operand
592  * is greater than the bit from the second operand.
593  * r5:4,p1 = vminub(r1:0, r3:2)
594  */
595 #define fGEN_TCG_A6_vminub_RdP(SHORTCODE) \
596     do { \
597         TCGv left = tcg_temp_new(); \
598         TCGv right = tcg_temp_new(); \
599         TCGv tmp = tcg_temp_new(); \
600         tcg_gen_movi_tl(PeV, 0); \
601         tcg_gen_movi_i64(RddV, 0); \
602         for (int i = 0; i < 8; i++) { \
603             gen_get_byte_i64(left, i, RttV, false); \
604             gen_get_byte_i64(right, i, RssV, false); \
605             tcg_gen_setcond_tl(TCG_COND_GT, tmp, left, right); \
606             tcg_gen_deposit_tl(PeV, PeV, tmp, i, 1); \
607             tcg_gen_umin_tl(tmp, left, right); \
608             gen_set_byte_i64(i, RddV, tmp); \
609         } \
610         tcg_temp_free(left); \
611         tcg_temp_free(right); \
612         tcg_temp_free(tmp); \
613     } while (0)
614 
615 /* Floating point */
616 #define fGEN_TCG_F2_conv_sf2df(SHORTCODE) \
617     gen_helper_conv_sf2df(RddV, cpu_env, RsV)
618 #define fGEN_TCG_F2_conv_df2sf(SHORTCODE) \
619     gen_helper_conv_df2sf(RdV, cpu_env, RssV)
620 #define fGEN_TCG_F2_conv_uw2sf(SHORTCODE) \
621     gen_helper_conv_uw2sf(RdV, cpu_env, RsV)
622 #define fGEN_TCG_F2_conv_uw2df(SHORTCODE) \
623     gen_helper_conv_uw2df(RddV, cpu_env, RsV)
624 #define fGEN_TCG_F2_conv_w2sf(SHORTCODE) \
625     gen_helper_conv_w2sf(RdV, cpu_env, RsV)
626 #define fGEN_TCG_F2_conv_w2df(SHORTCODE) \
627     gen_helper_conv_w2df(RddV, cpu_env, RsV)
628 #define fGEN_TCG_F2_conv_ud2sf(SHORTCODE) \
629     gen_helper_conv_ud2sf(RdV, cpu_env, RssV)
630 #define fGEN_TCG_F2_conv_ud2df(SHORTCODE) \
631     gen_helper_conv_ud2df(RddV, cpu_env, RssV)
632 #define fGEN_TCG_F2_conv_d2sf(SHORTCODE) \
633     gen_helper_conv_d2sf(RdV, cpu_env, RssV)
634 #define fGEN_TCG_F2_conv_d2df(SHORTCODE) \
635     gen_helper_conv_d2df(RddV, cpu_env, RssV)
636 #define fGEN_TCG_F2_conv_sf2uw(SHORTCODE) \
637     gen_helper_conv_sf2uw(RdV, cpu_env, RsV)
638 #define fGEN_TCG_F2_conv_sf2w(SHORTCODE) \
639     gen_helper_conv_sf2w(RdV, cpu_env, RsV)
640 #define fGEN_TCG_F2_conv_sf2ud(SHORTCODE) \
641     gen_helper_conv_sf2ud(RddV, cpu_env, RsV)
642 #define fGEN_TCG_F2_conv_sf2d(SHORTCODE) \
643     gen_helper_conv_sf2d(RddV, cpu_env, RsV)
644 #define fGEN_TCG_F2_conv_df2uw(SHORTCODE) \
645     gen_helper_conv_df2uw(RdV, cpu_env, RssV)
646 #define fGEN_TCG_F2_conv_df2w(SHORTCODE) \
647     gen_helper_conv_df2w(RdV, cpu_env, RssV)
648 #define fGEN_TCG_F2_conv_df2ud(SHORTCODE) \
649     gen_helper_conv_df2ud(RddV, cpu_env, RssV)
650 #define fGEN_TCG_F2_conv_df2d(SHORTCODE) \
651     gen_helper_conv_df2d(RddV, cpu_env, RssV)
652 #define fGEN_TCG_F2_conv_sf2uw_chop(SHORTCODE) \
653     gen_helper_conv_sf2uw_chop(RdV, cpu_env, RsV)
654 #define fGEN_TCG_F2_conv_sf2w_chop(SHORTCODE) \
655     gen_helper_conv_sf2w_chop(RdV, cpu_env, RsV)
656 #define fGEN_TCG_F2_conv_sf2ud_chop(SHORTCODE) \
657     gen_helper_conv_sf2ud_chop(RddV, cpu_env, RsV)
658 #define fGEN_TCG_F2_conv_sf2d_chop(SHORTCODE) \
659     gen_helper_conv_sf2d_chop(RddV, cpu_env, RsV)
660 #define fGEN_TCG_F2_conv_df2uw_chop(SHORTCODE) \
661     gen_helper_conv_df2uw_chop(RdV, cpu_env, RssV)
662 #define fGEN_TCG_F2_conv_df2w_chop(SHORTCODE) \
663     gen_helper_conv_df2w_chop(RdV, cpu_env, RssV)
664 #define fGEN_TCG_F2_conv_df2ud_chop(SHORTCODE) \
665     gen_helper_conv_df2ud_chop(RddV, cpu_env, RssV)
666 #define fGEN_TCG_F2_conv_df2d_chop(SHORTCODE) \
667     gen_helper_conv_df2d_chop(RddV, cpu_env, RssV)
668 #define fGEN_TCG_F2_sfadd(SHORTCODE) \
669     gen_helper_sfadd(RdV, cpu_env, RsV, RtV)
670 #define fGEN_TCG_F2_sfsub(SHORTCODE) \
671     gen_helper_sfsub(RdV, cpu_env, RsV, RtV)
672 #define fGEN_TCG_F2_sfcmpeq(SHORTCODE) \
673     gen_helper_sfcmpeq(PdV, cpu_env, RsV, RtV)
674 #define fGEN_TCG_F2_sfcmpgt(SHORTCODE) \
675     gen_helper_sfcmpgt(PdV, cpu_env, RsV, RtV)
676 #define fGEN_TCG_F2_sfcmpge(SHORTCODE) \
677     gen_helper_sfcmpge(PdV, cpu_env, RsV, RtV)
678 #define fGEN_TCG_F2_sfcmpuo(SHORTCODE) \
679     gen_helper_sfcmpuo(PdV, cpu_env, RsV, RtV)
680 #define fGEN_TCG_F2_sfmax(SHORTCODE) \
681     gen_helper_sfmax(RdV, cpu_env, RsV, RtV)
682 #define fGEN_TCG_F2_sfmin(SHORTCODE) \
683     gen_helper_sfmin(RdV, cpu_env, RsV, RtV)
684 #define fGEN_TCG_F2_sfclass(SHORTCODE) \
685     do { \
686         TCGv imm = tcg_constant_tl(uiV); \
687         gen_helper_sfclass(PdV, cpu_env, RsV, imm); \
688     } while (0)
689 #define fGEN_TCG_F2_sffixupn(SHORTCODE) \
690     gen_helper_sffixupn(RdV, cpu_env, RsV, RtV)
691 #define fGEN_TCG_F2_sffixupd(SHORTCODE) \
692     gen_helper_sffixupd(RdV, cpu_env, RsV, RtV)
693 #define fGEN_TCG_F2_sffixupr(SHORTCODE) \
694     gen_helper_sffixupr(RdV, cpu_env, RsV)
695 #define fGEN_TCG_F2_dfadd(SHORTCODE) \
696     gen_helper_dfadd(RddV, cpu_env, RssV, RttV)
697 #define fGEN_TCG_F2_dfsub(SHORTCODE) \
698     gen_helper_dfsub(RddV, cpu_env, RssV, RttV)
699 #define fGEN_TCG_F2_dfmax(SHORTCODE) \
700     gen_helper_dfmax(RddV, cpu_env, RssV, RttV)
701 #define fGEN_TCG_F2_dfmin(SHORTCODE) \
702     gen_helper_dfmin(RddV, cpu_env, RssV, RttV)
703 #define fGEN_TCG_F2_dfcmpeq(SHORTCODE) \
704     gen_helper_dfcmpeq(PdV, cpu_env, RssV, RttV)
705 #define fGEN_TCG_F2_dfcmpgt(SHORTCODE) \
706     gen_helper_dfcmpgt(PdV, cpu_env, RssV, RttV)
707 #define fGEN_TCG_F2_dfcmpge(SHORTCODE) \
708     gen_helper_dfcmpge(PdV, cpu_env, RssV, RttV)
709 #define fGEN_TCG_F2_dfcmpuo(SHORTCODE) \
710     gen_helper_dfcmpuo(PdV, cpu_env, RssV, RttV)
711 #define fGEN_TCG_F2_dfclass(SHORTCODE) \
712     do { \
713         TCGv imm = tcg_constant_tl(uiV); \
714         gen_helper_dfclass(PdV, cpu_env, RssV, imm); \
715     } while (0)
716 #define fGEN_TCG_F2_sfmpy(SHORTCODE) \
717     gen_helper_sfmpy(RdV, cpu_env, RsV, RtV)
718 #define fGEN_TCG_F2_sffma(SHORTCODE) \
719     gen_helper_sffma(RxV, cpu_env, RxV, RsV, RtV)
720 #define fGEN_TCG_F2_sffma_sc(SHORTCODE) \
721     gen_helper_sffma_sc(RxV, cpu_env, RxV, RsV, RtV, PuV)
722 #define fGEN_TCG_F2_sffms(SHORTCODE) \
723     gen_helper_sffms(RxV, cpu_env, RxV, RsV, RtV)
724 #define fGEN_TCG_F2_sffma_lib(SHORTCODE) \
725     gen_helper_sffma_lib(RxV, cpu_env, RxV, RsV, RtV)
726 #define fGEN_TCG_F2_sffms_lib(SHORTCODE) \
727     gen_helper_sffms_lib(RxV, cpu_env, RxV, RsV, RtV)
728 
729 #define fGEN_TCG_F2_dfmpyfix(SHORTCODE) \
730     gen_helper_dfmpyfix(RddV, cpu_env, RssV, RttV)
731 #define fGEN_TCG_F2_dfmpyhh(SHORTCODE) \
732     gen_helper_dfmpyhh(RxxV, cpu_env, RxxV, RssV, RttV)
733 
734 /* Nothing to do for these in qemu, need to suppress compiler warnings */
735 #define fGEN_TCG_Y4_l2fetch(SHORTCODE) \
736     do { \
737         RsV = RsV; \
738         RtV = RtV; \
739     } while (0)
740 #define fGEN_TCG_Y5_l2fetch(SHORTCODE) \
741     do { \
742         RsV = RsV; \
743     } while (0)
744 
745 #endif
746