xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision 10eb3721)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27#include "../tcg-ldst.c.inc"
28#include "../tcg-pool.c.inc"
29#include "elf.h"
30
31#define TCG_CT_CONST_S16        (1 << 8)
32#define TCG_CT_CONST_S32        (1 << 9)
33#define TCG_CT_CONST_U32        (1 << 10)
34#define TCG_CT_CONST_ZERO       (1 << 11)
35#define TCG_CT_CONST_P32        (1 << 12)
36#define TCG_CT_CONST_INV        (1 << 13)
37#define TCG_CT_CONST_INVRISBG   (1 << 14)
38#define TCG_CT_CONST_CMP        (1 << 15)
39
40#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
41#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
42
43/* Several places within the instruction set 0 means "no register"
44   rather than TCG_REG_R0.  */
45#define TCG_REG_NONE    0
46
47/* A scratch register that may be be used throughout the backend.  */
48#define TCG_TMP0        TCG_REG_R1
49
50#define TCG_GUEST_BASE_REG TCG_REG_R13
51
52/* All of the following instructions are prefixed with their instruction
53   format, and are defined as 8- or 16-bit quantities, even when the two
54   halves of the 16-bit quantity may appear 32 bits apart in the insn.
55   This makes it easy to copy the values from the tables in Appendix B.  */
56typedef enum S390Opcode {
57    RIL_AFI     = 0xc209,
58    RIL_AGFI    = 0xc208,
59    RIL_ALFI    = 0xc20b,
60    RIL_ALGFI   = 0xc20a,
61    RIL_BRASL   = 0xc005,
62    RIL_BRCL    = 0xc004,
63    RIL_CFI     = 0xc20d,
64    RIL_CGFI    = 0xc20c,
65    RIL_CLFI    = 0xc20f,
66    RIL_CLGFI   = 0xc20e,
67    RIL_CLRL    = 0xc60f,
68    RIL_CLGRL   = 0xc60a,
69    RIL_CRL     = 0xc60d,
70    RIL_CGRL    = 0xc608,
71    RIL_IIHF    = 0xc008,
72    RIL_IILF    = 0xc009,
73    RIL_LARL    = 0xc000,
74    RIL_LGFI    = 0xc001,
75    RIL_LGRL    = 0xc408,
76    RIL_LLIHF   = 0xc00e,
77    RIL_LLILF   = 0xc00f,
78    RIL_LRL     = 0xc40d,
79    RIL_MSFI    = 0xc201,
80    RIL_MSGFI   = 0xc200,
81    RIL_NIHF    = 0xc00a,
82    RIL_NILF    = 0xc00b,
83    RIL_OIHF    = 0xc00c,
84    RIL_OILF    = 0xc00d,
85    RIL_SLFI    = 0xc205,
86    RIL_SLGFI   = 0xc204,
87    RIL_XIHF    = 0xc006,
88    RIL_XILF    = 0xc007,
89
90    RI_AGHI     = 0xa70b,
91    RI_AHI      = 0xa70a,
92    RI_BRC      = 0xa704,
93    RI_CHI      = 0xa70e,
94    RI_CGHI     = 0xa70f,
95    RI_IIHH     = 0xa500,
96    RI_IIHL     = 0xa501,
97    RI_IILH     = 0xa502,
98    RI_IILL     = 0xa503,
99    RI_LGHI     = 0xa709,
100    RI_LLIHH    = 0xa50c,
101    RI_LLIHL    = 0xa50d,
102    RI_LLILH    = 0xa50e,
103    RI_LLILL    = 0xa50f,
104    RI_MGHI     = 0xa70d,
105    RI_MHI      = 0xa70c,
106    RI_NIHH     = 0xa504,
107    RI_NIHL     = 0xa505,
108    RI_NILH     = 0xa506,
109    RI_NILL     = 0xa507,
110    RI_OIHH     = 0xa508,
111    RI_OIHL     = 0xa509,
112    RI_OILH     = 0xa50a,
113    RI_OILL     = 0xa50b,
114    RI_TMLL     = 0xa701,
115    RI_TMLH     = 0xa700,
116    RI_TMHL     = 0xa703,
117    RI_TMHH     = 0xa702,
118
119    RIEb_CGRJ    = 0xec64,
120    RIEb_CLGRJ   = 0xec65,
121    RIEb_CLRJ    = 0xec77,
122    RIEb_CRJ     = 0xec76,
123
124    RIEc_CGIJ    = 0xec7c,
125    RIEc_CIJ     = 0xec7e,
126    RIEc_CLGIJ   = 0xec7d,
127    RIEc_CLIJ    = 0xec7f,
128
129    RIEf_RISBG   = 0xec55,
130
131    RIEg_LOCGHI  = 0xec46,
132
133    RRE_AGR     = 0xb908,
134    RRE_ALGR    = 0xb90a,
135    RRE_ALCR    = 0xb998,
136    RRE_ALCGR   = 0xb988,
137    RRE_ALGFR   = 0xb91a,
138    RRE_CGR     = 0xb920,
139    RRE_CLGR    = 0xb921,
140    RRE_DLGR    = 0xb987,
141    RRE_DLR     = 0xb997,
142    RRE_DSGFR   = 0xb91d,
143    RRE_DSGR    = 0xb90d,
144    RRE_FLOGR   = 0xb983,
145    RRE_LGBR    = 0xb906,
146    RRE_LCGR    = 0xb903,
147    RRE_LGFR    = 0xb914,
148    RRE_LGHR    = 0xb907,
149    RRE_LGR     = 0xb904,
150    RRE_LLGCR   = 0xb984,
151    RRE_LLGFR   = 0xb916,
152    RRE_LLGHR   = 0xb985,
153    RRE_LRVR    = 0xb91f,
154    RRE_LRVGR   = 0xb90f,
155    RRE_LTGR    = 0xb902,
156    RRE_MLGR    = 0xb986,
157    RRE_MSGR    = 0xb90c,
158    RRE_MSR     = 0xb252,
159    RRE_NGR     = 0xb980,
160    RRE_OGR     = 0xb981,
161    RRE_SGR     = 0xb909,
162    RRE_SLGR    = 0xb90b,
163    RRE_SLBR    = 0xb999,
164    RRE_SLBGR   = 0xb989,
165    RRE_XGR     = 0xb982,
166
167    RRFa_MGRK   = 0xb9ec,
168    RRFa_MSRKC  = 0xb9fd,
169    RRFa_MSGRKC = 0xb9ed,
170    RRFa_NCRK   = 0xb9f5,
171    RRFa_NCGRK  = 0xb9e5,
172    RRFa_NNRK   = 0xb974,
173    RRFa_NNGRK  = 0xb964,
174    RRFa_NORK   = 0xb976,
175    RRFa_NOGRK  = 0xb966,
176    RRFa_NRK    = 0xb9f4,
177    RRFa_NGRK   = 0xb9e4,
178    RRFa_NXRK   = 0xb977,
179    RRFa_NXGRK  = 0xb967,
180    RRFa_OCRK   = 0xb975,
181    RRFa_OCGRK  = 0xb965,
182    RRFa_ORK    = 0xb9f6,
183    RRFa_OGRK   = 0xb9e6,
184    RRFa_SRK    = 0xb9f9,
185    RRFa_SGRK   = 0xb9e9,
186    RRFa_SLRK   = 0xb9fb,
187    RRFa_SLGRK  = 0xb9eb,
188    RRFa_XRK    = 0xb9f7,
189    RRFa_XGRK   = 0xb9e7,
190
191    RRFam_SELGR = 0xb9e3,
192
193    RRFc_LOCR   = 0xb9f2,
194    RRFc_LOCGR  = 0xb9e2,
195    RRFc_POPCNT = 0xb9e1,
196
197    RR_AR       = 0x1a,
198    RR_ALR      = 0x1e,
199    RR_BASR     = 0x0d,
200    RR_BCR      = 0x07,
201    RR_CLR      = 0x15,
202    RR_CR       = 0x19,
203    RR_DR       = 0x1d,
204    RR_LCR      = 0x13,
205    RR_LR       = 0x18,
206    RR_LTR      = 0x12,
207    RR_NR       = 0x14,
208    RR_OR       = 0x16,
209    RR_SR       = 0x1b,
210    RR_SLR      = 0x1f,
211    RR_XR       = 0x17,
212
213    RSY_RLL     = 0xeb1d,
214    RSY_RLLG    = 0xeb1c,
215    RSY_SLLG    = 0xeb0d,
216    RSY_SLLK    = 0xebdf,
217    RSY_SRAG    = 0xeb0a,
218    RSY_SRAK    = 0xebdc,
219    RSY_SRLG    = 0xeb0c,
220    RSY_SRLK    = 0xebde,
221
222    RS_SLL      = 0x89,
223    RS_SRA      = 0x8a,
224    RS_SRL      = 0x88,
225
226    RXY_AG      = 0xe308,
227    RXY_AY      = 0xe35a,
228    RXY_CG      = 0xe320,
229    RXY_CLG     = 0xe321,
230    RXY_CLY     = 0xe355,
231    RXY_CY      = 0xe359,
232    RXY_LAY     = 0xe371,
233    RXY_LB      = 0xe376,
234    RXY_LG      = 0xe304,
235    RXY_LGB     = 0xe377,
236    RXY_LGF     = 0xe314,
237    RXY_LGH     = 0xe315,
238    RXY_LHY     = 0xe378,
239    RXY_LLGC    = 0xe390,
240    RXY_LLGF    = 0xe316,
241    RXY_LLGH    = 0xe391,
242    RXY_LMG     = 0xeb04,
243    RXY_LPQ     = 0xe38f,
244    RXY_LRV     = 0xe31e,
245    RXY_LRVG    = 0xe30f,
246    RXY_LRVH    = 0xe31f,
247    RXY_LY      = 0xe358,
248    RXY_NG      = 0xe380,
249    RXY_OG      = 0xe381,
250    RXY_STCY    = 0xe372,
251    RXY_STG     = 0xe324,
252    RXY_STHY    = 0xe370,
253    RXY_STMG    = 0xeb24,
254    RXY_STPQ    = 0xe38e,
255    RXY_STRV    = 0xe33e,
256    RXY_STRVG   = 0xe32f,
257    RXY_STRVH   = 0xe33f,
258    RXY_STY     = 0xe350,
259    RXY_XG      = 0xe382,
260
261    RX_A        = 0x5a,
262    RX_C        = 0x59,
263    RX_L        = 0x58,
264    RX_LA       = 0x41,
265    RX_LH       = 0x48,
266    RX_ST       = 0x50,
267    RX_STC      = 0x42,
268    RX_STH      = 0x40,
269
270    VRIa_VGBM   = 0xe744,
271    VRIa_VREPI  = 0xe745,
272    VRIb_VGM    = 0xe746,
273    VRIc_VREP   = 0xe74d,
274
275    VRRa_VLC    = 0xe7de,
276    VRRa_VLP    = 0xe7df,
277    VRRa_VLR    = 0xe756,
278    VRRc_VA     = 0xe7f3,
279    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
280    VRRc_VCH    = 0xe7fb,   /* " */
281    VRRc_VCHL   = 0xe7f9,   /* " */
282    VRRc_VERLLV = 0xe773,
283    VRRc_VESLV  = 0xe770,
284    VRRc_VESRAV = 0xe77a,
285    VRRc_VESRLV = 0xe778,
286    VRRc_VML    = 0xe7a2,
287    VRRc_VMN    = 0xe7fe,
288    VRRc_VMNL   = 0xe7fc,
289    VRRc_VMX    = 0xe7ff,
290    VRRc_VMXL   = 0xe7fd,
291    VRRc_VN     = 0xe768,
292    VRRc_VNC    = 0xe769,
293    VRRc_VNN    = 0xe76e,
294    VRRc_VNO    = 0xe76b,
295    VRRc_VNX    = 0xe76c,
296    VRRc_VO     = 0xe76a,
297    VRRc_VOC    = 0xe76f,
298    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
299    VRRc_VS     = 0xe7f7,
300    VRRa_VUPH   = 0xe7d7,
301    VRRa_VUPL   = 0xe7d6,
302    VRRc_VX     = 0xe76d,
303    VRRe_VSEL   = 0xe78d,
304    VRRf_VLVGP  = 0xe762,
305
306    VRSa_VERLL  = 0xe733,
307    VRSa_VESL   = 0xe730,
308    VRSa_VESRA  = 0xe73a,
309    VRSa_VESRL  = 0xe738,
310    VRSb_VLVG   = 0xe722,
311    VRSc_VLGV   = 0xe721,
312
313    VRX_VL      = 0xe706,
314    VRX_VLLEZ   = 0xe704,
315    VRX_VLREP   = 0xe705,
316    VRX_VST     = 0xe70e,
317    VRX_VSTEF   = 0xe70b,
318    VRX_VSTEG   = 0xe70a,
319
320    NOP         = 0x0707,
321} S390Opcode;
322
323#ifdef CONFIG_DEBUG_TCG
324static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
325    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
326    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
327    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
329    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
330    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
331    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
332};
333#endif
334
335/* Since R6 is a potential argument register, choose it last of the
336   call-saved registers.  Likewise prefer the call-clobbered registers
337   in reverse order to maximize the chance of avoiding the arguments.  */
338static const int tcg_target_reg_alloc_order[] = {
339    /* Call saved registers.  */
340    TCG_REG_R13,
341    TCG_REG_R12,
342    TCG_REG_R11,
343    TCG_REG_R10,
344    TCG_REG_R9,
345    TCG_REG_R8,
346    TCG_REG_R7,
347    TCG_REG_R6,
348    /* Call clobbered registers.  */
349    TCG_REG_R14,
350    TCG_REG_R0,
351    TCG_REG_R1,
352    /* Argument registers, in reverse order of allocation.  */
353    TCG_REG_R5,
354    TCG_REG_R4,
355    TCG_REG_R3,
356    TCG_REG_R2,
357
358    /* V8-V15 are call saved, and omitted. */
359    TCG_REG_V0,
360    TCG_REG_V1,
361    TCG_REG_V2,
362    TCG_REG_V3,
363    TCG_REG_V4,
364    TCG_REG_V5,
365    TCG_REG_V6,
366    TCG_REG_V7,
367    TCG_REG_V16,
368    TCG_REG_V17,
369    TCG_REG_V18,
370    TCG_REG_V19,
371    TCG_REG_V20,
372    TCG_REG_V21,
373    TCG_REG_V22,
374    TCG_REG_V23,
375    TCG_REG_V24,
376    TCG_REG_V25,
377    TCG_REG_V26,
378    TCG_REG_V27,
379    TCG_REG_V28,
380    TCG_REG_V29,
381    TCG_REG_V30,
382    TCG_REG_V31,
383};
384
385static const int tcg_target_call_iarg_regs[] = {
386    TCG_REG_R2,
387    TCG_REG_R3,
388    TCG_REG_R4,
389    TCG_REG_R5,
390    TCG_REG_R6,
391};
392
393static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
394{
395    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
396    tcg_debug_assert(slot == 0);
397    return TCG_REG_R2;
398}
399
400#define S390_CC_EQ      8
401#define S390_CC_LT      4
402#define S390_CC_GT      2
403#define S390_CC_OV      1
404#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
405#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
406#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
407#define S390_CC_NEVER   0
408#define S390_CC_ALWAYS  15
409
410#define S390_TM_EQ      8  /* CC == 0 */
411#define S390_TM_NE      7  /* CC in {1,2,3} */
412
413/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
414static const uint8_t tcg_cond_to_s390_cond[16] = {
415    [TCG_COND_EQ]  = S390_CC_EQ,
416    [TCG_COND_NE]  = S390_CC_NE,
417    [TCG_COND_TSTEQ] = S390_CC_EQ,
418    [TCG_COND_TSTNE] = S390_CC_NE,
419    [TCG_COND_LT]  = S390_CC_LT,
420    [TCG_COND_LE]  = S390_CC_LE,
421    [TCG_COND_GT]  = S390_CC_GT,
422    [TCG_COND_GE]  = S390_CC_GE,
423    [TCG_COND_LTU] = S390_CC_LT,
424    [TCG_COND_LEU] = S390_CC_LE,
425    [TCG_COND_GTU] = S390_CC_GT,
426    [TCG_COND_GEU] = S390_CC_GE,
427};
428
429/* Condition codes that result from a LOAD AND TEST.  Here, we have no
430   unsigned instruction variation, however since the test is vs zero we
431   can re-map the outcomes appropriately.  */
432static const uint8_t tcg_cond_to_ltr_cond[16] = {
433    [TCG_COND_EQ]  = S390_CC_EQ,
434    [TCG_COND_NE]  = S390_CC_NE,
435    [TCG_COND_TSTEQ] = S390_CC_ALWAYS,
436    [TCG_COND_TSTNE] = S390_CC_NEVER,
437    [TCG_COND_LT]  = S390_CC_LT,
438    [TCG_COND_LE]  = S390_CC_LE,
439    [TCG_COND_GT]  = S390_CC_GT,
440    [TCG_COND_GE]  = S390_CC_GE,
441    [TCG_COND_LTU] = S390_CC_NEVER,
442    [TCG_COND_LEU] = S390_CC_EQ,
443    [TCG_COND_GTU] = S390_CC_NE,
444    [TCG_COND_GEU] = S390_CC_ALWAYS,
445};
446
447static const tcg_insn_unit *tb_ret_addr;
448uint64_t s390_facilities[3];
449
450static inline bool is_general_reg(TCGReg r)
451{
452    return r <= TCG_REG_R15;
453}
454
455static inline bool is_vector_reg(TCGReg r)
456{
457    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
458}
459
460static bool patch_reloc(tcg_insn_unit *src_rw, int type,
461                        intptr_t value, intptr_t addend)
462{
463    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
464    intptr_t pcrel2;
465    uint32_t old;
466
467    value += addend;
468    pcrel2 = (tcg_insn_unit *)value - src_rx;
469
470    switch (type) {
471    case R_390_PC16DBL:
472        if (pcrel2 == (int16_t)pcrel2) {
473            tcg_patch16(src_rw, pcrel2);
474            return true;
475        }
476        break;
477    case R_390_PC32DBL:
478        if (pcrel2 == (int32_t)pcrel2) {
479            tcg_patch32(src_rw, pcrel2);
480            return true;
481        }
482        break;
483    case R_390_20:
484        if (value == sextract64(value, 0, 20)) {
485            old = *(uint32_t *)src_rw & 0xf00000ff;
486            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
487            tcg_patch32(src_rw, old);
488            return true;
489        }
490        break;
491    default:
492        g_assert_not_reached();
493    }
494    return false;
495}
496
497static int is_const_p16(uint64_t val)
498{
499    for (int i = 0; i < 4; ++i) {
500        uint64_t mask = 0xffffull << (i * 16);
501        if ((val & ~mask) == 0) {
502            return i;
503        }
504    }
505    return -1;
506}
507
508static int is_const_p32(uint64_t val)
509{
510    if ((val & 0xffffffff00000000ull) == 0) {
511        return 0;
512    }
513    if ((val & 0x00000000ffffffffull) == 0) {
514        return 1;
515    }
516    return -1;
517}
518
519/*
520 * Accept bit patterns like these:
521 *  0....01....1
522 *  1....10....0
523 *  1..10..01..1
524 *  0..01..10..0
525 * Copied from gcc sources.
526 */
527static bool risbg_mask(uint64_t c)
528{
529    uint64_t lsb;
530    /* We don't change the number of transitions by inverting,
531       so make sure we start with the LSB zero.  */
532    if (c & 1) {
533        c = ~c;
534    }
535    /* Reject all zeros or all ones.  */
536    if (c == 0) {
537        return false;
538    }
539    /* Find the first transition.  */
540    lsb = c & -c;
541    /* Invert to look for a second transition.  */
542    c = ~c;
543    /* Erase the first transition.  */
544    c &= -lsb;
545    /* Find the second transition, if any.  */
546    lsb = c & -c;
547    /* Match if all the bits are 1's, or if c is zero.  */
548    return c == -lsb;
549}
550
551/* Test if a constant matches the constraint. */
552static bool tcg_target_const_match(int64_t val, int ct,
553                                   TCGType type, TCGCond cond, int vece)
554{
555    uint64_t uval = val;
556
557    if (ct & TCG_CT_CONST) {
558        return true;
559    }
560    if (type == TCG_TYPE_I32) {
561        uval = (uint32_t)val;
562        val = (int32_t)val;
563    }
564
565    if (ct & TCG_CT_CONST_CMP) {
566        if (is_tst_cond(cond)) {
567            if (is_const_p16(uval) >= 0) {
568                return true;  /* TMxx */
569            }
570            if (risbg_mask(uval)) {
571                return true;  /* RISBG */
572            }
573            return false;
574        }
575
576        if (type == TCG_TYPE_I32) {
577            return true;
578        }
579
580        switch (cond) {
581        case TCG_COND_EQ:
582        case TCG_COND_NE:
583            ct |= TCG_CT_CONST_S32 | TCG_CT_CONST_U32;  /* CGFI or CLGFI */
584            break;
585        case TCG_COND_LT:
586        case TCG_COND_GE:
587        case TCG_COND_LE:
588        case TCG_COND_GT:
589            ct |= TCG_CT_CONST_S32;  /* CGFI */
590            break;
591        case TCG_COND_LTU:
592        case TCG_COND_GEU:
593        case TCG_COND_LEU:
594        case TCG_COND_GTU:
595            ct |= TCG_CT_CONST_U32;  /* CLGFI */
596            break;
597        case TCG_COND_TSTNE:
598        case TCG_COND_TSTEQ:
599            /* checked above, fallthru */
600        default:
601            g_assert_not_reached();
602        }
603    }
604
605    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
606        return true;
607    }
608    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
609        return true;
610    }
611    if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
612        return true;
613    }
614    if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
615        return true;
616    }
617
618    if (ct & TCG_CT_CONST_INV) {
619        val = ~val;
620    }
621    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
622        return true;
623    }
624    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
625        return true;
626    }
627    return false;
628}
629
630/* Emit instructions according to the given instruction format.  */
631
632static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
633{
634    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
635}
636
637static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
638                             TCGReg r1, TCGReg r2)
639{
640    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
641}
642
643/* RRF-a without the m4 field */
644static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
645                              TCGReg r1, TCGReg r2, TCGReg r3)
646{
647    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
648}
649
650/* RRF-a with the m4 field */
651static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
652                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
653{
654    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
655}
656
657static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
658                              TCGReg r1, TCGReg r2, int m3)
659{
660    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
661}
662
663static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
664{
665    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
666}
667
668static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
669                             int i2, int m3)
670{
671    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
672    tcg_out32(s, (i2 << 16) | (op & 0xff));
673}
674
675static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
676{
677    tcg_out16(s, op | (r1 << 4));
678    tcg_out32(s, i2);
679}
680
681static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
682                            TCGReg b2, TCGReg r3, int disp)
683{
684    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
685              | (disp & 0xfff));
686}
687
688static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
689                             TCGReg b2, TCGReg r3, int disp)
690{
691    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
692    tcg_out32(s, (op & 0xff) | (b2 << 28)
693              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
694}
695
696#define tcg_out_insn_RX   tcg_out_insn_RS
697#define tcg_out_insn_RXY  tcg_out_insn_RSY
698
699static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
700{
701    /*
702     * Shift bit 4 of each regno to its corresponding bit of RXB.
703     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
704     * is the left-shift of the 4th operand.
705     */
706    return ((v1 & 0x10) << (4 + 3))
707         | ((v2 & 0x10) << (4 + 2))
708         | ((v3 & 0x10) << (4 + 1))
709         | ((v4 & 0x10) << (4 + 0));
710}
711
712static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
713                              TCGReg v1, uint16_t i2, int m3)
714{
715    tcg_debug_assert(is_vector_reg(v1));
716    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
717    tcg_out16(s, i2);
718    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
719}
720
721static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
722                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
723{
724    tcg_debug_assert(is_vector_reg(v1));
725    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
726    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
727    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
728}
729
730static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
731                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
732{
733    tcg_debug_assert(is_vector_reg(v1));
734    tcg_debug_assert(is_vector_reg(v3));
735    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
736    tcg_out16(s, i2);
737    tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
738}
739
740static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
741                              TCGReg v1, TCGReg v2, int m3)
742{
743    tcg_debug_assert(is_vector_reg(v1));
744    tcg_debug_assert(is_vector_reg(v2));
745    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
746    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
747}
748
749static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
750                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
751{
752    tcg_debug_assert(is_vector_reg(v1));
753    tcg_debug_assert(is_vector_reg(v2));
754    tcg_debug_assert(is_vector_reg(v3));
755    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
756    tcg_out16(s, v3 << 12);
757    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
758}
759
760static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
761                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
762{
763    tcg_debug_assert(is_vector_reg(v1));
764    tcg_debug_assert(is_vector_reg(v2));
765    tcg_debug_assert(is_vector_reg(v3));
766    tcg_debug_assert(is_vector_reg(v4));
767    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
768    tcg_out16(s, v3 << 12);
769    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
770}
771
772static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
773                              TCGReg v1, TCGReg r2, TCGReg r3)
774{
775    tcg_debug_assert(is_vector_reg(v1));
776    tcg_debug_assert(is_general_reg(r2));
777    tcg_debug_assert(is_general_reg(r3));
778    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
779    tcg_out16(s, r3 << 12);
780    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
781}
782
783static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
784                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
785{
786    tcg_debug_assert(is_vector_reg(v1));
787    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
788    tcg_debug_assert(is_general_reg(b2));
789    tcg_debug_assert(is_vector_reg(v3));
790    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
791    tcg_out16(s, b2 << 12 | d2);
792    tcg_out16(s, (op & 0x00ff) | RXB(v1, v3, 0, 0) | (m4 << 12));
793}
794
795static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
796                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
797{
798    tcg_debug_assert(is_vector_reg(v1));
799    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
800    tcg_debug_assert(is_general_reg(b2));
801    tcg_debug_assert(is_general_reg(r3));
802    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
803    tcg_out16(s, b2 << 12 | d2);
804    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
805}
806
807static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
808                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
809{
810    tcg_debug_assert(is_general_reg(r1));
811    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
812    tcg_debug_assert(is_general_reg(b2));
813    tcg_debug_assert(is_vector_reg(v3));
814    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
815    tcg_out16(s, b2 << 12 | d2);
816    tcg_out16(s, (op & 0x00ff) | RXB(0, v3, 0, 0) | (m4 << 12));
817}
818
819static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
820                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
821{
822    tcg_debug_assert(is_vector_reg(v1));
823    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
824    tcg_debug_assert(is_general_reg(x2));
825    tcg_debug_assert(is_general_reg(b2));
826    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
827    tcg_out16(s, (b2 << 12) | d2);
828    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
829}
830
831/* Emit an opcode with "type-checking" of the format.  */
832#define tcg_out_insn(S, FMT, OP, ...) \
833    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
834
835
836/* emit 64-bit shifts */
837static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
838                         TCGReg src, TCGReg sh_reg, int sh_imm)
839{
840    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
841}
842
843/* emit 32-bit shifts */
844static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
845                         TCGReg sh_reg, int sh_imm)
846{
847    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
848}
849
850static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
851{
852    if (src == dst) {
853        return true;
854    }
855    switch (type) {
856    case TCG_TYPE_I32:
857        if (likely(is_general_reg(dst) && is_general_reg(src))) {
858            tcg_out_insn(s, RR, LR, dst, src);
859            break;
860        }
861        /* fallthru */
862
863    case TCG_TYPE_I64:
864        if (likely(is_general_reg(dst))) {
865            if (likely(is_general_reg(src))) {
866                tcg_out_insn(s, RRE, LGR, dst, src);
867            } else {
868                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
869            }
870            break;
871        } else if (is_general_reg(src)) {
872            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
873            break;
874        }
875        /* fallthru */
876
877    case TCG_TYPE_V64:
878    case TCG_TYPE_V128:
879        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
880        break;
881
882    default:
883        g_assert_not_reached();
884    }
885    return true;
886}
887
888static const S390Opcode li_insns[4] = {
889    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
890};
891static const S390Opcode oi_insns[4] = {
892    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
893};
894static const S390Opcode lif_insns[2] = {
895    RIL_LLILF, RIL_LLIHF,
896};
897static const S390Opcode tm_insns[4] = {
898    RI_TMLL, RI_TMLH, RI_TMHL, RI_TMHH
899};
900
901/* load a register with an immediate value */
902static void tcg_out_movi(TCGContext *s, TCGType type,
903                         TCGReg ret, tcg_target_long sval)
904{
905    tcg_target_ulong uval = sval;
906    ptrdiff_t pc_off;
907    int i;
908
909    if (type == TCG_TYPE_I32) {
910        uval = (uint32_t)sval;
911        sval = (int32_t)sval;
912    }
913
914    /* Try all 32-bit insns that can load it in one go.  */
915    if (sval >= -0x8000 && sval < 0x8000) {
916        tcg_out_insn(s, RI, LGHI, ret, sval);
917        return;
918    }
919
920    i = is_const_p16(uval);
921    if (i >= 0) {
922        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
923        return;
924    }
925
926    /* Try all 48-bit insns that can load it in one go.  */
927    if (sval == (int32_t)sval) {
928        tcg_out_insn(s, RIL, LGFI, ret, sval);
929        return;
930    }
931
932    i = is_const_p32(uval);
933    if (i >= 0) {
934        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
935        return;
936    }
937
938    /* Try for PC-relative address load.  For odd addresses, add one. */
939    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
940    if (pc_off == (int32_t)pc_off) {
941        tcg_out_insn(s, RIL, LARL, ret, pc_off);
942        if (sval & 1) {
943            tcg_out_insn(s, RI, AGHI, ret, 1);
944        }
945        return;
946    }
947
948    /* Otherwise, load it by parts. */
949    i = is_const_p16((uint32_t)uval);
950    if (i >= 0) {
951        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
952    } else {
953        tcg_out_insn(s, RIL, LLILF, ret, uval);
954    }
955    uval >>= 32;
956    i = is_const_p16(uval);
957    if (i >= 0) {
958        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
959    } else {
960        tcg_out_insn(s, RIL, OIHF, ret, uval);
961    }
962}
963
964/* Emit a load/store type instruction.  Inputs are:
965   DATA:     The register to be loaded or stored.
966   BASE+OFS: The effective address.
967   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
968   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
969
970static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
971                        TCGReg data, TCGReg base, TCGReg index,
972                        tcg_target_long ofs)
973{
974    if (ofs < -0x80000 || ofs >= 0x80000) {
975        /* Combine the low 20 bits of the offset with the actual load insn;
976           the high 44 bits must come from an immediate load.  */
977        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
978        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
979        ofs = low;
980
981        /* If we were already given an index register, add it in.  */
982        if (index != TCG_REG_NONE) {
983            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
984        }
985        index = TCG_TMP0;
986    }
987
988    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
989        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
990    } else {
991        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
992    }
993}
994
995static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
996                            TCGReg data, TCGReg base, TCGReg index,
997                            tcg_target_long ofs, int m3)
998{
999    if (ofs < 0 || ofs >= 0x1000) {
1000        if (ofs >= -0x80000 && ofs < 0x80000) {
1001            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
1002            base = TCG_TMP0;
1003            index = TCG_REG_NONE;
1004            ofs = 0;
1005        } else {
1006            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
1007            if (index != TCG_REG_NONE) {
1008                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
1009            }
1010            index = TCG_TMP0;
1011            ofs = 0;
1012        }
1013    }
1014    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1015}
1016
1017/* load data without address translation or endianness conversion */
1018static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1019                       TCGReg base, intptr_t ofs)
1020{
1021    switch (type) {
1022    case TCG_TYPE_I32:
1023        if (likely(is_general_reg(data))) {
1024            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1025            break;
1026        }
1027        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1028        break;
1029
1030    case TCG_TYPE_I64:
1031        if (likely(is_general_reg(data))) {
1032            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1033            break;
1034        }
1035        /* fallthru */
1036
1037    case TCG_TYPE_V64:
1038        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1039        break;
1040
1041    case TCG_TYPE_V128:
1042        /* Hint quadword aligned.  */
1043        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1044        break;
1045
1046    default:
1047        g_assert_not_reached();
1048    }
1049}
1050
1051static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1052                       TCGReg base, intptr_t ofs)
1053{
1054    switch (type) {
1055    case TCG_TYPE_I32:
1056        if (likely(is_general_reg(data))) {
1057            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1058        } else {
1059            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1060        }
1061        break;
1062
1063    case TCG_TYPE_I64:
1064        if (likely(is_general_reg(data))) {
1065            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1066            break;
1067        }
1068        /* fallthru */
1069
1070    case TCG_TYPE_V64:
1071        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1072        break;
1073
1074    case TCG_TYPE_V128:
1075        /* Hint quadword aligned.  */
1076        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1077        break;
1078
1079    default:
1080        g_assert_not_reached();
1081    }
1082}
1083
1084static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1085                               TCGReg base, intptr_t ofs)
1086{
1087    return false;
1088}
1089
1090static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
1091{
1092    return false;
1093}
1094
1095static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1096                             tcg_target_long imm)
1097{
1098    /* This function is only used for passing structs by reference. */
1099    tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1100}
1101
1102static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1103                                 int msb, int lsb, int ofs, int z)
1104{
1105    /* Format RIE-f */
1106    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1107    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1108    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1109}
1110
1111static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1112{
1113    tcg_out_insn(s, RRE, LGBR, dest, src);
1114}
1115
1116static void tcg_out_ext8u(TCGContext *s, TCGReg dest, TCGReg src)
1117{
1118    tcg_out_insn(s, RRE, LLGCR, dest, src);
1119}
1120
1121static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1122{
1123    tcg_out_insn(s, RRE, LGHR, dest, src);
1124}
1125
1126static void tcg_out_ext16u(TCGContext *s, TCGReg dest, TCGReg src)
1127{
1128    tcg_out_insn(s, RRE, LLGHR, dest, src);
1129}
1130
1131static void tcg_out_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1132{
1133    tcg_out_insn(s, RRE, LGFR, dest, src);
1134}
1135
1136static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1137{
1138    tcg_out_insn(s, RRE, LLGFR, dest, src);
1139}
1140
1141static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1142{
1143    tcg_out_ext32s(s, dest, src);
1144}
1145
1146static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
1147{
1148    tcg_out_ext32u(s, dest, src);
1149}
1150
1151static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
1152{
1153    tcg_out_mov(s, TCG_TYPE_I32, dest, src);
1154}
1155
1156static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1157{
1158    int msb, lsb;
1159    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1160        /* Achieve wraparound by swapping msb and lsb.  */
1161        msb = 64 - ctz64(~val);
1162        lsb = clz64(~val) - 1;
1163    } else {
1164        msb = clz64(val);
1165        lsb = 63 - ctz64(val);
1166    }
1167    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1168}
1169
1170static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1171{
1172    static const S390Opcode ni_insns[4] = {
1173        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1174    };
1175    static const S390Opcode nif_insns[2] = {
1176        RIL_NILF, RIL_NIHF
1177    };
1178    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1179    int i;
1180
1181    /* Look for the zero-extensions.  */
1182    if ((val & valid) == 0xffffffff) {
1183        tcg_out_ext32u(s, dest, dest);
1184        return;
1185    }
1186    if ((val & valid) == 0xff) {
1187        tcg_out_ext8u(s, dest, dest);
1188        return;
1189    }
1190    if ((val & valid) == 0xffff) {
1191        tcg_out_ext16u(s, dest, dest);
1192        return;
1193    }
1194
1195    i = is_const_p16(~val & valid);
1196    if (i >= 0) {
1197        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1198        return;
1199    }
1200
1201    i = is_const_p32(~val & valid);
1202    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1203    if (i >= 0) {
1204        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1205        return;
1206    }
1207
1208    if (risbg_mask(val)) {
1209        tgen_andi_risbg(s, dest, dest, val);
1210        return;
1211    }
1212
1213    g_assert_not_reached();
1214}
1215
1216static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1217{
1218    static const S390Opcode oif_insns[2] = {
1219        RIL_OILF, RIL_OIHF
1220    };
1221
1222    int i;
1223
1224    i = is_const_p16(val);
1225    if (i >= 0) {
1226        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1227        return;
1228    }
1229
1230    i = is_const_p32(val);
1231    if (i >= 0) {
1232        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1233        return;
1234    }
1235
1236    g_assert_not_reached();
1237}
1238
1239static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1240{
1241    switch (is_const_p32(val)) {
1242    case 0:
1243        tcg_out_insn(s, RIL, XILF, dest, val);
1244        break;
1245    case 1:
1246        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1247        break;
1248    default:
1249        g_assert_not_reached();
1250    }
1251}
1252
1253static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1254                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1255{
1256    bool is_unsigned = is_unsigned_cond(c);
1257    TCGCond inv_c = tcg_invert_cond(c);
1258    S390Opcode op;
1259
1260    if (is_tst_cond(c)) {
1261        tcg_debug_assert(!need_carry);
1262
1263        if (!c2const) {
1264            if (type == TCG_TYPE_I32) {
1265                tcg_out_insn(s, RRFa, NRK, TCG_REG_R0, r1, c2);
1266            } else {
1267                tcg_out_insn(s, RRFa, NGRK, TCG_REG_R0, r1, c2);
1268            }
1269            goto exit;
1270        }
1271
1272        if (type == TCG_TYPE_I32) {
1273            c2 = (uint32_t)c2;
1274        }
1275
1276        int i = is_const_p16(c2);
1277        if (i >= 0) {
1278            tcg_out_insn_RI(s, tm_insns[i], r1, c2 >> (i * 16));
1279            *inv_cc = c == TCG_COND_TSTEQ ? S390_TM_NE : S390_TM_EQ;
1280            return *inv_cc ^ 15;
1281        }
1282
1283        if (risbg_mask(c2)) {
1284            tgen_andi_risbg(s, TCG_REG_R0, r1, c2);
1285            goto exit;
1286        }
1287        g_assert_not_reached();
1288    }
1289
1290    if (c2const) {
1291        if (c2 == 0) {
1292            if (!(is_unsigned && need_carry)) {
1293                if (type == TCG_TYPE_I32) {
1294                    tcg_out_insn(s, RR, LTR, r1, r1);
1295                } else {
1296                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1297                }
1298                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1299                return tcg_cond_to_ltr_cond[c];
1300            }
1301        }
1302
1303        if (!is_unsigned && c2 == (int16_t)c2) {
1304            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1305            tcg_out_insn_RI(s, op, r1, c2);
1306            goto exit;
1307        }
1308
1309        if (type == TCG_TYPE_I32) {
1310            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1311            tcg_out_insn_RIL(s, op, r1, c2);
1312            goto exit;
1313        }
1314
1315        /* Should match TCG_CT_CONST_CMP. */
1316        switch (c) {
1317        case TCG_COND_LT:
1318        case TCG_COND_GE:
1319        case TCG_COND_LE:
1320        case TCG_COND_GT:
1321            tcg_debug_assert(c2 == (int32_t)c2);
1322            op = RIL_CGFI;
1323            break;
1324        case TCG_COND_EQ:
1325        case TCG_COND_NE:
1326            if (c2 == (int32_t)c2) {
1327                op = RIL_CGFI;
1328                break;
1329            }
1330            /* fall through */
1331        case TCG_COND_LTU:
1332        case TCG_COND_GEU:
1333        case TCG_COND_LEU:
1334        case TCG_COND_GTU:
1335            tcg_debug_assert(c2 == (uint32_t)c2);
1336            op = RIL_CLGFI;
1337            break;
1338        default:
1339            g_assert_not_reached();
1340        }
1341        tcg_out_insn_RIL(s, op, r1, c2);
1342    } else if (type == TCG_TYPE_I32) {
1343        op = (is_unsigned ? RR_CLR : RR_CR);
1344        tcg_out_insn_RR(s, op, r1, c2);
1345    } else {
1346        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1347        tcg_out_insn_RRE(s, op, r1, c2);
1348    }
1349
1350 exit:
1351    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1352    return tcg_cond_to_s390_cond[c];
1353}
1354
1355static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1356                    TCGArg c2, bool c2const, bool need_carry)
1357{
1358    int inv_cc;
1359    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1360}
1361
1362static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1363                         TCGReg dest, TCGReg c1, TCGArg c2,
1364                         bool c2const, bool neg)
1365{
1366    int cc;
1367
1368    /* With LOC2, we can always emit the minimum 3 insns.  */
1369    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1370        /* Emit: d = 0, d = (cc ? 1 : d).  */
1371        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1372        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1373        tcg_out_insn(s, RIEg, LOCGHI, dest, neg ? -1 : 1, cc);
1374        return;
1375    }
1376
1377    switch (cond) {
1378    case TCG_COND_GEU:
1379    case TCG_COND_LTU:
1380    case TCG_COND_LT:
1381    case TCG_COND_GE:
1382        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1383        if (!c2const) {
1384            TCGReg t = c1;
1385            c1 = c2;
1386            c2 = t;
1387            cond = tcg_swap_cond(cond);
1388        }
1389        break;
1390    default:
1391        break;
1392    }
1393
1394    switch (cond) {
1395    case TCG_COND_NE:
1396        /* X != 0 is X > 0.  */
1397        if (c2const && c2 == 0) {
1398            cond = TCG_COND_GTU;
1399        } else {
1400            break;
1401        }
1402        /* fallthru */
1403
1404    case TCG_COND_GTU:
1405    case TCG_COND_GT:
1406        /*
1407         * The result of a compare has CC=2 for GT and CC=3 unused.
1408         * ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.
1409         */
1410        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1411        tcg_out_movi(s, type, dest, 0);
1412        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1413        if (neg) {
1414            if (type == TCG_TYPE_I32) {
1415                tcg_out_insn(s, RR, LCR, dest, dest);
1416            } else {
1417                tcg_out_insn(s, RRE, LCGR, dest, dest);
1418            }
1419        }
1420        return;
1421
1422    case TCG_COND_EQ:
1423        /* X == 0 is X <= 0.  */
1424        if (c2const && c2 == 0) {
1425            cond = TCG_COND_LEU;
1426        } else {
1427            break;
1428        }
1429        /* fallthru */
1430
1431    case TCG_COND_LEU:
1432    case TCG_COND_LE:
1433        /*
1434         * As above, but we're looking for borrow, or !carry.
1435         * The second insn computes d - d - borrow, or -1 for true
1436         * and 0 for false.  So we must mask to 1 bit afterward.
1437         */
1438        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1439        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1440        if (!neg) {
1441            tgen_andi(s, type, dest, 1);
1442        }
1443        return;
1444
1445    default:
1446        g_assert_not_reached();
1447    }
1448
1449    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1450    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1451    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1452    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, neg ? -1 : 1);
1453    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1454}
1455
1456static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1457                             TCGArg v3, int v3const, TCGReg v4,
1458                             int cc, int inv_cc)
1459{
1460    TCGReg src;
1461
1462    if (v3const) {
1463        if (dest == v4) {
1464            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1465                /* Emit: if (cc) dest = v3. */
1466                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1467                return;
1468            }
1469            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1470            src = TCG_TMP0;
1471        } else {
1472            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1473            tcg_out_insn(s, RI, LGHI, dest, v3);
1474            cc = inv_cc;
1475            src = v4;
1476        }
1477    } else {
1478        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1479            /* Emit: dest = cc ? v3 : v4. */
1480            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1481            return;
1482        }
1483        if (dest == v4) {
1484            src = v3;
1485        } else {
1486            tcg_out_mov(s, type, dest, v3);
1487            cc = inv_cc;
1488            src = v4;
1489        }
1490    }
1491
1492    /* Emit: if (cc) dest = src. */
1493    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1494}
1495
1496static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1497                         TCGReg c1, TCGArg c2, int c2const,
1498                         TCGArg v3, int v3const, TCGReg v4)
1499{
1500    int cc, inv_cc;
1501
1502    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1503    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1504}
1505
1506static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1507                     TCGArg a2, int a2const)
1508{
1509    /* Since this sets both R and R+1, we have no choice but to store the
1510       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1511    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1512    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1513
1514    if (a2const && a2 == 64) {
1515        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1516        return;
1517    }
1518
1519    /*
1520     * Conditions from FLOGR are:
1521     *   2 -> one bit found
1522     *   8 -> no one bit found
1523     */
1524    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1525}
1526
1527static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1528{
1529    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1530    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1531        if (type == TCG_TYPE_I32) {
1532            tcg_out_ext32u(s, dest, src);
1533            src = dest;
1534        }
1535        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1536        return;
1537    }
1538
1539    /* Without MIE3, each byte gets the count of bits for the byte. */
1540    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1541
1542    /* Multiply to sum each byte at the top of the word. */
1543    if (type == TCG_TYPE_I32) {
1544        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1545        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1546    } else {
1547        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1548        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1549        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1550    }
1551}
1552
1553static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1554                         int ofs, int len, int z)
1555{
1556    int lsb = (63 - ofs);
1557    int msb = lsb - (len - 1);
1558    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1559}
1560
1561static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1562                         int ofs, int len)
1563{
1564    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1565}
1566
1567static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1568{
1569    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1570    if (off == (int16_t)off) {
1571        tcg_out_insn(s, RI, BRC, cc, off);
1572    } else if (off == (int32_t)off) {
1573        tcg_out_insn(s, RIL, BRCL, cc, off);
1574    } else {
1575        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1576        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1577    }
1578}
1579
1580static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1581{
1582    if (l->has_value) {
1583        tgen_gotoi(s, cc, l->u.value_ptr);
1584    } else {
1585        tcg_out16(s, RI_BRC | (cc << 4));
1586        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1587        s->code_ptr += 1;
1588    }
1589}
1590
1591static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1592                                TCGReg r1, TCGReg r2, TCGLabel *l)
1593{
1594    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1595    /* Format RIE-b */
1596    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1597    tcg_out16(s, 0);
1598    tcg_out16(s, cc << 12 | (opc & 0xff));
1599}
1600
1601static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1602                                    TCGReg r1, int i2, TCGLabel *l)
1603{
1604    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1605    /* Format RIE-c */
1606    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1607    tcg_out16(s, 0);
1608    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1609}
1610
1611static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1612                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1613{
1614    int cc;
1615
1616    if (!is_tst_cond(c)) {
1617        bool is_unsigned = is_unsigned_cond(c);
1618        bool in_range;
1619        S390Opcode opc;
1620
1621        cc = tcg_cond_to_s390_cond[c];
1622
1623        if (!c2const) {
1624            opc = (type == TCG_TYPE_I32
1625                   ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1626                   : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1627            tgen_compare_branch(s, opc, cc, r1, c2, l);
1628            return;
1629        }
1630
1631        /*
1632         * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1633         * If the immediate we've been given does not fit that range, we'll
1634         * fall back to separate compare and branch instructions using the
1635         * larger comparison range afforded by COMPARE IMMEDIATE.
1636         */
1637        if (type == TCG_TYPE_I32) {
1638            if (is_unsigned) {
1639                opc = RIEc_CLIJ;
1640                in_range = (uint32_t)c2 == (uint8_t)c2;
1641            } else {
1642                opc = RIEc_CIJ;
1643                in_range = (int32_t)c2 == (int8_t)c2;
1644            }
1645        } else {
1646            if (is_unsigned) {
1647                opc = RIEc_CLGIJ;
1648                in_range = (uint64_t)c2 == (uint8_t)c2;
1649            } else {
1650                opc = RIEc_CGIJ;
1651                in_range = (int64_t)c2 == (int8_t)c2;
1652            }
1653        }
1654        if (in_range) {
1655            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1656            return;
1657        }
1658    }
1659
1660    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1661    tgen_branch(s, cc, l);
1662}
1663
1664static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1665{
1666    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1667    if (off == (int32_t)off) {
1668        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1669    } else {
1670        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1671        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1672    }
1673}
1674
1675static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1676                         const TCGHelperInfo *info)
1677{
1678    tcg_out_call_int(s, dest);
1679}
1680
1681typedef struct {
1682    TCGReg base;
1683    TCGReg index;
1684    int disp;
1685    TCGAtomAlign aa;
1686} HostAddress;
1687
1688bool tcg_target_has_memory_bswap(MemOp memop)
1689{
1690    TCGAtomAlign aa;
1691
1692    if ((memop & MO_SIZE) <= MO_64) {
1693        return true;
1694    }
1695
1696    /*
1697     * Reject 16-byte memop with 16-byte atomicity,
1698     * but do allow a pair of 64-bit operations.
1699     */
1700    aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
1701    return aa.atom <= MO_64;
1702}
1703
1704static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1705                                   HostAddress h)
1706{
1707    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1708    case MO_UB:
1709        tcg_out_insn(s, RXY, LLGC, data, h.base, h.index, h.disp);
1710        break;
1711    case MO_SB:
1712        tcg_out_insn(s, RXY, LGB, data, h.base, h.index, h.disp);
1713        break;
1714
1715    case MO_UW | MO_BSWAP:
1716        /* swapped unsigned halfword load with upper bits zeroed */
1717        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1718        tcg_out_ext16u(s, data, data);
1719        break;
1720    case MO_UW:
1721        tcg_out_insn(s, RXY, LLGH, data, h.base, h.index, h.disp);
1722        break;
1723
1724    case MO_SW | MO_BSWAP:
1725        /* swapped sign-extended halfword load */
1726        tcg_out_insn(s, RXY, LRVH, data, h.base, h.index, h.disp);
1727        tcg_out_ext16s(s, TCG_TYPE_REG, data, data);
1728        break;
1729    case MO_SW:
1730        tcg_out_insn(s, RXY, LGH, data, h.base, h.index, h.disp);
1731        break;
1732
1733    case MO_UL | MO_BSWAP:
1734        /* swapped unsigned int load with upper bits zeroed */
1735        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1736        tcg_out_ext32u(s, data, data);
1737        break;
1738    case MO_UL:
1739        tcg_out_insn(s, RXY, LLGF, data, h.base, h.index, h.disp);
1740        break;
1741
1742    case MO_SL | MO_BSWAP:
1743        /* swapped sign-extended int load */
1744        tcg_out_insn(s, RXY, LRV, data, h.base, h.index, h.disp);
1745        tcg_out_ext32s(s, data, data);
1746        break;
1747    case MO_SL:
1748        tcg_out_insn(s, RXY, LGF, data, h.base, h.index, h.disp);
1749        break;
1750
1751    case MO_UQ | MO_BSWAP:
1752        tcg_out_insn(s, RXY, LRVG, data, h.base, h.index, h.disp);
1753        break;
1754    case MO_UQ:
1755        tcg_out_insn(s, RXY, LG, data, h.base, h.index, h.disp);
1756        break;
1757
1758    default:
1759        g_assert_not_reached();
1760    }
1761}
1762
1763static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1764                                   HostAddress h)
1765{
1766    switch (opc & (MO_SIZE | MO_BSWAP)) {
1767    case MO_UB:
1768        if (h.disp >= 0 && h.disp < 0x1000) {
1769            tcg_out_insn(s, RX, STC, data, h.base, h.index, h.disp);
1770        } else {
1771            tcg_out_insn(s, RXY, STCY, data, h.base, h.index, h.disp);
1772        }
1773        break;
1774
1775    case MO_UW | MO_BSWAP:
1776        tcg_out_insn(s, RXY, STRVH, data, h.base, h.index, h.disp);
1777        break;
1778    case MO_UW:
1779        if (h.disp >= 0 && h.disp < 0x1000) {
1780            tcg_out_insn(s, RX, STH, data, h.base, h.index, h.disp);
1781        } else {
1782            tcg_out_insn(s, RXY, STHY, data, h.base, h.index, h.disp);
1783        }
1784        break;
1785
1786    case MO_UL | MO_BSWAP:
1787        tcg_out_insn(s, RXY, STRV, data, h.base, h.index, h.disp);
1788        break;
1789    case MO_UL:
1790        if (h.disp >= 0 && h.disp < 0x1000) {
1791            tcg_out_insn(s, RX, ST, data, h.base, h.index, h.disp);
1792        } else {
1793            tcg_out_insn(s, RXY, STY, data, h.base, h.index, h.disp);
1794        }
1795        break;
1796
1797    case MO_UQ | MO_BSWAP:
1798        tcg_out_insn(s, RXY, STRVG, data, h.base, h.index, h.disp);
1799        break;
1800    case MO_UQ:
1801        tcg_out_insn(s, RXY, STG, data, h.base, h.index, h.disp);
1802        break;
1803
1804    default:
1805        g_assert_not_reached();
1806    }
1807}
1808
1809static const TCGLdstHelperParam ldst_helper_param = {
1810    .ntmp = 1, .tmp = { TCG_TMP0 }
1811};
1812
1813static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1814{
1815    MemOp opc = get_memop(lb->oi);
1816
1817    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1818                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1819        return false;
1820    }
1821
1822    tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1823    tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1824    tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1825
1826    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1827    return true;
1828}
1829
1830static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1831{
1832    MemOp opc = get_memop(lb->oi);
1833
1834    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1835                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1836        return false;
1837    }
1838
1839    tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1840    tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1841
1842    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1843    return true;
1844}
1845
1846/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1847#define MIN_TLB_MASK_TABLE_OFS  -(1 << 19)
1848
1849/*
1850 * For system-mode, perform the TLB load and compare.
1851 * For user-mode, perform any required alignment tests.
1852 * In both cases, return a TCGLabelQemuLdst structure if the slow path
1853 * is required and fill in @h with the host address for the fast path.
1854 */
1855static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
1856                                           TCGReg addr_reg, MemOpIdx oi,
1857                                           bool is_ld)
1858{
1859    TCGType addr_type = s->addr_type;
1860    TCGLabelQemuLdst *ldst = NULL;
1861    MemOp opc = get_memop(oi);
1862    MemOp s_bits = opc & MO_SIZE;
1863    unsigned a_mask;
1864
1865    h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
1866    a_mask = (1 << h->aa.align) - 1;
1867
1868    if (tcg_use_softmmu) {
1869        unsigned s_mask = (1 << s_bits) - 1;
1870        int mem_index = get_mmuidx(oi);
1871        int fast_off = tlb_mask_table_ofs(s, mem_index);
1872        int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1873        int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1874        int ofs, a_off;
1875        uint64_t tlb_mask;
1876
1877        ldst = new_ldst_label(s);
1878        ldst->is_ld = is_ld;
1879        ldst->oi = oi;
1880        ldst->addrlo_reg = addr_reg;
1881
1882        tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
1883                     s->page_bits - CPU_TLB_ENTRY_BITS);
1884
1885        tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
1886        tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
1887
1888        /*
1889         * For aligned accesses, we check the first byte and include the
1890         * alignment bits within the address.  For unaligned access, we
1891         * check that we don't cross pages using the address of the last
1892         * byte of the access.
1893         */
1894        a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
1895        tlb_mask = (uint64_t)s->page_mask | a_mask;
1896        if (a_off == 0) {
1897            tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
1898        } else {
1899            tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
1900            tgen_andi(s, addr_type, TCG_REG_R0, tlb_mask);
1901        }
1902
1903        if (is_ld) {
1904            ofs = offsetof(CPUTLBEntry, addr_read);
1905        } else {
1906            ofs = offsetof(CPUTLBEntry, addr_write);
1907        }
1908        if (addr_type == TCG_TYPE_I32) {
1909            ofs += HOST_BIG_ENDIAN * 4;
1910            tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1911        } else {
1912            tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
1913        }
1914
1915        tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1916        ldst->label_ptr[0] = s->code_ptr++;
1917
1918        h->index = TCG_TMP0;
1919        tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
1920                     offsetof(CPUTLBEntry, addend));
1921
1922        if (addr_type == TCG_TYPE_I32) {
1923            tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
1924            h->base = TCG_REG_NONE;
1925        } else {
1926            h->base = addr_reg;
1927        }
1928        h->disp = 0;
1929    } else {
1930        if (a_mask) {
1931            ldst = new_ldst_label(s);
1932            ldst->is_ld = is_ld;
1933            ldst->oi = oi;
1934            ldst->addrlo_reg = addr_reg;
1935
1936            tcg_debug_assert(a_mask <= 0xffff);
1937            tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
1938
1939            tcg_out16(s, RI_BRC | (S390_TM_NE << 4));
1940            ldst->label_ptr[0] = s->code_ptr++;
1941        }
1942
1943        h->base = addr_reg;
1944        if (addr_type == TCG_TYPE_I32) {
1945            tcg_out_ext32u(s, TCG_TMP0, addr_reg);
1946            h->base = TCG_TMP0;
1947        }
1948        if (guest_base < 0x80000) {
1949            h->index = TCG_REG_NONE;
1950            h->disp = guest_base;
1951        } else {
1952            h->index = TCG_GUEST_BASE_REG;
1953            h->disp = 0;
1954        }
1955    }
1956
1957    return ldst;
1958}
1959
1960static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1961                            MemOpIdx oi, TCGType data_type)
1962{
1963    TCGLabelQemuLdst *ldst;
1964    HostAddress h;
1965
1966    ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
1967    tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
1968
1969    if (ldst) {
1970        ldst->type = data_type;
1971        ldst->datalo_reg = data_reg;
1972        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1973    }
1974}
1975
1976static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1977                            MemOpIdx oi, TCGType data_type)
1978{
1979    TCGLabelQemuLdst *ldst;
1980    HostAddress h;
1981
1982    ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
1983    tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
1984
1985    if (ldst) {
1986        ldst->type = data_type;
1987        ldst->datalo_reg = data_reg;
1988        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
1989    }
1990}
1991
1992static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
1993                                   TCGReg addr_reg, MemOpIdx oi, bool is_ld)
1994{
1995    TCGLabel *l1 = NULL, *l2 = NULL;
1996    TCGLabelQemuLdst *ldst;
1997    HostAddress h;
1998    bool need_bswap;
1999    bool use_pair;
2000    S390Opcode insn;
2001
2002    ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
2003
2004    use_pair = h.aa.atom < MO_128;
2005    need_bswap = get_memop(oi) & MO_BSWAP;
2006
2007    if (!use_pair) {
2008        /*
2009         * Atomicity requires we use LPQ.  If we've already checked for
2010         * 16-byte alignment, that's all we need.  If we arrive with
2011         * lesser alignment, we have determined that less than 16-byte
2012         * alignment can be satisfied with two 8-byte loads.
2013         */
2014        if (h.aa.align < MO_128) {
2015            use_pair = true;
2016            l1 = gen_new_label();
2017            l2 = gen_new_label();
2018
2019            tcg_out_insn(s, RI, TMLL, addr_reg, 15);
2020            tgen_branch(s, S390_TM_NE, l1);
2021        }
2022
2023        tcg_debug_assert(!need_bswap);
2024        tcg_debug_assert(datalo & 1);
2025        tcg_debug_assert(datahi == datalo - 1);
2026        insn = is_ld ? RXY_LPQ : RXY_STPQ;
2027        tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
2028
2029        if (use_pair) {
2030            tgen_branch(s, S390_CC_ALWAYS, l2);
2031            tcg_out_label(s, l1);
2032        }
2033    }
2034    if (use_pair) {
2035        TCGReg d1, d2;
2036
2037        if (need_bswap) {
2038            d1 = datalo, d2 = datahi;
2039            insn = is_ld ? RXY_LRVG : RXY_STRVG;
2040        } else {
2041            d1 = datahi, d2 = datalo;
2042            insn = is_ld ? RXY_LG : RXY_STG;
2043        }
2044
2045        if (h.base == d1 || h.index == d1) {
2046            tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
2047            h.base = TCG_TMP0;
2048            h.index = TCG_REG_NONE;
2049            h.disp = 0;
2050        }
2051        tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
2052        tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
2053    }
2054    if (l2) {
2055        tcg_out_label(s, l2);
2056    }
2057
2058    if (ldst) {
2059        ldst->type = TCG_TYPE_I128;
2060        ldst->datalo_reg = datalo;
2061        ldst->datahi_reg = datahi;
2062        ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
2063    }
2064}
2065
2066static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
2067{
2068    /* Reuse the zeroing that exists for goto_ptr.  */
2069    if (a0 == 0) {
2070        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2071    } else {
2072        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2073        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2074    }
2075}
2076
2077static void tcg_out_goto_tb(TCGContext *s, int which)
2078{
2079    /*
2080     * Branch displacement must be aligned for atomic patching;
2081     * see if we need to add extra nop before branch
2082     */
2083    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2084        tcg_out16(s, NOP);
2085    }
2086    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2087    set_jmp_insn_offset(s, which);
2088    s->code_ptr += 2;
2089    set_jmp_reset_offset(s, which);
2090}
2091
2092void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
2093                              uintptr_t jmp_rx, uintptr_t jmp_rw)
2094{
2095    if (!HAVE_FACILITY(GEN_INST_EXT)) {
2096        return;
2097    }
2098    /* patch the branch destination */
2099    uintptr_t addr = tb->jmp_target_addr[n];
2100    intptr_t disp = addr - (jmp_rx - 2);
2101    qatomic_set((int32_t *)jmp_rw, disp / 2);
2102    /* no need to flush icache explicitly */
2103}
2104
2105# define OP_32_64(x) \
2106        case glue(glue(INDEX_op_,x),_i32): \
2107        case glue(glue(INDEX_op_,x),_i64)
2108
2109static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2110                              const TCGArg args[TCG_MAX_OP_ARGS],
2111                              const int const_args[TCG_MAX_OP_ARGS])
2112{
2113    S390Opcode op, op2;
2114    TCGArg a0, a1, a2;
2115
2116    switch (opc) {
2117    case INDEX_op_goto_ptr:
2118        a0 = args[0];
2119        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2120        break;
2121
2122    OP_32_64(ld8u):
2123        /* ??? LLC (RXY format) is only present with the extended-immediate
2124           facility, whereas LLGC is always present.  */
2125        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2126        break;
2127
2128    OP_32_64(ld8s):
2129        /* ??? LB is no smaller than LGB, so no point to using it.  */
2130        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2131        break;
2132
2133    OP_32_64(ld16u):
2134        /* ??? LLH (RXY format) is only present with the extended-immediate
2135           facility, whereas LLGH is always present.  */
2136        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2137        break;
2138
2139    case INDEX_op_ld16s_i32:
2140        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2141        break;
2142
2143    case INDEX_op_ld_i32:
2144        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2145        break;
2146
2147    OP_32_64(st8):
2148        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2149                    TCG_REG_NONE, args[2]);
2150        break;
2151
2152    OP_32_64(st16):
2153        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2154                    TCG_REG_NONE, args[2]);
2155        break;
2156
2157    case INDEX_op_st_i32:
2158        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2159        break;
2160
2161    case INDEX_op_add_i32:
2162        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2163        if (const_args[2]) {
2164        do_addi_32:
2165            if (a0 == a1) {
2166                if (a2 == (int16_t)a2) {
2167                    tcg_out_insn(s, RI, AHI, a0, a2);
2168                    break;
2169                }
2170                tcg_out_insn(s, RIL, AFI, a0, a2);
2171                break;
2172            }
2173            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2174        } else if (a0 == a1) {
2175            tcg_out_insn(s, RR, AR, a0, a2);
2176        } else {
2177            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2178        }
2179        break;
2180    case INDEX_op_sub_i32:
2181        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2182        if (const_args[2]) {
2183            a2 = -a2;
2184            goto do_addi_32;
2185        } else if (a0 == a1) {
2186            tcg_out_insn(s, RR, SR, a0, a2);
2187        } else {
2188            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2189        }
2190        break;
2191
2192    case INDEX_op_and_i32:
2193        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2194        if (const_args[2]) {
2195            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2196            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2197        } else if (a0 == a1) {
2198            tcg_out_insn(s, RR, NR, a0, a2);
2199        } else {
2200            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2201        }
2202        break;
2203    case INDEX_op_or_i32:
2204        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2205        if (const_args[2]) {
2206            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2207            tgen_ori(s, a0, a2);
2208        } else if (a0 == a1) {
2209            tcg_out_insn(s, RR, OR, a0, a2);
2210        } else {
2211            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2212        }
2213        break;
2214    case INDEX_op_xor_i32:
2215        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2216        if (const_args[2]) {
2217            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2218            tcg_out_insn(s, RIL, XILF, a0, a2);
2219        } else if (a0 == a1) {
2220            tcg_out_insn(s, RR, XR, args[0], args[2]);
2221        } else {
2222            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2223        }
2224        break;
2225
2226    case INDEX_op_andc_i32:
2227        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2228        if (const_args[2]) {
2229            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2230            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2231	} else {
2232            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2233	}
2234        break;
2235    case INDEX_op_orc_i32:
2236        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2237        if (const_args[2]) {
2238            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2239            tgen_ori(s, a0, (uint32_t)~a2);
2240        } else {
2241            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2242        }
2243        break;
2244    case INDEX_op_eqv_i32:
2245        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2246        if (const_args[2]) {
2247            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2248            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2249        } else {
2250            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2251        }
2252        break;
2253    case INDEX_op_nand_i32:
2254        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2255        break;
2256    case INDEX_op_nor_i32:
2257        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2258        break;
2259
2260    case INDEX_op_neg_i32:
2261        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2262        break;
2263    case INDEX_op_not_i32:
2264        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2265        break;
2266
2267    case INDEX_op_mul_i32:
2268        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2269        if (const_args[2]) {
2270            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2271            if (a2 == (int16_t)a2) {
2272                tcg_out_insn(s, RI, MHI, a0, a2);
2273            } else {
2274                tcg_out_insn(s, RIL, MSFI, a0, a2);
2275            }
2276        } else if (a0 == a1) {
2277            tcg_out_insn(s, RRE, MSR, a0, a2);
2278        } else {
2279            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2280        }
2281        break;
2282
2283    case INDEX_op_div2_i32:
2284        tcg_debug_assert(args[0] == args[2]);
2285        tcg_debug_assert(args[1] == args[3]);
2286        tcg_debug_assert((args[1] & 1) == 0);
2287        tcg_debug_assert(args[0] == args[1] + 1);
2288        tcg_out_insn(s, RR, DR, args[1], args[4]);
2289        break;
2290    case INDEX_op_divu2_i32:
2291        tcg_debug_assert(args[0] == args[2]);
2292        tcg_debug_assert(args[1] == args[3]);
2293        tcg_debug_assert((args[1] & 1) == 0);
2294        tcg_debug_assert(args[0] == args[1] + 1);
2295        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2296        break;
2297
2298    case INDEX_op_shl_i32:
2299        op = RS_SLL;
2300        op2 = RSY_SLLK;
2301    do_shift32:
2302        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2303        if (a0 == a1) {
2304            if (const_args[2]) {
2305                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2306            } else {
2307                tcg_out_sh32(s, op, a0, a2, 0);
2308            }
2309        } else {
2310            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2311            if (const_args[2]) {
2312                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2313            } else {
2314                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2315            }
2316        }
2317        break;
2318    case INDEX_op_shr_i32:
2319        op = RS_SRL;
2320        op2 = RSY_SRLK;
2321        goto do_shift32;
2322    case INDEX_op_sar_i32:
2323        op = RS_SRA;
2324        op2 = RSY_SRAK;
2325        goto do_shift32;
2326
2327    case INDEX_op_rotl_i32:
2328        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2329        if (const_args[2]) {
2330            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2331        } else {
2332            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2333        }
2334        break;
2335    case INDEX_op_rotr_i32:
2336        if (const_args[2]) {
2337            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2338                         TCG_REG_NONE, (32 - args[2]) & 31);
2339        } else {
2340            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2341            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2342        }
2343        break;
2344
2345    case INDEX_op_bswap16_i32:
2346        a0 = args[0], a1 = args[1], a2 = args[2];
2347        tcg_out_insn(s, RRE, LRVR, a0, a1);
2348        if (a2 & TCG_BSWAP_OS) {
2349            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2350        } else {
2351            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2352        }
2353        break;
2354    case INDEX_op_bswap16_i64:
2355        a0 = args[0], a1 = args[1], a2 = args[2];
2356        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2357        if (a2 & TCG_BSWAP_OS) {
2358            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2359        } else {
2360            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2361        }
2362        break;
2363
2364    case INDEX_op_bswap32_i32:
2365        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2366        break;
2367    case INDEX_op_bswap32_i64:
2368        a0 = args[0], a1 = args[1], a2 = args[2];
2369        tcg_out_insn(s, RRE, LRVR, a0, a1);
2370        if (a2 & TCG_BSWAP_OS) {
2371            tcg_out_ext32s(s, a0, a0);
2372        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2373            tcg_out_ext32u(s, a0, a0);
2374        }
2375        break;
2376
2377    case INDEX_op_add2_i32:
2378        if (const_args[4]) {
2379            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2380        } else {
2381            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2382        }
2383        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2384        break;
2385    case INDEX_op_sub2_i32:
2386        if (const_args[4]) {
2387            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2388        } else {
2389            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2390        }
2391        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2392        break;
2393
2394    case INDEX_op_br:
2395        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2396        break;
2397
2398    case INDEX_op_brcond_i32:
2399        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2400                    args[1], const_args[1], arg_label(args[3]));
2401        break;
2402    case INDEX_op_setcond_i32:
2403        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2404                     args[2], const_args[2], false);
2405        break;
2406    case INDEX_op_negsetcond_i32:
2407        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2408                     args[2], const_args[2], true);
2409        break;
2410    case INDEX_op_movcond_i32:
2411        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2412                     args[2], const_args[2], args[3], const_args[3], args[4]);
2413        break;
2414
2415    case INDEX_op_qemu_ld_a32_i32:
2416    case INDEX_op_qemu_ld_a64_i32:
2417        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
2418        break;
2419    case INDEX_op_qemu_ld_a32_i64:
2420    case INDEX_op_qemu_ld_a64_i64:
2421        tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
2422        break;
2423    case INDEX_op_qemu_st_a32_i32:
2424    case INDEX_op_qemu_st_a64_i32:
2425        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
2426        break;
2427    case INDEX_op_qemu_st_a32_i64:
2428    case INDEX_op_qemu_st_a64_i64:
2429        tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
2430        break;
2431    case INDEX_op_qemu_ld_a32_i128:
2432    case INDEX_op_qemu_ld_a64_i128:
2433        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
2434        break;
2435    case INDEX_op_qemu_st_a32_i128:
2436    case INDEX_op_qemu_st_a64_i128:
2437        tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
2438        break;
2439
2440    case INDEX_op_ld16s_i64:
2441        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2442        break;
2443    case INDEX_op_ld32u_i64:
2444        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2445        break;
2446    case INDEX_op_ld32s_i64:
2447        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2448        break;
2449    case INDEX_op_ld_i64:
2450        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2451        break;
2452
2453    case INDEX_op_st32_i64:
2454        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2455        break;
2456    case INDEX_op_st_i64:
2457        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2458        break;
2459
2460    case INDEX_op_add_i64:
2461        a0 = args[0], a1 = args[1], a2 = args[2];
2462        if (const_args[2]) {
2463        do_addi_64:
2464            if (a0 == a1) {
2465                if (a2 == (int16_t)a2) {
2466                    tcg_out_insn(s, RI, AGHI, a0, a2);
2467                    break;
2468                }
2469                if (a2 == (int32_t)a2) {
2470                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2471                    break;
2472                }
2473                if (a2 == (uint32_t)a2) {
2474                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2475                    break;
2476                }
2477                if (-a2 == (uint32_t)-a2) {
2478                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2479                    break;
2480                }
2481            }
2482            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2483        } else if (a0 == a1) {
2484            tcg_out_insn(s, RRE, AGR, a0, a2);
2485        } else {
2486            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2487        }
2488        break;
2489    case INDEX_op_sub_i64:
2490        a0 = args[0], a1 = args[1], a2 = args[2];
2491        if (const_args[2]) {
2492            a2 = -a2;
2493            goto do_addi_64;
2494        } else {
2495            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2496        }
2497        break;
2498
2499    case INDEX_op_and_i64:
2500        a0 = args[0], a1 = args[1], a2 = args[2];
2501        if (const_args[2]) {
2502            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2503            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2504        } else {
2505            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2506        }
2507        break;
2508    case INDEX_op_or_i64:
2509        a0 = args[0], a1 = args[1], a2 = args[2];
2510        if (const_args[2]) {
2511            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2512            tgen_ori(s, a0, a2);
2513        } else {
2514            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2515        }
2516        break;
2517    case INDEX_op_xor_i64:
2518        a0 = args[0], a1 = args[1], a2 = args[2];
2519        if (const_args[2]) {
2520            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2521            tgen_xori(s, a0, a2);
2522        } else {
2523            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2524        }
2525        break;
2526
2527    case INDEX_op_andc_i64:
2528        a0 = args[0], a1 = args[1], a2 = args[2];
2529        if (const_args[2]) {
2530            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2531            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2532        } else {
2533            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2534        }
2535        break;
2536    case INDEX_op_orc_i64:
2537        a0 = args[0], a1 = args[1], a2 = args[2];
2538        if (const_args[2]) {
2539            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2540            tgen_ori(s, a0, ~a2);
2541        } else {
2542            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2543        }
2544        break;
2545    case INDEX_op_eqv_i64:
2546        a0 = args[0], a1 = args[1], a2 = args[2];
2547        if (const_args[2]) {
2548            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2549            tgen_xori(s, a0, ~a2);
2550        } else {
2551            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2552        }
2553        break;
2554    case INDEX_op_nand_i64:
2555        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2556        break;
2557    case INDEX_op_nor_i64:
2558        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2559        break;
2560
2561    case INDEX_op_neg_i64:
2562        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2563        break;
2564    case INDEX_op_not_i64:
2565        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2566        break;
2567    case INDEX_op_bswap64_i64:
2568        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2569        break;
2570
2571    case INDEX_op_mul_i64:
2572        a0 = args[0], a1 = args[1], a2 = args[2];
2573        if (const_args[2]) {
2574            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2575            if (a2 == (int16_t)a2) {
2576                tcg_out_insn(s, RI, MGHI, a0, a2);
2577            } else {
2578                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2579            }
2580        } else if (a0 == a1) {
2581            tcg_out_insn(s, RRE, MSGR, a0, a2);
2582        } else {
2583            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2584        }
2585        break;
2586
2587    case INDEX_op_div2_i64:
2588        /*
2589         * ??? We get an unnecessary sign-extension of the dividend
2590         * into op0 with this definition, but as we do in fact always
2591         * produce both quotient and remainder using INDEX_op_div_i64
2592         * instead requires jumping through even more hoops.
2593         */
2594        tcg_debug_assert(args[0] == args[2]);
2595        tcg_debug_assert(args[1] == args[3]);
2596        tcg_debug_assert((args[1] & 1) == 0);
2597        tcg_debug_assert(args[0] == args[1] + 1);
2598        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2599        break;
2600    case INDEX_op_divu2_i64:
2601        tcg_debug_assert(args[0] == args[2]);
2602        tcg_debug_assert(args[1] == args[3]);
2603        tcg_debug_assert((args[1] & 1) == 0);
2604        tcg_debug_assert(args[0] == args[1] + 1);
2605        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2606        break;
2607    case INDEX_op_mulu2_i64:
2608        tcg_debug_assert(args[0] == args[2]);
2609        tcg_debug_assert((args[1] & 1) == 0);
2610        tcg_debug_assert(args[0] == args[1] + 1);
2611        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2612        break;
2613    case INDEX_op_muls2_i64:
2614        tcg_debug_assert((args[1] & 1) == 0);
2615        tcg_debug_assert(args[0] == args[1] + 1);
2616        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2617        break;
2618
2619    case INDEX_op_shl_i64:
2620        op = RSY_SLLG;
2621    do_shift64:
2622        if (const_args[2]) {
2623            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2624        } else {
2625            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2626        }
2627        break;
2628    case INDEX_op_shr_i64:
2629        op = RSY_SRLG;
2630        goto do_shift64;
2631    case INDEX_op_sar_i64:
2632        op = RSY_SRAG;
2633        goto do_shift64;
2634
2635    case INDEX_op_rotl_i64:
2636        if (const_args[2]) {
2637            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2638                         TCG_REG_NONE, args[2]);
2639        } else {
2640            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2641        }
2642        break;
2643    case INDEX_op_rotr_i64:
2644        if (const_args[2]) {
2645            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2646                         TCG_REG_NONE, (64 - args[2]) & 63);
2647        } else {
2648            /* We can use the smaller 32-bit negate because only the
2649               low 6 bits are examined for the rotate.  */
2650            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2651            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2652        }
2653        break;
2654
2655    case INDEX_op_add2_i64:
2656        if (const_args[4]) {
2657            if ((int64_t)args[4] >= 0) {
2658                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2659            } else {
2660                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2661            }
2662        } else {
2663            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2664        }
2665        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2666        break;
2667    case INDEX_op_sub2_i64:
2668        if (const_args[4]) {
2669            if ((int64_t)args[4] >= 0) {
2670                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2671            } else {
2672                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2673            }
2674        } else {
2675            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2676        }
2677        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2678        break;
2679
2680    case INDEX_op_brcond_i64:
2681        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2682                    args[1], const_args[1], arg_label(args[3]));
2683        break;
2684    case INDEX_op_setcond_i64:
2685        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2686                     args[2], const_args[2], false);
2687        break;
2688    case INDEX_op_negsetcond_i64:
2689        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2690                     args[2], const_args[2], true);
2691        break;
2692    case INDEX_op_movcond_i64:
2693        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2694                     args[2], const_args[2], args[3], const_args[3], args[4]);
2695        break;
2696
2697    OP_32_64(deposit):
2698        a0 = args[0], a1 = args[1], a2 = args[2];
2699        if (const_args[1]) {
2700            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2701        } else {
2702            /* Since we can't support "0Z" as a constraint, we allow a1 in
2703               any register.  Fix things up as if a matching constraint.  */
2704            if (a0 != a1) {
2705                TCGType type = (opc == INDEX_op_deposit_i64);
2706                if (a0 == a2) {
2707                    tcg_out_mov(s, type, TCG_TMP0, a2);
2708                    a2 = TCG_TMP0;
2709                }
2710                tcg_out_mov(s, type, a0, a1);
2711            }
2712            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2713        }
2714        break;
2715
2716    OP_32_64(extract):
2717        tgen_extract(s, args[0], args[1], args[2], args[3]);
2718        break;
2719
2720    case INDEX_op_clz_i64:
2721        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2722        break;
2723
2724    case INDEX_op_ctpop_i32:
2725        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2726        break;
2727    case INDEX_op_ctpop_i64:
2728        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2729        break;
2730
2731    case INDEX_op_mb:
2732        /* The host memory model is quite strong, we simply need to
2733           serialize the instruction stream.  */
2734        if (args[0] & TCG_MO_ST_LD) {
2735            /* fast-bcr-serialization facility (45) is present */
2736            tcg_out_insn(s, RR, BCR, 14, 0);
2737        }
2738        break;
2739
2740    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2741    case INDEX_op_mov_i64:
2742    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2743    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2744    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2745    case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
2746    case INDEX_op_ext8s_i64:
2747    case INDEX_op_ext8u_i32:
2748    case INDEX_op_ext8u_i64:
2749    case INDEX_op_ext16s_i32:
2750    case INDEX_op_ext16s_i64:
2751    case INDEX_op_ext16u_i32:
2752    case INDEX_op_ext16u_i64:
2753    case INDEX_op_ext32s_i64:
2754    case INDEX_op_ext32u_i64:
2755    case INDEX_op_ext_i32_i64:
2756    case INDEX_op_extu_i32_i64:
2757    case INDEX_op_extrl_i64_i32:
2758    default:
2759        g_assert_not_reached();
2760    }
2761}
2762
2763static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2764                            TCGReg dst, TCGReg src)
2765{
2766    if (is_general_reg(src)) {
2767        /* Replicate general register into two MO_64. */
2768        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2769        if (vece == MO_64) {
2770            return true;
2771        }
2772        src = dst;
2773    }
2774
2775    /*
2776     * Recall that the "standard" integer, within a vector, is the
2777     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2778     */
2779    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2780    return true;
2781}
2782
2783static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2784                             TCGReg dst, TCGReg base, intptr_t offset)
2785{
2786    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2787    return true;
2788}
2789
2790static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2791                             TCGReg dst, int64_t val)
2792{
2793    int i, mask, msb, lsb;
2794
2795    /* Look for int16_t elements.  */
2796    if (vece <= MO_16 ||
2797        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2798        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2799        return;
2800    }
2801
2802    /* Look for bit masks.  */
2803    if (vece == MO_32) {
2804        if (risbg_mask((int32_t)val)) {
2805            /* Handle wraparound by swapping msb and lsb.  */
2806            if ((val & 0x80000001u) == 0x80000001u) {
2807                msb = 32 - ctz32(~val);
2808                lsb = clz32(~val) - 1;
2809            } else {
2810                msb = clz32(val);
2811                lsb = 31 - ctz32(val);
2812            }
2813            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2814            return;
2815        }
2816    } else {
2817        if (risbg_mask(val)) {
2818            /* Handle wraparound by swapping msb and lsb.  */
2819            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2820                /* Handle wraparound by swapping msb and lsb.  */
2821                msb = 64 - ctz64(~val);
2822                lsb = clz64(~val) - 1;
2823            } else {
2824                msb = clz64(val);
2825                lsb = 63 - ctz64(val);
2826            }
2827            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2828            return;
2829        }
2830    }
2831
2832    /* Look for all bytes 0x00 or 0xff.  */
2833    for (i = mask = 0; i < 8; i++) {
2834        uint8_t byte = val >> (i * 8);
2835        if (byte == 0xff) {
2836            mask |= 1 << i;
2837        } else if (byte != 0) {
2838            break;
2839        }
2840    }
2841    if (i == 8) {
2842        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2843        return;
2844    }
2845
2846    /* Otherwise, stuff it in the constant pool.  */
2847    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2848    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2849    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2850}
2851
2852static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2853                           unsigned vecl, unsigned vece,
2854                           const TCGArg args[TCG_MAX_OP_ARGS],
2855                           const int const_args[TCG_MAX_OP_ARGS])
2856{
2857    TCGType type = vecl + TCG_TYPE_V64;
2858    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2859
2860    switch (opc) {
2861    case INDEX_op_ld_vec:
2862        tcg_out_ld(s, type, a0, a1, a2);
2863        break;
2864    case INDEX_op_st_vec:
2865        tcg_out_st(s, type, a0, a1, a2);
2866        break;
2867    case INDEX_op_dupm_vec:
2868        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2869        break;
2870
2871    case INDEX_op_abs_vec:
2872        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2873        break;
2874    case INDEX_op_neg_vec:
2875        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2876        break;
2877    case INDEX_op_not_vec:
2878        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2879        break;
2880
2881    case INDEX_op_add_vec:
2882        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2883        break;
2884    case INDEX_op_sub_vec:
2885        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2886        break;
2887    case INDEX_op_and_vec:
2888        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2889        break;
2890    case INDEX_op_andc_vec:
2891        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2892        break;
2893    case INDEX_op_mul_vec:
2894        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2895        break;
2896    case INDEX_op_or_vec:
2897        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2898        break;
2899    case INDEX_op_orc_vec:
2900        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2901        break;
2902    case INDEX_op_xor_vec:
2903        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2904        break;
2905    case INDEX_op_nand_vec:
2906        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2907        break;
2908    case INDEX_op_nor_vec:
2909        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2910        break;
2911    case INDEX_op_eqv_vec:
2912        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2913        break;
2914
2915    case INDEX_op_shli_vec:
2916        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2917        break;
2918    case INDEX_op_shri_vec:
2919        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2920        break;
2921    case INDEX_op_sari_vec:
2922        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2923        break;
2924    case INDEX_op_rotli_vec:
2925        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2926        break;
2927    case INDEX_op_shls_vec:
2928        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2929        break;
2930    case INDEX_op_shrs_vec:
2931        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2932        break;
2933    case INDEX_op_sars_vec:
2934        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2935        break;
2936    case INDEX_op_rotls_vec:
2937        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2938        break;
2939    case INDEX_op_shlv_vec:
2940        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2941        break;
2942    case INDEX_op_shrv_vec:
2943        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2944        break;
2945    case INDEX_op_sarv_vec:
2946        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2947        break;
2948    case INDEX_op_rotlv_vec:
2949        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2950        break;
2951
2952    case INDEX_op_smin_vec:
2953        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2954        break;
2955    case INDEX_op_smax_vec:
2956        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2957        break;
2958    case INDEX_op_umin_vec:
2959        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2960        break;
2961    case INDEX_op_umax_vec:
2962        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2963        break;
2964
2965    case INDEX_op_bitsel_vec:
2966        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2967        break;
2968
2969    case INDEX_op_cmp_vec:
2970        switch ((TCGCond)args[3]) {
2971        case TCG_COND_EQ:
2972            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2973            break;
2974        case TCG_COND_GT:
2975            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2976            break;
2977        case TCG_COND_GTU:
2978            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2979            break;
2980        default:
2981            g_assert_not_reached();
2982        }
2983        break;
2984
2985    case INDEX_op_s390_vuph_vec:
2986        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2987        break;
2988    case INDEX_op_s390_vupl_vec:
2989        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2990        break;
2991    case INDEX_op_s390_vpks_vec:
2992        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2993        break;
2994
2995    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2996    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2997    default:
2998        g_assert_not_reached();
2999    }
3000}
3001
3002int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
3003{
3004    switch (opc) {
3005    case INDEX_op_abs_vec:
3006    case INDEX_op_add_vec:
3007    case INDEX_op_and_vec:
3008    case INDEX_op_andc_vec:
3009    case INDEX_op_bitsel_vec:
3010    case INDEX_op_eqv_vec:
3011    case INDEX_op_nand_vec:
3012    case INDEX_op_neg_vec:
3013    case INDEX_op_nor_vec:
3014    case INDEX_op_not_vec:
3015    case INDEX_op_or_vec:
3016    case INDEX_op_orc_vec:
3017    case INDEX_op_rotli_vec:
3018    case INDEX_op_rotls_vec:
3019    case INDEX_op_rotlv_vec:
3020    case INDEX_op_sari_vec:
3021    case INDEX_op_sars_vec:
3022    case INDEX_op_sarv_vec:
3023    case INDEX_op_shli_vec:
3024    case INDEX_op_shls_vec:
3025    case INDEX_op_shlv_vec:
3026    case INDEX_op_shri_vec:
3027    case INDEX_op_shrs_vec:
3028    case INDEX_op_shrv_vec:
3029    case INDEX_op_smax_vec:
3030    case INDEX_op_smin_vec:
3031    case INDEX_op_sub_vec:
3032    case INDEX_op_umax_vec:
3033    case INDEX_op_umin_vec:
3034    case INDEX_op_xor_vec:
3035        return 1;
3036    case INDEX_op_cmp_vec:
3037    case INDEX_op_cmpsel_vec:
3038    case INDEX_op_rotrv_vec:
3039        return -1;
3040    case INDEX_op_mul_vec:
3041        return vece < MO_64;
3042    case INDEX_op_ssadd_vec:
3043    case INDEX_op_sssub_vec:
3044        return vece < MO_64 ? -1 : 0;
3045    default:
3046        return 0;
3047    }
3048}
3049
3050static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
3051                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3052{
3053    bool need_swap = false, need_inv = false;
3054
3055    switch (cond) {
3056    case TCG_COND_EQ:
3057    case TCG_COND_GT:
3058    case TCG_COND_GTU:
3059        break;
3060    case TCG_COND_NE:
3061    case TCG_COND_LE:
3062    case TCG_COND_LEU:
3063        need_inv = true;
3064        break;
3065    case TCG_COND_LT:
3066    case TCG_COND_LTU:
3067        need_swap = true;
3068        break;
3069    case TCG_COND_GE:
3070    case TCG_COND_GEU:
3071        need_swap = need_inv = true;
3072        break;
3073    default:
3074        g_assert_not_reached();
3075    }
3076
3077    if (need_inv) {
3078        cond = tcg_invert_cond(cond);
3079    }
3080    if (need_swap) {
3081        TCGv_vec t1;
3082        t1 = v1, v1 = v2, v2 = t1;
3083        cond = tcg_swap_cond(cond);
3084    }
3085
3086    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
3087              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3088
3089    return need_inv;
3090}
3091
3092static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3093                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3094{
3095    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
3096        tcg_gen_not_vec(vece, v0, v0);
3097    }
3098}
3099
3100static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
3101                              TCGv_vec c1, TCGv_vec c2,
3102                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
3103{
3104    TCGv_vec t = tcg_temp_new_vec(type);
3105
3106    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
3107        /* Invert the sense of the compare by swapping arguments.  */
3108        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3109    } else {
3110        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3111    }
3112    tcg_temp_free_vec(t);
3113}
3114
3115static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3116                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3117{
3118    TCGv_vec h1 = tcg_temp_new_vec(type);
3119    TCGv_vec h2 = tcg_temp_new_vec(type);
3120    TCGv_vec l1 = tcg_temp_new_vec(type);
3121    TCGv_vec l2 = tcg_temp_new_vec(type);
3122
3123    tcg_debug_assert (vece < MO_64);
3124
3125    /* Unpack with sign-extension. */
3126    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3127              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3128    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3129              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3130
3131    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3132              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3133    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3134              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3135
3136    /* Arithmetic on a wider element size. */
3137    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3138              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3139    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3140              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3141
3142    /* Pack with saturation. */
3143    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3144              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3145
3146    tcg_temp_free_vec(h1);
3147    tcg_temp_free_vec(h2);
3148    tcg_temp_free_vec(l1);
3149    tcg_temp_free_vec(l2);
3150}
3151
3152void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3153                       TCGArg a0, ...)
3154{
3155    va_list va;
3156    TCGv_vec v0, v1, v2, v3, v4, t0;
3157
3158    va_start(va, a0);
3159    v0 = temp_tcgv_vec(arg_temp(a0));
3160    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3161    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3162
3163    switch (opc) {
3164    case INDEX_op_cmp_vec:
3165        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3166        break;
3167
3168    case INDEX_op_cmpsel_vec:
3169        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3170        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3171        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3172        break;
3173
3174    case INDEX_op_rotrv_vec:
3175        t0 = tcg_temp_new_vec(type);
3176        tcg_gen_neg_vec(vece, t0, v2);
3177        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3178        tcg_temp_free_vec(t0);
3179        break;
3180
3181    case INDEX_op_ssadd_vec:
3182        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3183        break;
3184    case INDEX_op_sssub_vec:
3185        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3186        break;
3187
3188    default:
3189        g_assert_not_reached();
3190    }
3191    va_end(va);
3192}
3193
3194static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3195{
3196    switch (op) {
3197    case INDEX_op_goto_ptr:
3198        return C_O0_I1(r);
3199
3200    case INDEX_op_ld8u_i32:
3201    case INDEX_op_ld8u_i64:
3202    case INDEX_op_ld8s_i32:
3203    case INDEX_op_ld8s_i64:
3204    case INDEX_op_ld16u_i32:
3205    case INDEX_op_ld16u_i64:
3206    case INDEX_op_ld16s_i32:
3207    case INDEX_op_ld16s_i64:
3208    case INDEX_op_ld_i32:
3209    case INDEX_op_ld32u_i64:
3210    case INDEX_op_ld32s_i64:
3211    case INDEX_op_ld_i64:
3212        return C_O1_I1(r, r);
3213
3214    case INDEX_op_st8_i32:
3215    case INDEX_op_st8_i64:
3216    case INDEX_op_st16_i32:
3217    case INDEX_op_st16_i64:
3218    case INDEX_op_st_i32:
3219    case INDEX_op_st32_i64:
3220    case INDEX_op_st_i64:
3221        return C_O0_I2(r, r);
3222
3223    case INDEX_op_add_i32:
3224    case INDEX_op_add_i64:
3225    case INDEX_op_shl_i64:
3226    case INDEX_op_shr_i64:
3227    case INDEX_op_sar_i64:
3228    case INDEX_op_rotl_i32:
3229    case INDEX_op_rotl_i64:
3230    case INDEX_op_rotr_i32:
3231    case INDEX_op_rotr_i64:
3232        return C_O1_I2(r, r, ri);
3233    case INDEX_op_setcond_i32:
3234    case INDEX_op_negsetcond_i32:
3235    case INDEX_op_setcond_i64:
3236    case INDEX_op_negsetcond_i64:
3237        return C_O1_I2(r, r, rC);
3238
3239    case INDEX_op_clz_i64:
3240        return C_O1_I2(r, r, rI);
3241
3242    case INDEX_op_sub_i32:
3243    case INDEX_op_sub_i64:
3244    case INDEX_op_and_i32:
3245    case INDEX_op_or_i32:
3246    case INDEX_op_xor_i32:
3247        return C_O1_I2(r, r, ri);
3248    case INDEX_op_and_i64:
3249        return C_O1_I2(r, r, rNKR);
3250    case INDEX_op_or_i64:
3251    case INDEX_op_xor_i64:
3252        return C_O1_I2(r, r, rK);
3253
3254    case INDEX_op_andc_i32:
3255    case INDEX_op_orc_i32:
3256    case INDEX_op_eqv_i32:
3257        return C_O1_I2(r, r, ri);
3258    case INDEX_op_andc_i64:
3259        return C_O1_I2(r, r, rKR);
3260    case INDEX_op_orc_i64:
3261    case INDEX_op_eqv_i64:
3262        return C_O1_I2(r, r, rNK);
3263
3264    case INDEX_op_nand_i32:
3265    case INDEX_op_nand_i64:
3266    case INDEX_op_nor_i32:
3267    case INDEX_op_nor_i64:
3268        return C_O1_I2(r, r, r);
3269
3270    case INDEX_op_mul_i32:
3271        return (HAVE_FACILITY(MISC_INSN_EXT2)
3272                ? C_O1_I2(r, r, ri)
3273                : C_O1_I2(r, 0, ri));
3274    case INDEX_op_mul_i64:
3275        return (HAVE_FACILITY(MISC_INSN_EXT2)
3276                ? C_O1_I2(r, r, rJ)
3277                : C_O1_I2(r, 0, rJ));
3278
3279    case INDEX_op_shl_i32:
3280    case INDEX_op_shr_i32:
3281    case INDEX_op_sar_i32:
3282        return C_O1_I2(r, r, ri);
3283
3284    case INDEX_op_brcond_i32:
3285        return C_O0_I2(r, ri);
3286    case INDEX_op_brcond_i64:
3287        return C_O0_I2(r, rC);
3288
3289    case INDEX_op_bswap16_i32:
3290    case INDEX_op_bswap16_i64:
3291    case INDEX_op_bswap32_i32:
3292    case INDEX_op_bswap32_i64:
3293    case INDEX_op_bswap64_i64:
3294    case INDEX_op_neg_i32:
3295    case INDEX_op_neg_i64:
3296    case INDEX_op_not_i32:
3297    case INDEX_op_not_i64:
3298    case INDEX_op_ext8s_i32:
3299    case INDEX_op_ext8s_i64:
3300    case INDEX_op_ext8u_i32:
3301    case INDEX_op_ext8u_i64:
3302    case INDEX_op_ext16s_i32:
3303    case INDEX_op_ext16s_i64:
3304    case INDEX_op_ext16u_i32:
3305    case INDEX_op_ext16u_i64:
3306    case INDEX_op_ext32s_i64:
3307    case INDEX_op_ext32u_i64:
3308    case INDEX_op_ext_i32_i64:
3309    case INDEX_op_extu_i32_i64:
3310    case INDEX_op_extract_i32:
3311    case INDEX_op_extract_i64:
3312    case INDEX_op_ctpop_i32:
3313    case INDEX_op_ctpop_i64:
3314        return C_O1_I1(r, r);
3315
3316    case INDEX_op_qemu_ld_a32_i32:
3317    case INDEX_op_qemu_ld_a64_i32:
3318    case INDEX_op_qemu_ld_a32_i64:
3319    case INDEX_op_qemu_ld_a64_i64:
3320        return C_O1_I1(r, r);
3321    case INDEX_op_qemu_st_a32_i64:
3322    case INDEX_op_qemu_st_a64_i64:
3323    case INDEX_op_qemu_st_a32_i32:
3324    case INDEX_op_qemu_st_a64_i32:
3325        return C_O0_I2(r, r);
3326    case INDEX_op_qemu_ld_a32_i128:
3327    case INDEX_op_qemu_ld_a64_i128:
3328        return C_O2_I1(o, m, r);
3329    case INDEX_op_qemu_st_a32_i128:
3330    case INDEX_op_qemu_st_a64_i128:
3331        return C_O0_I3(o, m, r);
3332
3333    case INDEX_op_deposit_i32:
3334    case INDEX_op_deposit_i64:
3335        return C_O1_I2(r, rZ, r);
3336
3337    case INDEX_op_movcond_i32:
3338        return C_O1_I4(r, r, ri, rI, r);
3339    case INDEX_op_movcond_i64:
3340        return C_O1_I4(r, r, rC, rI, r);
3341
3342    case INDEX_op_div2_i32:
3343    case INDEX_op_div2_i64:
3344    case INDEX_op_divu2_i32:
3345    case INDEX_op_divu2_i64:
3346        return C_O2_I3(o, m, 0, 1, r);
3347
3348    case INDEX_op_mulu2_i64:
3349        return C_O2_I2(o, m, 0, r);
3350    case INDEX_op_muls2_i64:
3351        return C_O2_I2(o, m, r, r);
3352
3353    case INDEX_op_add2_i32:
3354    case INDEX_op_sub2_i32:
3355        return C_N1_O1_I4(r, r, 0, 1, ri, r);
3356
3357    case INDEX_op_add2_i64:
3358    case INDEX_op_sub2_i64:
3359        return C_N1_O1_I4(r, r, 0, 1, rJU, r);
3360
3361    case INDEX_op_st_vec:
3362        return C_O0_I2(v, r);
3363    case INDEX_op_ld_vec:
3364    case INDEX_op_dupm_vec:
3365        return C_O1_I1(v, r);
3366    case INDEX_op_dup_vec:
3367        return C_O1_I1(v, vr);
3368    case INDEX_op_abs_vec:
3369    case INDEX_op_neg_vec:
3370    case INDEX_op_not_vec:
3371    case INDEX_op_rotli_vec:
3372    case INDEX_op_sari_vec:
3373    case INDEX_op_shli_vec:
3374    case INDEX_op_shri_vec:
3375    case INDEX_op_s390_vuph_vec:
3376    case INDEX_op_s390_vupl_vec:
3377        return C_O1_I1(v, v);
3378    case INDEX_op_add_vec:
3379    case INDEX_op_sub_vec:
3380    case INDEX_op_and_vec:
3381    case INDEX_op_andc_vec:
3382    case INDEX_op_or_vec:
3383    case INDEX_op_orc_vec:
3384    case INDEX_op_xor_vec:
3385    case INDEX_op_nand_vec:
3386    case INDEX_op_nor_vec:
3387    case INDEX_op_eqv_vec:
3388    case INDEX_op_cmp_vec:
3389    case INDEX_op_mul_vec:
3390    case INDEX_op_rotlv_vec:
3391    case INDEX_op_rotrv_vec:
3392    case INDEX_op_shlv_vec:
3393    case INDEX_op_shrv_vec:
3394    case INDEX_op_sarv_vec:
3395    case INDEX_op_smax_vec:
3396    case INDEX_op_smin_vec:
3397    case INDEX_op_umax_vec:
3398    case INDEX_op_umin_vec:
3399    case INDEX_op_s390_vpks_vec:
3400        return C_O1_I2(v, v, v);
3401    case INDEX_op_rotls_vec:
3402    case INDEX_op_shls_vec:
3403    case INDEX_op_shrs_vec:
3404    case INDEX_op_sars_vec:
3405        return C_O1_I2(v, v, r);
3406    case INDEX_op_bitsel_vec:
3407        return C_O1_I3(v, v, v, v);
3408
3409    default:
3410        g_assert_not_reached();
3411    }
3412}
3413
3414/*
3415 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3416 * Some distros have fixed this up locally, others have not.
3417 */
3418#ifndef HWCAP_S390_VXRS
3419#define HWCAP_S390_VXRS 2048
3420#endif
3421
3422static void query_s390_facilities(void)
3423{
3424    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3425    const char *which;
3426
3427    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3428       is present on all 64-bit systems, but let's check for it anyway.  */
3429    if (hwcap & HWCAP_S390_STFLE) {
3430        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3431        register void *r1 __asm__("1") = s390_facilities;
3432
3433        /* stfle 0(%r1) */
3434        asm volatile(".word 0xb2b0,0x1000"
3435                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3436    }
3437
3438    /*
3439     * Use of vector registers requires os support beyond the facility bit.
3440     * If the kernel does not advertise support, disable the facility bits.
3441     * There is nothing else we currently care about in the 3rd word, so
3442     * disable VECTOR with one store.
3443     */
3444    if (!(hwcap & HWCAP_S390_VXRS)) {
3445        s390_facilities[2] = 0;
3446    }
3447
3448    /*
3449     * Minimum supported cpu revision is z196.
3450     * Check for all required facilities.
3451     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3452     */
3453    if (!HAVE_FACILITY(LONG_DISP)) {
3454        which = "long-displacement";
3455        goto fail;
3456    }
3457    if (!HAVE_FACILITY(EXT_IMM)) {
3458        which = "extended-immediate";
3459        goto fail;
3460    }
3461    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3462        which = "general-instructions-extension";
3463        goto fail;
3464    }
3465    /*
3466     * Facility 45 is a big bin that contains: distinct-operands,
3467     * fast-BCR-serialization, high-word, population-count,
3468     * interlocked-access-1, and load/store-on-condition-1
3469     */
3470    if (!HAVE_FACILITY(45)) {
3471        which = "45";
3472        goto fail;
3473    }
3474    return;
3475
3476 fail:
3477    error_report("%s: missing required facility %s", __func__, which);
3478    exit(EXIT_FAILURE);
3479}
3480
3481static void tcg_target_init(TCGContext *s)
3482{
3483    query_s390_facilities();
3484
3485    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3486    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3487    if (HAVE_FACILITY(VECTOR)) {
3488        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3489        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3490    }
3491
3492    tcg_target_call_clobber_regs = 0;
3493    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3494    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3495    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3496    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3497    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3498    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3499    /* The r6 register is technically call-saved, but it's also a parameter
3500       register, so it can get killed by setup for the qemu_st helper.  */
3501    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3502    /* The return register can be considered call-clobbered.  */
3503    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3504
3505    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3506    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3507    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3508    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3509    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3510    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3511    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3512    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3513    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3514    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3515    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3516    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3517    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3518    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3519    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3520    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3521    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3522    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3523    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3524    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3525    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3526    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3527    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3528    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3529
3530    s->reserved_regs = 0;
3531    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3532    /* XXX many insns can't be used with R0, so we better avoid it for now */
3533    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3534    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3535}
3536
3537#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3538                           + TCG_STATIC_CALL_ARGS_SIZE           \
3539                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3540
3541static void tcg_target_qemu_prologue(TCGContext *s)
3542{
3543    /* stmg %r6,%r15,48(%r15) (save registers) */
3544    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3545
3546    /* aghi %r15,-frame_size */
3547    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3548
3549    tcg_set_frame(s, TCG_REG_CALL_STACK,
3550                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3551                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3552
3553    if (!tcg_use_softmmu && guest_base >= 0x80000) {
3554        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3555        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3556    }
3557
3558    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3559
3560    /* br %r3 (go to TB) */
3561    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3562
3563    /*
3564     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3565     * and fall through to the rest of the epilogue.
3566     */
3567    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3568    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3569
3570    /* TB epilogue */
3571    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3572
3573    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3574    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3575                 FRAME_SIZE + 48);
3576
3577    /* br %r14 (return) */
3578    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3579}
3580
3581static void tcg_out_tb_start(TCGContext *s)
3582{
3583    /* nothing to do */
3584}
3585
3586static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3587{
3588    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3589}
3590
3591typedef struct {
3592    DebugFrameHeader h;
3593    uint8_t fde_def_cfa[4];
3594    uint8_t fde_reg_ofs[18];
3595} DebugFrame;
3596
3597/* We're expecting a 2 byte uleb128 encoded value.  */
3598QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3599
3600#define ELF_HOST_MACHINE  EM_S390
3601
3602static const DebugFrame debug_frame = {
3603    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3604    .h.cie.id = -1,
3605    .h.cie.version = 1,
3606    .h.cie.code_align = 1,
3607    .h.cie.data_align = 8,                /* sleb128 8 */
3608    .h.cie.return_column = TCG_REG_R14,
3609
3610    /* Total FDE size does not include the "len" member.  */
3611    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3612
3613    .fde_def_cfa = {
3614        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3615        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3616        (FRAME_SIZE >> 7)
3617    },
3618    .fde_reg_ofs = {
3619        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3620        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3621        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3622        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3623        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3624        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3625        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3626        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3627        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3628    }
3629};
3630
3631void tcg_register_jit(const void *buf, size_t buf_size)
3632{
3633    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3634}
3635