xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision fc313c64)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-pool.c.inc"
33#include "elf.h"
34
35/* ??? The translation blocks produced by TCG are generally small enough to
36   be entirely reachable with a 16-bit displacement.  Leaving the option for
37   a 32-bit displacement here Just In Case.  */
38#define USE_LONG_BRANCHES 0
39
40#define TCG_CT_CONST_S16   0x100
41#define TCG_CT_CONST_S32   0x200
42#define TCG_CT_CONST_S33   0x400
43#define TCG_CT_CONST_ZERO  0x800
44
45#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
46#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
47
48/*
49 * For softmmu, we need to avoid conflicts with the first 3
50 * argument registers to perform the tlb lookup, and to call
51 * the helper function.
52 */
53#ifdef CONFIG_SOFTMMU
54#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
55#else
56#define SOFTMMU_RESERVE_REGS 0
57#endif
58
59
60/* Several places within the instruction set 0 means "no register"
61   rather than TCG_REG_R0.  */
62#define TCG_REG_NONE    0
63
64/* A scratch register that may be be used throughout the backend.  */
65#define TCG_TMP0        TCG_REG_R1
66
67/* A scratch register that holds a pointer to the beginning of the TB.
68   We don't need this when we have pc-relative loads with the general
69   instructions extension facility.  */
70#define TCG_REG_TB      TCG_REG_R12
71#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
72
73#ifndef CONFIG_SOFTMMU
74#define TCG_GUEST_BASE_REG TCG_REG_R13
75#endif
76
77/* All of the following instructions are prefixed with their instruction
78   format, and are defined as 8- or 16-bit quantities, even when the two
79   halves of the 16-bit quantity may appear 32 bits apart in the insn.
80   This makes it easy to copy the values from the tables in Appendix B.  */
81typedef enum S390Opcode {
82    RIL_AFI     = 0xc209,
83    RIL_AGFI    = 0xc208,
84    RIL_ALFI    = 0xc20b,
85    RIL_ALGFI   = 0xc20a,
86    RIL_BRASL   = 0xc005,
87    RIL_BRCL    = 0xc004,
88    RIL_CFI     = 0xc20d,
89    RIL_CGFI    = 0xc20c,
90    RIL_CLFI    = 0xc20f,
91    RIL_CLGFI   = 0xc20e,
92    RIL_CLRL    = 0xc60f,
93    RIL_CLGRL   = 0xc60a,
94    RIL_CRL     = 0xc60d,
95    RIL_CGRL    = 0xc608,
96    RIL_IIHF    = 0xc008,
97    RIL_IILF    = 0xc009,
98    RIL_LARL    = 0xc000,
99    RIL_LGFI    = 0xc001,
100    RIL_LGRL    = 0xc408,
101    RIL_LLIHF   = 0xc00e,
102    RIL_LLILF   = 0xc00f,
103    RIL_LRL     = 0xc40d,
104    RIL_MSFI    = 0xc201,
105    RIL_MSGFI   = 0xc200,
106    RIL_NIHF    = 0xc00a,
107    RIL_NILF    = 0xc00b,
108    RIL_OIHF    = 0xc00c,
109    RIL_OILF    = 0xc00d,
110    RIL_SLFI    = 0xc205,
111    RIL_SLGFI   = 0xc204,
112    RIL_XIHF    = 0xc006,
113    RIL_XILF    = 0xc007,
114
115    RI_AGHI     = 0xa70b,
116    RI_AHI      = 0xa70a,
117    RI_BRC      = 0xa704,
118    RI_CHI      = 0xa70e,
119    RI_CGHI     = 0xa70f,
120    RI_IIHH     = 0xa500,
121    RI_IIHL     = 0xa501,
122    RI_IILH     = 0xa502,
123    RI_IILL     = 0xa503,
124    RI_LGHI     = 0xa709,
125    RI_LLIHH    = 0xa50c,
126    RI_LLIHL    = 0xa50d,
127    RI_LLILH    = 0xa50e,
128    RI_LLILL    = 0xa50f,
129    RI_MGHI     = 0xa70d,
130    RI_MHI      = 0xa70c,
131    RI_NIHH     = 0xa504,
132    RI_NIHL     = 0xa505,
133    RI_NILH     = 0xa506,
134    RI_NILL     = 0xa507,
135    RI_OIHH     = 0xa508,
136    RI_OIHL     = 0xa509,
137    RI_OILH     = 0xa50a,
138    RI_OILL     = 0xa50b,
139
140    RIE_CGIJ    = 0xec7c,
141    RIE_CGRJ    = 0xec64,
142    RIE_CIJ     = 0xec7e,
143    RIE_CLGRJ   = 0xec65,
144    RIE_CLIJ    = 0xec7f,
145    RIE_CLGIJ   = 0xec7d,
146    RIE_CLRJ    = 0xec77,
147    RIE_CRJ     = 0xec76,
148    RIE_LOCGHI  = 0xec46,
149    RIE_RISBG   = 0xec55,
150
151    RRE_AGR     = 0xb908,
152    RRE_ALGR    = 0xb90a,
153    RRE_ALCR    = 0xb998,
154    RRE_ALCGR   = 0xb988,
155    RRE_CGR     = 0xb920,
156    RRE_CLGR    = 0xb921,
157    RRE_DLGR    = 0xb987,
158    RRE_DLR     = 0xb997,
159    RRE_DSGFR   = 0xb91d,
160    RRE_DSGR    = 0xb90d,
161    RRE_FLOGR   = 0xb983,
162    RRE_LGBR    = 0xb906,
163    RRE_LCGR    = 0xb903,
164    RRE_LGFR    = 0xb914,
165    RRE_LGHR    = 0xb907,
166    RRE_LGR     = 0xb904,
167    RRE_LLGCR   = 0xb984,
168    RRE_LLGFR   = 0xb916,
169    RRE_LLGHR   = 0xb985,
170    RRE_LRVR    = 0xb91f,
171    RRE_LRVGR   = 0xb90f,
172    RRE_LTGR    = 0xb902,
173    RRE_MLGR    = 0xb986,
174    RRE_MSGR    = 0xb90c,
175    RRE_MSR     = 0xb252,
176    RRE_NGR     = 0xb980,
177    RRE_OGR     = 0xb981,
178    RRE_SGR     = 0xb909,
179    RRE_SLGR    = 0xb90b,
180    RRE_SLBR    = 0xb999,
181    RRE_SLBGR   = 0xb989,
182    RRE_XGR     = 0xb982,
183
184    RRF_LOCR    = 0xb9f2,
185    RRF_LOCGR   = 0xb9e2,
186    RRF_NRK     = 0xb9f4,
187    RRF_NGRK    = 0xb9e4,
188    RRF_ORK     = 0xb9f6,
189    RRF_OGRK    = 0xb9e6,
190    RRF_SRK     = 0xb9f9,
191    RRF_SGRK    = 0xb9e9,
192    RRF_SLRK    = 0xb9fb,
193    RRF_SLGRK   = 0xb9eb,
194    RRF_XRK     = 0xb9f7,
195    RRF_XGRK    = 0xb9e7,
196
197    RR_AR       = 0x1a,
198    RR_ALR      = 0x1e,
199    RR_BASR     = 0x0d,
200    RR_BCR      = 0x07,
201    RR_CLR      = 0x15,
202    RR_CR       = 0x19,
203    RR_DR       = 0x1d,
204    RR_LCR      = 0x13,
205    RR_LR       = 0x18,
206    RR_LTR      = 0x12,
207    RR_NR       = 0x14,
208    RR_OR       = 0x16,
209    RR_SR       = 0x1b,
210    RR_SLR      = 0x1f,
211    RR_XR       = 0x17,
212
213    RSY_RLL     = 0xeb1d,
214    RSY_RLLG    = 0xeb1c,
215    RSY_SLLG    = 0xeb0d,
216    RSY_SLLK    = 0xebdf,
217    RSY_SRAG    = 0xeb0a,
218    RSY_SRAK    = 0xebdc,
219    RSY_SRLG    = 0xeb0c,
220    RSY_SRLK    = 0xebde,
221
222    RS_SLL      = 0x89,
223    RS_SRA      = 0x8a,
224    RS_SRL      = 0x88,
225
226    RXY_AG      = 0xe308,
227    RXY_AY      = 0xe35a,
228    RXY_CG      = 0xe320,
229    RXY_CLG     = 0xe321,
230    RXY_CLY     = 0xe355,
231    RXY_CY      = 0xe359,
232    RXY_LAY     = 0xe371,
233    RXY_LB      = 0xe376,
234    RXY_LG      = 0xe304,
235    RXY_LGB     = 0xe377,
236    RXY_LGF     = 0xe314,
237    RXY_LGH     = 0xe315,
238    RXY_LHY     = 0xe378,
239    RXY_LLGC    = 0xe390,
240    RXY_LLGF    = 0xe316,
241    RXY_LLGH    = 0xe391,
242    RXY_LMG     = 0xeb04,
243    RXY_LRV     = 0xe31e,
244    RXY_LRVG    = 0xe30f,
245    RXY_LRVH    = 0xe31f,
246    RXY_LY      = 0xe358,
247    RXY_NG      = 0xe380,
248    RXY_OG      = 0xe381,
249    RXY_STCY    = 0xe372,
250    RXY_STG     = 0xe324,
251    RXY_STHY    = 0xe370,
252    RXY_STMG    = 0xeb24,
253    RXY_STRV    = 0xe33e,
254    RXY_STRVG   = 0xe32f,
255    RXY_STRVH   = 0xe33f,
256    RXY_STY     = 0xe350,
257    RXY_XG      = 0xe382,
258
259    RX_A        = 0x5a,
260    RX_C        = 0x59,
261    RX_L        = 0x58,
262    RX_LA       = 0x41,
263    RX_LH       = 0x48,
264    RX_ST       = 0x50,
265    RX_STC      = 0x42,
266    RX_STH      = 0x40,
267
268    VRIa_VGBM   = 0xe744,
269    VRIa_VREPI  = 0xe745,
270    VRIb_VGM    = 0xe746,
271    VRIc_VREP   = 0xe74d,
272
273    VRRa_VLC    = 0xe7de,
274    VRRa_VLP    = 0xe7df,
275    VRRa_VLR    = 0xe756,
276    VRRc_VA     = 0xe7f3,
277    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
278    VRRc_VCH    = 0xe7fb,   /* " */
279    VRRc_VCHL   = 0xe7f9,   /* " */
280    VRRc_VERLLV = 0xe773,
281    VRRc_VESLV  = 0xe770,
282    VRRc_VESRAV = 0xe77a,
283    VRRc_VESRLV = 0xe778,
284    VRRc_VML    = 0xe7a2,
285    VRRc_VMN    = 0xe7fe,
286    VRRc_VMNL   = 0xe7fc,
287    VRRc_VMX    = 0xe7ff,
288    VRRc_VMXL   = 0xe7fd,
289    VRRc_VN     = 0xe768,
290    VRRc_VNC    = 0xe769,
291    VRRc_VNO    = 0xe76b,
292    VRRc_VO     = 0xe76a,
293    VRRc_VOC    = 0xe76f,
294    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
295    VRRc_VS     = 0xe7f7,
296    VRRa_VUPH   = 0xe7d7,
297    VRRa_VUPL   = 0xe7d6,
298    VRRc_VX     = 0xe76d,
299    VRRe_VSEL   = 0xe78d,
300    VRRf_VLVGP  = 0xe762,
301
302    VRSa_VERLL  = 0xe733,
303    VRSa_VESL   = 0xe730,
304    VRSa_VESRA  = 0xe73a,
305    VRSa_VESRL  = 0xe738,
306    VRSb_VLVG   = 0xe722,
307    VRSc_VLGV   = 0xe721,
308
309    VRX_VL      = 0xe706,
310    VRX_VLLEZ   = 0xe704,
311    VRX_VLREP   = 0xe705,
312    VRX_VST     = 0xe70e,
313    VRX_VSTEF   = 0xe70b,
314    VRX_VSTEG   = 0xe70a,
315
316    NOP         = 0x0707,
317} S390Opcode;
318
319#ifdef CONFIG_DEBUG_TCG
320static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
321    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
322    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
323    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
325    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
326    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
327    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
328};
329#endif
330
331/* Since R6 is a potential argument register, choose it last of the
332   call-saved registers.  Likewise prefer the call-clobbered registers
333   in reverse order to maximize the chance of avoiding the arguments.  */
334static const int tcg_target_reg_alloc_order[] = {
335    /* Call saved registers.  */
336    TCG_REG_R13,
337    TCG_REG_R12,
338    TCG_REG_R11,
339    TCG_REG_R10,
340    TCG_REG_R9,
341    TCG_REG_R8,
342    TCG_REG_R7,
343    TCG_REG_R6,
344    /* Call clobbered registers.  */
345    TCG_REG_R14,
346    TCG_REG_R0,
347    TCG_REG_R1,
348    /* Argument registers, in reverse order of allocation.  */
349    TCG_REG_R5,
350    TCG_REG_R4,
351    TCG_REG_R3,
352    TCG_REG_R2,
353
354    /* V8-V15 are call saved, and omitted. */
355    TCG_REG_V0,
356    TCG_REG_V1,
357    TCG_REG_V2,
358    TCG_REG_V3,
359    TCG_REG_V4,
360    TCG_REG_V5,
361    TCG_REG_V6,
362    TCG_REG_V7,
363    TCG_REG_V16,
364    TCG_REG_V17,
365    TCG_REG_V18,
366    TCG_REG_V19,
367    TCG_REG_V20,
368    TCG_REG_V21,
369    TCG_REG_V22,
370    TCG_REG_V23,
371    TCG_REG_V24,
372    TCG_REG_V25,
373    TCG_REG_V26,
374    TCG_REG_V27,
375    TCG_REG_V28,
376    TCG_REG_V29,
377    TCG_REG_V30,
378    TCG_REG_V31,
379};
380
381static const int tcg_target_call_iarg_regs[] = {
382    TCG_REG_R2,
383    TCG_REG_R3,
384    TCG_REG_R4,
385    TCG_REG_R5,
386    TCG_REG_R6,
387};
388
389static const int tcg_target_call_oarg_regs[] = {
390    TCG_REG_R2,
391};
392
393#define S390_CC_EQ      8
394#define S390_CC_LT      4
395#define S390_CC_GT      2
396#define S390_CC_OV      1
397#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
398#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
399#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
400#define S390_CC_NEVER   0
401#define S390_CC_ALWAYS  15
402
403/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
404static const uint8_t tcg_cond_to_s390_cond[] = {
405    [TCG_COND_EQ]  = S390_CC_EQ,
406    [TCG_COND_NE]  = S390_CC_NE,
407    [TCG_COND_LT]  = S390_CC_LT,
408    [TCG_COND_LE]  = S390_CC_LE,
409    [TCG_COND_GT]  = S390_CC_GT,
410    [TCG_COND_GE]  = S390_CC_GE,
411    [TCG_COND_LTU] = S390_CC_LT,
412    [TCG_COND_LEU] = S390_CC_LE,
413    [TCG_COND_GTU] = S390_CC_GT,
414    [TCG_COND_GEU] = S390_CC_GE,
415};
416
417/* Condition codes that result from a LOAD AND TEST.  Here, we have no
418   unsigned instruction variation, however since the test is vs zero we
419   can re-map the outcomes appropriately.  */
420static const uint8_t tcg_cond_to_ltr_cond[] = {
421    [TCG_COND_EQ]  = S390_CC_EQ,
422    [TCG_COND_NE]  = S390_CC_NE,
423    [TCG_COND_LT]  = S390_CC_LT,
424    [TCG_COND_LE]  = S390_CC_LE,
425    [TCG_COND_GT]  = S390_CC_GT,
426    [TCG_COND_GE]  = S390_CC_GE,
427    [TCG_COND_LTU] = S390_CC_NEVER,
428    [TCG_COND_LEU] = S390_CC_EQ,
429    [TCG_COND_GTU] = S390_CC_NE,
430    [TCG_COND_GEU] = S390_CC_ALWAYS,
431};
432
433#ifdef CONFIG_SOFTMMU
434static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
435    [MO_UB]   = helper_ret_ldub_mmu,
436    [MO_SB]   = helper_ret_ldsb_mmu,
437    [MO_LEUW] = helper_le_lduw_mmu,
438    [MO_LESW] = helper_le_ldsw_mmu,
439    [MO_LEUL] = helper_le_ldul_mmu,
440    [MO_LESL] = helper_le_ldsl_mmu,
441    [MO_LEUQ] = helper_le_ldq_mmu,
442    [MO_BEUW] = helper_be_lduw_mmu,
443    [MO_BESW] = helper_be_ldsw_mmu,
444    [MO_BEUL] = helper_be_ldul_mmu,
445    [MO_BESL] = helper_be_ldsl_mmu,
446    [MO_BEUQ] = helper_be_ldq_mmu,
447};
448
449static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
450    [MO_UB]   = helper_ret_stb_mmu,
451    [MO_LEUW] = helper_le_stw_mmu,
452    [MO_LEUL] = helper_le_stl_mmu,
453    [MO_LEUQ] = helper_le_stq_mmu,
454    [MO_BEUW] = helper_be_stw_mmu,
455    [MO_BEUL] = helper_be_stl_mmu,
456    [MO_BEUQ] = helper_be_stq_mmu,
457};
458#endif
459
460static const tcg_insn_unit *tb_ret_addr;
461uint64_t s390_facilities[3];
462
463static inline bool is_general_reg(TCGReg r)
464{
465    return r <= TCG_REG_R15;
466}
467
468static inline bool is_vector_reg(TCGReg r)
469{
470    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
471}
472
473static bool patch_reloc(tcg_insn_unit *src_rw, int type,
474                        intptr_t value, intptr_t addend)
475{
476    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
477    intptr_t pcrel2;
478    uint32_t old;
479
480    value += addend;
481    pcrel2 = (tcg_insn_unit *)value - src_rx;
482
483    switch (type) {
484    case R_390_PC16DBL:
485        if (pcrel2 == (int16_t)pcrel2) {
486            tcg_patch16(src_rw, pcrel2);
487            return true;
488        }
489        break;
490    case R_390_PC32DBL:
491        if (pcrel2 == (int32_t)pcrel2) {
492            tcg_patch32(src_rw, pcrel2);
493            return true;
494        }
495        break;
496    case R_390_20:
497        if (value == sextract64(value, 0, 20)) {
498            old = *(uint32_t *)src_rw & 0xf00000ff;
499            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
500            tcg_patch32(src_rw, old);
501            return true;
502        }
503        break;
504    default:
505        g_assert_not_reached();
506    }
507    return false;
508}
509
510/* Test if a constant matches the constraint. */
511static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
512{
513    if (ct & TCG_CT_CONST) {
514        return 1;
515    }
516
517    if (type == TCG_TYPE_I32) {
518        val = (int32_t)val;
519    }
520
521    /* The following are mutually exclusive.  */
522    if (ct & TCG_CT_CONST_S16) {
523        return val == (int16_t)val;
524    } else if (ct & TCG_CT_CONST_S32) {
525        return val == (int32_t)val;
526    } else if (ct & TCG_CT_CONST_S33) {
527        return val >= -0xffffffffll && val <= 0xffffffffll;
528    } else if (ct & TCG_CT_CONST_ZERO) {
529        return val == 0;
530    }
531
532    return 0;
533}
534
535/* Emit instructions according to the given instruction format.  */
536
537static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
538{
539    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
540}
541
542static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
543                             TCGReg r1, TCGReg r2)
544{
545    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
546}
547
548static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
549                             TCGReg r1, TCGReg r2, int m3)
550{
551    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
552}
553
554static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
555{
556    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
557}
558
559static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
560                             int i2, int m3)
561{
562    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
563    tcg_out32(s, (i2 << 16) | (op & 0xff));
564}
565
566static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
567{
568    tcg_out16(s, op | (r1 << 4));
569    tcg_out32(s, i2);
570}
571
572static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
573                            TCGReg b2, TCGReg r3, int disp)
574{
575    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
576              | (disp & 0xfff));
577}
578
579static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
580                             TCGReg b2, TCGReg r3, int disp)
581{
582    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
583    tcg_out32(s, (op & 0xff) | (b2 << 28)
584              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
585}
586
587#define tcg_out_insn_RX   tcg_out_insn_RS
588#define tcg_out_insn_RXY  tcg_out_insn_RSY
589
590static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
591{
592    /*
593     * Shift bit 4 of each regno to its corresponding bit of RXB.
594     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
595     * is the left-shift of the 4th operand.
596     */
597    return ((v1 & 0x10) << (4 + 3))
598         | ((v2 & 0x10) << (4 + 2))
599         | ((v3 & 0x10) << (4 + 1))
600         | ((v4 & 0x10) << (4 + 0));
601}
602
603static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
604                              TCGReg v1, uint16_t i2, int m3)
605{
606    tcg_debug_assert(is_vector_reg(v1));
607    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
608    tcg_out16(s, i2);
609    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
610}
611
612static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
613                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
614{
615    tcg_debug_assert(is_vector_reg(v1));
616    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
617    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
618    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
619}
620
621static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
622                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
623{
624    tcg_debug_assert(is_vector_reg(v1));
625    tcg_debug_assert(is_vector_reg(v3));
626    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
627    tcg_out16(s, i2);
628    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
629}
630
631static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
632                              TCGReg v1, TCGReg v2, int m3)
633{
634    tcg_debug_assert(is_vector_reg(v1));
635    tcg_debug_assert(is_vector_reg(v2));
636    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
637    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
638}
639
640static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
641                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
642{
643    tcg_debug_assert(is_vector_reg(v1));
644    tcg_debug_assert(is_vector_reg(v2));
645    tcg_debug_assert(is_vector_reg(v3));
646    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
647    tcg_out16(s, v3 << 12);
648    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
649}
650
651static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
652                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
653{
654    tcg_debug_assert(is_vector_reg(v1));
655    tcg_debug_assert(is_vector_reg(v2));
656    tcg_debug_assert(is_vector_reg(v3));
657    tcg_debug_assert(is_vector_reg(v4));
658    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
659    tcg_out16(s, v3 << 12);
660    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
661}
662
663static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
664                              TCGReg v1, TCGReg r2, TCGReg r3)
665{
666    tcg_debug_assert(is_vector_reg(v1));
667    tcg_debug_assert(is_general_reg(r2));
668    tcg_debug_assert(is_general_reg(r3));
669    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
670    tcg_out16(s, r3 << 12);
671    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
672}
673
674static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
675                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
676{
677    tcg_debug_assert(is_vector_reg(v1));
678    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
679    tcg_debug_assert(is_general_reg(b2));
680    tcg_debug_assert(is_vector_reg(v3));
681    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
682    tcg_out16(s, b2 << 12 | d2);
683    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
684}
685
686static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
687                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
688{
689    tcg_debug_assert(is_vector_reg(v1));
690    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
691    tcg_debug_assert(is_general_reg(b2));
692    tcg_debug_assert(is_general_reg(r3));
693    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
694    tcg_out16(s, b2 << 12 | d2);
695    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
696}
697
698static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
699                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
700{
701    tcg_debug_assert(is_general_reg(r1));
702    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
703    tcg_debug_assert(is_general_reg(b2));
704    tcg_debug_assert(is_vector_reg(v3));
705    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
706    tcg_out16(s, b2 << 12 | d2);
707    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
708}
709
710static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
711                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
712{
713    tcg_debug_assert(is_vector_reg(v1));
714    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
715    tcg_debug_assert(is_general_reg(x2));
716    tcg_debug_assert(is_general_reg(b2));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
718    tcg_out16(s, (b2 << 12) | d2);
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
720}
721
722/* Emit an opcode with "type-checking" of the format.  */
723#define tcg_out_insn(S, FMT, OP, ...) \
724    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
725
726
727/* emit 64-bit shifts */
728static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
729                         TCGReg src, TCGReg sh_reg, int sh_imm)
730{
731    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
732}
733
734/* emit 32-bit shifts */
735static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
736                         TCGReg sh_reg, int sh_imm)
737{
738    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
739}
740
741static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
742{
743    if (src == dst) {
744        return true;
745    }
746    switch (type) {
747    case TCG_TYPE_I32:
748        if (likely(is_general_reg(dst) && is_general_reg(src))) {
749            tcg_out_insn(s, RR, LR, dst, src);
750            break;
751        }
752        /* fallthru */
753
754    case TCG_TYPE_I64:
755        if (likely(is_general_reg(dst))) {
756            if (likely(is_general_reg(src))) {
757                tcg_out_insn(s, RRE, LGR, dst, src);
758            } else {
759                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
760            }
761            break;
762        } else if (is_general_reg(src)) {
763            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
764            break;
765        }
766        /* fallthru */
767
768    case TCG_TYPE_V64:
769    case TCG_TYPE_V128:
770        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
771        break;
772
773    default:
774        g_assert_not_reached();
775    }
776    return true;
777}
778
779static const S390Opcode lli_insns[4] = {
780    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
781};
782
783static bool maybe_out_small_movi(TCGContext *s, TCGType type,
784                                 TCGReg ret, tcg_target_long sval)
785{
786    tcg_target_ulong uval = sval;
787    int i;
788
789    if (type == TCG_TYPE_I32) {
790        uval = (uint32_t)sval;
791        sval = (int32_t)sval;
792    }
793
794    /* Try all 32-bit insns that can load it in one go.  */
795    if (sval >= -0x8000 && sval < 0x8000) {
796        tcg_out_insn(s, RI, LGHI, ret, sval);
797        return true;
798    }
799
800    for (i = 0; i < 4; i++) {
801        tcg_target_long mask = 0xffffull << i*16;
802        if ((uval & mask) == uval) {
803            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
804            return true;
805        }
806    }
807
808    return false;
809}
810
811/* load a register with an immediate value */
812static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
813                             tcg_target_long sval, bool in_prologue)
814{
815    tcg_target_ulong uval;
816
817    /* Try all 32-bit insns that can load it in one go.  */
818    if (maybe_out_small_movi(s, type, ret, sval)) {
819        return;
820    }
821
822    uval = sval;
823    if (type == TCG_TYPE_I32) {
824        uval = (uint32_t)sval;
825        sval = (int32_t)sval;
826    }
827
828    /* Try all 48-bit insns that can load it in one go.  */
829    if (HAVE_FACILITY(EXT_IMM)) {
830        if (sval == (int32_t)sval) {
831            tcg_out_insn(s, RIL, LGFI, ret, sval);
832            return;
833        }
834        if (uval <= 0xffffffff) {
835            tcg_out_insn(s, RIL, LLILF, ret, uval);
836            return;
837        }
838        if ((uval & 0xffffffff) == 0) {
839            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
840            return;
841        }
842    }
843
844    /* Try for PC-relative address load.  For odd addresses,
845       attempt to use an offset from the start of the TB.  */
846    if ((sval & 1) == 0) {
847        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
848        if (off == (int32_t)off) {
849            tcg_out_insn(s, RIL, LARL, ret, off);
850            return;
851        }
852    } else if (USE_REG_TB && !in_prologue) {
853        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
854        if (off == sextract64(off, 0, 20)) {
855            /* This is certain to be an address within TB, and therefore
856               OFF will be negative; don't try RX_LA.  */
857            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
858            return;
859        }
860    }
861
862    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
863       that LLILL, LLIHL, LLILF above did not succeed, we know that
864       both insns are required.  */
865    if (uval <= 0xffffffff) {
866        tcg_out_insn(s, RI, LLILL, ret, uval);
867        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
868        return;
869    }
870
871    /* Otherwise, stuff it in the constant pool.  */
872    if (HAVE_FACILITY(GEN_INST_EXT)) {
873        tcg_out_insn(s, RIL, LGRL, ret, 0);
874        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
875    } else if (USE_REG_TB && !in_prologue) {
876        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
877        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
878                       tcg_tbrel_diff(s, NULL));
879    } else {
880        TCGReg base = ret ? ret : TCG_TMP0;
881        tcg_out_insn(s, RIL, LARL, base, 0);
882        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
883        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
884    }
885}
886
887static void tcg_out_movi(TCGContext *s, TCGType type,
888                         TCGReg ret, tcg_target_long sval)
889{
890    tcg_out_movi_int(s, type, ret, sval, false);
891}
892
893/* Emit a load/store type instruction.  Inputs are:
894   DATA:     The register to be loaded or stored.
895   BASE+OFS: The effective address.
896   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
897   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
898
899static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
900                        TCGReg data, TCGReg base, TCGReg index,
901                        tcg_target_long ofs)
902{
903    if (ofs < -0x80000 || ofs >= 0x80000) {
904        /* Combine the low 20 bits of the offset with the actual load insn;
905           the high 44 bits must come from an immediate load.  */
906        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
907        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
908        ofs = low;
909
910        /* If we were already given an index register, add it in.  */
911        if (index != TCG_REG_NONE) {
912            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
913        }
914        index = TCG_TMP0;
915    }
916
917    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
918        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
919    } else {
920        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
921    }
922}
923
924static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
925                            TCGReg data, TCGReg base, TCGReg index,
926                            tcg_target_long ofs, int m3)
927{
928    if (ofs < 0 || ofs >= 0x1000) {
929        if (ofs >= -0x80000 && ofs < 0x80000) {
930            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
931            base = TCG_TMP0;
932            index = TCG_REG_NONE;
933            ofs = 0;
934        } else {
935            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
936            if (index != TCG_REG_NONE) {
937                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
938            }
939            index = TCG_TMP0;
940            ofs = 0;
941        }
942    }
943    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
944}
945
946/* load data without address translation or endianness conversion */
947static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
948                       TCGReg base, intptr_t ofs)
949{
950    switch (type) {
951    case TCG_TYPE_I32:
952        if (likely(is_general_reg(data))) {
953            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
954            break;
955        }
956        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
957        break;
958
959    case TCG_TYPE_I64:
960        if (likely(is_general_reg(data))) {
961            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
962            break;
963        }
964        /* fallthru */
965
966    case TCG_TYPE_V64:
967        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
968        break;
969
970    case TCG_TYPE_V128:
971        /* Hint quadword aligned.  */
972        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
973        break;
974
975    default:
976        g_assert_not_reached();
977    }
978}
979
980static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
981                       TCGReg base, intptr_t ofs)
982{
983    switch (type) {
984    case TCG_TYPE_I32:
985        if (likely(is_general_reg(data))) {
986            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
987        } else {
988            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
989        }
990        break;
991
992    case TCG_TYPE_I64:
993        if (likely(is_general_reg(data))) {
994            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
995            break;
996        }
997        /* fallthru */
998
999    case TCG_TYPE_V64:
1000        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1001        break;
1002
1003    case TCG_TYPE_V128:
1004        /* Hint quadword aligned.  */
1005        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1006        break;
1007
1008    default:
1009        g_assert_not_reached();
1010    }
1011}
1012
1013static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1014                               TCGReg base, intptr_t ofs)
1015{
1016    return false;
1017}
1018
1019/* load data from an absolute host address */
1020static void tcg_out_ld_abs(TCGContext *s, TCGType type,
1021                           TCGReg dest, const void *abs)
1022{
1023    intptr_t addr = (intptr_t)abs;
1024
1025    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
1026        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
1027        if (disp == (int32_t)disp) {
1028            if (type == TCG_TYPE_I32) {
1029                tcg_out_insn(s, RIL, LRL, dest, disp);
1030            } else {
1031                tcg_out_insn(s, RIL, LGRL, dest, disp);
1032            }
1033            return;
1034        }
1035    }
1036    if (USE_REG_TB) {
1037        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
1038        if (disp == sextract64(disp, 0, 20)) {
1039            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
1040            return;
1041        }
1042    }
1043
1044    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1045    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1046}
1047
1048static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1049                                 int msb, int lsb, int ofs, int z)
1050{
1051    /* Format RIE-f */
1052    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1053    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1054    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1055}
1056
1057static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1058{
1059    if (HAVE_FACILITY(EXT_IMM)) {
1060        tcg_out_insn(s, RRE, LGBR, dest, src);
1061        return;
1062    }
1063
1064    if (type == TCG_TYPE_I32) {
1065        if (dest == src) {
1066            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1067        } else {
1068            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1069        }
1070        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1071    } else {
1072        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1073        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1074    }
1075}
1076
1077static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1078{
1079    if (HAVE_FACILITY(EXT_IMM)) {
1080        tcg_out_insn(s, RRE, LLGCR, dest, src);
1081        return;
1082    }
1083
1084    if (dest == src) {
1085        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1086        src = TCG_TMP0;
1087    } else {
1088        tcg_out_movi(s, type, dest, 0xff);
1089    }
1090    if (type == TCG_TYPE_I32) {
1091        tcg_out_insn(s, RR, NR, dest, src);
1092    } else {
1093        tcg_out_insn(s, RRE, NGR, dest, src);
1094    }
1095}
1096
1097static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1098{
1099    if (HAVE_FACILITY(EXT_IMM)) {
1100        tcg_out_insn(s, RRE, LGHR, dest, src);
1101        return;
1102    }
1103
1104    if (type == TCG_TYPE_I32) {
1105        if (dest == src) {
1106            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1107        } else {
1108            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1109        }
1110        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1111    } else {
1112        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1113        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1114    }
1115}
1116
1117static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1118{
1119    if (HAVE_FACILITY(EXT_IMM)) {
1120        tcg_out_insn(s, RRE, LLGHR, dest, src);
1121        return;
1122    }
1123
1124    if (dest == src) {
1125        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1126        src = TCG_TMP0;
1127    } else {
1128        tcg_out_movi(s, type, dest, 0xffff);
1129    }
1130    if (type == TCG_TYPE_I32) {
1131        tcg_out_insn(s, RR, NR, dest, src);
1132    } else {
1133        tcg_out_insn(s, RRE, NGR, dest, src);
1134    }
1135}
1136
1137static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1138{
1139    tcg_out_insn(s, RRE, LGFR, dest, src);
1140}
1141
1142static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1143{
1144    tcg_out_insn(s, RRE, LLGFR, dest, src);
1145}
1146
1147/* Accept bit patterns like these:
1148    0....01....1
1149    1....10....0
1150    1..10..01..1
1151    0..01..10..0
1152   Copied from gcc sources.  */
1153static inline bool risbg_mask(uint64_t c)
1154{
1155    uint64_t lsb;
1156    /* We don't change the number of transitions by inverting,
1157       so make sure we start with the LSB zero.  */
1158    if (c & 1) {
1159        c = ~c;
1160    }
1161    /* Reject all zeros or all ones.  */
1162    if (c == 0) {
1163        return false;
1164    }
1165    /* Find the first transition.  */
1166    lsb = c & -c;
1167    /* Invert to look for a second transition.  */
1168    c = ~c;
1169    /* Erase the first transition.  */
1170    c &= -lsb;
1171    /* Find the second transition, if any.  */
1172    lsb = c & -c;
1173    /* Match if all the bits are 1's, or if c is zero.  */
1174    return c == -lsb;
1175}
1176
1177static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1178{
1179    int msb, lsb;
1180    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1181        /* Achieve wraparound by swapping msb and lsb.  */
1182        msb = 64 - ctz64(~val);
1183        lsb = clz64(~val) - 1;
1184    } else {
1185        msb = clz64(val);
1186        lsb = 63 - ctz64(val);
1187    }
1188    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1189}
1190
1191static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1192{
1193    static const S390Opcode ni_insns[4] = {
1194        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1195    };
1196    static const S390Opcode nif_insns[2] = {
1197        RIL_NILF, RIL_NIHF
1198    };
1199    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1200    int i;
1201
1202    /* Look for the zero-extensions.  */
1203    if ((val & valid) == 0xffffffff) {
1204        tgen_ext32u(s, dest, dest);
1205        return;
1206    }
1207    if (HAVE_FACILITY(EXT_IMM)) {
1208        if ((val & valid) == 0xff) {
1209            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1210            return;
1211        }
1212        if ((val & valid) == 0xffff) {
1213            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1214            return;
1215        }
1216    }
1217
1218    /* Try all 32-bit insns that can perform it in one go.  */
1219    for (i = 0; i < 4; i++) {
1220        tcg_target_ulong mask = ~(0xffffull << i*16);
1221        if (((val | ~valid) & mask) == mask) {
1222            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1223            return;
1224        }
1225    }
1226
1227    /* Try all 48-bit insns that can perform it in one go.  */
1228    if (HAVE_FACILITY(EXT_IMM)) {
1229        for (i = 0; i < 2; i++) {
1230            tcg_target_ulong mask = ~(0xffffffffull << i*32);
1231            if (((val | ~valid) & mask) == mask) {
1232                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1233                return;
1234            }
1235        }
1236    }
1237    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1238        tgen_andi_risbg(s, dest, dest, val);
1239        return;
1240    }
1241
1242    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1243    if (USE_REG_TB) {
1244        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1245            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1246            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1247                           tcg_tbrel_diff(s, NULL));
1248            return;
1249        }
1250    } else {
1251        tcg_out_movi(s, type, TCG_TMP0, val);
1252    }
1253    if (type == TCG_TYPE_I32) {
1254        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1255    } else {
1256        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1257    }
1258}
1259
1260static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1261{
1262    static const S390Opcode oi_insns[4] = {
1263        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1264    };
1265    static const S390Opcode oif_insns[2] = {
1266        RIL_OILF, RIL_OIHF
1267    };
1268
1269    int i;
1270
1271    /* Look for no-op.  */
1272    if (unlikely(val == 0)) {
1273        return;
1274    }
1275
1276    /* Try all 32-bit insns that can perform it in one go.  */
1277    for (i = 0; i < 4; i++) {
1278        tcg_target_ulong mask = (0xffffull << i*16);
1279        if ((val & mask) != 0 && (val & ~mask) == 0) {
1280            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1281            return;
1282        }
1283    }
1284
1285    /* Try all 48-bit insns that can perform it in one go.  */
1286    if (HAVE_FACILITY(EXT_IMM)) {
1287        for (i = 0; i < 2; i++) {
1288            tcg_target_ulong mask = (0xffffffffull << i*32);
1289            if ((val & mask) != 0 && (val & ~mask) == 0) {
1290                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
1291                return;
1292            }
1293        }
1294    }
1295
1296    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1297    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1298        if (type == TCG_TYPE_I32) {
1299            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1300        } else {
1301            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1302        }
1303    } else if (USE_REG_TB) {
1304        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1305        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1306                       tcg_tbrel_diff(s, NULL));
1307    } else {
1308        /* Perform the OR via sequential modifications to the high and
1309           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1310           masks in each half.  */
1311        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1312        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1313        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1314    }
1315}
1316
1317static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1318{
1319    /* Try all 48-bit insns that can perform it in one go.  */
1320    if (HAVE_FACILITY(EXT_IMM)) {
1321        if ((val & 0xffffffff00000000ull) == 0) {
1322            tcg_out_insn(s, RIL, XILF, dest, val);
1323            return;
1324        }
1325        if ((val & 0x00000000ffffffffull) == 0) {
1326            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1327            return;
1328        }
1329    }
1330
1331    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1332    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1333        if (type == TCG_TYPE_I32) {
1334            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1335        } else {
1336            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1337        }
1338    } else if (USE_REG_TB) {
1339        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1340        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1341                       tcg_tbrel_diff(s, NULL));
1342    } else {
1343        /* Perform the xor by parts.  */
1344        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1345        if (val & 0xffffffff) {
1346            tcg_out_insn(s, RIL, XILF, dest, val);
1347        }
1348        if (val > 0xffffffff) {
1349            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1350        }
1351    }
1352}
1353
1354static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1355                    TCGArg c2, bool c2const, bool need_carry)
1356{
1357    bool is_unsigned = is_unsigned_cond(c);
1358    S390Opcode op;
1359
1360    if (c2const) {
1361        if (c2 == 0) {
1362            if (!(is_unsigned && need_carry)) {
1363                if (type == TCG_TYPE_I32) {
1364                    tcg_out_insn(s, RR, LTR, r1, r1);
1365                } else {
1366                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1367                }
1368                return tcg_cond_to_ltr_cond[c];
1369            }
1370        }
1371
1372        if (!is_unsigned && c2 == (int16_t)c2) {
1373            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1374            tcg_out_insn_RI(s, op, r1, c2);
1375            goto exit;
1376        }
1377
1378        if (HAVE_FACILITY(EXT_IMM)) {
1379            if (type == TCG_TYPE_I32) {
1380                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1381                tcg_out_insn_RIL(s, op, r1, c2);
1382                goto exit;
1383            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1384                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1385                tcg_out_insn_RIL(s, op, r1, c2);
1386                goto exit;
1387            }
1388        }
1389
1390        /* Use the constant pool, but not for small constants.  */
1391        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1392            c2 = TCG_TMP0;
1393            /* fall through to reg-reg */
1394        } else if (USE_REG_TB) {
1395            if (type == TCG_TYPE_I32) {
1396                op = (is_unsigned ? RXY_CLY : RXY_CY);
1397                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1398                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1399                               4 - tcg_tbrel_diff(s, NULL));
1400            } else {
1401                op = (is_unsigned ? RXY_CLG : RXY_CG);
1402                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1403                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1404                               tcg_tbrel_diff(s, NULL));
1405            }
1406            goto exit;
1407        } else {
1408            if (type == TCG_TYPE_I32) {
1409                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1410                tcg_out_insn_RIL(s, op, r1, 0);
1411                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1412                               s->code_ptr - 2, 2 + 4);
1413            } else {
1414                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1415                tcg_out_insn_RIL(s, op, r1, 0);
1416                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1417            }
1418            goto exit;
1419        }
1420    }
1421
1422    if (type == TCG_TYPE_I32) {
1423        op = (is_unsigned ? RR_CLR : RR_CR);
1424        tcg_out_insn_RR(s, op, r1, c2);
1425    } else {
1426        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1427        tcg_out_insn_RRE(s, op, r1, c2);
1428    }
1429
1430 exit:
1431    return tcg_cond_to_s390_cond[c];
1432}
1433
1434static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1435                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1436{
1437    int cc;
1438    bool have_loc;
1439
1440    /* With LOC2, we can always emit the minimum 3 insns.  */
1441    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1442        /* Emit: d = 0, d = (cc ? 1 : d).  */
1443        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1444        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1445        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1446        return;
1447    }
1448
1449    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1450
1451    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1452 restart:
1453    switch (cond) {
1454    case TCG_COND_NE:
1455        /* X != 0 is X > 0.  */
1456        if (c2const && c2 == 0) {
1457            cond = TCG_COND_GTU;
1458        } else {
1459            break;
1460        }
1461        /* fallthru */
1462
1463    case TCG_COND_GTU:
1464    case TCG_COND_GT:
1465        /* The result of a compare has CC=2 for GT and CC=3 unused.
1466           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1467        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1468        tcg_out_movi(s, type, dest, 0);
1469        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1470        return;
1471
1472    case TCG_COND_EQ:
1473        /* X == 0 is X <= 0.  */
1474        if (c2const && c2 == 0) {
1475            cond = TCG_COND_LEU;
1476        } else {
1477            break;
1478        }
1479        /* fallthru */
1480
1481    case TCG_COND_LEU:
1482    case TCG_COND_LE:
1483        /* As above, but we're looking for borrow, or !carry.
1484           The second insn computes d - d - borrow, or -1 for true
1485           and 0 for false.  So we must mask to 1 bit afterward.  */
1486        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1487        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1488        tgen_andi(s, type, dest, 1);
1489        return;
1490
1491    case TCG_COND_GEU:
1492    case TCG_COND_LTU:
1493    case TCG_COND_LT:
1494    case TCG_COND_GE:
1495        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1496        if (c2const) {
1497            if (have_loc) {
1498                break;
1499            }
1500            tcg_out_movi(s, type, TCG_TMP0, c2);
1501            c2 = c1;
1502            c2const = 0;
1503            c1 = TCG_TMP0;
1504        } else {
1505            TCGReg t = c1;
1506            c1 = c2;
1507            c2 = t;
1508        }
1509        cond = tcg_swap_cond(cond);
1510        goto restart;
1511
1512    default:
1513        g_assert_not_reached();
1514    }
1515
1516    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1517    if (have_loc) {
1518        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1519        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1520        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1521        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1522    } else {
1523        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1524        tcg_out_movi(s, type, dest, 1);
1525        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1526        tcg_out_movi(s, type, dest, 0);
1527    }
1528}
1529
1530static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1531                         TCGReg c1, TCGArg c2, int c2const,
1532                         TCGArg v3, int v3const)
1533{
1534    int cc;
1535    if (HAVE_FACILITY(LOAD_ON_COND)) {
1536        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1537        if (v3const) {
1538            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1539        } else {
1540            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1541        }
1542    } else {
1543        c = tcg_invert_cond(c);
1544        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1545
1546        /* Emit: if (cc) goto over; dest = r3; over:  */
1547        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1548        tcg_out_insn(s, RRE, LGR, dest, v3);
1549    }
1550}
1551
1552static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1553                     TCGArg a2, int a2const)
1554{
1555    /* Since this sets both R and R+1, we have no choice but to store the
1556       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1557    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1558    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1559
1560    if (a2const && a2 == 64) {
1561        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1562    } else {
1563        if (a2const) {
1564            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1565        } else {
1566            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1567        }
1568        if (HAVE_FACILITY(LOAD_ON_COND)) {
1569            /* Emit: if (one bit found) dest = r0.  */
1570            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1571        } else {
1572            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1573            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1574            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1575        }
1576    }
1577}
1578
1579static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1580                         int ofs, int len, int z)
1581{
1582    int lsb = (63 - ofs);
1583    int msb = lsb - (len - 1);
1584    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1585}
1586
1587static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1588                         int ofs, int len)
1589{
1590    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1591}
1592
1593static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1594{
1595    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1596    if (off == (int16_t)off) {
1597        tcg_out_insn(s, RI, BRC, cc, off);
1598    } else if (off == (int32_t)off) {
1599        tcg_out_insn(s, RIL, BRCL, cc, off);
1600    } else {
1601        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1602        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1603    }
1604}
1605
1606static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1607{
1608    if (l->has_value) {
1609        tgen_gotoi(s, cc, l->u.value_ptr);
1610    } else if (USE_LONG_BRANCHES) {
1611        tcg_out16(s, RIL_BRCL | (cc << 4));
1612        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1613        s->code_ptr += 2;
1614    } else {
1615        tcg_out16(s, RI_BRC | (cc << 4));
1616        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1617        s->code_ptr += 1;
1618    }
1619}
1620
1621static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1622                                TCGReg r1, TCGReg r2, TCGLabel *l)
1623{
1624    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1625    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1626    tcg_out16(s, 0);
1627    tcg_out16(s, cc << 12 | (opc & 0xff));
1628}
1629
1630static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1631                                    TCGReg r1, int i2, TCGLabel *l)
1632{
1633    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1634    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1635    tcg_out16(s, 0);
1636    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1637}
1638
1639static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1640                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1641{
1642    int cc;
1643
1644    if (HAVE_FACILITY(GEN_INST_EXT)) {
1645        bool is_unsigned = is_unsigned_cond(c);
1646        bool in_range;
1647        S390Opcode opc;
1648
1649        cc = tcg_cond_to_s390_cond[c];
1650
1651        if (!c2const) {
1652            opc = (type == TCG_TYPE_I32
1653                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1654                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1655            tgen_compare_branch(s, opc, cc, r1, c2, l);
1656            return;
1657        }
1658
1659        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1660           If the immediate we've been given does not fit that range, we'll
1661           fall back to separate compare and branch instructions using the
1662           larger comparison range afforded by COMPARE IMMEDIATE.  */
1663        if (type == TCG_TYPE_I32) {
1664            if (is_unsigned) {
1665                opc = RIE_CLIJ;
1666                in_range = (uint32_t)c2 == (uint8_t)c2;
1667            } else {
1668                opc = RIE_CIJ;
1669                in_range = (int32_t)c2 == (int8_t)c2;
1670            }
1671        } else {
1672            if (is_unsigned) {
1673                opc = RIE_CLGIJ;
1674                in_range = (uint64_t)c2 == (uint8_t)c2;
1675            } else {
1676                opc = RIE_CGIJ;
1677                in_range = (int64_t)c2 == (int8_t)c2;
1678            }
1679        }
1680        if (in_range) {
1681            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1682            return;
1683        }
1684    }
1685
1686    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1687    tgen_branch(s, cc, l);
1688}
1689
1690static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
1691{
1692    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1693    if (off == (int32_t)off) {
1694        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1695    } else {
1696        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1697        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1698    }
1699}
1700
1701static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1702                                   TCGReg base, TCGReg index, int disp)
1703{
1704    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1705    case MO_UB:
1706        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1707        break;
1708    case MO_SB:
1709        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1710        break;
1711
1712    case MO_UW | MO_BSWAP:
1713        /* swapped unsigned halfword load with upper bits zeroed */
1714        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1715        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1716        break;
1717    case MO_UW:
1718        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1719        break;
1720
1721    case MO_SW | MO_BSWAP:
1722        /* swapped sign-extended halfword load */
1723        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1724        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1725        break;
1726    case MO_SW:
1727        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1728        break;
1729
1730    case MO_UL | MO_BSWAP:
1731        /* swapped unsigned int load with upper bits zeroed */
1732        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1733        tgen_ext32u(s, data, data);
1734        break;
1735    case MO_UL:
1736        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1737        break;
1738
1739    case MO_SL | MO_BSWAP:
1740        /* swapped sign-extended int load */
1741        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1742        tgen_ext32s(s, data, data);
1743        break;
1744    case MO_SL:
1745        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1746        break;
1747
1748    case MO_UQ | MO_BSWAP:
1749        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1750        break;
1751    case MO_UQ:
1752        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1753        break;
1754
1755    default:
1756        tcg_abort();
1757    }
1758}
1759
1760static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1761                                   TCGReg base, TCGReg index, int disp)
1762{
1763    switch (opc & (MO_SIZE | MO_BSWAP)) {
1764    case MO_UB:
1765        if (disp >= 0 && disp < 0x1000) {
1766            tcg_out_insn(s, RX, STC, data, base, index, disp);
1767        } else {
1768            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1769        }
1770        break;
1771
1772    case MO_UW | MO_BSWAP:
1773        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1774        break;
1775    case MO_UW:
1776        if (disp >= 0 && disp < 0x1000) {
1777            tcg_out_insn(s, RX, STH, data, base, index, disp);
1778        } else {
1779            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1780        }
1781        break;
1782
1783    case MO_UL | MO_BSWAP:
1784        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1785        break;
1786    case MO_UL:
1787        if (disp >= 0 && disp < 0x1000) {
1788            tcg_out_insn(s, RX, ST, data, base, index, disp);
1789        } else {
1790            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1791        }
1792        break;
1793
1794    case MO_UQ | MO_BSWAP:
1795        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1796        break;
1797    case MO_UQ:
1798        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1799        break;
1800
1801    default:
1802        tcg_abort();
1803    }
1804}
1805
1806#if defined(CONFIG_SOFTMMU)
1807#include "../tcg-ldst.c.inc"
1808
1809/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1810QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1811QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1812
1813/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1814   addend into R2.  Returns a register with the santitized guest address.  */
1815static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1816                               int mem_index, bool is_ld)
1817{
1818    unsigned s_bits = opc & MO_SIZE;
1819    unsigned a_bits = get_alignment_bits(opc);
1820    unsigned s_mask = (1 << s_bits) - 1;
1821    unsigned a_mask = (1 << a_bits) - 1;
1822    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1823    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1824    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1825    int ofs, a_off;
1826    uint64_t tlb_mask;
1827
1828    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1829                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1830    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1831    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1832
1833    /* For aligned accesses, we check the first byte and include the alignment
1834       bits within the address.  For unaligned access, we check that we don't
1835       cross pages using the address of the last byte of the access.  */
1836    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1837    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1838    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1839        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1840    } else {
1841        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1842        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1843    }
1844
1845    if (is_ld) {
1846        ofs = offsetof(CPUTLBEntry, addr_read);
1847    } else {
1848        ofs = offsetof(CPUTLBEntry, addr_write);
1849    }
1850    if (TARGET_LONG_BITS == 32) {
1851        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1852    } else {
1853        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1854    }
1855
1856    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1857                 offsetof(CPUTLBEntry, addend));
1858
1859    if (TARGET_LONG_BITS == 32) {
1860        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1861        return TCG_REG_R3;
1862    }
1863    return addr_reg;
1864}
1865
1866static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1867                                TCGReg data, TCGReg addr,
1868                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1869{
1870    TCGLabelQemuLdst *label = new_ldst_label(s);
1871
1872    label->is_ld = is_ld;
1873    label->oi = oi;
1874    label->datalo_reg = data;
1875    label->addrlo_reg = addr;
1876    label->raddr = tcg_splitwx_to_rx(raddr);
1877    label->label_ptr[0] = label_ptr;
1878}
1879
1880static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1881{
1882    TCGReg addr_reg = lb->addrlo_reg;
1883    TCGReg data_reg = lb->datalo_reg;
1884    MemOpIdx oi = lb->oi;
1885    MemOp opc = get_memop(oi);
1886
1887    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1888                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1889        return false;
1890    }
1891
1892    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1893    if (TARGET_LONG_BITS == 64) {
1894        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1895    }
1896    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1897    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1898    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1899    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1900
1901    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1902    return true;
1903}
1904
1905static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1906{
1907    TCGReg addr_reg = lb->addrlo_reg;
1908    TCGReg data_reg = lb->datalo_reg;
1909    MemOpIdx oi = lb->oi;
1910    MemOp opc = get_memop(oi);
1911
1912    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1913                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1914        return false;
1915    }
1916
1917    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1918    if (TARGET_LONG_BITS == 64) {
1919        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1920    }
1921    switch (opc & MO_SIZE) {
1922    case MO_UB:
1923        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1924        break;
1925    case MO_UW:
1926        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1927        break;
1928    case MO_UL:
1929        tgen_ext32u(s, TCG_REG_R4, data_reg);
1930        break;
1931    case MO_UQ:
1932        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1933        break;
1934    default:
1935        tcg_abort();
1936    }
1937    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1938    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1939    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1940
1941    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1942    return true;
1943}
1944#else
1945static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1946                                  TCGReg *index_reg, tcg_target_long *disp)
1947{
1948    if (TARGET_LONG_BITS == 32) {
1949        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1950        *addr_reg = TCG_TMP0;
1951    }
1952    if (guest_base < 0x80000) {
1953        *index_reg = TCG_REG_NONE;
1954        *disp = guest_base;
1955    } else {
1956        *index_reg = TCG_GUEST_BASE_REG;
1957        *disp = 0;
1958    }
1959}
1960#endif /* CONFIG_SOFTMMU */
1961
1962static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1963                            MemOpIdx oi)
1964{
1965    MemOp opc = get_memop(oi);
1966#ifdef CONFIG_SOFTMMU
1967    unsigned mem_index = get_mmuidx(oi);
1968    tcg_insn_unit *label_ptr;
1969    TCGReg base_reg;
1970
1971    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1972
1973    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1974    label_ptr = s->code_ptr;
1975    s->code_ptr += 1;
1976
1977    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1978
1979    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1980#else
1981    TCGReg index_reg;
1982    tcg_target_long disp;
1983
1984    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1985    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1986#endif
1987}
1988
1989static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1990                            MemOpIdx oi)
1991{
1992    MemOp opc = get_memop(oi);
1993#ifdef CONFIG_SOFTMMU
1994    unsigned mem_index = get_mmuidx(oi);
1995    tcg_insn_unit *label_ptr;
1996    TCGReg base_reg;
1997
1998    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1999
2000    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2001    label_ptr = s->code_ptr;
2002    s->code_ptr += 1;
2003
2004    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2005
2006    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2007#else
2008    TCGReg index_reg;
2009    tcg_target_long disp;
2010
2011    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2012    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2013#endif
2014}
2015
2016# define OP_32_64(x) \
2017        case glue(glue(INDEX_op_,x),_i32): \
2018        case glue(glue(INDEX_op_,x),_i64)
2019
2020static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2021                              const TCGArg args[TCG_MAX_OP_ARGS],
2022                              const int const_args[TCG_MAX_OP_ARGS])
2023{
2024    S390Opcode op, op2;
2025    TCGArg a0, a1, a2;
2026
2027    switch (opc) {
2028    case INDEX_op_exit_tb:
2029        /* Reuse the zeroing that exists for goto_ptr.  */
2030        a0 = args[0];
2031        if (a0 == 0) {
2032            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2033        } else {
2034            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2035            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2036        }
2037        break;
2038
2039    case INDEX_op_goto_tb:
2040        a0 = args[0];
2041        if (s->tb_jmp_insn_offset) {
2042            /*
2043             * branch displacement must be aligned for atomic patching;
2044             * see if we need to add extra nop before branch
2045             */
2046            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2047                tcg_out16(s, NOP);
2048            }
2049            tcg_debug_assert(!USE_REG_TB);
2050            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2051            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2052            s->code_ptr += 2;
2053        } else {
2054            /* load address stored at s->tb_jmp_target_addr + a0 */
2055            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2056                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2057            /* and go there */
2058            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2059        }
2060        set_jmp_reset_offset(s, a0);
2061
2062        /* For the unlinked path of goto_tb, we need to reset
2063           TCG_REG_TB to the beginning of this TB.  */
2064        if (USE_REG_TB) {
2065            int ofs = -tcg_current_code_size(s);
2066            /* All TB are restricted to 64KiB by unwind info. */
2067            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2068            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2069                         TCG_REG_TB, TCG_REG_NONE, ofs);
2070        }
2071        break;
2072
2073    case INDEX_op_goto_ptr:
2074        a0 = args[0];
2075        if (USE_REG_TB) {
2076            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2077        }
2078        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2079        break;
2080
2081    OP_32_64(ld8u):
2082        /* ??? LLC (RXY format) is only present with the extended-immediate
2083           facility, whereas LLGC is always present.  */
2084        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2085        break;
2086
2087    OP_32_64(ld8s):
2088        /* ??? LB is no smaller than LGB, so no point to using it.  */
2089        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2090        break;
2091
2092    OP_32_64(ld16u):
2093        /* ??? LLH (RXY format) is only present with the extended-immediate
2094           facility, whereas LLGH is always present.  */
2095        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2096        break;
2097
2098    case INDEX_op_ld16s_i32:
2099        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2100        break;
2101
2102    case INDEX_op_ld_i32:
2103        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2104        break;
2105
2106    OP_32_64(st8):
2107        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2108                    TCG_REG_NONE, args[2]);
2109        break;
2110
2111    OP_32_64(st16):
2112        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2113                    TCG_REG_NONE, args[2]);
2114        break;
2115
2116    case INDEX_op_st_i32:
2117        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2118        break;
2119
2120    case INDEX_op_add_i32:
2121        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2122        if (const_args[2]) {
2123        do_addi_32:
2124            if (a0 == a1) {
2125                if (a2 == (int16_t)a2) {
2126                    tcg_out_insn(s, RI, AHI, a0, a2);
2127                    break;
2128                }
2129                if (HAVE_FACILITY(EXT_IMM)) {
2130                    tcg_out_insn(s, RIL, AFI, a0, a2);
2131                    break;
2132                }
2133            }
2134            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2135        } else if (a0 == a1) {
2136            tcg_out_insn(s, RR, AR, a0, a2);
2137        } else {
2138            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2139        }
2140        break;
2141    case INDEX_op_sub_i32:
2142        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2143        if (const_args[2]) {
2144            a2 = -a2;
2145            goto do_addi_32;
2146        } else if (a0 == a1) {
2147            tcg_out_insn(s, RR, SR, a0, a2);
2148        } else {
2149            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2150        }
2151        break;
2152
2153    case INDEX_op_and_i32:
2154        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2155        if (const_args[2]) {
2156            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2157            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2158        } else if (a0 == a1) {
2159            tcg_out_insn(s, RR, NR, a0, a2);
2160        } else {
2161            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2162        }
2163        break;
2164    case INDEX_op_or_i32:
2165        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2166        if (const_args[2]) {
2167            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2168            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2169        } else if (a0 == a1) {
2170            tcg_out_insn(s, RR, OR, a0, a2);
2171        } else {
2172            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2173        }
2174        break;
2175    case INDEX_op_xor_i32:
2176        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2177        if (const_args[2]) {
2178            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2179            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2180        } else if (a0 == a1) {
2181            tcg_out_insn(s, RR, XR, args[0], args[2]);
2182        } else {
2183            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2184        }
2185        break;
2186
2187    case INDEX_op_neg_i32:
2188        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2189        break;
2190
2191    case INDEX_op_mul_i32:
2192        if (const_args[2]) {
2193            if ((int32_t)args[2] == (int16_t)args[2]) {
2194                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2195            } else {
2196                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2197            }
2198        } else {
2199            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2200        }
2201        break;
2202
2203    case INDEX_op_div2_i32:
2204        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2205        break;
2206    case INDEX_op_divu2_i32:
2207        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2208        break;
2209
2210    case INDEX_op_shl_i32:
2211        op = RS_SLL;
2212        op2 = RSY_SLLK;
2213    do_shift32:
2214        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2215        if (a0 == a1) {
2216            if (const_args[2]) {
2217                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2218            } else {
2219                tcg_out_sh32(s, op, a0, a2, 0);
2220            }
2221        } else {
2222            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2223            if (const_args[2]) {
2224                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2225            } else {
2226                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2227            }
2228        }
2229        break;
2230    case INDEX_op_shr_i32:
2231        op = RS_SRL;
2232        op2 = RSY_SRLK;
2233        goto do_shift32;
2234    case INDEX_op_sar_i32:
2235        op = RS_SRA;
2236        op2 = RSY_SRAK;
2237        goto do_shift32;
2238
2239    case INDEX_op_rotl_i32:
2240        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2241        if (const_args[2]) {
2242            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2243        } else {
2244            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2245        }
2246        break;
2247    case INDEX_op_rotr_i32:
2248        if (const_args[2]) {
2249            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2250                         TCG_REG_NONE, (32 - args[2]) & 31);
2251        } else {
2252            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2253            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2254        }
2255        break;
2256
2257    case INDEX_op_ext8s_i32:
2258        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2259        break;
2260    case INDEX_op_ext16s_i32:
2261        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2262        break;
2263    case INDEX_op_ext8u_i32:
2264        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2265        break;
2266    case INDEX_op_ext16u_i32:
2267        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2268        break;
2269
2270    case INDEX_op_bswap16_i32:
2271        a0 = args[0], a1 = args[1], a2 = args[2];
2272        tcg_out_insn(s, RRE, LRVR, a0, a1);
2273        if (a2 & TCG_BSWAP_OS) {
2274            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2275        } else {
2276            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2277        }
2278        break;
2279    case INDEX_op_bswap16_i64:
2280        a0 = args[0], a1 = args[1], a2 = args[2];
2281        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2282        if (a2 & TCG_BSWAP_OS) {
2283            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2284        } else {
2285            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2286        }
2287        break;
2288
2289    case INDEX_op_bswap32_i32:
2290        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2291        break;
2292    case INDEX_op_bswap32_i64:
2293        a0 = args[0], a1 = args[1], a2 = args[2];
2294        tcg_out_insn(s, RRE, LRVR, a0, a1);
2295        if (a2 & TCG_BSWAP_OS) {
2296            tgen_ext32s(s, a0, a0);
2297        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2298            tgen_ext32u(s, a0, a0);
2299        }
2300        break;
2301
2302    case INDEX_op_add2_i32:
2303        if (const_args[4]) {
2304            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2305        } else {
2306            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2307        }
2308        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2309        break;
2310    case INDEX_op_sub2_i32:
2311        if (const_args[4]) {
2312            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2313        } else {
2314            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2315        }
2316        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2317        break;
2318
2319    case INDEX_op_br:
2320        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2321        break;
2322
2323    case INDEX_op_brcond_i32:
2324        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2325                    args[1], const_args[1], arg_label(args[3]));
2326        break;
2327    case INDEX_op_setcond_i32:
2328        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2329                     args[2], const_args[2]);
2330        break;
2331    case INDEX_op_movcond_i32:
2332        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2333                     args[2], const_args[2], args[3], const_args[3]);
2334        break;
2335
2336    case INDEX_op_qemu_ld_i32:
2337        /* ??? Technically we can use a non-extending instruction.  */
2338    case INDEX_op_qemu_ld_i64:
2339        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2340        break;
2341    case INDEX_op_qemu_st_i32:
2342    case INDEX_op_qemu_st_i64:
2343        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2344        break;
2345
2346    case INDEX_op_ld16s_i64:
2347        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2348        break;
2349    case INDEX_op_ld32u_i64:
2350        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2351        break;
2352    case INDEX_op_ld32s_i64:
2353        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2354        break;
2355    case INDEX_op_ld_i64:
2356        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2357        break;
2358
2359    case INDEX_op_st32_i64:
2360        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2361        break;
2362    case INDEX_op_st_i64:
2363        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2364        break;
2365
2366    case INDEX_op_add_i64:
2367        a0 = args[0], a1 = args[1], a2 = args[2];
2368        if (const_args[2]) {
2369        do_addi_64:
2370            if (a0 == a1) {
2371                if (a2 == (int16_t)a2) {
2372                    tcg_out_insn(s, RI, AGHI, a0, a2);
2373                    break;
2374                }
2375                if (HAVE_FACILITY(EXT_IMM)) {
2376                    if (a2 == (int32_t)a2) {
2377                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2378                        break;
2379                    } else if (a2 == (uint32_t)a2) {
2380                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2381                        break;
2382                    } else if (-a2 == (uint32_t)-a2) {
2383                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2384                        break;
2385                    }
2386                }
2387            }
2388            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2389        } else if (a0 == a1) {
2390            tcg_out_insn(s, RRE, AGR, a0, a2);
2391        } else {
2392            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2393        }
2394        break;
2395    case INDEX_op_sub_i64:
2396        a0 = args[0], a1 = args[1], a2 = args[2];
2397        if (const_args[2]) {
2398            a2 = -a2;
2399            goto do_addi_64;
2400        } else if (a0 == a1) {
2401            tcg_out_insn(s, RRE, SGR, a0, a2);
2402        } else {
2403            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2404        }
2405        break;
2406
2407    case INDEX_op_and_i64:
2408        a0 = args[0], a1 = args[1], a2 = args[2];
2409        if (const_args[2]) {
2410            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2411            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2412        } else if (a0 == a1) {
2413            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2414        } else {
2415            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2416        }
2417        break;
2418    case INDEX_op_or_i64:
2419        a0 = args[0], a1 = args[1], a2 = args[2];
2420        if (const_args[2]) {
2421            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2422            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2423        } else if (a0 == a1) {
2424            tcg_out_insn(s, RRE, OGR, a0, a2);
2425        } else {
2426            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2427        }
2428        break;
2429    case INDEX_op_xor_i64:
2430        a0 = args[0], a1 = args[1], a2 = args[2];
2431        if (const_args[2]) {
2432            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2433            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2434        } else if (a0 == a1) {
2435            tcg_out_insn(s, RRE, XGR, a0, a2);
2436        } else {
2437            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2438        }
2439        break;
2440
2441    case INDEX_op_neg_i64:
2442        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2443        break;
2444    case INDEX_op_bswap64_i64:
2445        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2446        break;
2447
2448    case INDEX_op_mul_i64:
2449        if (const_args[2]) {
2450            if (args[2] == (int16_t)args[2]) {
2451                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2452            } else {
2453                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2454            }
2455        } else {
2456            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2457        }
2458        break;
2459
2460    case INDEX_op_div2_i64:
2461        /* ??? We get an unnecessary sign-extension of the dividend
2462           into R3 with this definition, but as we do in fact always
2463           produce both quotient and remainder using INDEX_op_div_i64
2464           instead requires jumping through even more hoops.  */
2465        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2466        break;
2467    case INDEX_op_divu2_i64:
2468        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2469        break;
2470    case INDEX_op_mulu2_i64:
2471        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2472        break;
2473
2474    case INDEX_op_shl_i64:
2475        op = RSY_SLLG;
2476    do_shift64:
2477        if (const_args[2]) {
2478            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2479        } else {
2480            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2481        }
2482        break;
2483    case INDEX_op_shr_i64:
2484        op = RSY_SRLG;
2485        goto do_shift64;
2486    case INDEX_op_sar_i64:
2487        op = RSY_SRAG;
2488        goto do_shift64;
2489
2490    case INDEX_op_rotl_i64:
2491        if (const_args[2]) {
2492            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2493                         TCG_REG_NONE, args[2]);
2494        } else {
2495            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2496        }
2497        break;
2498    case INDEX_op_rotr_i64:
2499        if (const_args[2]) {
2500            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2501                         TCG_REG_NONE, (64 - args[2]) & 63);
2502        } else {
2503            /* We can use the smaller 32-bit negate because only the
2504               low 6 bits are examined for the rotate.  */
2505            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2506            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2507        }
2508        break;
2509
2510    case INDEX_op_ext8s_i64:
2511        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2512        break;
2513    case INDEX_op_ext16s_i64:
2514        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2515        break;
2516    case INDEX_op_ext_i32_i64:
2517    case INDEX_op_ext32s_i64:
2518        tgen_ext32s(s, args[0], args[1]);
2519        break;
2520    case INDEX_op_ext8u_i64:
2521        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2522        break;
2523    case INDEX_op_ext16u_i64:
2524        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2525        break;
2526    case INDEX_op_extu_i32_i64:
2527    case INDEX_op_ext32u_i64:
2528        tgen_ext32u(s, args[0], args[1]);
2529        break;
2530
2531    case INDEX_op_add2_i64:
2532        if (const_args[4]) {
2533            if ((int64_t)args[4] >= 0) {
2534                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2535            } else {
2536                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2537            }
2538        } else {
2539            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2540        }
2541        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2542        break;
2543    case INDEX_op_sub2_i64:
2544        if (const_args[4]) {
2545            if ((int64_t)args[4] >= 0) {
2546                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2547            } else {
2548                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2549            }
2550        } else {
2551            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2552        }
2553        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2554        break;
2555
2556    case INDEX_op_brcond_i64:
2557        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2558                    args[1], const_args[1], arg_label(args[3]));
2559        break;
2560    case INDEX_op_setcond_i64:
2561        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2562                     args[2], const_args[2]);
2563        break;
2564    case INDEX_op_movcond_i64:
2565        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2566                     args[2], const_args[2], args[3], const_args[3]);
2567        break;
2568
2569    OP_32_64(deposit):
2570        a0 = args[0], a1 = args[1], a2 = args[2];
2571        if (const_args[1]) {
2572            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2573        } else {
2574            /* Since we can't support "0Z" as a constraint, we allow a1 in
2575               any register.  Fix things up as if a matching constraint.  */
2576            if (a0 != a1) {
2577                TCGType type = (opc == INDEX_op_deposit_i64);
2578                if (a0 == a2) {
2579                    tcg_out_mov(s, type, TCG_TMP0, a2);
2580                    a2 = TCG_TMP0;
2581                }
2582                tcg_out_mov(s, type, a0, a1);
2583            }
2584            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2585        }
2586        break;
2587
2588    OP_32_64(extract):
2589        tgen_extract(s, args[0], args[1], args[2], args[3]);
2590        break;
2591
2592    case INDEX_op_clz_i64:
2593        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2594        break;
2595
2596    case INDEX_op_mb:
2597        /* The host memory model is quite strong, we simply need to
2598           serialize the instruction stream.  */
2599        if (args[0] & TCG_MO_ST_LD) {
2600            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2601        }
2602        break;
2603
2604    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2605    case INDEX_op_mov_i64:
2606    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2607    default:
2608        tcg_abort();
2609    }
2610}
2611
2612static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2613                            TCGReg dst, TCGReg src)
2614{
2615    if (is_general_reg(src)) {
2616        /* Replicate general register into two MO_64. */
2617        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2618        if (vece == MO_64) {
2619            return true;
2620        }
2621    }
2622
2623    /*
2624     * Recall that the "standard" integer, within a vector, is the
2625     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2626     */
2627    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2628    return true;
2629}
2630
2631static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2632                             TCGReg dst, TCGReg base, intptr_t offset)
2633{
2634    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2635    return true;
2636}
2637
2638static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2639                             TCGReg dst, int64_t val)
2640{
2641    int i, mask, msb, lsb;
2642
2643    /* Look for int16_t elements.  */
2644    if (vece <= MO_16 ||
2645        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2646        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2647        return;
2648    }
2649
2650    /* Look for bit masks.  */
2651    if (vece == MO_32) {
2652        if (risbg_mask((int32_t)val)) {
2653            /* Handle wraparound by swapping msb and lsb.  */
2654            if ((val & 0x80000001u) == 0x80000001u) {
2655                msb = 32 - ctz32(~val);
2656                lsb = clz32(~val) - 1;
2657            } else {
2658                msb = clz32(val);
2659                lsb = 31 - ctz32(val);
2660            }
2661            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
2662            return;
2663        }
2664    } else {
2665        if (risbg_mask(val)) {
2666            /* Handle wraparound by swapping msb and lsb.  */
2667            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2668                /* Handle wraparound by swapping msb and lsb.  */
2669                msb = 64 - ctz64(~val);
2670                lsb = clz64(~val) - 1;
2671            } else {
2672                msb = clz64(val);
2673                lsb = 63 - ctz64(val);
2674            }
2675            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
2676            return;
2677        }
2678    }
2679
2680    /* Look for all bytes 0x00 or 0xff.  */
2681    for (i = mask = 0; i < 8; i++) {
2682        uint8_t byte = val >> (i * 8);
2683        if (byte == 0xff) {
2684            mask |= 1 << i;
2685        } else if (byte != 0) {
2686            break;
2687        }
2688    }
2689    if (i == 8) {
2690        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2691        return;
2692    }
2693
2694    /* Otherwise, stuff it in the constant pool.  */
2695    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2696    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2697    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2698}
2699
2700static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2701                           unsigned vecl, unsigned vece,
2702                           const TCGArg args[TCG_MAX_OP_ARGS],
2703                           const int const_args[TCG_MAX_OP_ARGS])
2704{
2705    TCGType type = vecl + TCG_TYPE_V64;
2706    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2707
2708    switch (opc) {
2709    case INDEX_op_ld_vec:
2710        tcg_out_ld(s, type, a0, a1, a2);
2711        break;
2712    case INDEX_op_st_vec:
2713        tcg_out_st(s, type, a0, a1, a2);
2714        break;
2715    case INDEX_op_dupm_vec:
2716        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2717        break;
2718
2719    case INDEX_op_abs_vec:
2720        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2721        break;
2722    case INDEX_op_neg_vec:
2723        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2724        break;
2725    case INDEX_op_not_vec:
2726        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2727        break;
2728
2729    case INDEX_op_add_vec:
2730        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2731        break;
2732    case INDEX_op_sub_vec:
2733        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2734        break;
2735    case INDEX_op_and_vec:
2736        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2737        break;
2738    case INDEX_op_andc_vec:
2739        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2740        break;
2741    case INDEX_op_mul_vec:
2742        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2743        break;
2744    case INDEX_op_or_vec:
2745        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2746        break;
2747    case INDEX_op_orc_vec:
2748        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2749        break;
2750    case INDEX_op_xor_vec:
2751        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2752        break;
2753
2754    case INDEX_op_shli_vec:
2755        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2756        break;
2757    case INDEX_op_shri_vec:
2758        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2759        break;
2760    case INDEX_op_sari_vec:
2761        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2762        break;
2763    case INDEX_op_rotli_vec:
2764        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2765        break;
2766    case INDEX_op_shls_vec:
2767        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2768        break;
2769    case INDEX_op_shrs_vec:
2770        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2771        break;
2772    case INDEX_op_sars_vec:
2773        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2774        break;
2775    case INDEX_op_rotls_vec:
2776        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2777        break;
2778    case INDEX_op_shlv_vec:
2779        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2780        break;
2781    case INDEX_op_shrv_vec:
2782        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2783        break;
2784    case INDEX_op_sarv_vec:
2785        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2786        break;
2787    case INDEX_op_rotlv_vec:
2788        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2789        break;
2790
2791    case INDEX_op_smin_vec:
2792        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2793        break;
2794    case INDEX_op_smax_vec:
2795        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2796        break;
2797    case INDEX_op_umin_vec:
2798        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2799        break;
2800    case INDEX_op_umax_vec:
2801        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2802        break;
2803
2804    case INDEX_op_bitsel_vec:
2805        tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
2806        break;
2807
2808    case INDEX_op_cmp_vec:
2809        switch ((TCGCond)args[3]) {
2810        case TCG_COND_EQ:
2811            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2812            break;
2813        case TCG_COND_GT:
2814            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2815            break;
2816        case TCG_COND_GTU:
2817            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2818            break;
2819        default:
2820            g_assert_not_reached();
2821        }
2822        break;
2823
2824    case INDEX_op_s390_vuph_vec:
2825        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2826        break;
2827    case INDEX_op_s390_vupl_vec:
2828        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2829        break;
2830    case INDEX_op_s390_vpks_vec:
2831        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2832        break;
2833
2834    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2835    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2836    default:
2837        g_assert_not_reached();
2838    }
2839}
2840
2841int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2842{
2843    switch (opc) {
2844    case INDEX_op_abs_vec:
2845    case INDEX_op_add_vec:
2846    case INDEX_op_and_vec:
2847    case INDEX_op_andc_vec:
2848    case INDEX_op_bitsel_vec:
2849    case INDEX_op_neg_vec:
2850    case INDEX_op_not_vec:
2851    case INDEX_op_or_vec:
2852    case INDEX_op_orc_vec:
2853    case INDEX_op_rotli_vec:
2854    case INDEX_op_rotls_vec:
2855    case INDEX_op_rotlv_vec:
2856    case INDEX_op_sari_vec:
2857    case INDEX_op_sars_vec:
2858    case INDEX_op_sarv_vec:
2859    case INDEX_op_shli_vec:
2860    case INDEX_op_shls_vec:
2861    case INDEX_op_shlv_vec:
2862    case INDEX_op_shri_vec:
2863    case INDEX_op_shrs_vec:
2864    case INDEX_op_shrv_vec:
2865    case INDEX_op_smax_vec:
2866    case INDEX_op_smin_vec:
2867    case INDEX_op_sub_vec:
2868    case INDEX_op_umax_vec:
2869    case INDEX_op_umin_vec:
2870    case INDEX_op_xor_vec:
2871        return 1;
2872    case INDEX_op_cmp_vec:
2873    case INDEX_op_cmpsel_vec:
2874    case INDEX_op_rotrv_vec:
2875        return -1;
2876    case INDEX_op_mul_vec:
2877        return vece < MO_64;
2878    case INDEX_op_ssadd_vec:
2879    case INDEX_op_sssub_vec:
2880        return vece < MO_64 ? -1 : 0;
2881    default:
2882        return 0;
2883    }
2884}
2885
2886static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2887                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2888{
2889    bool need_swap = false, need_inv = false;
2890
2891    switch (cond) {
2892    case TCG_COND_EQ:
2893    case TCG_COND_GT:
2894    case TCG_COND_GTU:
2895        break;
2896    case TCG_COND_NE:
2897    case TCG_COND_LE:
2898    case TCG_COND_LEU:
2899        need_inv = true;
2900        break;
2901    case TCG_COND_LT:
2902    case TCG_COND_LTU:
2903        need_swap = true;
2904        break;
2905    case TCG_COND_GE:
2906    case TCG_COND_GEU:
2907        need_swap = need_inv = true;
2908        break;
2909    default:
2910        g_assert_not_reached();
2911    }
2912
2913    if (need_inv) {
2914        cond = tcg_invert_cond(cond);
2915    }
2916    if (need_swap) {
2917        TCGv_vec t1;
2918        t1 = v1, v1 = v2, v2 = t1;
2919        cond = tcg_swap_cond(cond);
2920    }
2921
2922    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2923              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2924
2925    return need_inv;
2926}
2927
2928static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2929                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2930{
2931    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2932        tcg_gen_not_vec(vece, v0, v0);
2933    }
2934}
2935
2936static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2937                              TCGv_vec c1, TCGv_vec c2,
2938                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2939{
2940    TCGv_vec t = tcg_temp_new_vec(type);
2941
2942    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2943        /* Invert the sense of the compare by swapping arguments.  */
2944        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2945    } else {
2946        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2947    }
2948    tcg_temp_free_vec(t);
2949}
2950
2951static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2952                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2953{
2954    TCGv_vec h1 = tcg_temp_new_vec(type);
2955    TCGv_vec h2 = tcg_temp_new_vec(type);
2956    TCGv_vec l1 = tcg_temp_new_vec(type);
2957    TCGv_vec l2 = tcg_temp_new_vec(type);
2958
2959    tcg_debug_assert (vece < MO_64);
2960
2961    /* Unpack with sign-extension. */
2962    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2963              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
2964    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2965              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
2966
2967    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2968              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
2969    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2970              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
2971
2972    /* Arithmetic on a wider element size. */
2973    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
2974              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
2975    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
2976              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
2977
2978    /* Pack with saturation. */
2979    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
2980              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
2981
2982    tcg_temp_free_vec(h1);
2983    tcg_temp_free_vec(h2);
2984    tcg_temp_free_vec(l1);
2985    tcg_temp_free_vec(l2);
2986}
2987
2988void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2989                       TCGArg a0, ...)
2990{
2991    va_list va;
2992    TCGv_vec v0, v1, v2, v3, v4, t0;
2993
2994    va_start(va, a0);
2995    v0 = temp_tcgv_vec(arg_temp(a0));
2996    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2997    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2998
2999    switch (opc) {
3000    case INDEX_op_cmp_vec:
3001        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3002        break;
3003
3004    case INDEX_op_cmpsel_vec:
3005        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3006        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3007        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3008        break;
3009
3010    case INDEX_op_rotrv_vec:
3011        t0 = tcg_temp_new_vec(type);
3012        tcg_gen_neg_vec(vece, t0, v2);
3013        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3014        tcg_temp_free_vec(t0);
3015        break;
3016
3017    case INDEX_op_ssadd_vec:
3018        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3019        break;
3020    case INDEX_op_sssub_vec:
3021        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3022        break;
3023
3024    default:
3025        g_assert_not_reached();
3026    }
3027    va_end(va);
3028}
3029
3030static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3031{
3032    switch (op) {
3033    case INDEX_op_goto_ptr:
3034        return C_O0_I1(r);
3035
3036    case INDEX_op_ld8u_i32:
3037    case INDEX_op_ld8u_i64:
3038    case INDEX_op_ld8s_i32:
3039    case INDEX_op_ld8s_i64:
3040    case INDEX_op_ld16u_i32:
3041    case INDEX_op_ld16u_i64:
3042    case INDEX_op_ld16s_i32:
3043    case INDEX_op_ld16s_i64:
3044    case INDEX_op_ld_i32:
3045    case INDEX_op_ld32u_i64:
3046    case INDEX_op_ld32s_i64:
3047    case INDEX_op_ld_i64:
3048        return C_O1_I1(r, r);
3049
3050    case INDEX_op_st8_i32:
3051    case INDEX_op_st8_i64:
3052    case INDEX_op_st16_i32:
3053    case INDEX_op_st16_i64:
3054    case INDEX_op_st_i32:
3055    case INDEX_op_st32_i64:
3056    case INDEX_op_st_i64:
3057        return C_O0_I2(r, r);
3058
3059    case INDEX_op_add_i32:
3060    case INDEX_op_add_i64:
3061    case INDEX_op_shl_i64:
3062    case INDEX_op_shr_i64:
3063    case INDEX_op_sar_i64:
3064    case INDEX_op_rotl_i32:
3065    case INDEX_op_rotl_i64:
3066    case INDEX_op_rotr_i32:
3067    case INDEX_op_rotr_i64:
3068    case INDEX_op_clz_i64:
3069    case INDEX_op_setcond_i32:
3070    case INDEX_op_setcond_i64:
3071        return C_O1_I2(r, r, ri);
3072
3073    case INDEX_op_sub_i32:
3074    case INDEX_op_sub_i64:
3075    case INDEX_op_and_i32:
3076    case INDEX_op_and_i64:
3077    case INDEX_op_or_i32:
3078    case INDEX_op_or_i64:
3079    case INDEX_op_xor_i32:
3080    case INDEX_op_xor_i64:
3081        return (HAVE_FACILITY(DISTINCT_OPS)
3082                ? C_O1_I2(r, r, ri)
3083                : C_O1_I2(r, 0, ri));
3084
3085    case INDEX_op_mul_i32:
3086        /* If we have the general-instruction-extensions, then we have
3087           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
3088           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
3089        return (HAVE_FACILITY(GEN_INST_EXT)
3090                ? C_O1_I2(r, 0, ri)
3091                : C_O1_I2(r, 0, rI));
3092
3093    case INDEX_op_mul_i64:
3094        return (HAVE_FACILITY(GEN_INST_EXT)
3095                ? C_O1_I2(r, 0, rJ)
3096                : C_O1_I2(r, 0, rI));
3097
3098    case INDEX_op_shl_i32:
3099    case INDEX_op_shr_i32:
3100    case INDEX_op_sar_i32:
3101        return (HAVE_FACILITY(DISTINCT_OPS)
3102                ? C_O1_I2(r, r, ri)
3103                : C_O1_I2(r, 0, ri));
3104
3105    case INDEX_op_brcond_i32:
3106    case INDEX_op_brcond_i64:
3107        return C_O0_I2(r, ri);
3108
3109    case INDEX_op_bswap16_i32:
3110    case INDEX_op_bswap16_i64:
3111    case INDEX_op_bswap32_i32:
3112    case INDEX_op_bswap32_i64:
3113    case INDEX_op_bswap64_i64:
3114    case INDEX_op_neg_i32:
3115    case INDEX_op_neg_i64:
3116    case INDEX_op_ext8s_i32:
3117    case INDEX_op_ext8s_i64:
3118    case INDEX_op_ext8u_i32:
3119    case INDEX_op_ext8u_i64:
3120    case INDEX_op_ext16s_i32:
3121    case INDEX_op_ext16s_i64:
3122    case INDEX_op_ext16u_i32:
3123    case INDEX_op_ext16u_i64:
3124    case INDEX_op_ext32s_i64:
3125    case INDEX_op_ext32u_i64:
3126    case INDEX_op_ext_i32_i64:
3127    case INDEX_op_extu_i32_i64:
3128    case INDEX_op_extract_i32:
3129    case INDEX_op_extract_i64:
3130        return C_O1_I1(r, r);
3131
3132    case INDEX_op_qemu_ld_i32:
3133    case INDEX_op_qemu_ld_i64:
3134        return C_O1_I1(r, L);
3135    case INDEX_op_qemu_st_i64:
3136    case INDEX_op_qemu_st_i32:
3137        return C_O0_I2(L, L);
3138
3139    case INDEX_op_deposit_i32:
3140    case INDEX_op_deposit_i64:
3141        return C_O1_I2(r, rZ, r);
3142
3143    case INDEX_op_movcond_i32:
3144    case INDEX_op_movcond_i64:
3145        return (HAVE_FACILITY(LOAD_ON_COND2)
3146                ? C_O1_I4(r, r, ri, rI, 0)
3147                : C_O1_I4(r, r, ri, r, 0));
3148
3149    case INDEX_op_div2_i32:
3150    case INDEX_op_div2_i64:
3151    case INDEX_op_divu2_i32:
3152    case INDEX_op_divu2_i64:
3153        return C_O2_I3(b, a, 0, 1, r);
3154
3155    case INDEX_op_mulu2_i64:
3156        return C_O2_I2(b, a, 0, r);
3157
3158    case INDEX_op_add2_i32:
3159    case INDEX_op_sub2_i32:
3160        return (HAVE_FACILITY(EXT_IMM)
3161                ? C_O2_I4(r, r, 0, 1, ri, r)
3162                : C_O2_I4(r, r, 0, 1, r, r));
3163
3164    case INDEX_op_add2_i64:
3165    case INDEX_op_sub2_i64:
3166        return (HAVE_FACILITY(EXT_IMM)
3167                ? C_O2_I4(r, r, 0, 1, rA, r)
3168                : C_O2_I4(r, r, 0, 1, r, r));
3169
3170    case INDEX_op_st_vec:
3171        return C_O0_I2(v, r);
3172    case INDEX_op_ld_vec:
3173    case INDEX_op_dupm_vec:
3174        return C_O1_I1(v, r);
3175    case INDEX_op_dup_vec:
3176        return C_O1_I1(v, vr);
3177    case INDEX_op_abs_vec:
3178    case INDEX_op_neg_vec:
3179    case INDEX_op_not_vec:
3180    case INDEX_op_rotli_vec:
3181    case INDEX_op_sari_vec:
3182    case INDEX_op_shli_vec:
3183    case INDEX_op_shri_vec:
3184    case INDEX_op_s390_vuph_vec:
3185    case INDEX_op_s390_vupl_vec:
3186        return C_O1_I1(v, v);
3187    case INDEX_op_add_vec:
3188    case INDEX_op_sub_vec:
3189    case INDEX_op_and_vec:
3190    case INDEX_op_andc_vec:
3191    case INDEX_op_or_vec:
3192    case INDEX_op_orc_vec:
3193    case INDEX_op_xor_vec:
3194    case INDEX_op_cmp_vec:
3195    case INDEX_op_mul_vec:
3196    case INDEX_op_rotlv_vec:
3197    case INDEX_op_rotrv_vec:
3198    case INDEX_op_shlv_vec:
3199    case INDEX_op_shrv_vec:
3200    case INDEX_op_sarv_vec:
3201    case INDEX_op_smax_vec:
3202    case INDEX_op_smin_vec:
3203    case INDEX_op_umax_vec:
3204    case INDEX_op_umin_vec:
3205    case INDEX_op_s390_vpks_vec:
3206        return C_O1_I2(v, v, v);
3207    case INDEX_op_rotls_vec:
3208    case INDEX_op_shls_vec:
3209    case INDEX_op_shrs_vec:
3210    case INDEX_op_sars_vec:
3211        return C_O1_I2(v, v, r);
3212    case INDEX_op_bitsel_vec:
3213        return C_O1_I3(v, v, v, v);
3214
3215    default:
3216        g_assert_not_reached();
3217    }
3218}
3219
3220/*
3221 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3222 * Some distros have fixed this up locally, others have not.
3223 */
3224#ifndef HWCAP_S390_VXRS
3225#define HWCAP_S390_VXRS 2048
3226#endif
3227
3228static void query_s390_facilities(void)
3229{
3230    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3231
3232    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3233       is present on all 64-bit systems, but let's check for it anyway.  */
3234    if (hwcap & HWCAP_S390_STFLE) {
3235        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3236        register void *r1 __asm__("1") = s390_facilities;
3237
3238        /* stfle 0(%r1) */
3239        asm volatile(".word 0xb2b0,0x1000"
3240                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3241    }
3242
3243    /*
3244     * Use of vector registers requires os support beyond the facility bit.
3245     * If the kernel does not advertise support, disable the facility bits.
3246     * There is nothing else we currently care about in the 3rd word, so
3247     * disable VECTOR with one store.
3248     */
3249    if (!(hwcap & HWCAP_S390_VXRS)) {
3250        s390_facilities[2] = 0;
3251    }
3252}
3253
3254static void tcg_target_init(TCGContext *s)
3255{
3256    query_s390_facilities();
3257
3258    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3259    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3260    if (HAVE_FACILITY(VECTOR)) {
3261        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3262        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3263    }
3264
3265    tcg_target_call_clobber_regs = 0;
3266    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3267    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3268    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3269    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3270    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3271    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3272    /* The r6 register is technically call-saved, but it's also a parameter
3273       register, so it can get killed by setup for the qemu_st helper.  */
3274    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3275    /* The return register can be considered call-clobbered.  */
3276    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3277
3278    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3279    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3280    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3281    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3282    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3283    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3284    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3285    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3286    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3287    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3288    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3289    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3290    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3291    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3292    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3293    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3294    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3295    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3296    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3297    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3298    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3299    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3300    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3301    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3302
3303    s->reserved_regs = 0;
3304    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3305    /* XXX many insns can't be used with R0, so we better avoid it for now */
3306    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3307    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3308    if (USE_REG_TB) {
3309        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3310    }
3311}
3312
3313#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3314                           + TCG_STATIC_CALL_ARGS_SIZE           \
3315                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3316
3317static void tcg_target_qemu_prologue(TCGContext *s)
3318{
3319    /* stmg %r6,%r15,48(%r15) (save registers) */
3320    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3321
3322    /* aghi %r15,-frame_size */
3323    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3324
3325    tcg_set_frame(s, TCG_REG_CALL_STACK,
3326                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3327                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3328
3329#ifndef CONFIG_SOFTMMU
3330    if (guest_base >= 0x80000) {
3331        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3332        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3333    }
3334#endif
3335
3336    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3337    if (USE_REG_TB) {
3338        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3339                    tcg_target_call_iarg_regs[1]);
3340    }
3341
3342    /* br %r3 (go to TB) */
3343    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3344
3345    /*
3346     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3347     * and fall through to the rest of the epilogue.
3348     */
3349    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3350    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3351
3352    /* TB epilogue */
3353    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3354
3355    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3356    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3357                 FRAME_SIZE + 48);
3358
3359    /* br %r14 (return) */
3360    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3361}
3362
3363static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3364{
3365    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3366}
3367
3368typedef struct {
3369    DebugFrameHeader h;
3370    uint8_t fde_def_cfa[4];
3371    uint8_t fde_reg_ofs[18];
3372} DebugFrame;
3373
3374/* We're expecting a 2 byte uleb128 encoded value.  */
3375QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3376
3377#define ELF_HOST_MACHINE  EM_S390
3378
3379static const DebugFrame debug_frame = {
3380    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3381    .h.cie.id = -1,
3382    .h.cie.version = 1,
3383    .h.cie.code_align = 1,
3384    .h.cie.data_align = 8,                /* sleb128 8 */
3385    .h.cie.return_column = TCG_REG_R14,
3386
3387    /* Total FDE size does not include the "len" member.  */
3388    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3389
3390    .fde_def_cfa = {
3391        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3392        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3393        (FRAME_SIZE >> 7)
3394    },
3395    .fde_reg_ofs = {
3396        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3397        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3398        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3399        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3400        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3401        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3402        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3403        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3404        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3405    }
3406};
3407
3408void tcg_register_jit(const void *buf, size_t buf_size)
3409{
3410    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3411}
3412