xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision 6c3a9247)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36/* ??? The translation blocks produced by TCG are generally small enough to
37   be entirely reachable with a 16-bit displacement.  Leaving the option for
38   a 32-bit displacement here Just In Case.  */
39#define USE_LONG_BRANCHES 0
40
41#define TCG_CT_CONST_S16   0x100
42#define TCG_CT_CONST_S32   0x200
43#define TCG_CT_CONST_S33   0x400
44#define TCG_CT_CONST_ZERO  0x800
45
46#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
47#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
48
49/*
50 * For softmmu, we need to avoid conflicts with the first 3
51 * argument registers to perform the tlb lookup, and to call
52 * the helper function.
53 */
54#ifdef CONFIG_SOFTMMU
55#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
56#else
57#define SOFTMMU_RESERVE_REGS 0
58#endif
59
60
61/* Several places within the instruction set 0 means "no register"
62   rather than TCG_REG_R0.  */
63#define TCG_REG_NONE    0
64
65/* A scratch register that may be be used throughout the backend.  */
66#define TCG_TMP0        TCG_REG_R1
67
68/* A scratch register that holds a pointer to the beginning of the TB.
69   We don't need this when we have pc-relative loads with the general
70   instructions extension facility.  */
71#define TCG_REG_TB      TCG_REG_R12
72#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
73
74#ifndef CONFIG_SOFTMMU
75#define TCG_GUEST_BASE_REG TCG_REG_R13
76#endif
77
78/* All of the following instructions are prefixed with their instruction
79   format, and are defined as 8- or 16-bit quantities, even when the two
80   halves of the 16-bit quantity may appear 32 bits apart in the insn.
81   This makes it easy to copy the values from the tables in Appendix B.  */
82typedef enum S390Opcode {
83    RIL_AFI     = 0xc209,
84    RIL_AGFI    = 0xc208,
85    RIL_ALFI    = 0xc20b,
86    RIL_ALGFI   = 0xc20a,
87    RIL_BRASL   = 0xc005,
88    RIL_BRCL    = 0xc004,
89    RIL_CFI     = 0xc20d,
90    RIL_CGFI    = 0xc20c,
91    RIL_CLFI    = 0xc20f,
92    RIL_CLGFI   = 0xc20e,
93    RIL_CLRL    = 0xc60f,
94    RIL_CLGRL   = 0xc60a,
95    RIL_CRL     = 0xc60d,
96    RIL_CGRL    = 0xc608,
97    RIL_IIHF    = 0xc008,
98    RIL_IILF    = 0xc009,
99    RIL_LARL    = 0xc000,
100    RIL_LGFI    = 0xc001,
101    RIL_LGRL    = 0xc408,
102    RIL_LLIHF   = 0xc00e,
103    RIL_LLILF   = 0xc00f,
104    RIL_LRL     = 0xc40d,
105    RIL_MSFI    = 0xc201,
106    RIL_MSGFI   = 0xc200,
107    RIL_NIHF    = 0xc00a,
108    RIL_NILF    = 0xc00b,
109    RIL_OIHF    = 0xc00c,
110    RIL_OILF    = 0xc00d,
111    RIL_SLFI    = 0xc205,
112    RIL_SLGFI   = 0xc204,
113    RIL_XIHF    = 0xc006,
114    RIL_XILF    = 0xc007,
115
116    RI_AGHI     = 0xa70b,
117    RI_AHI      = 0xa70a,
118    RI_BRC      = 0xa704,
119    RI_CHI      = 0xa70e,
120    RI_CGHI     = 0xa70f,
121    RI_IIHH     = 0xa500,
122    RI_IIHL     = 0xa501,
123    RI_IILH     = 0xa502,
124    RI_IILL     = 0xa503,
125    RI_LGHI     = 0xa709,
126    RI_LLIHH    = 0xa50c,
127    RI_LLIHL    = 0xa50d,
128    RI_LLILH    = 0xa50e,
129    RI_LLILL    = 0xa50f,
130    RI_MGHI     = 0xa70d,
131    RI_MHI      = 0xa70c,
132    RI_NIHH     = 0xa504,
133    RI_NIHL     = 0xa505,
134    RI_NILH     = 0xa506,
135    RI_NILL     = 0xa507,
136    RI_OIHH     = 0xa508,
137    RI_OIHL     = 0xa509,
138    RI_OILH     = 0xa50a,
139    RI_OILL     = 0xa50b,
140    RI_TMLL     = 0xa701,
141
142    RIE_CGIJ    = 0xec7c,
143    RIE_CGRJ    = 0xec64,
144    RIE_CIJ     = 0xec7e,
145    RIE_CLGRJ   = 0xec65,
146    RIE_CLIJ    = 0xec7f,
147    RIE_CLGIJ   = 0xec7d,
148    RIE_CLRJ    = 0xec77,
149    RIE_CRJ     = 0xec76,
150    RIE_LOCGHI  = 0xec46,
151    RIE_RISBG   = 0xec55,
152
153    RRE_AGR     = 0xb908,
154    RRE_ALGR    = 0xb90a,
155    RRE_ALCR    = 0xb998,
156    RRE_ALCGR   = 0xb988,
157    RRE_CGR     = 0xb920,
158    RRE_CLGR    = 0xb921,
159    RRE_DLGR    = 0xb987,
160    RRE_DLR     = 0xb997,
161    RRE_DSGFR   = 0xb91d,
162    RRE_DSGR    = 0xb90d,
163    RRE_FLOGR   = 0xb983,
164    RRE_LGBR    = 0xb906,
165    RRE_LCGR    = 0xb903,
166    RRE_LGFR    = 0xb914,
167    RRE_LGHR    = 0xb907,
168    RRE_LGR     = 0xb904,
169    RRE_LLGCR   = 0xb984,
170    RRE_LLGFR   = 0xb916,
171    RRE_LLGHR   = 0xb985,
172    RRE_LRVR    = 0xb91f,
173    RRE_LRVGR   = 0xb90f,
174    RRE_LTGR    = 0xb902,
175    RRE_MLGR    = 0xb986,
176    RRE_MSGR    = 0xb90c,
177    RRE_MSR     = 0xb252,
178    RRE_NGR     = 0xb980,
179    RRE_OGR     = 0xb981,
180    RRE_SGR     = 0xb909,
181    RRE_SLGR    = 0xb90b,
182    RRE_SLBR    = 0xb999,
183    RRE_SLBGR   = 0xb989,
184    RRE_XGR     = 0xb982,
185
186    RRF_LOCR    = 0xb9f2,
187    RRF_LOCGR   = 0xb9e2,
188    RRF_NRK     = 0xb9f4,
189    RRF_NGRK    = 0xb9e4,
190    RRF_ORK     = 0xb9f6,
191    RRF_OGRK    = 0xb9e6,
192    RRF_SRK     = 0xb9f9,
193    RRF_SGRK    = 0xb9e9,
194    RRF_SLRK    = 0xb9fb,
195    RRF_SLGRK   = 0xb9eb,
196    RRF_XRK     = 0xb9f7,
197    RRF_XGRK    = 0xb9e7,
198
199    RR_AR       = 0x1a,
200    RR_ALR      = 0x1e,
201    RR_BASR     = 0x0d,
202    RR_BCR      = 0x07,
203    RR_CLR      = 0x15,
204    RR_CR       = 0x19,
205    RR_DR       = 0x1d,
206    RR_LCR      = 0x13,
207    RR_LR       = 0x18,
208    RR_LTR      = 0x12,
209    RR_NR       = 0x14,
210    RR_OR       = 0x16,
211    RR_SR       = 0x1b,
212    RR_SLR      = 0x1f,
213    RR_XR       = 0x17,
214
215    RSY_RLL     = 0xeb1d,
216    RSY_RLLG    = 0xeb1c,
217    RSY_SLLG    = 0xeb0d,
218    RSY_SLLK    = 0xebdf,
219    RSY_SRAG    = 0xeb0a,
220    RSY_SRAK    = 0xebdc,
221    RSY_SRLG    = 0xeb0c,
222    RSY_SRLK    = 0xebde,
223
224    RS_SLL      = 0x89,
225    RS_SRA      = 0x8a,
226    RS_SRL      = 0x88,
227
228    RXY_AG      = 0xe308,
229    RXY_AY      = 0xe35a,
230    RXY_CG      = 0xe320,
231    RXY_CLG     = 0xe321,
232    RXY_CLY     = 0xe355,
233    RXY_CY      = 0xe359,
234    RXY_LAY     = 0xe371,
235    RXY_LB      = 0xe376,
236    RXY_LG      = 0xe304,
237    RXY_LGB     = 0xe377,
238    RXY_LGF     = 0xe314,
239    RXY_LGH     = 0xe315,
240    RXY_LHY     = 0xe378,
241    RXY_LLGC    = 0xe390,
242    RXY_LLGF    = 0xe316,
243    RXY_LLGH    = 0xe391,
244    RXY_LMG     = 0xeb04,
245    RXY_LRV     = 0xe31e,
246    RXY_LRVG    = 0xe30f,
247    RXY_LRVH    = 0xe31f,
248    RXY_LY      = 0xe358,
249    RXY_NG      = 0xe380,
250    RXY_OG      = 0xe381,
251    RXY_STCY    = 0xe372,
252    RXY_STG     = 0xe324,
253    RXY_STHY    = 0xe370,
254    RXY_STMG    = 0xeb24,
255    RXY_STRV    = 0xe33e,
256    RXY_STRVG   = 0xe32f,
257    RXY_STRVH   = 0xe33f,
258    RXY_STY     = 0xe350,
259    RXY_XG      = 0xe382,
260
261    RX_A        = 0x5a,
262    RX_C        = 0x59,
263    RX_L        = 0x58,
264    RX_LA       = 0x41,
265    RX_LH       = 0x48,
266    RX_ST       = 0x50,
267    RX_STC      = 0x42,
268    RX_STH      = 0x40,
269
270    VRIa_VGBM   = 0xe744,
271    VRIa_VREPI  = 0xe745,
272    VRIb_VGM    = 0xe746,
273    VRIc_VREP   = 0xe74d,
274
275    VRRa_VLC    = 0xe7de,
276    VRRa_VLP    = 0xe7df,
277    VRRa_VLR    = 0xe756,
278    VRRc_VA     = 0xe7f3,
279    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
280    VRRc_VCH    = 0xe7fb,   /* " */
281    VRRc_VCHL   = 0xe7f9,   /* " */
282    VRRc_VERLLV = 0xe773,
283    VRRc_VESLV  = 0xe770,
284    VRRc_VESRAV = 0xe77a,
285    VRRc_VESRLV = 0xe778,
286    VRRc_VML    = 0xe7a2,
287    VRRc_VMN    = 0xe7fe,
288    VRRc_VMNL   = 0xe7fc,
289    VRRc_VMX    = 0xe7ff,
290    VRRc_VMXL   = 0xe7fd,
291    VRRc_VN     = 0xe768,
292    VRRc_VNC    = 0xe769,
293    VRRc_VNO    = 0xe76b,
294    VRRc_VO     = 0xe76a,
295    VRRc_VOC    = 0xe76f,
296    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
297    VRRc_VS     = 0xe7f7,
298    VRRa_VUPH   = 0xe7d7,
299    VRRa_VUPL   = 0xe7d6,
300    VRRc_VX     = 0xe76d,
301    VRRe_VSEL   = 0xe78d,
302    VRRf_VLVGP  = 0xe762,
303
304    VRSa_VERLL  = 0xe733,
305    VRSa_VESL   = 0xe730,
306    VRSa_VESRA  = 0xe73a,
307    VRSa_VESRL  = 0xe738,
308    VRSb_VLVG   = 0xe722,
309    VRSc_VLGV   = 0xe721,
310
311    VRX_VL      = 0xe706,
312    VRX_VLLEZ   = 0xe704,
313    VRX_VLREP   = 0xe705,
314    VRX_VST     = 0xe70e,
315    VRX_VSTEF   = 0xe70b,
316    VRX_VSTEG   = 0xe70a,
317
318    NOP         = 0x0707,
319} S390Opcode;
320
321#ifdef CONFIG_DEBUG_TCG
322static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
323    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
324    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
325    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
326    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
327    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
328    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
329    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
330};
331#endif
332
333/* Since R6 is a potential argument register, choose it last of the
334   call-saved registers.  Likewise prefer the call-clobbered registers
335   in reverse order to maximize the chance of avoiding the arguments.  */
336static const int tcg_target_reg_alloc_order[] = {
337    /* Call saved registers.  */
338    TCG_REG_R13,
339    TCG_REG_R12,
340    TCG_REG_R11,
341    TCG_REG_R10,
342    TCG_REG_R9,
343    TCG_REG_R8,
344    TCG_REG_R7,
345    TCG_REG_R6,
346    /* Call clobbered registers.  */
347    TCG_REG_R14,
348    TCG_REG_R0,
349    TCG_REG_R1,
350    /* Argument registers, in reverse order of allocation.  */
351    TCG_REG_R5,
352    TCG_REG_R4,
353    TCG_REG_R3,
354    TCG_REG_R2,
355
356    /* V8-V15 are call saved, and omitted. */
357    TCG_REG_V0,
358    TCG_REG_V1,
359    TCG_REG_V2,
360    TCG_REG_V3,
361    TCG_REG_V4,
362    TCG_REG_V5,
363    TCG_REG_V6,
364    TCG_REG_V7,
365    TCG_REG_V16,
366    TCG_REG_V17,
367    TCG_REG_V18,
368    TCG_REG_V19,
369    TCG_REG_V20,
370    TCG_REG_V21,
371    TCG_REG_V22,
372    TCG_REG_V23,
373    TCG_REG_V24,
374    TCG_REG_V25,
375    TCG_REG_V26,
376    TCG_REG_V27,
377    TCG_REG_V28,
378    TCG_REG_V29,
379    TCG_REG_V30,
380    TCG_REG_V31,
381};
382
383static const int tcg_target_call_iarg_regs[] = {
384    TCG_REG_R2,
385    TCG_REG_R3,
386    TCG_REG_R4,
387    TCG_REG_R5,
388    TCG_REG_R6,
389};
390
391static const int tcg_target_call_oarg_regs[] = {
392    TCG_REG_R2,
393};
394
395#define S390_CC_EQ      8
396#define S390_CC_LT      4
397#define S390_CC_GT      2
398#define S390_CC_OV      1
399#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
400#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
401#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
402#define S390_CC_NEVER   0
403#define S390_CC_ALWAYS  15
404
405/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
406static const uint8_t tcg_cond_to_s390_cond[] = {
407    [TCG_COND_EQ]  = S390_CC_EQ,
408    [TCG_COND_NE]  = S390_CC_NE,
409    [TCG_COND_LT]  = S390_CC_LT,
410    [TCG_COND_LE]  = S390_CC_LE,
411    [TCG_COND_GT]  = S390_CC_GT,
412    [TCG_COND_GE]  = S390_CC_GE,
413    [TCG_COND_LTU] = S390_CC_LT,
414    [TCG_COND_LEU] = S390_CC_LE,
415    [TCG_COND_GTU] = S390_CC_GT,
416    [TCG_COND_GEU] = S390_CC_GE,
417};
418
419/* Condition codes that result from a LOAD AND TEST.  Here, we have no
420   unsigned instruction variation, however since the test is vs zero we
421   can re-map the outcomes appropriately.  */
422static const uint8_t tcg_cond_to_ltr_cond[] = {
423    [TCG_COND_EQ]  = S390_CC_EQ,
424    [TCG_COND_NE]  = S390_CC_NE,
425    [TCG_COND_LT]  = S390_CC_LT,
426    [TCG_COND_LE]  = S390_CC_LE,
427    [TCG_COND_GT]  = S390_CC_GT,
428    [TCG_COND_GE]  = S390_CC_GE,
429    [TCG_COND_LTU] = S390_CC_NEVER,
430    [TCG_COND_LEU] = S390_CC_EQ,
431    [TCG_COND_GTU] = S390_CC_NE,
432    [TCG_COND_GEU] = S390_CC_ALWAYS,
433};
434
435#ifdef CONFIG_SOFTMMU
436static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
437    [MO_UB]   = helper_ret_ldub_mmu,
438    [MO_SB]   = helper_ret_ldsb_mmu,
439    [MO_LEUW] = helper_le_lduw_mmu,
440    [MO_LESW] = helper_le_ldsw_mmu,
441    [MO_LEUL] = helper_le_ldul_mmu,
442    [MO_LESL] = helper_le_ldsl_mmu,
443    [MO_LEUQ] = helper_le_ldq_mmu,
444    [MO_BEUW] = helper_be_lduw_mmu,
445    [MO_BESW] = helper_be_ldsw_mmu,
446    [MO_BEUL] = helper_be_ldul_mmu,
447    [MO_BESL] = helper_be_ldsl_mmu,
448    [MO_BEUQ] = helper_be_ldq_mmu,
449};
450
451static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
452    [MO_UB]   = helper_ret_stb_mmu,
453    [MO_LEUW] = helper_le_stw_mmu,
454    [MO_LEUL] = helper_le_stl_mmu,
455    [MO_LEUQ] = helper_le_stq_mmu,
456    [MO_BEUW] = helper_be_stw_mmu,
457    [MO_BEUL] = helper_be_stl_mmu,
458    [MO_BEUQ] = helper_be_stq_mmu,
459};
460#endif
461
462static const tcg_insn_unit *tb_ret_addr;
463uint64_t s390_facilities[3];
464
465static inline bool is_general_reg(TCGReg r)
466{
467    return r <= TCG_REG_R15;
468}
469
470static inline bool is_vector_reg(TCGReg r)
471{
472    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
473}
474
475static bool patch_reloc(tcg_insn_unit *src_rw, int type,
476                        intptr_t value, intptr_t addend)
477{
478    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
479    intptr_t pcrel2;
480    uint32_t old;
481
482    value += addend;
483    pcrel2 = (tcg_insn_unit *)value - src_rx;
484
485    switch (type) {
486    case R_390_PC16DBL:
487        if (pcrel2 == (int16_t)pcrel2) {
488            tcg_patch16(src_rw, pcrel2);
489            return true;
490        }
491        break;
492    case R_390_PC32DBL:
493        if (pcrel2 == (int32_t)pcrel2) {
494            tcg_patch32(src_rw, pcrel2);
495            return true;
496        }
497        break;
498    case R_390_20:
499        if (value == sextract64(value, 0, 20)) {
500            old = *(uint32_t *)src_rw & 0xf00000ff;
501            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
502            tcg_patch32(src_rw, old);
503            return true;
504        }
505        break;
506    default:
507        g_assert_not_reached();
508    }
509    return false;
510}
511
512/* Test if a constant matches the constraint. */
513static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
514{
515    if (ct & TCG_CT_CONST) {
516        return 1;
517    }
518
519    if (type == TCG_TYPE_I32) {
520        val = (int32_t)val;
521    }
522
523    /* The following are mutually exclusive.  */
524    if (ct & TCG_CT_CONST_S16) {
525        return val == (int16_t)val;
526    } else if (ct & TCG_CT_CONST_S32) {
527        return val == (int32_t)val;
528    } else if (ct & TCG_CT_CONST_S33) {
529        return val >= -0xffffffffll && val <= 0xffffffffll;
530    } else if (ct & TCG_CT_CONST_ZERO) {
531        return val == 0;
532    }
533
534    return 0;
535}
536
537/* Emit instructions according to the given instruction format.  */
538
539static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
540{
541    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
542}
543
544static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
545                             TCGReg r1, TCGReg r2)
546{
547    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
548}
549
550static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
551                             TCGReg r1, TCGReg r2, int m3)
552{
553    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
554}
555
556static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
557{
558    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
559}
560
561static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
562                             int i2, int m3)
563{
564    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
565    tcg_out32(s, (i2 << 16) | (op & 0xff));
566}
567
568static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
569{
570    tcg_out16(s, op | (r1 << 4));
571    tcg_out32(s, i2);
572}
573
574static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
575                            TCGReg b2, TCGReg r3, int disp)
576{
577    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
578              | (disp & 0xfff));
579}
580
581static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
582                             TCGReg b2, TCGReg r3, int disp)
583{
584    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
585    tcg_out32(s, (op & 0xff) | (b2 << 28)
586              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
587}
588
589#define tcg_out_insn_RX   tcg_out_insn_RS
590#define tcg_out_insn_RXY  tcg_out_insn_RSY
591
592static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
593{
594    /*
595     * Shift bit 4 of each regno to its corresponding bit of RXB.
596     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
597     * is the left-shift of the 4th operand.
598     */
599    return ((v1 & 0x10) << (4 + 3))
600         | ((v2 & 0x10) << (4 + 2))
601         | ((v3 & 0x10) << (4 + 1))
602         | ((v4 & 0x10) << (4 + 0));
603}
604
605static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
606                              TCGReg v1, uint16_t i2, int m3)
607{
608    tcg_debug_assert(is_vector_reg(v1));
609    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
610    tcg_out16(s, i2);
611    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
612}
613
614static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
615                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
616{
617    tcg_debug_assert(is_vector_reg(v1));
618    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
619    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
620    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
621}
622
623static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
624                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
625{
626    tcg_debug_assert(is_vector_reg(v1));
627    tcg_debug_assert(is_vector_reg(v3));
628    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
629    tcg_out16(s, i2);
630    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
631}
632
633static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
634                              TCGReg v1, TCGReg v2, int m3)
635{
636    tcg_debug_assert(is_vector_reg(v1));
637    tcg_debug_assert(is_vector_reg(v2));
638    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
639    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
640}
641
642static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
643                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
644{
645    tcg_debug_assert(is_vector_reg(v1));
646    tcg_debug_assert(is_vector_reg(v2));
647    tcg_debug_assert(is_vector_reg(v3));
648    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
649    tcg_out16(s, v3 << 12);
650    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
651}
652
653static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
654                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
655{
656    tcg_debug_assert(is_vector_reg(v1));
657    tcg_debug_assert(is_vector_reg(v2));
658    tcg_debug_assert(is_vector_reg(v3));
659    tcg_debug_assert(is_vector_reg(v4));
660    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
661    tcg_out16(s, v3 << 12);
662    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
663}
664
665static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
666                              TCGReg v1, TCGReg r2, TCGReg r3)
667{
668    tcg_debug_assert(is_vector_reg(v1));
669    tcg_debug_assert(is_general_reg(r2));
670    tcg_debug_assert(is_general_reg(r3));
671    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
672    tcg_out16(s, r3 << 12);
673    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
674}
675
676static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
677                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
678{
679    tcg_debug_assert(is_vector_reg(v1));
680    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
681    tcg_debug_assert(is_general_reg(b2));
682    tcg_debug_assert(is_vector_reg(v3));
683    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
684    tcg_out16(s, b2 << 12 | d2);
685    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
686}
687
688static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
689                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
690{
691    tcg_debug_assert(is_vector_reg(v1));
692    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
693    tcg_debug_assert(is_general_reg(b2));
694    tcg_debug_assert(is_general_reg(r3));
695    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
696    tcg_out16(s, b2 << 12 | d2);
697    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
698}
699
700static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
701                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
702{
703    tcg_debug_assert(is_general_reg(r1));
704    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
705    tcg_debug_assert(is_general_reg(b2));
706    tcg_debug_assert(is_vector_reg(v3));
707    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
708    tcg_out16(s, b2 << 12 | d2);
709    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
710}
711
712static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
713                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
714{
715    tcg_debug_assert(is_vector_reg(v1));
716    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
717    tcg_debug_assert(is_general_reg(x2));
718    tcg_debug_assert(is_general_reg(b2));
719    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
720    tcg_out16(s, (b2 << 12) | d2);
721    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
722}
723
724/* Emit an opcode with "type-checking" of the format.  */
725#define tcg_out_insn(S, FMT, OP, ...) \
726    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
727
728
729/* emit 64-bit shifts */
730static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
731                         TCGReg src, TCGReg sh_reg, int sh_imm)
732{
733    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
734}
735
736/* emit 32-bit shifts */
737static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
738                         TCGReg sh_reg, int sh_imm)
739{
740    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
741}
742
743static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
744{
745    if (src == dst) {
746        return true;
747    }
748    switch (type) {
749    case TCG_TYPE_I32:
750        if (likely(is_general_reg(dst) && is_general_reg(src))) {
751            tcg_out_insn(s, RR, LR, dst, src);
752            break;
753        }
754        /* fallthru */
755
756    case TCG_TYPE_I64:
757        if (likely(is_general_reg(dst))) {
758            if (likely(is_general_reg(src))) {
759                tcg_out_insn(s, RRE, LGR, dst, src);
760            } else {
761                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
762            }
763            break;
764        } else if (is_general_reg(src)) {
765            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
766            break;
767        }
768        /* fallthru */
769
770    case TCG_TYPE_V64:
771    case TCG_TYPE_V128:
772        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
773        break;
774
775    default:
776        g_assert_not_reached();
777    }
778    return true;
779}
780
781static const S390Opcode lli_insns[4] = {
782    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
783};
784
785static bool maybe_out_small_movi(TCGContext *s, TCGType type,
786                                 TCGReg ret, tcg_target_long sval)
787{
788    tcg_target_ulong uval = sval;
789    int i;
790
791    if (type == TCG_TYPE_I32) {
792        uval = (uint32_t)sval;
793        sval = (int32_t)sval;
794    }
795
796    /* Try all 32-bit insns that can load it in one go.  */
797    if (sval >= -0x8000 && sval < 0x8000) {
798        tcg_out_insn(s, RI, LGHI, ret, sval);
799        return true;
800    }
801
802    for (i = 0; i < 4; i++) {
803        tcg_target_long mask = 0xffffull << i*16;
804        if ((uval & mask) == uval) {
805            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
806            return true;
807        }
808    }
809
810    return false;
811}
812
813/* load a register with an immediate value */
814static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
815                             tcg_target_long sval, bool in_prologue)
816{
817    tcg_target_ulong uval;
818
819    /* Try all 32-bit insns that can load it in one go.  */
820    if (maybe_out_small_movi(s, type, ret, sval)) {
821        return;
822    }
823
824    uval = sval;
825    if (type == TCG_TYPE_I32) {
826        uval = (uint32_t)sval;
827        sval = (int32_t)sval;
828    }
829
830    /* Try all 48-bit insns that can load it in one go.  */
831    if (HAVE_FACILITY(EXT_IMM)) {
832        if (sval == (int32_t)sval) {
833            tcg_out_insn(s, RIL, LGFI, ret, sval);
834            return;
835        }
836        if (uval <= 0xffffffff) {
837            tcg_out_insn(s, RIL, LLILF, ret, uval);
838            return;
839        }
840        if ((uval & 0xffffffff) == 0) {
841            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
842            return;
843        }
844    }
845
846    /* Try for PC-relative address load.  For odd addresses,
847       attempt to use an offset from the start of the TB.  */
848    if ((sval & 1) == 0) {
849        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
850        if (off == (int32_t)off) {
851            tcg_out_insn(s, RIL, LARL, ret, off);
852            return;
853        }
854    } else if (USE_REG_TB && !in_prologue) {
855        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
856        if (off == sextract64(off, 0, 20)) {
857            /* This is certain to be an address within TB, and therefore
858               OFF will be negative; don't try RX_LA.  */
859            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
860            return;
861        }
862    }
863
864    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
865       that LLILL, LLIHL, LLILF above did not succeed, we know that
866       both insns are required.  */
867    if (uval <= 0xffffffff) {
868        tcg_out_insn(s, RI, LLILL, ret, uval);
869        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
870        return;
871    }
872
873    /* Otherwise, stuff it in the constant pool.  */
874    if (HAVE_FACILITY(GEN_INST_EXT)) {
875        tcg_out_insn(s, RIL, LGRL, ret, 0);
876        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
877    } else if (USE_REG_TB && !in_prologue) {
878        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
879        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
880                       tcg_tbrel_diff(s, NULL));
881    } else {
882        TCGReg base = ret ? ret : TCG_TMP0;
883        tcg_out_insn(s, RIL, LARL, base, 0);
884        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
885        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
886    }
887}
888
889static void tcg_out_movi(TCGContext *s, TCGType type,
890                         TCGReg ret, tcg_target_long sval)
891{
892    tcg_out_movi_int(s, type, ret, sval, false);
893}
894
895/* Emit a load/store type instruction.  Inputs are:
896   DATA:     The register to be loaded or stored.
897   BASE+OFS: The effective address.
898   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
899   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
900
901static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
902                        TCGReg data, TCGReg base, TCGReg index,
903                        tcg_target_long ofs)
904{
905    if (ofs < -0x80000 || ofs >= 0x80000) {
906        /* Combine the low 20 bits of the offset with the actual load insn;
907           the high 44 bits must come from an immediate load.  */
908        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
909        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
910        ofs = low;
911
912        /* If we were already given an index register, add it in.  */
913        if (index != TCG_REG_NONE) {
914            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
915        }
916        index = TCG_TMP0;
917    }
918
919    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
920        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
921    } else {
922        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
923    }
924}
925
926static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
927                            TCGReg data, TCGReg base, TCGReg index,
928                            tcg_target_long ofs, int m3)
929{
930    if (ofs < 0 || ofs >= 0x1000) {
931        if (ofs >= -0x80000 && ofs < 0x80000) {
932            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
933            base = TCG_TMP0;
934            index = TCG_REG_NONE;
935            ofs = 0;
936        } else {
937            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
938            if (index != TCG_REG_NONE) {
939                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
940            }
941            index = TCG_TMP0;
942            ofs = 0;
943        }
944    }
945    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
946}
947
948/* load data without address translation or endianness conversion */
949static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
950                       TCGReg base, intptr_t ofs)
951{
952    switch (type) {
953    case TCG_TYPE_I32:
954        if (likely(is_general_reg(data))) {
955            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
956            break;
957        }
958        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
959        break;
960
961    case TCG_TYPE_I64:
962        if (likely(is_general_reg(data))) {
963            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
964            break;
965        }
966        /* fallthru */
967
968    case TCG_TYPE_V64:
969        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
970        break;
971
972    case TCG_TYPE_V128:
973        /* Hint quadword aligned.  */
974        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
975        break;
976
977    default:
978        g_assert_not_reached();
979    }
980}
981
982static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
983                       TCGReg base, intptr_t ofs)
984{
985    switch (type) {
986    case TCG_TYPE_I32:
987        if (likely(is_general_reg(data))) {
988            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
989        } else {
990            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
991        }
992        break;
993
994    case TCG_TYPE_I64:
995        if (likely(is_general_reg(data))) {
996            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
997            break;
998        }
999        /* fallthru */
1000
1001    case TCG_TYPE_V64:
1002        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1003        break;
1004
1005    case TCG_TYPE_V128:
1006        /* Hint quadword aligned.  */
1007        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1008        break;
1009
1010    default:
1011        g_assert_not_reached();
1012    }
1013}
1014
1015static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1016                               TCGReg base, intptr_t ofs)
1017{
1018    return false;
1019}
1020
1021/* load data from an absolute host address */
1022static void tcg_out_ld_abs(TCGContext *s, TCGType type,
1023                           TCGReg dest, const void *abs)
1024{
1025    intptr_t addr = (intptr_t)abs;
1026
1027    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
1028        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
1029        if (disp == (int32_t)disp) {
1030            if (type == TCG_TYPE_I32) {
1031                tcg_out_insn(s, RIL, LRL, dest, disp);
1032            } else {
1033                tcg_out_insn(s, RIL, LGRL, dest, disp);
1034            }
1035            return;
1036        }
1037    }
1038    if (USE_REG_TB) {
1039        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
1040        if (disp == sextract64(disp, 0, 20)) {
1041            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
1042            return;
1043        }
1044    }
1045
1046    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1047    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1048}
1049
1050static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1051                                 int msb, int lsb, int ofs, int z)
1052{
1053    /* Format RIE-f */
1054    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1055    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1056    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1057}
1058
1059static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1060{
1061    if (HAVE_FACILITY(EXT_IMM)) {
1062        tcg_out_insn(s, RRE, LGBR, dest, src);
1063        return;
1064    }
1065
1066    if (type == TCG_TYPE_I32) {
1067        if (dest == src) {
1068            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1069        } else {
1070            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1071        }
1072        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1073    } else {
1074        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1075        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1076    }
1077}
1078
1079static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1080{
1081    if (HAVE_FACILITY(EXT_IMM)) {
1082        tcg_out_insn(s, RRE, LLGCR, dest, src);
1083        return;
1084    }
1085
1086    if (dest == src) {
1087        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1088        src = TCG_TMP0;
1089    } else {
1090        tcg_out_movi(s, type, dest, 0xff);
1091    }
1092    if (type == TCG_TYPE_I32) {
1093        tcg_out_insn(s, RR, NR, dest, src);
1094    } else {
1095        tcg_out_insn(s, RRE, NGR, dest, src);
1096    }
1097}
1098
1099static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1100{
1101    if (HAVE_FACILITY(EXT_IMM)) {
1102        tcg_out_insn(s, RRE, LGHR, dest, src);
1103        return;
1104    }
1105
1106    if (type == TCG_TYPE_I32) {
1107        if (dest == src) {
1108            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1109        } else {
1110            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1111        }
1112        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1113    } else {
1114        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1115        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1116    }
1117}
1118
1119static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1120{
1121    if (HAVE_FACILITY(EXT_IMM)) {
1122        tcg_out_insn(s, RRE, LLGHR, dest, src);
1123        return;
1124    }
1125
1126    if (dest == src) {
1127        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1128        src = TCG_TMP0;
1129    } else {
1130        tcg_out_movi(s, type, dest, 0xffff);
1131    }
1132    if (type == TCG_TYPE_I32) {
1133        tcg_out_insn(s, RR, NR, dest, src);
1134    } else {
1135        tcg_out_insn(s, RRE, NGR, dest, src);
1136    }
1137}
1138
1139static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1140{
1141    tcg_out_insn(s, RRE, LGFR, dest, src);
1142}
1143
1144static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1145{
1146    tcg_out_insn(s, RRE, LLGFR, dest, src);
1147}
1148
1149/* Accept bit patterns like these:
1150    0....01....1
1151    1....10....0
1152    1..10..01..1
1153    0..01..10..0
1154   Copied from gcc sources.  */
1155static inline bool risbg_mask(uint64_t c)
1156{
1157    uint64_t lsb;
1158    /* We don't change the number of transitions by inverting,
1159       so make sure we start with the LSB zero.  */
1160    if (c & 1) {
1161        c = ~c;
1162    }
1163    /* Reject all zeros or all ones.  */
1164    if (c == 0) {
1165        return false;
1166    }
1167    /* Find the first transition.  */
1168    lsb = c & -c;
1169    /* Invert to look for a second transition.  */
1170    c = ~c;
1171    /* Erase the first transition.  */
1172    c &= -lsb;
1173    /* Find the second transition, if any.  */
1174    lsb = c & -c;
1175    /* Match if all the bits are 1's, or if c is zero.  */
1176    return c == -lsb;
1177}
1178
1179static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1180{
1181    int msb, lsb;
1182    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1183        /* Achieve wraparound by swapping msb and lsb.  */
1184        msb = 64 - ctz64(~val);
1185        lsb = clz64(~val) - 1;
1186    } else {
1187        msb = clz64(val);
1188        lsb = 63 - ctz64(val);
1189    }
1190    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1191}
1192
1193static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1194{
1195    static const S390Opcode ni_insns[4] = {
1196        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1197    };
1198    static const S390Opcode nif_insns[2] = {
1199        RIL_NILF, RIL_NIHF
1200    };
1201    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1202    int i;
1203
1204    /* Look for the zero-extensions.  */
1205    if ((val & valid) == 0xffffffff) {
1206        tgen_ext32u(s, dest, dest);
1207        return;
1208    }
1209    if (HAVE_FACILITY(EXT_IMM)) {
1210        if ((val & valid) == 0xff) {
1211            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1212            return;
1213        }
1214        if ((val & valid) == 0xffff) {
1215            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1216            return;
1217        }
1218    }
1219
1220    /* Try all 32-bit insns that can perform it in one go.  */
1221    for (i = 0; i < 4; i++) {
1222        tcg_target_ulong mask = ~(0xffffull << i*16);
1223        if (((val | ~valid) & mask) == mask) {
1224            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1225            return;
1226        }
1227    }
1228
1229    /* Try all 48-bit insns that can perform it in one go.  */
1230    if (HAVE_FACILITY(EXT_IMM)) {
1231        for (i = 0; i < 2; i++) {
1232            tcg_target_ulong mask = ~(0xffffffffull << i*32);
1233            if (((val | ~valid) & mask) == mask) {
1234                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1235                return;
1236            }
1237        }
1238    }
1239    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1240        tgen_andi_risbg(s, dest, dest, val);
1241        return;
1242    }
1243
1244    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1245    if (USE_REG_TB) {
1246        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1247            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1248            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1249                           tcg_tbrel_diff(s, NULL));
1250            return;
1251        }
1252    } else {
1253        tcg_out_movi(s, type, TCG_TMP0, val);
1254    }
1255    if (type == TCG_TYPE_I32) {
1256        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1257    } else {
1258        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1259    }
1260}
1261
1262static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1263{
1264    static const S390Opcode oi_insns[4] = {
1265        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1266    };
1267    static const S390Opcode oif_insns[2] = {
1268        RIL_OILF, RIL_OIHF
1269    };
1270
1271    int i;
1272
1273    /* Look for no-op.  */
1274    if (unlikely(val == 0)) {
1275        return;
1276    }
1277
1278    /* Try all 32-bit insns that can perform it in one go.  */
1279    for (i = 0; i < 4; i++) {
1280        tcg_target_ulong mask = (0xffffull << i*16);
1281        if ((val & mask) != 0 && (val & ~mask) == 0) {
1282            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1283            return;
1284        }
1285    }
1286
1287    /* Try all 48-bit insns that can perform it in one go.  */
1288    if (HAVE_FACILITY(EXT_IMM)) {
1289        for (i = 0; i < 2; i++) {
1290            tcg_target_ulong mask = (0xffffffffull << i*32);
1291            if ((val & mask) != 0 && (val & ~mask) == 0) {
1292                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
1293                return;
1294            }
1295        }
1296    }
1297
1298    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1299    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1300        if (type == TCG_TYPE_I32) {
1301            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1302        } else {
1303            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1304        }
1305    } else if (USE_REG_TB) {
1306        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1307        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1308                       tcg_tbrel_diff(s, NULL));
1309    } else {
1310        /* Perform the OR via sequential modifications to the high and
1311           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1312           masks in each half.  */
1313        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1314        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1315        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1316    }
1317}
1318
1319static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1320{
1321    /* Try all 48-bit insns that can perform it in one go.  */
1322    if (HAVE_FACILITY(EXT_IMM)) {
1323        if ((val & 0xffffffff00000000ull) == 0) {
1324            tcg_out_insn(s, RIL, XILF, dest, val);
1325            return;
1326        }
1327        if ((val & 0x00000000ffffffffull) == 0) {
1328            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1329            return;
1330        }
1331    }
1332
1333    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1334    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1335        if (type == TCG_TYPE_I32) {
1336            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1337        } else {
1338            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1339        }
1340    } else if (USE_REG_TB) {
1341        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1342        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1343                       tcg_tbrel_diff(s, NULL));
1344    } else {
1345        /* Perform the xor by parts.  */
1346        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1347        if (val & 0xffffffff) {
1348            tcg_out_insn(s, RIL, XILF, dest, val);
1349        }
1350        if (val > 0xffffffff) {
1351            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1352        }
1353    }
1354}
1355
1356static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1357                    TCGArg c2, bool c2const, bool need_carry)
1358{
1359    bool is_unsigned = is_unsigned_cond(c);
1360    S390Opcode op;
1361
1362    if (c2const) {
1363        if (c2 == 0) {
1364            if (!(is_unsigned && need_carry)) {
1365                if (type == TCG_TYPE_I32) {
1366                    tcg_out_insn(s, RR, LTR, r1, r1);
1367                } else {
1368                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1369                }
1370                return tcg_cond_to_ltr_cond[c];
1371            }
1372        }
1373
1374        if (!is_unsigned && c2 == (int16_t)c2) {
1375            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1376            tcg_out_insn_RI(s, op, r1, c2);
1377            goto exit;
1378        }
1379
1380        if (HAVE_FACILITY(EXT_IMM)) {
1381            if (type == TCG_TYPE_I32) {
1382                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1383                tcg_out_insn_RIL(s, op, r1, c2);
1384                goto exit;
1385            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1386                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1387                tcg_out_insn_RIL(s, op, r1, c2);
1388                goto exit;
1389            }
1390        }
1391
1392        /* Use the constant pool, but not for small constants.  */
1393        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1394            c2 = TCG_TMP0;
1395            /* fall through to reg-reg */
1396        } else if (USE_REG_TB) {
1397            if (type == TCG_TYPE_I32) {
1398                op = (is_unsigned ? RXY_CLY : RXY_CY);
1399                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1400                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1401                               4 - tcg_tbrel_diff(s, NULL));
1402            } else {
1403                op = (is_unsigned ? RXY_CLG : RXY_CG);
1404                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1405                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1406                               tcg_tbrel_diff(s, NULL));
1407            }
1408            goto exit;
1409        } else {
1410            if (type == TCG_TYPE_I32) {
1411                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1412                tcg_out_insn_RIL(s, op, r1, 0);
1413                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1414                               s->code_ptr - 2, 2 + 4);
1415            } else {
1416                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1417                tcg_out_insn_RIL(s, op, r1, 0);
1418                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1419            }
1420            goto exit;
1421        }
1422    }
1423
1424    if (type == TCG_TYPE_I32) {
1425        op = (is_unsigned ? RR_CLR : RR_CR);
1426        tcg_out_insn_RR(s, op, r1, c2);
1427    } else {
1428        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1429        tcg_out_insn_RRE(s, op, r1, c2);
1430    }
1431
1432 exit:
1433    return tcg_cond_to_s390_cond[c];
1434}
1435
1436static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1437                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1438{
1439    int cc;
1440    bool have_loc;
1441
1442    /* With LOC2, we can always emit the minimum 3 insns.  */
1443    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1444        /* Emit: d = 0, d = (cc ? 1 : d).  */
1445        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1446        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1447        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1448        return;
1449    }
1450
1451    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1452
1453    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1454 restart:
1455    switch (cond) {
1456    case TCG_COND_NE:
1457        /* X != 0 is X > 0.  */
1458        if (c2const && c2 == 0) {
1459            cond = TCG_COND_GTU;
1460        } else {
1461            break;
1462        }
1463        /* fallthru */
1464
1465    case TCG_COND_GTU:
1466    case TCG_COND_GT:
1467        /* The result of a compare has CC=2 for GT and CC=3 unused.
1468           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1469        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1470        tcg_out_movi(s, type, dest, 0);
1471        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1472        return;
1473
1474    case TCG_COND_EQ:
1475        /* X == 0 is X <= 0.  */
1476        if (c2const && c2 == 0) {
1477            cond = TCG_COND_LEU;
1478        } else {
1479            break;
1480        }
1481        /* fallthru */
1482
1483    case TCG_COND_LEU:
1484    case TCG_COND_LE:
1485        /* As above, but we're looking for borrow, or !carry.
1486           The second insn computes d - d - borrow, or -1 for true
1487           and 0 for false.  So we must mask to 1 bit afterward.  */
1488        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1489        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1490        tgen_andi(s, type, dest, 1);
1491        return;
1492
1493    case TCG_COND_GEU:
1494    case TCG_COND_LTU:
1495    case TCG_COND_LT:
1496    case TCG_COND_GE:
1497        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1498        if (c2const) {
1499            if (have_loc) {
1500                break;
1501            }
1502            tcg_out_movi(s, type, TCG_TMP0, c2);
1503            c2 = c1;
1504            c2const = 0;
1505            c1 = TCG_TMP0;
1506        } else {
1507            TCGReg t = c1;
1508            c1 = c2;
1509            c2 = t;
1510        }
1511        cond = tcg_swap_cond(cond);
1512        goto restart;
1513
1514    default:
1515        g_assert_not_reached();
1516    }
1517
1518    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1519    if (have_loc) {
1520        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1521        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1522        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1523        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1524    } else {
1525        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1526        tcg_out_movi(s, type, dest, 1);
1527        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1528        tcg_out_movi(s, type, dest, 0);
1529    }
1530}
1531
1532static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1533                         TCGReg c1, TCGArg c2, int c2const,
1534                         TCGArg v3, int v3const)
1535{
1536    int cc;
1537    if (HAVE_FACILITY(LOAD_ON_COND)) {
1538        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1539        if (v3const) {
1540            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1541        } else {
1542            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1543        }
1544    } else {
1545        c = tcg_invert_cond(c);
1546        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1547
1548        /* Emit: if (cc) goto over; dest = r3; over:  */
1549        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1550        tcg_out_insn(s, RRE, LGR, dest, v3);
1551    }
1552}
1553
1554static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1555                     TCGArg a2, int a2const)
1556{
1557    /* Since this sets both R and R+1, we have no choice but to store the
1558       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1559    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1560    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1561
1562    if (a2const && a2 == 64) {
1563        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1564    } else {
1565        if (a2const) {
1566            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1567        } else {
1568            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1569        }
1570        if (HAVE_FACILITY(LOAD_ON_COND)) {
1571            /* Emit: if (one bit found) dest = r0.  */
1572            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1573        } else {
1574            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1575            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1576            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1577        }
1578    }
1579}
1580
1581static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1582                         int ofs, int len, int z)
1583{
1584    int lsb = (63 - ofs);
1585    int msb = lsb - (len - 1);
1586    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1587}
1588
1589static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1590                         int ofs, int len)
1591{
1592    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1593}
1594
1595static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1596{
1597    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1598    if (off == (int16_t)off) {
1599        tcg_out_insn(s, RI, BRC, cc, off);
1600    } else if (off == (int32_t)off) {
1601        tcg_out_insn(s, RIL, BRCL, cc, off);
1602    } else {
1603        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1604        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1605    }
1606}
1607
1608static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1609{
1610    if (l->has_value) {
1611        tgen_gotoi(s, cc, l->u.value_ptr);
1612    } else if (USE_LONG_BRANCHES) {
1613        tcg_out16(s, RIL_BRCL | (cc << 4));
1614        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1615        s->code_ptr += 2;
1616    } else {
1617        tcg_out16(s, RI_BRC | (cc << 4));
1618        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1619        s->code_ptr += 1;
1620    }
1621}
1622
1623static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1624                                TCGReg r1, TCGReg r2, TCGLabel *l)
1625{
1626    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1627    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1628    tcg_out16(s, 0);
1629    tcg_out16(s, cc << 12 | (opc & 0xff));
1630}
1631
1632static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1633                                    TCGReg r1, int i2, TCGLabel *l)
1634{
1635    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1636    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1637    tcg_out16(s, 0);
1638    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1639}
1640
1641static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1642                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1643{
1644    int cc;
1645
1646    if (HAVE_FACILITY(GEN_INST_EXT)) {
1647        bool is_unsigned = is_unsigned_cond(c);
1648        bool in_range;
1649        S390Opcode opc;
1650
1651        cc = tcg_cond_to_s390_cond[c];
1652
1653        if (!c2const) {
1654            opc = (type == TCG_TYPE_I32
1655                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1656                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1657            tgen_compare_branch(s, opc, cc, r1, c2, l);
1658            return;
1659        }
1660
1661        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1662           If the immediate we've been given does not fit that range, we'll
1663           fall back to separate compare and branch instructions using the
1664           larger comparison range afforded by COMPARE IMMEDIATE.  */
1665        if (type == TCG_TYPE_I32) {
1666            if (is_unsigned) {
1667                opc = RIE_CLIJ;
1668                in_range = (uint32_t)c2 == (uint8_t)c2;
1669            } else {
1670                opc = RIE_CIJ;
1671                in_range = (int32_t)c2 == (int8_t)c2;
1672            }
1673        } else {
1674            if (is_unsigned) {
1675                opc = RIE_CLGIJ;
1676                in_range = (uint64_t)c2 == (uint8_t)c2;
1677            } else {
1678                opc = RIE_CGIJ;
1679                in_range = (int64_t)c2 == (int8_t)c2;
1680            }
1681        }
1682        if (in_range) {
1683            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1684            return;
1685        }
1686    }
1687
1688    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1689    tgen_branch(s, cc, l);
1690}
1691
1692static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
1693{
1694    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1695    if (off == (int32_t)off) {
1696        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1697    } else {
1698        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1699        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1700    }
1701}
1702
1703static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1704                                   TCGReg base, TCGReg index, int disp)
1705{
1706    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1707    case MO_UB:
1708        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1709        break;
1710    case MO_SB:
1711        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1712        break;
1713
1714    case MO_UW | MO_BSWAP:
1715        /* swapped unsigned halfword load with upper bits zeroed */
1716        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1717        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1718        break;
1719    case MO_UW:
1720        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1721        break;
1722
1723    case MO_SW | MO_BSWAP:
1724        /* swapped sign-extended halfword load */
1725        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1726        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1727        break;
1728    case MO_SW:
1729        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1730        break;
1731
1732    case MO_UL | MO_BSWAP:
1733        /* swapped unsigned int load with upper bits zeroed */
1734        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1735        tgen_ext32u(s, data, data);
1736        break;
1737    case MO_UL:
1738        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1739        break;
1740
1741    case MO_SL | MO_BSWAP:
1742        /* swapped sign-extended int load */
1743        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1744        tgen_ext32s(s, data, data);
1745        break;
1746    case MO_SL:
1747        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1748        break;
1749
1750    case MO_UQ | MO_BSWAP:
1751        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1752        break;
1753    case MO_UQ:
1754        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1755        break;
1756
1757    default:
1758        tcg_abort();
1759    }
1760}
1761
1762static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1763                                   TCGReg base, TCGReg index, int disp)
1764{
1765    switch (opc & (MO_SIZE | MO_BSWAP)) {
1766    case MO_UB:
1767        if (disp >= 0 && disp < 0x1000) {
1768            tcg_out_insn(s, RX, STC, data, base, index, disp);
1769        } else {
1770            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1771        }
1772        break;
1773
1774    case MO_UW | MO_BSWAP:
1775        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1776        break;
1777    case MO_UW:
1778        if (disp >= 0 && disp < 0x1000) {
1779            tcg_out_insn(s, RX, STH, data, base, index, disp);
1780        } else {
1781            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1782        }
1783        break;
1784
1785    case MO_UL | MO_BSWAP:
1786        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1787        break;
1788    case MO_UL:
1789        if (disp >= 0 && disp < 0x1000) {
1790            tcg_out_insn(s, RX, ST, data, base, index, disp);
1791        } else {
1792            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1793        }
1794        break;
1795
1796    case MO_UQ | MO_BSWAP:
1797        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1798        break;
1799    case MO_UQ:
1800        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1801        break;
1802
1803    default:
1804        tcg_abort();
1805    }
1806}
1807
1808#if defined(CONFIG_SOFTMMU)
1809/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1810QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1811QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1812
1813/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1814   addend into R2.  Returns a register with the santitized guest address.  */
1815static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1816                               int mem_index, bool is_ld)
1817{
1818    unsigned s_bits = opc & MO_SIZE;
1819    unsigned a_bits = get_alignment_bits(opc);
1820    unsigned s_mask = (1 << s_bits) - 1;
1821    unsigned a_mask = (1 << a_bits) - 1;
1822    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1823    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1824    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1825    int ofs, a_off;
1826    uint64_t tlb_mask;
1827
1828    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1829                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1830    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1831    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1832
1833    /* For aligned accesses, we check the first byte and include the alignment
1834       bits within the address.  For unaligned access, we check that we don't
1835       cross pages using the address of the last byte of the access.  */
1836    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1837    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1838    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1839        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1840    } else {
1841        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1842        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1843    }
1844
1845    if (is_ld) {
1846        ofs = offsetof(CPUTLBEntry, addr_read);
1847    } else {
1848        ofs = offsetof(CPUTLBEntry, addr_write);
1849    }
1850    if (TARGET_LONG_BITS == 32) {
1851        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1852    } else {
1853        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1854    }
1855
1856    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1857                 offsetof(CPUTLBEntry, addend));
1858
1859    if (TARGET_LONG_BITS == 32) {
1860        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1861        return TCG_REG_R3;
1862    }
1863    return addr_reg;
1864}
1865
1866static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1867                                TCGReg data, TCGReg addr,
1868                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1869{
1870    TCGLabelQemuLdst *label = new_ldst_label(s);
1871
1872    label->is_ld = is_ld;
1873    label->oi = oi;
1874    label->datalo_reg = data;
1875    label->addrlo_reg = addr;
1876    label->raddr = tcg_splitwx_to_rx(raddr);
1877    label->label_ptr[0] = label_ptr;
1878}
1879
1880static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1881{
1882    TCGReg addr_reg = lb->addrlo_reg;
1883    TCGReg data_reg = lb->datalo_reg;
1884    MemOpIdx oi = lb->oi;
1885    MemOp opc = get_memop(oi);
1886
1887    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1888                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1889        return false;
1890    }
1891
1892    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1893    if (TARGET_LONG_BITS == 64) {
1894        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1895    }
1896    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1897    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1898    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1899    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1900
1901    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1902    return true;
1903}
1904
1905static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1906{
1907    TCGReg addr_reg = lb->addrlo_reg;
1908    TCGReg data_reg = lb->datalo_reg;
1909    MemOpIdx oi = lb->oi;
1910    MemOp opc = get_memop(oi);
1911
1912    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1913                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1914        return false;
1915    }
1916
1917    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1918    if (TARGET_LONG_BITS == 64) {
1919        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1920    }
1921    switch (opc & MO_SIZE) {
1922    case MO_UB:
1923        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1924        break;
1925    case MO_UW:
1926        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1927        break;
1928    case MO_UL:
1929        tgen_ext32u(s, TCG_REG_R4, data_reg);
1930        break;
1931    case MO_UQ:
1932        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1933        break;
1934    default:
1935        tcg_abort();
1936    }
1937    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1938    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1939    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1940
1941    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1942    return true;
1943}
1944#else
1945static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1946                                   TCGReg addrlo, unsigned a_bits)
1947{
1948    unsigned a_mask = (1 << a_bits) - 1;
1949    TCGLabelQemuLdst *l = new_ldst_label(s);
1950
1951    l->is_ld = is_ld;
1952    l->addrlo_reg = addrlo;
1953
1954    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1955    tcg_debug_assert(a_bits < 16);
1956    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1957
1958    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1959    l->label_ptr[0] = s->code_ptr;
1960    s->code_ptr += 1;
1961
1962    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1963}
1964
1965static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1966{
1967    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1968                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1969        return false;
1970    }
1971
1972    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1973    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1974
1975    /* "Tail call" to the helper, with the return address back inline. */
1976    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1977    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1978                                                 : helper_unaligned_st));
1979    return true;
1980}
1981
1982static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1983{
1984    return tcg_out_fail_alignment(s, l);
1985}
1986
1987static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1988{
1989    return tcg_out_fail_alignment(s, l);
1990}
1991
1992static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1993                                  TCGReg *index_reg, tcg_target_long *disp)
1994{
1995    if (TARGET_LONG_BITS == 32) {
1996        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1997        *addr_reg = TCG_TMP0;
1998    }
1999    if (guest_base < 0x80000) {
2000        *index_reg = TCG_REG_NONE;
2001        *disp = guest_base;
2002    } else {
2003        *index_reg = TCG_GUEST_BASE_REG;
2004        *disp = 0;
2005    }
2006}
2007#endif /* CONFIG_SOFTMMU */
2008
2009static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2010                            MemOpIdx oi)
2011{
2012    MemOp opc = get_memop(oi);
2013#ifdef CONFIG_SOFTMMU
2014    unsigned mem_index = get_mmuidx(oi);
2015    tcg_insn_unit *label_ptr;
2016    TCGReg base_reg;
2017
2018    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
2019
2020    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2021    label_ptr = s->code_ptr;
2022    s->code_ptr += 1;
2023
2024    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2025
2026    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2027#else
2028    TCGReg index_reg;
2029    tcg_target_long disp;
2030    unsigned a_bits = get_alignment_bits(opc);
2031
2032    if (a_bits) {
2033        tcg_out_test_alignment(s, true, addr_reg, a_bits);
2034    }
2035    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2036    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2037#endif
2038}
2039
2040static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2041                            MemOpIdx oi)
2042{
2043    MemOp opc = get_memop(oi);
2044#ifdef CONFIG_SOFTMMU
2045    unsigned mem_index = get_mmuidx(oi);
2046    tcg_insn_unit *label_ptr;
2047    TCGReg base_reg;
2048
2049    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
2050
2051    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2052    label_ptr = s->code_ptr;
2053    s->code_ptr += 1;
2054
2055    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2056
2057    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2058#else
2059    TCGReg index_reg;
2060    tcg_target_long disp;
2061    unsigned a_bits = get_alignment_bits(opc);
2062
2063    if (a_bits) {
2064        tcg_out_test_alignment(s, false, addr_reg, a_bits);
2065    }
2066    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2067    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2068#endif
2069}
2070
2071# define OP_32_64(x) \
2072        case glue(glue(INDEX_op_,x),_i32): \
2073        case glue(glue(INDEX_op_,x),_i64)
2074
2075static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2076                              const TCGArg args[TCG_MAX_OP_ARGS],
2077                              const int const_args[TCG_MAX_OP_ARGS])
2078{
2079    S390Opcode op, op2;
2080    TCGArg a0, a1, a2;
2081
2082    switch (opc) {
2083    case INDEX_op_exit_tb:
2084        /* Reuse the zeroing that exists for goto_ptr.  */
2085        a0 = args[0];
2086        if (a0 == 0) {
2087            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2088        } else {
2089            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2090            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2091        }
2092        break;
2093
2094    case INDEX_op_goto_tb:
2095        a0 = args[0];
2096        if (s->tb_jmp_insn_offset) {
2097            /*
2098             * branch displacement must be aligned for atomic patching;
2099             * see if we need to add extra nop before branch
2100             */
2101            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2102                tcg_out16(s, NOP);
2103            }
2104            tcg_debug_assert(!USE_REG_TB);
2105            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2106            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2107            s->code_ptr += 2;
2108        } else {
2109            /* load address stored at s->tb_jmp_target_addr + a0 */
2110            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2111                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2112            /* and go there */
2113            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2114        }
2115        set_jmp_reset_offset(s, a0);
2116
2117        /* For the unlinked path of goto_tb, we need to reset
2118           TCG_REG_TB to the beginning of this TB.  */
2119        if (USE_REG_TB) {
2120            int ofs = -tcg_current_code_size(s);
2121            /* All TB are restricted to 64KiB by unwind info. */
2122            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2123            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2124                         TCG_REG_TB, TCG_REG_NONE, ofs);
2125        }
2126        break;
2127
2128    case INDEX_op_goto_ptr:
2129        a0 = args[0];
2130        if (USE_REG_TB) {
2131            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2132        }
2133        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2134        break;
2135
2136    OP_32_64(ld8u):
2137        /* ??? LLC (RXY format) is only present with the extended-immediate
2138           facility, whereas LLGC is always present.  */
2139        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2140        break;
2141
2142    OP_32_64(ld8s):
2143        /* ??? LB is no smaller than LGB, so no point to using it.  */
2144        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2145        break;
2146
2147    OP_32_64(ld16u):
2148        /* ??? LLH (RXY format) is only present with the extended-immediate
2149           facility, whereas LLGH is always present.  */
2150        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2151        break;
2152
2153    case INDEX_op_ld16s_i32:
2154        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2155        break;
2156
2157    case INDEX_op_ld_i32:
2158        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2159        break;
2160
2161    OP_32_64(st8):
2162        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2163                    TCG_REG_NONE, args[2]);
2164        break;
2165
2166    OP_32_64(st16):
2167        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2168                    TCG_REG_NONE, args[2]);
2169        break;
2170
2171    case INDEX_op_st_i32:
2172        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2173        break;
2174
2175    case INDEX_op_add_i32:
2176        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2177        if (const_args[2]) {
2178        do_addi_32:
2179            if (a0 == a1) {
2180                if (a2 == (int16_t)a2) {
2181                    tcg_out_insn(s, RI, AHI, a0, a2);
2182                    break;
2183                }
2184                if (HAVE_FACILITY(EXT_IMM)) {
2185                    tcg_out_insn(s, RIL, AFI, a0, a2);
2186                    break;
2187                }
2188            }
2189            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2190        } else if (a0 == a1) {
2191            tcg_out_insn(s, RR, AR, a0, a2);
2192        } else {
2193            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2194        }
2195        break;
2196    case INDEX_op_sub_i32:
2197        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2198        if (const_args[2]) {
2199            a2 = -a2;
2200            goto do_addi_32;
2201        } else if (a0 == a1) {
2202            tcg_out_insn(s, RR, SR, a0, a2);
2203        } else {
2204            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2205        }
2206        break;
2207
2208    case INDEX_op_and_i32:
2209        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2210        if (const_args[2]) {
2211            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2212            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2213        } else if (a0 == a1) {
2214            tcg_out_insn(s, RR, NR, a0, a2);
2215        } else {
2216            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2217        }
2218        break;
2219    case INDEX_op_or_i32:
2220        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2221        if (const_args[2]) {
2222            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2223            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2224        } else if (a0 == a1) {
2225            tcg_out_insn(s, RR, OR, a0, a2);
2226        } else {
2227            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2228        }
2229        break;
2230    case INDEX_op_xor_i32:
2231        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2232        if (const_args[2]) {
2233            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2234            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2235        } else if (a0 == a1) {
2236            tcg_out_insn(s, RR, XR, args[0], args[2]);
2237        } else {
2238            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2239        }
2240        break;
2241
2242    case INDEX_op_neg_i32:
2243        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2244        break;
2245
2246    case INDEX_op_mul_i32:
2247        if (const_args[2]) {
2248            if ((int32_t)args[2] == (int16_t)args[2]) {
2249                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2250            } else {
2251                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2252            }
2253        } else {
2254            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2255        }
2256        break;
2257
2258    case INDEX_op_div2_i32:
2259        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2260        break;
2261    case INDEX_op_divu2_i32:
2262        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2263        break;
2264
2265    case INDEX_op_shl_i32:
2266        op = RS_SLL;
2267        op2 = RSY_SLLK;
2268    do_shift32:
2269        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2270        if (a0 == a1) {
2271            if (const_args[2]) {
2272                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2273            } else {
2274                tcg_out_sh32(s, op, a0, a2, 0);
2275            }
2276        } else {
2277            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2278            if (const_args[2]) {
2279                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2280            } else {
2281                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2282            }
2283        }
2284        break;
2285    case INDEX_op_shr_i32:
2286        op = RS_SRL;
2287        op2 = RSY_SRLK;
2288        goto do_shift32;
2289    case INDEX_op_sar_i32:
2290        op = RS_SRA;
2291        op2 = RSY_SRAK;
2292        goto do_shift32;
2293
2294    case INDEX_op_rotl_i32:
2295        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2296        if (const_args[2]) {
2297            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2298        } else {
2299            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2300        }
2301        break;
2302    case INDEX_op_rotr_i32:
2303        if (const_args[2]) {
2304            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2305                         TCG_REG_NONE, (32 - args[2]) & 31);
2306        } else {
2307            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2308            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2309        }
2310        break;
2311
2312    case INDEX_op_ext8s_i32:
2313        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2314        break;
2315    case INDEX_op_ext16s_i32:
2316        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2317        break;
2318    case INDEX_op_ext8u_i32:
2319        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2320        break;
2321    case INDEX_op_ext16u_i32:
2322        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2323        break;
2324
2325    case INDEX_op_bswap16_i32:
2326        a0 = args[0], a1 = args[1], a2 = args[2];
2327        tcg_out_insn(s, RRE, LRVR, a0, a1);
2328        if (a2 & TCG_BSWAP_OS) {
2329            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2330        } else {
2331            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2332        }
2333        break;
2334    case INDEX_op_bswap16_i64:
2335        a0 = args[0], a1 = args[1], a2 = args[2];
2336        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2337        if (a2 & TCG_BSWAP_OS) {
2338            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2339        } else {
2340            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2341        }
2342        break;
2343
2344    case INDEX_op_bswap32_i32:
2345        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2346        break;
2347    case INDEX_op_bswap32_i64:
2348        a0 = args[0], a1 = args[1], a2 = args[2];
2349        tcg_out_insn(s, RRE, LRVR, a0, a1);
2350        if (a2 & TCG_BSWAP_OS) {
2351            tgen_ext32s(s, a0, a0);
2352        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2353            tgen_ext32u(s, a0, a0);
2354        }
2355        break;
2356
2357    case INDEX_op_add2_i32:
2358        if (const_args[4]) {
2359            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2360        } else {
2361            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2362        }
2363        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2364        break;
2365    case INDEX_op_sub2_i32:
2366        if (const_args[4]) {
2367            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2368        } else {
2369            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2370        }
2371        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2372        break;
2373
2374    case INDEX_op_br:
2375        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2376        break;
2377
2378    case INDEX_op_brcond_i32:
2379        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2380                    args[1], const_args[1], arg_label(args[3]));
2381        break;
2382    case INDEX_op_setcond_i32:
2383        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2384                     args[2], const_args[2]);
2385        break;
2386    case INDEX_op_movcond_i32:
2387        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2388                     args[2], const_args[2], args[3], const_args[3]);
2389        break;
2390
2391    case INDEX_op_qemu_ld_i32:
2392        /* ??? Technically we can use a non-extending instruction.  */
2393    case INDEX_op_qemu_ld_i64:
2394        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2395        break;
2396    case INDEX_op_qemu_st_i32:
2397    case INDEX_op_qemu_st_i64:
2398        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2399        break;
2400
2401    case INDEX_op_ld16s_i64:
2402        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2403        break;
2404    case INDEX_op_ld32u_i64:
2405        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2406        break;
2407    case INDEX_op_ld32s_i64:
2408        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2409        break;
2410    case INDEX_op_ld_i64:
2411        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2412        break;
2413
2414    case INDEX_op_st32_i64:
2415        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2416        break;
2417    case INDEX_op_st_i64:
2418        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2419        break;
2420
2421    case INDEX_op_add_i64:
2422        a0 = args[0], a1 = args[1], a2 = args[2];
2423        if (const_args[2]) {
2424        do_addi_64:
2425            if (a0 == a1) {
2426                if (a2 == (int16_t)a2) {
2427                    tcg_out_insn(s, RI, AGHI, a0, a2);
2428                    break;
2429                }
2430                if (HAVE_FACILITY(EXT_IMM)) {
2431                    if (a2 == (int32_t)a2) {
2432                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2433                        break;
2434                    } else if (a2 == (uint32_t)a2) {
2435                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2436                        break;
2437                    } else if (-a2 == (uint32_t)-a2) {
2438                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2439                        break;
2440                    }
2441                }
2442            }
2443            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2444        } else if (a0 == a1) {
2445            tcg_out_insn(s, RRE, AGR, a0, a2);
2446        } else {
2447            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2448        }
2449        break;
2450    case INDEX_op_sub_i64:
2451        a0 = args[0], a1 = args[1], a2 = args[2];
2452        if (const_args[2]) {
2453            a2 = -a2;
2454            goto do_addi_64;
2455        } else if (a0 == a1) {
2456            tcg_out_insn(s, RRE, SGR, a0, a2);
2457        } else {
2458            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2459        }
2460        break;
2461
2462    case INDEX_op_and_i64:
2463        a0 = args[0], a1 = args[1], a2 = args[2];
2464        if (const_args[2]) {
2465            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2466            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2467        } else if (a0 == a1) {
2468            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2469        } else {
2470            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2471        }
2472        break;
2473    case INDEX_op_or_i64:
2474        a0 = args[0], a1 = args[1], a2 = args[2];
2475        if (const_args[2]) {
2476            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2477            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2478        } else if (a0 == a1) {
2479            tcg_out_insn(s, RRE, OGR, a0, a2);
2480        } else {
2481            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2482        }
2483        break;
2484    case INDEX_op_xor_i64:
2485        a0 = args[0], a1 = args[1], a2 = args[2];
2486        if (const_args[2]) {
2487            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2488            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2489        } else if (a0 == a1) {
2490            tcg_out_insn(s, RRE, XGR, a0, a2);
2491        } else {
2492            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2493        }
2494        break;
2495
2496    case INDEX_op_neg_i64:
2497        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2498        break;
2499    case INDEX_op_bswap64_i64:
2500        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2501        break;
2502
2503    case INDEX_op_mul_i64:
2504        if (const_args[2]) {
2505            if (args[2] == (int16_t)args[2]) {
2506                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2507            } else {
2508                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2509            }
2510        } else {
2511            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2512        }
2513        break;
2514
2515    case INDEX_op_div2_i64:
2516        /* ??? We get an unnecessary sign-extension of the dividend
2517           into R3 with this definition, but as we do in fact always
2518           produce both quotient and remainder using INDEX_op_div_i64
2519           instead requires jumping through even more hoops.  */
2520        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2521        break;
2522    case INDEX_op_divu2_i64:
2523        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2524        break;
2525    case INDEX_op_mulu2_i64:
2526        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2527        break;
2528
2529    case INDEX_op_shl_i64:
2530        op = RSY_SLLG;
2531    do_shift64:
2532        if (const_args[2]) {
2533            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2534        } else {
2535            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2536        }
2537        break;
2538    case INDEX_op_shr_i64:
2539        op = RSY_SRLG;
2540        goto do_shift64;
2541    case INDEX_op_sar_i64:
2542        op = RSY_SRAG;
2543        goto do_shift64;
2544
2545    case INDEX_op_rotl_i64:
2546        if (const_args[2]) {
2547            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2548                         TCG_REG_NONE, args[2]);
2549        } else {
2550            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2551        }
2552        break;
2553    case INDEX_op_rotr_i64:
2554        if (const_args[2]) {
2555            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2556                         TCG_REG_NONE, (64 - args[2]) & 63);
2557        } else {
2558            /* We can use the smaller 32-bit negate because only the
2559               low 6 bits are examined for the rotate.  */
2560            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2561            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2562        }
2563        break;
2564
2565    case INDEX_op_ext8s_i64:
2566        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2567        break;
2568    case INDEX_op_ext16s_i64:
2569        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2570        break;
2571    case INDEX_op_ext_i32_i64:
2572    case INDEX_op_ext32s_i64:
2573        tgen_ext32s(s, args[0], args[1]);
2574        break;
2575    case INDEX_op_ext8u_i64:
2576        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2577        break;
2578    case INDEX_op_ext16u_i64:
2579        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2580        break;
2581    case INDEX_op_extu_i32_i64:
2582    case INDEX_op_ext32u_i64:
2583        tgen_ext32u(s, args[0], args[1]);
2584        break;
2585
2586    case INDEX_op_add2_i64:
2587        if (const_args[4]) {
2588            if ((int64_t)args[4] >= 0) {
2589                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2590            } else {
2591                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2592            }
2593        } else {
2594            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2595        }
2596        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2597        break;
2598    case INDEX_op_sub2_i64:
2599        if (const_args[4]) {
2600            if ((int64_t)args[4] >= 0) {
2601                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2602            } else {
2603                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2604            }
2605        } else {
2606            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2607        }
2608        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2609        break;
2610
2611    case INDEX_op_brcond_i64:
2612        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2613                    args[1], const_args[1], arg_label(args[3]));
2614        break;
2615    case INDEX_op_setcond_i64:
2616        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2617                     args[2], const_args[2]);
2618        break;
2619    case INDEX_op_movcond_i64:
2620        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2621                     args[2], const_args[2], args[3], const_args[3]);
2622        break;
2623
2624    OP_32_64(deposit):
2625        a0 = args[0], a1 = args[1], a2 = args[2];
2626        if (const_args[1]) {
2627            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2628        } else {
2629            /* Since we can't support "0Z" as a constraint, we allow a1 in
2630               any register.  Fix things up as if a matching constraint.  */
2631            if (a0 != a1) {
2632                TCGType type = (opc == INDEX_op_deposit_i64);
2633                if (a0 == a2) {
2634                    tcg_out_mov(s, type, TCG_TMP0, a2);
2635                    a2 = TCG_TMP0;
2636                }
2637                tcg_out_mov(s, type, a0, a1);
2638            }
2639            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2640        }
2641        break;
2642
2643    OP_32_64(extract):
2644        tgen_extract(s, args[0], args[1], args[2], args[3]);
2645        break;
2646
2647    case INDEX_op_clz_i64:
2648        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2649        break;
2650
2651    case INDEX_op_mb:
2652        /* The host memory model is quite strong, we simply need to
2653           serialize the instruction stream.  */
2654        if (args[0] & TCG_MO_ST_LD) {
2655            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2656        }
2657        break;
2658
2659    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2660    case INDEX_op_mov_i64:
2661    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2662    default:
2663        tcg_abort();
2664    }
2665}
2666
2667static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2668                            TCGReg dst, TCGReg src)
2669{
2670    if (is_general_reg(src)) {
2671        /* Replicate general register into two MO_64. */
2672        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2673        if (vece == MO_64) {
2674            return true;
2675        }
2676    }
2677
2678    /*
2679     * Recall that the "standard" integer, within a vector, is the
2680     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2681     */
2682    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2683    return true;
2684}
2685
2686static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2687                             TCGReg dst, TCGReg base, intptr_t offset)
2688{
2689    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2690    return true;
2691}
2692
2693static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2694                             TCGReg dst, int64_t val)
2695{
2696    int i, mask, msb, lsb;
2697
2698    /* Look for int16_t elements.  */
2699    if (vece <= MO_16 ||
2700        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2701        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2702        return;
2703    }
2704
2705    /* Look for bit masks.  */
2706    if (vece == MO_32) {
2707        if (risbg_mask((int32_t)val)) {
2708            /* Handle wraparound by swapping msb and lsb.  */
2709            if ((val & 0x80000001u) == 0x80000001u) {
2710                msb = 32 - ctz32(~val);
2711                lsb = clz32(~val) - 1;
2712            } else {
2713                msb = clz32(val);
2714                lsb = 31 - ctz32(val);
2715            }
2716            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
2717            return;
2718        }
2719    } else {
2720        if (risbg_mask(val)) {
2721            /* Handle wraparound by swapping msb and lsb.  */
2722            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2723                /* Handle wraparound by swapping msb and lsb.  */
2724                msb = 64 - ctz64(~val);
2725                lsb = clz64(~val) - 1;
2726            } else {
2727                msb = clz64(val);
2728                lsb = 63 - ctz64(val);
2729            }
2730            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
2731            return;
2732        }
2733    }
2734
2735    /* Look for all bytes 0x00 or 0xff.  */
2736    for (i = mask = 0; i < 8; i++) {
2737        uint8_t byte = val >> (i * 8);
2738        if (byte == 0xff) {
2739            mask |= 1 << i;
2740        } else if (byte != 0) {
2741            break;
2742        }
2743    }
2744    if (i == 8) {
2745        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2746        return;
2747    }
2748
2749    /* Otherwise, stuff it in the constant pool.  */
2750    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2751    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2752    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2753}
2754
2755static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2756                           unsigned vecl, unsigned vece,
2757                           const TCGArg args[TCG_MAX_OP_ARGS],
2758                           const int const_args[TCG_MAX_OP_ARGS])
2759{
2760    TCGType type = vecl + TCG_TYPE_V64;
2761    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2762
2763    switch (opc) {
2764    case INDEX_op_ld_vec:
2765        tcg_out_ld(s, type, a0, a1, a2);
2766        break;
2767    case INDEX_op_st_vec:
2768        tcg_out_st(s, type, a0, a1, a2);
2769        break;
2770    case INDEX_op_dupm_vec:
2771        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2772        break;
2773
2774    case INDEX_op_abs_vec:
2775        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2776        break;
2777    case INDEX_op_neg_vec:
2778        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2779        break;
2780    case INDEX_op_not_vec:
2781        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2782        break;
2783
2784    case INDEX_op_add_vec:
2785        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2786        break;
2787    case INDEX_op_sub_vec:
2788        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2789        break;
2790    case INDEX_op_and_vec:
2791        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2792        break;
2793    case INDEX_op_andc_vec:
2794        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2795        break;
2796    case INDEX_op_mul_vec:
2797        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2798        break;
2799    case INDEX_op_or_vec:
2800        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2801        break;
2802    case INDEX_op_orc_vec:
2803        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2804        break;
2805    case INDEX_op_xor_vec:
2806        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2807        break;
2808
2809    case INDEX_op_shli_vec:
2810        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2811        break;
2812    case INDEX_op_shri_vec:
2813        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2814        break;
2815    case INDEX_op_sari_vec:
2816        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2817        break;
2818    case INDEX_op_rotli_vec:
2819        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2820        break;
2821    case INDEX_op_shls_vec:
2822        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2823        break;
2824    case INDEX_op_shrs_vec:
2825        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2826        break;
2827    case INDEX_op_sars_vec:
2828        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2829        break;
2830    case INDEX_op_rotls_vec:
2831        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2832        break;
2833    case INDEX_op_shlv_vec:
2834        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2835        break;
2836    case INDEX_op_shrv_vec:
2837        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2838        break;
2839    case INDEX_op_sarv_vec:
2840        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2841        break;
2842    case INDEX_op_rotlv_vec:
2843        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2844        break;
2845
2846    case INDEX_op_smin_vec:
2847        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2848        break;
2849    case INDEX_op_smax_vec:
2850        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2851        break;
2852    case INDEX_op_umin_vec:
2853        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2854        break;
2855    case INDEX_op_umax_vec:
2856        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2857        break;
2858
2859    case INDEX_op_bitsel_vec:
2860        tcg_out_insn(s, VRRe, VSEL, a0, a1, a2, args[3]);
2861        break;
2862
2863    case INDEX_op_cmp_vec:
2864        switch ((TCGCond)args[3]) {
2865        case TCG_COND_EQ:
2866            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2867            break;
2868        case TCG_COND_GT:
2869            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2870            break;
2871        case TCG_COND_GTU:
2872            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2873            break;
2874        default:
2875            g_assert_not_reached();
2876        }
2877        break;
2878
2879    case INDEX_op_s390_vuph_vec:
2880        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2881        break;
2882    case INDEX_op_s390_vupl_vec:
2883        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2884        break;
2885    case INDEX_op_s390_vpks_vec:
2886        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2887        break;
2888
2889    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2890    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2891    default:
2892        g_assert_not_reached();
2893    }
2894}
2895
2896int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2897{
2898    switch (opc) {
2899    case INDEX_op_abs_vec:
2900    case INDEX_op_add_vec:
2901    case INDEX_op_and_vec:
2902    case INDEX_op_andc_vec:
2903    case INDEX_op_bitsel_vec:
2904    case INDEX_op_neg_vec:
2905    case INDEX_op_not_vec:
2906    case INDEX_op_or_vec:
2907    case INDEX_op_orc_vec:
2908    case INDEX_op_rotli_vec:
2909    case INDEX_op_rotls_vec:
2910    case INDEX_op_rotlv_vec:
2911    case INDEX_op_sari_vec:
2912    case INDEX_op_sars_vec:
2913    case INDEX_op_sarv_vec:
2914    case INDEX_op_shli_vec:
2915    case INDEX_op_shls_vec:
2916    case INDEX_op_shlv_vec:
2917    case INDEX_op_shri_vec:
2918    case INDEX_op_shrs_vec:
2919    case INDEX_op_shrv_vec:
2920    case INDEX_op_smax_vec:
2921    case INDEX_op_smin_vec:
2922    case INDEX_op_sub_vec:
2923    case INDEX_op_umax_vec:
2924    case INDEX_op_umin_vec:
2925    case INDEX_op_xor_vec:
2926        return 1;
2927    case INDEX_op_cmp_vec:
2928    case INDEX_op_cmpsel_vec:
2929    case INDEX_op_rotrv_vec:
2930        return -1;
2931    case INDEX_op_mul_vec:
2932        return vece < MO_64;
2933    case INDEX_op_ssadd_vec:
2934    case INDEX_op_sssub_vec:
2935        return vece < MO_64 ? -1 : 0;
2936    default:
2937        return 0;
2938    }
2939}
2940
2941static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2942                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2943{
2944    bool need_swap = false, need_inv = false;
2945
2946    switch (cond) {
2947    case TCG_COND_EQ:
2948    case TCG_COND_GT:
2949    case TCG_COND_GTU:
2950        break;
2951    case TCG_COND_NE:
2952    case TCG_COND_LE:
2953    case TCG_COND_LEU:
2954        need_inv = true;
2955        break;
2956    case TCG_COND_LT:
2957    case TCG_COND_LTU:
2958        need_swap = true;
2959        break;
2960    case TCG_COND_GE:
2961    case TCG_COND_GEU:
2962        need_swap = need_inv = true;
2963        break;
2964    default:
2965        g_assert_not_reached();
2966    }
2967
2968    if (need_inv) {
2969        cond = tcg_invert_cond(cond);
2970    }
2971    if (need_swap) {
2972        TCGv_vec t1;
2973        t1 = v1, v1 = v2, v2 = t1;
2974        cond = tcg_swap_cond(cond);
2975    }
2976
2977    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2978              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2979
2980    return need_inv;
2981}
2982
2983static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2984                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2985{
2986    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2987        tcg_gen_not_vec(vece, v0, v0);
2988    }
2989}
2990
2991static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2992                              TCGv_vec c1, TCGv_vec c2,
2993                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2994{
2995    TCGv_vec t = tcg_temp_new_vec(type);
2996
2997    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2998        /* Invert the sense of the compare by swapping arguments.  */
2999        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3000    } else {
3001        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3002    }
3003    tcg_temp_free_vec(t);
3004}
3005
3006static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3007                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3008{
3009    TCGv_vec h1 = tcg_temp_new_vec(type);
3010    TCGv_vec h2 = tcg_temp_new_vec(type);
3011    TCGv_vec l1 = tcg_temp_new_vec(type);
3012    TCGv_vec l2 = tcg_temp_new_vec(type);
3013
3014    tcg_debug_assert (vece < MO_64);
3015
3016    /* Unpack with sign-extension. */
3017    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3018              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3019    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3020              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3021
3022    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3023              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3024    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3025              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3026
3027    /* Arithmetic on a wider element size. */
3028    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3029              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3030    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3031              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3032
3033    /* Pack with saturation. */
3034    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3035              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3036
3037    tcg_temp_free_vec(h1);
3038    tcg_temp_free_vec(h2);
3039    tcg_temp_free_vec(l1);
3040    tcg_temp_free_vec(l2);
3041}
3042
3043void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3044                       TCGArg a0, ...)
3045{
3046    va_list va;
3047    TCGv_vec v0, v1, v2, v3, v4, t0;
3048
3049    va_start(va, a0);
3050    v0 = temp_tcgv_vec(arg_temp(a0));
3051    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3052    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3053
3054    switch (opc) {
3055    case INDEX_op_cmp_vec:
3056        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3057        break;
3058
3059    case INDEX_op_cmpsel_vec:
3060        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3061        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3062        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3063        break;
3064
3065    case INDEX_op_rotrv_vec:
3066        t0 = tcg_temp_new_vec(type);
3067        tcg_gen_neg_vec(vece, t0, v2);
3068        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3069        tcg_temp_free_vec(t0);
3070        break;
3071
3072    case INDEX_op_ssadd_vec:
3073        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3074        break;
3075    case INDEX_op_sssub_vec:
3076        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3077        break;
3078
3079    default:
3080        g_assert_not_reached();
3081    }
3082    va_end(va);
3083}
3084
3085static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3086{
3087    switch (op) {
3088    case INDEX_op_goto_ptr:
3089        return C_O0_I1(r);
3090
3091    case INDEX_op_ld8u_i32:
3092    case INDEX_op_ld8u_i64:
3093    case INDEX_op_ld8s_i32:
3094    case INDEX_op_ld8s_i64:
3095    case INDEX_op_ld16u_i32:
3096    case INDEX_op_ld16u_i64:
3097    case INDEX_op_ld16s_i32:
3098    case INDEX_op_ld16s_i64:
3099    case INDEX_op_ld_i32:
3100    case INDEX_op_ld32u_i64:
3101    case INDEX_op_ld32s_i64:
3102    case INDEX_op_ld_i64:
3103        return C_O1_I1(r, r);
3104
3105    case INDEX_op_st8_i32:
3106    case INDEX_op_st8_i64:
3107    case INDEX_op_st16_i32:
3108    case INDEX_op_st16_i64:
3109    case INDEX_op_st_i32:
3110    case INDEX_op_st32_i64:
3111    case INDEX_op_st_i64:
3112        return C_O0_I2(r, r);
3113
3114    case INDEX_op_add_i32:
3115    case INDEX_op_add_i64:
3116    case INDEX_op_shl_i64:
3117    case INDEX_op_shr_i64:
3118    case INDEX_op_sar_i64:
3119    case INDEX_op_rotl_i32:
3120    case INDEX_op_rotl_i64:
3121    case INDEX_op_rotr_i32:
3122    case INDEX_op_rotr_i64:
3123    case INDEX_op_clz_i64:
3124    case INDEX_op_setcond_i32:
3125    case INDEX_op_setcond_i64:
3126        return C_O1_I2(r, r, ri);
3127
3128    case INDEX_op_sub_i32:
3129    case INDEX_op_sub_i64:
3130    case INDEX_op_and_i32:
3131    case INDEX_op_and_i64:
3132    case INDEX_op_or_i32:
3133    case INDEX_op_or_i64:
3134    case INDEX_op_xor_i32:
3135    case INDEX_op_xor_i64:
3136        return (HAVE_FACILITY(DISTINCT_OPS)
3137                ? C_O1_I2(r, r, ri)
3138                : C_O1_I2(r, 0, ri));
3139
3140    case INDEX_op_mul_i32:
3141        /* If we have the general-instruction-extensions, then we have
3142           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
3143           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
3144        return (HAVE_FACILITY(GEN_INST_EXT)
3145                ? C_O1_I2(r, 0, ri)
3146                : C_O1_I2(r, 0, rI));
3147
3148    case INDEX_op_mul_i64:
3149        return (HAVE_FACILITY(GEN_INST_EXT)
3150                ? C_O1_I2(r, 0, rJ)
3151                : C_O1_I2(r, 0, rI));
3152
3153    case INDEX_op_shl_i32:
3154    case INDEX_op_shr_i32:
3155    case INDEX_op_sar_i32:
3156        return (HAVE_FACILITY(DISTINCT_OPS)
3157                ? C_O1_I2(r, r, ri)
3158                : C_O1_I2(r, 0, ri));
3159
3160    case INDEX_op_brcond_i32:
3161    case INDEX_op_brcond_i64:
3162        return C_O0_I2(r, ri);
3163
3164    case INDEX_op_bswap16_i32:
3165    case INDEX_op_bswap16_i64:
3166    case INDEX_op_bswap32_i32:
3167    case INDEX_op_bswap32_i64:
3168    case INDEX_op_bswap64_i64:
3169    case INDEX_op_neg_i32:
3170    case INDEX_op_neg_i64:
3171    case INDEX_op_ext8s_i32:
3172    case INDEX_op_ext8s_i64:
3173    case INDEX_op_ext8u_i32:
3174    case INDEX_op_ext8u_i64:
3175    case INDEX_op_ext16s_i32:
3176    case INDEX_op_ext16s_i64:
3177    case INDEX_op_ext16u_i32:
3178    case INDEX_op_ext16u_i64:
3179    case INDEX_op_ext32s_i64:
3180    case INDEX_op_ext32u_i64:
3181    case INDEX_op_ext_i32_i64:
3182    case INDEX_op_extu_i32_i64:
3183    case INDEX_op_extract_i32:
3184    case INDEX_op_extract_i64:
3185        return C_O1_I1(r, r);
3186
3187    case INDEX_op_qemu_ld_i32:
3188    case INDEX_op_qemu_ld_i64:
3189        return C_O1_I1(r, L);
3190    case INDEX_op_qemu_st_i64:
3191    case INDEX_op_qemu_st_i32:
3192        return C_O0_I2(L, L);
3193
3194    case INDEX_op_deposit_i32:
3195    case INDEX_op_deposit_i64:
3196        return C_O1_I2(r, rZ, r);
3197
3198    case INDEX_op_movcond_i32:
3199    case INDEX_op_movcond_i64:
3200        return (HAVE_FACILITY(LOAD_ON_COND2)
3201                ? C_O1_I4(r, r, ri, rI, 0)
3202                : C_O1_I4(r, r, ri, r, 0));
3203
3204    case INDEX_op_div2_i32:
3205    case INDEX_op_div2_i64:
3206    case INDEX_op_divu2_i32:
3207    case INDEX_op_divu2_i64:
3208        return C_O2_I3(b, a, 0, 1, r);
3209
3210    case INDEX_op_mulu2_i64:
3211        return C_O2_I2(b, a, 0, r);
3212
3213    case INDEX_op_add2_i32:
3214    case INDEX_op_sub2_i32:
3215        return (HAVE_FACILITY(EXT_IMM)
3216                ? C_O2_I4(r, r, 0, 1, ri, r)
3217                : C_O2_I4(r, r, 0, 1, r, r));
3218
3219    case INDEX_op_add2_i64:
3220    case INDEX_op_sub2_i64:
3221        return (HAVE_FACILITY(EXT_IMM)
3222                ? C_O2_I4(r, r, 0, 1, rA, r)
3223                : C_O2_I4(r, r, 0, 1, r, r));
3224
3225    case INDEX_op_st_vec:
3226        return C_O0_I2(v, r);
3227    case INDEX_op_ld_vec:
3228    case INDEX_op_dupm_vec:
3229        return C_O1_I1(v, r);
3230    case INDEX_op_dup_vec:
3231        return C_O1_I1(v, vr);
3232    case INDEX_op_abs_vec:
3233    case INDEX_op_neg_vec:
3234    case INDEX_op_not_vec:
3235    case INDEX_op_rotli_vec:
3236    case INDEX_op_sari_vec:
3237    case INDEX_op_shli_vec:
3238    case INDEX_op_shri_vec:
3239    case INDEX_op_s390_vuph_vec:
3240    case INDEX_op_s390_vupl_vec:
3241        return C_O1_I1(v, v);
3242    case INDEX_op_add_vec:
3243    case INDEX_op_sub_vec:
3244    case INDEX_op_and_vec:
3245    case INDEX_op_andc_vec:
3246    case INDEX_op_or_vec:
3247    case INDEX_op_orc_vec:
3248    case INDEX_op_xor_vec:
3249    case INDEX_op_cmp_vec:
3250    case INDEX_op_mul_vec:
3251    case INDEX_op_rotlv_vec:
3252    case INDEX_op_rotrv_vec:
3253    case INDEX_op_shlv_vec:
3254    case INDEX_op_shrv_vec:
3255    case INDEX_op_sarv_vec:
3256    case INDEX_op_smax_vec:
3257    case INDEX_op_smin_vec:
3258    case INDEX_op_umax_vec:
3259    case INDEX_op_umin_vec:
3260    case INDEX_op_s390_vpks_vec:
3261        return C_O1_I2(v, v, v);
3262    case INDEX_op_rotls_vec:
3263    case INDEX_op_shls_vec:
3264    case INDEX_op_shrs_vec:
3265    case INDEX_op_sars_vec:
3266        return C_O1_I2(v, v, r);
3267    case INDEX_op_bitsel_vec:
3268        return C_O1_I3(v, v, v, v);
3269
3270    default:
3271        g_assert_not_reached();
3272    }
3273}
3274
3275/*
3276 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3277 * Some distros have fixed this up locally, others have not.
3278 */
3279#ifndef HWCAP_S390_VXRS
3280#define HWCAP_S390_VXRS 2048
3281#endif
3282
3283static void query_s390_facilities(void)
3284{
3285    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3286
3287    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3288       is present on all 64-bit systems, but let's check for it anyway.  */
3289    if (hwcap & HWCAP_S390_STFLE) {
3290        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3291        register void *r1 __asm__("1") = s390_facilities;
3292
3293        /* stfle 0(%r1) */
3294        asm volatile(".word 0xb2b0,0x1000"
3295                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3296    }
3297
3298    /*
3299     * Use of vector registers requires os support beyond the facility bit.
3300     * If the kernel does not advertise support, disable the facility bits.
3301     * There is nothing else we currently care about in the 3rd word, so
3302     * disable VECTOR with one store.
3303     */
3304    if (!(hwcap & HWCAP_S390_VXRS)) {
3305        s390_facilities[2] = 0;
3306    }
3307}
3308
3309static void tcg_target_init(TCGContext *s)
3310{
3311    query_s390_facilities();
3312
3313    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3314    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3315    if (HAVE_FACILITY(VECTOR)) {
3316        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3317        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3318    }
3319
3320    tcg_target_call_clobber_regs = 0;
3321    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3322    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3323    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3324    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3325    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3326    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3327    /* The r6 register is technically call-saved, but it's also a parameter
3328       register, so it can get killed by setup for the qemu_st helper.  */
3329    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3330    /* The return register can be considered call-clobbered.  */
3331    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3332
3333    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3334    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3335    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3336    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3337    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3338    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3339    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3340    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3341    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3342    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3343    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3344    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3345    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3346    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3347    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3348    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3349    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3350    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3351    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3352    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3353    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3354    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3355    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3356    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3357
3358    s->reserved_regs = 0;
3359    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3360    /* XXX many insns can't be used with R0, so we better avoid it for now */
3361    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3362    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3363    if (USE_REG_TB) {
3364        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3365    }
3366}
3367
3368#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3369                           + TCG_STATIC_CALL_ARGS_SIZE           \
3370                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3371
3372static void tcg_target_qemu_prologue(TCGContext *s)
3373{
3374    /* stmg %r6,%r15,48(%r15) (save registers) */
3375    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3376
3377    /* aghi %r15,-frame_size */
3378    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3379
3380    tcg_set_frame(s, TCG_REG_CALL_STACK,
3381                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3382                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3383
3384#ifndef CONFIG_SOFTMMU
3385    if (guest_base >= 0x80000) {
3386        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3387        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3388    }
3389#endif
3390
3391    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3392    if (USE_REG_TB) {
3393        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3394                    tcg_target_call_iarg_regs[1]);
3395    }
3396
3397    /* br %r3 (go to TB) */
3398    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3399
3400    /*
3401     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3402     * and fall through to the rest of the epilogue.
3403     */
3404    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3405    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3406
3407    /* TB epilogue */
3408    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3409
3410    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3411    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3412                 FRAME_SIZE + 48);
3413
3414    /* br %r14 (return) */
3415    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3416}
3417
3418static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3419{
3420    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3421}
3422
3423typedef struct {
3424    DebugFrameHeader h;
3425    uint8_t fde_def_cfa[4];
3426    uint8_t fde_reg_ofs[18];
3427} DebugFrame;
3428
3429/* We're expecting a 2 byte uleb128 encoded value.  */
3430QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3431
3432#define ELF_HOST_MACHINE  EM_S390
3433
3434static const DebugFrame debug_frame = {
3435    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3436    .h.cie.id = -1,
3437    .h.cie.version = 1,
3438    .h.cie.code_align = 1,
3439    .h.cie.data_align = 8,                /* sleb128 8 */
3440    .h.cie.return_column = TCG_REG_R14,
3441
3442    /* Total FDE size does not include the "len" member.  */
3443    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3444
3445    .fde_def_cfa = {
3446        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3447        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3448        (FRAME_SIZE >> 7)
3449    },
3450    .fde_reg_ofs = {
3451        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3452        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3453        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3454        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3455        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3456        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3457        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3458        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3459        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3460    }
3461};
3462
3463void tcg_register_jit(const void *buf, size_t buf_size)
3464{
3465    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3466}
3467