xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision 7a5951f651ad5f158631a826070b24631e733763)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36/* ??? The translation blocks produced by TCG are generally small enough to
37   be entirely reachable with a 16-bit displacement.  Leaving the option for
38   a 32-bit displacement here Just In Case.  */
39#define USE_LONG_BRANCHES 0
40
41#define TCG_CT_CONST_S16   0x100
42#define TCG_CT_CONST_S32   0x200
43#define TCG_CT_CONST_S33   0x400
44#define TCG_CT_CONST_ZERO  0x800
45
46#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
47#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
48
49/*
50 * For softmmu, we need to avoid conflicts with the first 3
51 * argument registers to perform the tlb lookup, and to call
52 * the helper function.
53 */
54#ifdef CONFIG_SOFTMMU
55#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
56#else
57#define SOFTMMU_RESERVE_REGS 0
58#endif
59
60
61/* Several places within the instruction set 0 means "no register"
62   rather than TCG_REG_R0.  */
63#define TCG_REG_NONE    0
64
65/* A scratch register that may be be used throughout the backend.  */
66#define TCG_TMP0        TCG_REG_R1
67
68/* A scratch register that holds a pointer to the beginning of the TB.
69   We don't need this when we have pc-relative loads with the general
70   instructions extension facility.  */
71#define TCG_REG_TB      TCG_REG_R12
72#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
73
74#ifndef CONFIG_SOFTMMU
75#define TCG_GUEST_BASE_REG TCG_REG_R13
76#endif
77
78/* All of the following instructions are prefixed with their instruction
79   format, and are defined as 8- or 16-bit quantities, even when the two
80   halves of the 16-bit quantity may appear 32 bits apart in the insn.
81   This makes it easy to copy the values from the tables in Appendix B.  */
82typedef enum S390Opcode {
83    RIL_AFI     = 0xc209,
84    RIL_AGFI    = 0xc208,
85    RIL_ALFI    = 0xc20b,
86    RIL_ALGFI   = 0xc20a,
87    RIL_BRASL   = 0xc005,
88    RIL_BRCL    = 0xc004,
89    RIL_CFI     = 0xc20d,
90    RIL_CGFI    = 0xc20c,
91    RIL_CLFI    = 0xc20f,
92    RIL_CLGFI   = 0xc20e,
93    RIL_CLRL    = 0xc60f,
94    RIL_CLGRL   = 0xc60a,
95    RIL_CRL     = 0xc60d,
96    RIL_CGRL    = 0xc608,
97    RIL_IIHF    = 0xc008,
98    RIL_IILF    = 0xc009,
99    RIL_LARL    = 0xc000,
100    RIL_LGFI    = 0xc001,
101    RIL_LGRL    = 0xc408,
102    RIL_LLIHF   = 0xc00e,
103    RIL_LLILF   = 0xc00f,
104    RIL_LRL     = 0xc40d,
105    RIL_MSFI    = 0xc201,
106    RIL_MSGFI   = 0xc200,
107    RIL_NIHF    = 0xc00a,
108    RIL_NILF    = 0xc00b,
109    RIL_OIHF    = 0xc00c,
110    RIL_OILF    = 0xc00d,
111    RIL_SLFI    = 0xc205,
112    RIL_SLGFI   = 0xc204,
113    RIL_XIHF    = 0xc006,
114    RIL_XILF    = 0xc007,
115
116    RI_AGHI     = 0xa70b,
117    RI_AHI      = 0xa70a,
118    RI_BRC      = 0xa704,
119    RI_CHI      = 0xa70e,
120    RI_CGHI     = 0xa70f,
121    RI_IIHH     = 0xa500,
122    RI_IIHL     = 0xa501,
123    RI_IILH     = 0xa502,
124    RI_IILL     = 0xa503,
125    RI_LGHI     = 0xa709,
126    RI_LLIHH    = 0xa50c,
127    RI_LLIHL    = 0xa50d,
128    RI_LLILH    = 0xa50e,
129    RI_LLILL    = 0xa50f,
130    RI_MGHI     = 0xa70d,
131    RI_MHI      = 0xa70c,
132    RI_NIHH     = 0xa504,
133    RI_NIHL     = 0xa505,
134    RI_NILH     = 0xa506,
135    RI_NILL     = 0xa507,
136    RI_OIHH     = 0xa508,
137    RI_OIHL     = 0xa509,
138    RI_OILH     = 0xa50a,
139    RI_OILL     = 0xa50b,
140    RI_TMLL     = 0xa701,
141
142    RIE_CGIJ    = 0xec7c,
143    RIE_CGRJ    = 0xec64,
144    RIE_CIJ     = 0xec7e,
145    RIE_CLGRJ   = 0xec65,
146    RIE_CLIJ    = 0xec7f,
147    RIE_CLGIJ   = 0xec7d,
148    RIE_CLRJ    = 0xec77,
149    RIE_CRJ     = 0xec76,
150    RIE_LOCGHI  = 0xec46,
151    RIE_RISBG   = 0xec55,
152
153    RRE_AGR     = 0xb908,
154    RRE_ALGR    = 0xb90a,
155    RRE_ALCR    = 0xb998,
156    RRE_ALCGR   = 0xb988,
157    RRE_CGR     = 0xb920,
158    RRE_CLGR    = 0xb921,
159    RRE_DLGR    = 0xb987,
160    RRE_DLR     = 0xb997,
161    RRE_DSGFR   = 0xb91d,
162    RRE_DSGR    = 0xb90d,
163    RRE_FLOGR   = 0xb983,
164    RRE_LGBR    = 0xb906,
165    RRE_LCGR    = 0xb903,
166    RRE_LGFR    = 0xb914,
167    RRE_LGHR    = 0xb907,
168    RRE_LGR     = 0xb904,
169    RRE_LLGCR   = 0xb984,
170    RRE_LLGFR   = 0xb916,
171    RRE_LLGHR   = 0xb985,
172    RRE_LRVR    = 0xb91f,
173    RRE_LRVGR   = 0xb90f,
174    RRE_LTGR    = 0xb902,
175    RRE_MLGR    = 0xb986,
176    RRE_MSGR    = 0xb90c,
177    RRE_MSR     = 0xb252,
178    RRE_NGR     = 0xb980,
179    RRE_OGR     = 0xb981,
180    RRE_SGR     = 0xb909,
181    RRE_SLGR    = 0xb90b,
182    RRE_SLBR    = 0xb999,
183    RRE_SLBGR   = 0xb989,
184    RRE_XGR     = 0xb982,
185
186    RRF_LOCR    = 0xb9f2,
187    RRF_LOCGR   = 0xb9e2,
188    RRF_NRK     = 0xb9f4,
189    RRF_NGRK    = 0xb9e4,
190    RRF_ORK     = 0xb9f6,
191    RRF_OGRK    = 0xb9e6,
192    RRF_SRK     = 0xb9f9,
193    RRF_SGRK    = 0xb9e9,
194    RRF_SLRK    = 0xb9fb,
195    RRF_SLGRK   = 0xb9eb,
196    RRF_XRK     = 0xb9f7,
197    RRF_XGRK    = 0xb9e7,
198
199    RR_AR       = 0x1a,
200    RR_ALR      = 0x1e,
201    RR_BASR     = 0x0d,
202    RR_BCR      = 0x07,
203    RR_CLR      = 0x15,
204    RR_CR       = 0x19,
205    RR_DR       = 0x1d,
206    RR_LCR      = 0x13,
207    RR_LR       = 0x18,
208    RR_LTR      = 0x12,
209    RR_NR       = 0x14,
210    RR_OR       = 0x16,
211    RR_SR       = 0x1b,
212    RR_SLR      = 0x1f,
213    RR_XR       = 0x17,
214
215    RSY_RLL     = 0xeb1d,
216    RSY_RLLG    = 0xeb1c,
217    RSY_SLLG    = 0xeb0d,
218    RSY_SLLK    = 0xebdf,
219    RSY_SRAG    = 0xeb0a,
220    RSY_SRAK    = 0xebdc,
221    RSY_SRLG    = 0xeb0c,
222    RSY_SRLK    = 0xebde,
223
224    RS_SLL      = 0x89,
225    RS_SRA      = 0x8a,
226    RS_SRL      = 0x88,
227
228    RXY_AG      = 0xe308,
229    RXY_AY      = 0xe35a,
230    RXY_CG      = 0xe320,
231    RXY_CLG     = 0xe321,
232    RXY_CLY     = 0xe355,
233    RXY_CY      = 0xe359,
234    RXY_LAY     = 0xe371,
235    RXY_LB      = 0xe376,
236    RXY_LG      = 0xe304,
237    RXY_LGB     = 0xe377,
238    RXY_LGF     = 0xe314,
239    RXY_LGH     = 0xe315,
240    RXY_LHY     = 0xe378,
241    RXY_LLGC    = 0xe390,
242    RXY_LLGF    = 0xe316,
243    RXY_LLGH    = 0xe391,
244    RXY_LMG     = 0xeb04,
245    RXY_LRV     = 0xe31e,
246    RXY_LRVG    = 0xe30f,
247    RXY_LRVH    = 0xe31f,
248    RXY_LY      = 0xe358,
249    RXY_NG      = 0xe380,
250    RXY_OG      = 0xe381,
251    RXY_STCY    = 0xe372,
252    RXY_STG     = 0xe324,
253    RXY_STHY    = 0xe370,
254    RXY_STMG    = 0xeb24,
255    RXY_STRV    = 0xe33e,
256    RXY_STRVG   = 0xe32f,
257    RXY_STRVH   = 0xe33f,
258    RXY_STY     = 0xe350,
259    RXY_XG      = 0xe382,
260
261    RX_A        = 0x5a,
262    RX_C        = 0x59,
263    RX_L        = 0x58,
264    RX_LA       = 0x41,
265    RX_LH       = 0x48,
266    RX_ST       = 0x50,
267    RX_STC      = 0x42,
268    RX_STH      = 0x40,
269
270    VRIa_VGBM   = 0xe744,
271    VRIa_VREPI  = 0xe745,
272    VRIb_VGM    = 0xe746,
273    VRIc_VREP   = 0xe74d,
274
275    VRRa_VLC    = 0xe7de,
276    VRRa_VLP    = 0xe7df,
277    VRRa_VLR    = 0xe756,
278    VRRc_VA     = 0xe7f3,
279    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
280    VRRc_VCH    = 0xe7fb,   /* " */
281    VRRc_VCHL   = 0xe7f9,   /* " */
282    VRRc_VERLLV = 0xe773,
283    VRRc_VESLV  = 0xe770,
284    VRRc_VESRAV = 0xe77a,
285    VRRc_VESRLV = 0xe778,
286    VRRc_VML    = 0xe7a2,
287    VRRc_VMN    = 0xe7fe,
288    VRRc_VMNL   = 0xe7fc,
289    VRRc_VMX    = 0xe7ff,
290    VRRc_VMXL   = 0xe7fd,
291    VRRc_VN     = 0xe768,
292    VRRc_VNC    = 0xe769,
293    VRRc_VNN    = 0xe76e,
294    VRRc_VNO    = 0xe76b,
295    VRRc_VNX    = 0xe76c,
296    VRRc_VO     = 0xe76a,
297    VRRc_VOC    = 0xe76f,
298    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
299    VRRc_VS     = 0xe7f7,
300    VRRa_VUPH   = 0xe7d7,
301    VRRa_VUPL   = 0xe7d6,
302    VRRc_VX     = 0xe76d,
303    VRRe_VSEL   = 0xe78d,
304    VRRf_VLVGP  = 0xe762,
305
306    VRSa_VERLL  = 0xe733,
307    VRSa_VESL   = 0xe730,
308    VRSa_VESRA  = 0xe73a,
309    VRSa_VESRL  = 0xe738,
310    VRSb_VLVG   = 0xe722,
311    VRSc_VLGV   = 0xe721,
312
313    VRX_VL      = 0xe706,
314    VRX_VLLEZ   = 0xe704,
315    VRX_VLREP   = 0xe705,
316    VRX_VST     = 0xe70e,
317    VRX_VSTEF   = 0xe70b,
318    VRX_VSTEG   = 0xe70a,
319
320    NOP         = 0x0707,
321} S390Opcode;
322
323#ifdef CONFIG_DEBUG_TCG
324static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
325    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
326    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
327    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
329    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
330    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
331    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
332};
333#endif
334
335/* Since R6 is a potential argument register, choose it last of the
336   call-saved registers.  Likewise prefer the call-clobbered registers
337   in reverse order to maximize the chance of avoiding the arguments.  */
338static const int tcg_target_reg_alloc_order[] = {
339    /* Call saved registers.  */
340    TCG_REG_R13,
341    TCG_REG_R12,
342    TCG_REG_R11,
343    TCG_REG_R10,
344    TCG_REG_R9,
345    TCG_REG_R8,
346    TCG_REG_R7,
347    TCG_REG_R6,
348    /* Call clobbered registers.  */
349    TCG_REG_R14,
350    TCG_REG_R0,
351    TCG_REG_R1,
352    /* Argument registers, in reverse order of allocation.  */
353    TCG_REG_R5,
354    TCG_REG_R4,
355    TCG_REG_R3,
356    TCG_REG_R2,
357
358    /* V8-V15 are call saved, and omitted. */
359    TCG_REG_V0,
360    TCG_REG_V1,
361    TCG_REG_V2,
362    TCG_REG_V3,
363    TCG_REG_V4,
364    TCG_REG_V5,
365    TCG_REG_V6,
366    TCG_REG_V7,
367    TCG_REG_V16,
368    TCG_REG_V17,
369    TCG_REG_V18,
370    TCG_REG_V19,
371    TCG_REG_V20,
372    TCG_REG_V21,
373    TCG_REG_V22,
374    TCG_REG_V23,
375    TCG_REG_V24,
376    TCG_REG_V25,
377    TCG_REG_V26,
378    TCG_REG_V27,
379    TCG_REG_V28,
380    TCG_REG_V29,
381    TCG_REG_V30,
382    TCG_REG_V31,
383};
384
385static const int tcg_target_call_iarg_regs[] = {
386    TCG_REG_R2,
387    TCG_REG_R3,
388    TCG_REG_R4,
389    TCG_REG_R5,
390    TCG_REG_R6,
391};
392
393static const int tcg_target_call_oarg_regs[] = {
394    TCG_REG_R2,
395};
396
397#define S390_CC_EQ      8
398#define S390_CC_LT      4
399#define S390_CC_GT      2
400#define S390_CC_OV      1
401#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
402#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
403#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
404#define S390_CC_NEVER   0
405#define S390_CC_ALWAYS  15
406
407/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
408static const uint8_t tcg_cond_to_s390_cond[] = {
409    [TCG_COND_EQ]  = S390_CC_EQ,
410    [TCG_COND_NE]  = S390_CC_NE,
411    [TCG_COND_LT]  = S390_CC_LT,
412    [TCG_COND_LE]  = S390_CC_LE,
413    [TCG_COND_GT]  = S390_CC_GT,
414    [TCG_COND_GE]  = S390_CC_GE,
415    [TCG_COND_LTU] = S390_CC_LT,
416    [TCG_COND_LEU] = S390_CC_LE,
417    [TCG_COND_GTU] = S390_CC_GT,
418    [TCG_COND_GEU] = S390_CC_GE,
419};
420
421/* Condition codes that result from a LOAD AND TEST.  Here, we have no
422   unsigned instruction variation, however since the test is vs zero we
423   can re-map the outcomes appropriately.  */
424static const uint8_t tcg_cond_to_ltr_cond[] = {
425    [TCG_COND_EQ]  = S390_CC_EQ,
426    [TCG_COND_NE]  = S390_CC_NE,
427    [TCG_COND_LT]  = S390_CC_LT,
428    [TCG_COND_LE]  = S390_CC_LE,
429    [TCG_COND_GT]  = S390_CC_GT,
430    [TCG_COND_GE]  = S390_CC_GE,
431    [TCG_COND_LTU] = S390_CC_NEVER,
432    [TCG_COND_LEU] = S390_CC_EQ,
433    [TCG_COND_GTU] = S390_CC_NE,
434    [TCG_COND_GEU] = S390_CC_ALWAYS,
435};
436
437#ifdef CONFIG_SOFTMMU
438static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
439    [MO_UB]   = helper_ret_ldub_mmu,
440    [MO_SB]   = helper_ret_ldsb_mmu,
441    [MO_LEUW] = helper_le_lduw_mmu,
442    [MO_LESW] = helper_le_ldsw_mmu,
443    [MO_LEUL] = helper_le_ldul_mmu,
444    [MO_LESL] = helper_le_ldsl_mmu,
445    [MO_LEUQ] = helper_le_ldq_mmu,
446    [MO_BEUW] = helper_be_lduw_mmu,
447    [MO_BESW] = helper_be_ldsw_mmu,
448    [MO_BEUL] = helper_be_ldul_mmu,
449    [MO_BESL] = helper_be_ldsl_mmu,
450    [MO_BEUQ] = helper_be_ldq_mmu,
451};
452
453static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
454    [MO_UB]   = helper_ret_stb_mmu,
455    [MO_LEUW] = helper_le_stw_mmu,
456    [MO_LEUL] = helper_le_stl_mmu,
457    [MO_LEUQ] = helper_le_stq_mmu,
458    [MO_BEUW] = helper_be_stw_mmu,
459    [MO_BEUL] = helper_be_stl_mmu,
460    [MO_BEUQ] = helper_be_stq_mmu,
461};
462#endif
463
464static const tcg_insn_unit *tb_ret_addr;
465uint64_t s390_facilities[3];
466
467static inline bool is_general_reg(TCGReg r)
468{
469    return r <= TCG_REG_R15;
470}
471
472static inline bool is_vector_reg(TCGReg r)
473{
474    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
475}
476
477static bool patch_reloc(tcg_insn_unit *src_rw, int type,
478                        intptr_t value, intptr_t addend)
479{
480    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
481    intptr_t pcrel2;
482    uint32_t old;
483
484    value += addend;
485    pcrel2 = (tcg_insn_unit *)value - src_rx;
486
487    switch (type) {
488    case R_390_PC16DBL:
489        if (pcrel2 == (int16_t)pcrel2) {
490            tcg_patch16(src_rw, pcrel2);
491            return true;
492        }
493        break;
494    case R_390_PC32DBL:
495        if (pcrel2 == (int32_t)pcrel2) {
496            tcg_patch32(src_rw, pcrel2);
497            return true;
498        }
499        break;
500    case R_390_20:
501        if (value == sextract64(value, 0, 20)) {
502            old = *(uint32_t *)src_rw & 0xf00000ff;
503            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
504            tcg_patch32(src_rw, old);
505            return true;
506        }
507        break;
508    default:
509        g_assert_not_reached();
510    }
511    return false;
512}
513
514/* Test if a constant matches the constraint. */
515static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
516{
517    if (ct & TCG_CT_CONST) {
518        return 1;
519    }
520
521    if (type == TCG_TYPE_I32) {
522        val = (int32_t)val;
523    }
524
525    /* The following are mutually exclusive.  */
526    if (ct & TCG_CT_CONST_S16) {
527        return val == (int16_t)val;
528    } else if (ct & TCG_CT_CONST_S32) {
529        return val == (int32_t)val;
530    } else if (ct & TCG_CT_CONST_S33) {
531        return val >= -0xffffffffll && val <= 0xffffffffll;
532    } else if (ct & TCG_CT_CONST_ZERO) {
533        return val == 0;
534    }
535
536    return 0;
537}
538
539/* Emit instructions according to the given instruction format.  */
540
541static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
542{
543    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
544}
545
546static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
547                             TCGReg r1, TCGReg r2)
548{
549    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
550}
551
552static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
553                             TCGReg r1, TCGReg r2, int m3)
554{
555    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
556}
557
558static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
559{
560    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
561}
562
563static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
564                             int i2, int m3)
565{
566    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
567    tcg_out32(s, (i2 << 16) | (op & 0xff));
568}
569
570static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
571{
572    tcg_out16(s, op | (r1 << 4));
573    tcg_out32(s, i2);
574}
575
576static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
577                            TCGReg b2, TCGReg r3, int disp)
578{
579    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
580              | (disp & 0xfff));
581}
582
583static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
584                             TCGReg b2, TCGReg r3, int disp)
585{
586    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
587    tcg_out32(s, (op & 0xff) | (b2 << 28)
588              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
589}
590
591#define tcg_out_insn_RX   tcg_out_insn_RS
592#define tcg_out_insn_RXY  tcg_out_insn_RSY
593
594static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
595{
596    /*
597     * Shift bit 4 of each regno to its corresponding bit of RXB.
598     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
599     * is the left-shift of the 4th operand.
600     */
601    return ((v1 & 0x10) << (4 + 3))
602         | ((v2 & 0x10) << (4 + 2))
603         | ((v3 & 0x10) << (4 + 1))
604         | ((v4 & 0x10) << (4 + 0));
605}
606
607static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
608                              TCGReg v1, uint16_t i2, int m3)
609{
610    tcg_debug_assert(is_vector_reg(v1));
611    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
612    tcg_out16(s, i2);
613    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
614}
615
616static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
617                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
618{
619    tcg_debug_assert(is_vector_reg(v1));
620    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
621    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
622    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
623}
624
625static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
626                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
627{
628    tcg_debug_assert(is_vector_reg(v1));
629    tcg_debug_assert(is_vector_reg(v3));
630    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
631    tcg_out16(s, i2);
632    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
633}
634
635static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
636                              TCGReg v1, TCGReg v2, int m3)
637{
638    tcg_debug_assert(is_vector_reg(v1));
639    tcg_debug_assert(is_vector_reg(v2));
640    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
641    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
642}
643
644static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
645                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
646{
647    tcg_debug_assert(is_vector_reg(v1));
648    tcg_debug_assert(is_vector_reg(v2));
649    tcg_debug_assert(is_vector_reg(v3));
650    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
651    tcg_out16(s, v3 << 12);
652    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
653}
654
655static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
656                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
657{
658    tcg_debug_assert(is_vector_reg(v1));
659    tcg_debug_assert(is_vector_reg(v2));
660    tcg_debug_assert(is_vector_reg(v3));
661    tcg_debug_assert(is_vector_reg(v4));
662    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
663    tcg_out16(s, v3 << 12);
664    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
665}
666
667static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
668                              TCGReg v1, TCGReg r2, TCGReg r3)
669{
670    tcg_debug_assert(is_vector_reg(v1));
671    tcg_debug_assert(is_general_reg(r2));
672    tcg_debug_assert(is_general_reg(r3));
673    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
674    tcg_out16(s, r3 << 12);
675    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
676}
677
678static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
679                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
680{
681    tcg_debug_assert(is_vector_reg(v1));
682    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
683    tcg_debug_assert(is_general_reg(b2));
684    tcg_debug_assert(is_vector_reg(v3));
685    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
686    tcg_out16(s, b2 << 12 | d2);
687    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
688}
689
690static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
691                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
692{
693    tcg_debug_assert(is_vector_reg(v1));
694    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
695    tcg_debug_assert(is_general_reg(b2));
696    tcg_debug_assert(is_general_reg(r3));
697    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
698    tcg_out16(s, b2 << 12 | d2);
699    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
700}
701
702static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
703                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
704{
705    tcg_debug_assert(is_general_reg(r1));
706    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
707    tcg_debug_assert(is_general_reg(b2));
708    tcg_debug_assert(is_vector_reg(v3));
709    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
710    tcg_out16(s, b2 << 12 | d2);
711    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
712}
713
714static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
715                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
716{
717    tcg_debug_assert(is_vector_reg(v1));
718    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
719    tcg_debug_assert(is_general_reg(x2));
720    tcg_debug_assert(is_general_reg(b2));
721    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
722    tcg_out16(s, (b2 << 12) | d2);
723    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
724}
725
726/* Emit an opcode with "type-checking" of the format.  */
727#define tcg_out_insn(S, FMT, OP, ...) \
728    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
729
730
731/* emit 64-bit shifts */
732static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
733                         TCGReg src, TCGReg sh_reg, int sh_imm)
734{
735    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
736}
737
738/* emit 32-bit shifts */
739static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
740                         TCGReg sh_reg, int sh_imm)
741{
742    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
743}
744
745static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
746{
747    if (src == dst) {
748        return true;
749    }
750    switch (type) {
751    case TCG_TYPE_I32:
752        if (likely(is_general_reg(dst) && is_general_reg(src))) {
753            tcg_out_insn(s, RR, LR, dst, src);
754            break;
755        }
756        /* fallthru */
757
758    case TCG_TYPE_I64:
759        if (likely(is_general_reg(dst))) {
760            if (likely(is_general_reg(src))) {
761                tcg_out_insn(s, RRE, LGR, dst, src);
762            } else {
763                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
764            }
765            break;
766        } else if (is_general_reg(src)) {
767            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
768            break;
769        }
770        /* fallthru */
771
772    case TCG_TYPE_V64:
773    case TCG_TYPE_V128:
774        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
775        break;
776
777    default:
778        g_assert_not_reached();
779    }
780    return true;
781}
782
783static const S390Opcode lli_insns[4] = {
784    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
785};
786
787static bool maybe_out_small_movi(TCGContext *s, TCGType type,
788                                 TCGReg ret, tcg_target_long sval)
789{
790    tcg_target_ulong uval = sval;
791    int i;
792
793    if (type == TCG_TYPE_I32) {
794        uval = (uint32_t)sval;
795        sval = (int32_t)sval;
796    }
797
798    /* Try all 32-bit insns that can load it in one go.  */
799    if (sval >= -0x8000 && sval < 0x8000) {
800        tcg_out_insn(s, RI, LGHI, ret, sval);
801        return true;
802    }
803
804    for (i = 0; i < 4; i++) {
805        tcg_target_long mask = 0xffffull << i * 16;
806        if ((uval & mask) == uval) {
807            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16);
808            return true;
809        }
810    }
811
812    return false;
813}
814
815/* load a register with an immediate value */
816static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
817                             tcg_target_long sval, bool in_prologue)
818{
819    tcg_target_ulong uval;
820
821    /* Try all 32-bit insns that can load it in one go.  */
822    if (maybe_out_small_movi(s, type, ret, sval)) {
823        return;
824    }
825
826    uval = sval;
827    if (type == TCG_TYPE_I32) {
828        uval = (uint32_t)sval;
829        sval = (int32_t)sval;
830    }
831
832    /* Try all 48-bit insns that can load it in one go.  */
833    if (HAVE_FACILITY(EXT_IMM)) {
834        if (sval == (int32_t)sval) {
835            tcg_out_insn(s, RIL, LGFI, ret, sval);
836            return;
837        }
838        if (uval <= 0xffffffff) {
839            tcg_out_insn(s, RIL, LLILF, ret, uval);
840            return;
841        }
842        if ((uval & 0xffffffff) == 0) {
843            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
844            return;
845        }
846    }
847
848    /* Try for PC-relative address load.  For odd addresses,
849       attempt to use an offset from the start of the TB.  */
850    if ((sval & 1) == 0) {
851        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
852        if (off == (int32_t)off) {
853            tcg_out_insn(s, RIL, LARL, ret, off);
854            return;
855        }
856    } else if (USE_REG_TB && !in_prologue) {
857        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
858        if (off == sextract64(off, 0, 20)) {
859            /* This is certain to be an address within TB, and therefore
860               OFF will be negative; don't try RX_LA.  */
861            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
862            return;
863        }
864    }
865
866    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
867       that LLILL, LLIHL, LLILF above did not succeed, we know that
868       both insns are required.  */
869    if (uval <= 0xffffffff) {
870        tcg_out_insn(s, RI, LLILL, ret, uval);
871        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
872        return;
873    }
874
875    /* Otherwise, stuff it in the constant pool.  */
876    if (HAVE_FACILITY(GEN_INST_EXT)) {
877        tcg_out_insn(s, RIL, LGRL, ret, 0);
878        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
879    } else if (USE_REG_TB && !in_prologue) {
880        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
881        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
882                       tcg_tbrel_diff(s, NULL));
883    } else {
884        TCGReg base = ret ? ret : TCG_TMP0;
885        tcg_out_insn(s, RIL, LARL, base, 0);
886        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
887        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
888    }
889}
890
891static void tcg_out_movi(TCGContext *s, TCGType type,
892                         TCGReg ret, tcg_target_long sval)
893{
894    tcg_out_movi_int(s, type, ret, sval, false);
895}
896
897/* Emit a load/store type instruction.  Inputs are:
898   DATA:     The register to be loaded or stored.
899   BASE+OFS: The effective address.
900   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
901   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
902
903static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
904                        TCGReg data, TCGReg base, TCGReg index,
905                        tcg_target_long ofs)
906{
907    if (ofs < -0x80000 || ofs >= 0x80000) {
908        /* Combine the low 20 bits of the offset with the actual load insn;
909           the high 44 bits must come from an immediate load.  */
910        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
911        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
912        ofs = low;
913
914        /* If we were already given an index register, add it in.  */
915        if (index != TCG_REG_NONE) {
916            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
917        }
918        index = TCG_TMP0;
919    }
920
921    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
922        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
923    } else {
924        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
925    }
926}
927
928static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
929                            TCGReg data, TCGReg base, TCGReg index,
930                            tcg_target_long ofs, int m3)
931{
932    if (ofs < 0 || ofs >= 0x1000) {
933        if (ofs >= -0x80000 && ofs < 0x80000) {
934            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
935            base = TCG_TMP0;
936            index = TCG_REG_NONE;
937            ofs = 0;
938        } else {
939            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
940            if (index != TCG_REG_NONE) {
941                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
942            }
943            index = TCG_TMP0;
944            ofs = 0;
945        }
946    }
947    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
948}
949
950/* load data without address translation or endianness conversion */
951static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
952                       TCGReg base, intptr_t ofs)
953{
954    switch (type) {
955    case TCG_TYPE_I32:
956        if (likely(is_general_reg(data))) {
957            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
958            break;
959        }
960        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
961        break;
962
963    case TCG_TYPE_I64:
964        if (likely(is_general_reg(data))) {
965            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
966            break;
967        }
968        /* fallthru */
969
970    case TCG_TYPE_V64:
971        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
972        break;
973
974    case TCG_TYPE_V128:
975        /* Hint quadword aligned.  */
976        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
977        break;
978
979    default:
980        g_assert_not_reached();
981    }
982}
983
984static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
985                       TCGReg base, intptr_t ofs)
986{
987    switch (type) {
988    case TCG_TYPE_I32:
989        if (likely(is_general_reg(data))) {
990            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
991        } else {
992            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
993        }
994        break;
995
996    case TCG_TYPE_I64:
997        if (likely(is_general_reg(data))) {
998            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
999            break;
1000        }
1001        /* fallthru */
1002
1003    case TCG_TYPE_V64:
1004        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1005        break;
1006
1007    case TCG_TYPE_V128:
1008        /* Hint quadword aligned.  */
1009        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1010        break;
1011
1012    default:
1013        g_assert_not_reached();
1014    }
1015}
1016
1017static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1018                               TCGReg base, intptr_t ofs)
1019{
1020    return false;
1021}
1022
1023/* load data from an absolute host address */
1024static void tcg_out_ld_abs(TCGContext *s, TCGType type,
1025                           TCGReg dest, const void *abs)
1026{
1027    intptr_t addr = (intptr_t)abs;
1028
1029    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
1030        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
1031        if (disp == (int32_t)disp) {
1032            if (type == TCG_TYPE_I32) {
1033                tcg_out_insn(s, RIL, LRL, dest, disp);
1034            } else {
1035                tcg_out_insn(s, RIL, LGRL, dest, disp);
1036            }
1037            return;
1038        }
1039    }
1040    if (USE_REG_TB) {
1041        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
1042        if (disp == sextract64(disp, 0, 20)) {
1043            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
1044            return;
1045        }
1046    }
1047
1048    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1049    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1050}
1051
1052static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1053                                 int msb, int lsb, int ofs, int z)
1054{
1055    /* Format RIE-f */
1056    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1057    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1058    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1059}
1060
1061static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1062{
1063    if (HAVE_FACILITY(EXT_IMM)) {
1064        tcg_out_insn(s, RRE, LGBR, dest, src);
1065        return;
1066    }
1067
1068    if (type == TCG_TYPE_I32) {
1069        if (dest == src) {
1070            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1071        } else {
1072            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1073        }
1074        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1075    } else {
1076        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1077        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1078    }
1079}
1080
1081static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1082{
1083    if (HAVE_FACILITY(EXT_IMM)) {
1084        tcg_out_insn(s, RRE, LLGCR, dest, src);
1085        return;
1086    }
1087
1088    if (dest == src) {
1089        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1090        src = TCG_TMP0;
1091    } else {
1092        tcg_out_movi(s, type, dest, 0xff);
1093    }
1094    if (type == TCG_TYPE_I32) {
1095        tcg_out_insn(s, RR, NR, dest, src);
1096    } else {
1097        tcg_out_insn(s, RRE, NGR, dest, src);
1098    }
1099}
1100
1101static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1102{
1103    if (HAVE_FACILITY(EXT_IMM)) {
1104        tcg_out_insn(s, RRE, LGHR, dest, src);
1105        return;
1106    }
1107
1108    if (type == TCG_TYPE_I32) {
1109        if (dest == src) {
1110            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1111        } else {
1112            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1113        }
1114        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1115    } else {
1116        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1117        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1118    }
1119}
1120
1121static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1122{
1123    if (HAVE_FACILITY(EXT_IMM)) {
1124        tcg_out_insn(s, RRE, LLGHR, dest, src);
1125        return;
1126    }
1127
1128    if (dest == src) {
1129        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1130        src = TCG_TMP0;
1131    } else {
1132        tcg_out_movi(s, type, dest, 0xffff);
1133    }
1134    if (type == TCG_TYPE_I32) {
1135        tcg_out_insn(s, RR, NR, dest, src);
1136    } else {
1137        tcg_out_insn(s, RRE, NGR, dest, src);
1138    }
1139}
1140
1141static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1142{
1143    tcg_out_insn(s, RRE, LGFR, dest, src);
1144}
1145
1146static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1147{
1148    tcg_out_insn(s, RRE, LLGFR, dest, src);
1149}
1150
1151/* Accept bit patterns like these:
1152    0....01....1
1153    1....10....0
1154    1..10..01..1
1155    0..01..10..0
1156   Copied from gcc sources.  */
1157static inline bool risbg_mask(uint64_t c)
1158{
1159    uint64_t lsb;
1160    /* We don't change the number of transitions by inverting,
1161       so make sure we start with the LSB zero.  */
1162    if (c & 1) {
1163        c = ~c;
1164    }
1165    /* Reject all zeros or all ones.  */
1166    if (c == 0) {
1167        return false;
1168    }
1169    /* Find the first transition.  */
1170    lsb = c & -c;
1171    /* Invert to look for a second transition.  */
1172    c = ~c;
1173    /* Erase the first transition.  */
1174    c &= -lsb;
1175    /* Find the second transition, if any.  */
1176    lsb = c & -c;
1177    /* Match if all the bits are 1's, or if c is zero.  */
1178    return c == -lsb;
1179}
1180
1181static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1182{
1183    int msb, lsb;
1184    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1185        /* Achieve wraparound by swapping msb and lsb.  */
1186        msb = 64 - ctz64(~val);
1187        lsb = clz64(~val) - 1;
1188    } else {
1189        msb = clz64(val);
1190        lsb = 63 - ctz64(val);
1191    }
1192    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1193}
1194
1195static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1196{
1197    static const S390Opcode ni_insns[4] = {
1198        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1199    };
1200    static const S390Opcode nif_insns[2] = {
1201        RIL_NILF, RIL_NIHF
1202    };
1203    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1204    int i;
1205
1206    /* Look for the zero-extensions.  */
1207    if ((val & valid) == 0xffffffff) {
1208        tgen_ext32u(s, dest, dest);
1209        return;
1210    }
1211    if (HAVE_FACILITY(EXT_IMM)) {
1212        if ((val & valid) == 0xff) {
1213            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1214            return;
1215        }
1216        if ((val & valid) == 0xffff) {
1217            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1218            return;
1219        }
1220    }
1221
1222    /* Try all 32-bit insns that can perform it in one go.  */
1223    for (i = 0; i < 4; i++) {
1224        tcg_target_ulong mask = ~(0xffffull << i * 16);
1225        if (((val | ~valid) & mask) == mask) {
1226            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16);
1227            return;
1228        }
1229    }
1230
1231    /* Try all 48-bit insns that can perform it in one go.  */
1232    if (HAVE_FACILITY(EXT_IMM)) {
1233        for (i = 0; i < 2; i++) {
1234            tcg_target_ulong mask = ~(0xffffffffull << i * 32);
1235            if (((val | ~valid) & mask) == mask) {
1236                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32);
1237                return;
1238            }
1239        }
1240    }
1241    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1242        tgen_andi_risbg(s, dest, dest, val);
1243        return;
1244    }
1245
1246    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1247    if (USE_REG_TB) {
1248        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1249            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1250            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1251                           tcg_tbrel_diff(s, NULL));
1252            return;
1253        }
1254    } else {
1255        tcg_out_movi(s, type, TCG_TMP0, val);
1256    }
1257    if (type == TCG_TYPE_I32) {
1258        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1259    } else {
1260        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1261    }
1262}
1263
1264static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1265{
1266    static const S390Opcode oi_insns[4] = {
1267        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1268    };
1269    static const S390Opcode oif_insns[2] = {
1270        RIL_OILF, RIL_OIHF
1271    };
1272
1273    int i;
1274
1275    /* Look for no-op.  */
1276    if (unlikely(val == 0)) {
1277        return;
1278    }
1279
1280    /* Try all 32-bit insns that can perform it in one go.  */
1281    for (i = 0; i < 4; i++) {
1282        tcg_target_ulong mask = (0xffffull << i * 16);
1283        if ((val & mask) != 0 && (val & ~mask) == 0) {
1284            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16);
1285            return;
1286        }
1287    }
1288
1289    /* Try all 48-bit insns that can perform it in one go.  */
1290    if (HAVE_FACILITY(EXT_IMM)) {
1291        for (i = 0; i < 2; i++) {
1292            tcg_target_ulong mask = (0xffffffffull << i * 32);
1293            if ((val & mask) != 0 && (val & ~mask) == 0) {
1294                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32);
1295                return;
1296            }
1297        }
1298    }
1299
1300    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1301    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1302        if (type == TCG_TYPE_I32) {
1303            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1304        } else {
1305            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1306        }
1307    } else if (USE_REG_TB) {
1308        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1309        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1310                       tcg_tbrel_diff(s, NULL));
1311    } else {
1312        /* Perform the OR via sequential modifications to the high and
1313           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1314           masks in each half.  */
1315        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1316        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1317        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1318    }
1319}
1320
1321static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1322{
1323    /* Try all 48-bit insns that can perform it in one go.  */
1324    if (HAVE_FACILITY(EXT_IMM)) {
1325        if ((val & 0xffffffff00000000ull) == 0) {
1326            tcg_out_insn(s, RIL, XILF, dest, val);
1327            return;
1328        }
1329        if ((val & 0x00000000ffffffffull) == 0) {
1330            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1331            return;
1332        }
1333    }
1334
1335    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1336    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1337        if (type == TCG_TYPE_I32) {
1338            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1339        } else {
1340            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1341        }
1342    } else if (USE_REG_TB) {
1343        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1344        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1345                       tcg_tbrel_diff(s, NULL));
1346    } else {
1347        /* Perform the xor by parts.  */
1348        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1349        if (val & 0xffffffff) {
1350            tcg_out_insn(s, RIL, XILF, dest, val);
1351        }
1352        if (val > 0xffffffff) {
1353            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1354        }
1355    }
1356}
1357
1358static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1359                    TCGArg c2, bool c2const, bool need_carry)
1360{
1361    bool is_unsigned = is_unsigned_cond(c);
1362    S390Opcode op;
1363
1364    if (c2const) {
1365        if (c2 == 0) {
1366            if (!(is_unsigned && need_carry)) {
1367                if (type == TCG_TYPE_I32) {
1368                    tcg_out_insn(s, RR, LTR, r1, r1);
1369                } else {
1370                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1371                }
1372                return tcg_cond_to_ltr_cond[c];
1373            }
1374        }
1375
1376        if (!is_unsigned && c2 == (int16_t)c2) {
1377            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1378            tcg_out_insn_RI(s, op, r1, c2);
1379            goto exit;
1380        }
1381
1382        if (HAVE_FACILITY(EXT_IMM)) {
1383            if (type == TCG_TYPE_I32) {
1384                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1385                tcg_out_insn_RIL(s, op, r1, c2);
1386                goto exit;
1387            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1388                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1389                tcg_out_insn_RIL(s, op, r1, c2);
1390                goto exit;
1391            }
1392        }
1393
1394        /* Use the constant pool, but not for small constants.  */
1395        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1396            c2 = TCG_TMP0;
1397            /* fall through to reg-reg */
1398        } else if (USE_REG_TB) {
1399            if (type == TCG_TYPE_I32) {
1400                op = (is_unsigned ? RXY_CLY : RXY_CY);
1401                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1402                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1403                               4 - tcg_tbrel_diff(s, NULL));
1404            } else {
1405                op = (is_unsigned ? RXY_CLG : RXY_CG);
1406                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1407                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1408                               tcg_tbrel_diff(s, NULL));
1409            }
1410            goto exit;
1411        } else {
1412            if (type == TCG_TYPE_I32) {
1413                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1414                tcg_out_insn_RIL(s, op, r1, 0);
1415                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1416                               s->code_ptr - 2, 2 + 4);
1417            } else {
1418                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1419                tcg_out_insn_RIL(s, op, r1, 0);
1420                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1421            }
1422            goto exit;
1423        }
1424    }
1425
1426    if (type == TCG_TYPE_I32) {
1427        op = (is_unsigned ? RR_CLR : RR_CR);
1428        tcg_out_insn_RR(s, op, r1, c2);
1429    } else {
1430        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1431        tcg_out_insn_RRE(s, op, r1, c2);
1432    }
1433
1434 exit:
1435    return tcg_cond_to_s390_cond[c];
1436}
1437
1438static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1439                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1440{
1441    int cc;
1442    bool have_loc;
1443
1444    /* With LOC2, we can always emit the minimum 3 insns.  */
1445    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1446        /* Emit: d = 0, d = (cc ? 1 : d).  */
1447        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1448        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1449        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1450        return;
1451    }
1452
1453    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1454
1455    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1456 restart:
1457    switch (cond) {
1458    case TCG_COND_NE:
1459        /* X != 0 is X > 0.  */
1460        if (c2const && c2 == 0) {
1461            cond = TCG_COND_GTU;
1462        } else {
1463            break;
1464        }
1465        /* fallthru */
1466
1467    case TCG_COND_GTU:
1468    case TCG_COND_GT:
1469        /* The result of a compare has CC=2 for GT and CC=3 unused.
1470           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1471        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1472        tcg_out_movi(s, type, dest, 0);
1473        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1474        return;
1475
1476    case TCG_COND_EQ:
1477        /* X == 0 is X <= 0.  */
1478        if (c2const && c2 == 0) {
1479            cond = TCG_COND_LEU;
1480        } else {
1481            break;
1482        }
1483        /* fallthru */
1484
1485    case TCG_COND_LEU:
1486    case TCG_COND_LE:
1487        /* As above, but we're looking for borrow, or !carry.
1488           The second insn computes d - d - borrow, or -1 for true
1489           and 0 for false.  So we must mask to 1 bit afterward.  */
1490        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1491        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1492        tgen_andi(s, type, dest, 1);
1493        return;
1494
1495    case TCG_COND_GEU:
1496    case TCG_COND_LTU:
1497    case TCG_COND_LT:
1498    case TCG_COND_GE:
1499        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1500        if (c2const) {
1501            if (have_loc) {
1502                break;
1503            }
1504            tcg_out_movi(s, type, TCG_TMP0, c2);
1505            c2 = c1;
1506            c2const = 0;
1507            c1 = TCG_TMP0;
1508        } else {
1509            TCGReg t = c1;
1510            c1 = c2;
1511            c2 = t;
1512        }
1513        cond = tcg_swap_cond(cond);
1514        goto restart;
1515
1516    default:
1517        g_assert_not_reached();
1518    }
1519
1520    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1521    if (have_loc) {
1522        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1523        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1524        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1525        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1526    } else {
1527        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1528        tcg_out_movi(s, type, dest, 1);
1529        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1530        tcg_out_movi(s, type, dest, 0);
1531    }
1532}
1533
1534static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1535                         TCGReg c1, TCGArg c2, int c2const,
1536                         TCGArg v3, int v3const)
1537{
1538    int cc;
1539    if (HAVE_FACILITY(LOAD_ON_COND)) {
1540        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1541        if (v3const) {
1542            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1543        } else {
1544            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1545        }
1546    } else {
1547        c = tcg_invert_cond(c);
1548        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1549
1550        /* Emit: if (cc) goto over; dest = r3; over:  */
1551        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1552        tcg_out_insn(s, RRE, LGR, dest, v3);
1553    }
1554}
1555
1556static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1557                     TCGArg a2, int a2const)
1558{
1559    /* Since this sets both R and R+1, we have no choice but to store the
1560       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1561    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1562    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1563
1564    if (a2const && a2 == 64) {
1565        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1566    } else {
1567        if (a2const) {
1568            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1569        } else {
1570            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1571        }
1572        if (HAVE_FACILITY(LOAD_ON_COND)) {
1573            /* Emit: if (one bit found) dest = r0.  */
1574            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1575        } else {
1576            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1577            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1578            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1579        }
1580    }
1581}
1582
1583static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1584                         int ofs, int len, int z)
1585{
1586    int lsb = (63 - ofs);
1587    int msb = lsb - (len - 1);
1588    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1589}
1590
1591static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1592                         int ofs, int len)
1593{
1594    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1595}
1596
1597static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1598{
1599    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1600    if (off == (int16_t)off) {
1601        tcg_out_insn(s, RI, BRC, cc, off);
1602    } else if (off == (int32_t)off) {
1603        tcg_out_insn(s, RIL, BRCL, cc, off);
1604    } else {
1605        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1606        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1607    }
1608}
1609
1610static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1611{
1612    if (l->has_value) {
1613        tgen_gotoi(s, cc, l->u.value_ptr);
1614    } else if (USE_LONG_BRANCHES) {
1615        tcg_out16(s, RIL_BRCL | (cc << 4));
1616        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1617        s->code_ptr += 2;
1618    } else {
1619        tcg_out16(s, RI_BRC | (cc << 4));
1620        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1621        s->code_ptr += 1;
1622    }
1623}
1624
1625static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1626                                TCGReg r1, TCGReg r2, TCGLabel *l)
1627{
1628    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1629    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1630    tcg_out16(s, 0);
1631    tcg_out16(s, cc << 12 | (opc & 0xff));
1632}
1633
1634static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1635                                    TCGReg r1, int i2, TCGLabel *l)
1636{
1637    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1638    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1639    tcg_out16(s, 0);
1640    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1641}
1642
1643static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1644                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1645{
1646    int cc;
1647
1648    if (HAVE_FACILITY(GEN_INST_EXT)) {
1649        bool is_unsigned = is_unsigned_cond(c);
1650        bool in_range;
1651        S390Opcode opc;
1652
1653        cc = tcg_cond_to_s390_cond[c];
1654
1655        if (!c2const) {
1656            opc = (type == TCG_TYPE_I32
1657                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1658                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1659            tgen_compare_branch(s, opc, cc, r1, c2, l);
1660            return;
1661        }
1662
1663        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1664           If the immediate we've been given does not fit that range, we'll
1665           fall back to separate compare and branch instructions using the
1666           larger comparison range afforded by COMPARE IMMEDIATE.  */
1667        if (type == TCG_TYPE_I32) {
1668            if (is_unsigned) {
1669                opc = RIE_CLIJ;
1670                in_range = (uint32_t)c2 == (uint8_t)c2;
1671            } else {
1672                opc = RIE_CIJ;
1673                in_range = (int32_t)c2 == (int8_t)c2;
1674            }
1675        } else {
1676            if (is_unsigned) {
1677                opc = RIE_CLGIJ;
1678                in_range = (uint64_t)c2 == (uint8_t)c2;
1679            } else {
1680                opc = RIE_CGIJ;
1681                in_range = (int64_t)c2 == (int8_t)c2;
1682            }
1683        }
1684        if (in_range) {
1685            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1686            return;
1687        }
1688    }
1689
1690    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1691    tgen_branch(s, cc, l);
1692}
1693
1694static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1695{
1696    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1697    if (off == (int32_t)off) {
1698        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1699    } else {
1700        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1701        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1702    }
1703}
1704
1705static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1706                         const TCGHelperInfo *info)
1707{
1708    tcg_out_call_int(s, dest);
1709}
1710
1711static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1712                                   TCGReg base, TCGReg index, int disp)
1713{
1714    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1715    case MO_UB:
1716        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1717        break;
1718    case MO_SB:
1719        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1720        break;
1721
1722    case MO_UW | MO_BSWAP:
1723        /* swapped unsigned halfword load with upper bits zeroed */
1724        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1725        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1726        break;
1727    case MO_UW:
1728        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1729        break;
1730
1731    case MO_SW | MO_BSWAP:
1732        /* swapped sign-extended halfword load */
1733        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1734        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1735        break;
1736    case MO_SW:
1737        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1738        break;
1739
1740    case MO_UL | MO_BSWAP:
1741        /* swapped unsigned int load with upper bits zeroed */
1742        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1743        tgen_ext32u(s, data, data);
1744        break;
1745    case MO_UL:
1746        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1747        break;
1748
1749    case MO_SL | MO_BSWAP:
1750        /* swapped sign-extended int load */
1751        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1752        tgen_ext32s(s, data, data);
1753        break;
1754    case MO_SL:
1755        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1756        break;
1757
1758    case MO_UQ | MO_BSWAP:
1759        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1760        break;
1761    case MO_UQ:
1762        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1763        break;
1764
1765    default:
1766        tcg_abort();
1767    }
1768}
1769
1770static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1771                                   TCGReg base, TCGReg index, int disp)
1772{
1773    switch (opc & (MO_SIZE | MO_BSWAP)) {
1774    case MO_UB:
1775        if (disp >= 0 && disp < 0x1000) {
1776            tcg_out_insn(s, RX, STC, data, base, index, disp);
1777        } else {
1778            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1779        }
1780        break;
1781
1782    case MO_UW | MO_BSWAP:
1783        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1784        break;
1785    case MO_UW:
1786        if (disp >= 0 && disp < 0x1000) {
1787            tcg_out_insn(s, RX, STH, data, base, index, disp);
1788        } else {
1789            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1790        }
1791        break;
1792
1793    case MO_UL | MO_BSWAP:
1794        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1795        break;
1796    case MO_UL:
1797        if (disp >= 0 && disp < 0x1000) {
1798            tcg_out_insn(s, RX, ST, data, base, index, disp);
1799        } else {
1800            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1801        }
1802        break;
1803
1804    case MO_UQ | MO_BSWAP:
1805        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1806        break;
1807    case MO_UQ:
1808        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1809        break;
1810
1811    default:
1812        tcg_abort();
1813    }
1814}
1815
1816#if defined(CONFIG_SOFTMMU)
1817/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1818QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1819QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1820
1821/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1822   addend into R2.  Returns a register with the santitized guest address.  */
1823static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1824                               int mem_index, bool is_ld)
1825{
1826    unsigned s_bits = opc & MO_SIZE;
1827    unsigned a_bits = get_alignment_bits(opc);
1828    unsigned s_mask = (1 << s_bits) - 1;
1829    unsigned a_mask = (1 << a_bits) - 1;
1830    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1831    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1832    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1833    int ofs, a_off;
1834    uint64_t tlb_mask;
1835
1836    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1837                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1838    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1839    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1840
1841    /* For aligned accesses, we check the first byte and include the alignment
1842       bits within the address.  For unaligned access, we check that we don't
1843       cross pages using the address of the last byte of the access.  */
1844    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1845    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1846    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1847        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1848    } else {
1849        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1850        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1851    }
1852
1853    if (is_ld) {
1854        ofs = offsetof(CPUTLBEntry, addr_read);
1855    } else {
1856        ofs = offsetof(CPUTLBEntry, addr_write);
1857    }
1858    if (TARGET_LONG_BITS == 32) {
1859        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1860    } else {
1861        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1862    }
1863
1864    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1865                 offsetof(CPUTLBEntry, addend));
1866
1867    if (TARGET_LONG_BITS == 32) {
1868        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1869        return TCG_REG_R3;
1870    }
1871    return addr_reg;
1872}
1873
1874static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1875                                TCGReg data, TCGReg addr,
1876                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1877{
1878    TCGLabelQemuLdst *label = new_ldst_label(s);
1879
1880    label->is_ld = is_ld;
1881    label->oi = oi;
1882    label->datalo_reg = data;
1883    label->addrlo_reg = addr;
1884    label->raddr = tcg_splitwx_to_rx(raddr);
1885    label->label_ptr[0] = label_ptr;
1886}
1887
1888static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1889{
1890    TCGReg addr_reg = lb->addrlo_reg;
1891    TCGReg data_reg = lb->datalo_reg;
1892    MemOpIdx oi = lb->oi;
1893    MemOp opc = get_memop(oi);
1894
1895    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1896                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1897        return false;
1898    }
1899
1900    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1901    if (TARGET_LONG_BITS == 64) {
1902        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1903    }
1904    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1905    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1906    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1907    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1908
1909    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1910    return true;
1911}
1912
1913static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1914{
1915    TCGReg addr_reg = lb->addrlo_reg;
1916    TCGReg data_reg = lb->datalo_reg;
1917    MemOpIdx oi = lb->oi;
1918    MemOp opc = get_memop(oi);
1919
1920    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1921                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1922        return false;
1923    }
1924
1925    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1926    if (TARGET_LONG_BITS == 64) {
1927        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1928    }
1929    switch (opc & MO_SIZE) {
1930    case MO_UB:
1931        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1932        break;
1933    case MO_UW:
1934        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1935        break;
1936    case MO_UL:
1937        tgen_ext32u(s, TCG_REG_R4, data_reg);
1938        break;
1939    case MO_UQ:
1940        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1941        break;
1942    default:
1943        tcg_abort();
1944    }
1945    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1946    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1947    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1948
1949    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1950    return true;
1951}
1952#else
1953static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1954                                   TCGReg addrlo, unsigned a_bits)
1955{
1956    unsigned a_mask = (1 << a_bits) - 1;
1957    TCGLabelQemuLdst *l = new_ldst_label(s);
1958
1959    l->is_ld = is_ld;
1960    l->addrlo_reg = addrlo;
1961
1962    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1963    tcg_debug_assert(a_bits < 16);
1964    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1965
1966    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1967    l->label_ptr[0] = s->code_ptr;
1968    s->code_ptr += 1;
1969
1970    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1971}
1972
1973static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1974{
1975    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1976                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1977        return false;
1978    }
1979
1980    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1981    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1982
1983    /* "Tail call" to the helper, with the return address back inline. */
1984    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1985    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1986                                                 : helper_unaligned_st));
1987    return true;
1988}
1989
1990static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1991{
1992    return tcg_out_fail_alignment(s, l);
1993}
1994
1995static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1996{
1997    return tcg_out_fail_alignment(s, l);
1998}
1999
2000static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
2001                                  TCGReg *index_reg, tcg_target_long *disp)
2002{
2003    if (TARGET_LONG_BITS == 32) {
2004        tgen_ext32u(s, TCG_TMP0, *addr_reg);
2005        *addr_reg = TCG_TMP0;
2006    }
2007    if (guest_base < 0x80000) {
2008        *index_reg = TCG_REG_NONE;
2009        *disp = guest_base;
2010    } else {
2011        *index_reg = TCG_GUEST_BASE_REG;
2012        *disp = 0;
2013    }
2014}
2015#endif /* CONFIG_SOFTMMU */
2016
2017static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2018                            MemOpIdx oi)
2019{
2020    MemOp opc = get_memop(oi);
2021#ifdef CONFIG_SOFTMMU
2022    unsigned mem_index = get_mmuidx(oi);
2023    tcg_insn_unit *label_ptr;
2024    TCGReg base_reg;
2025
2026    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
2027
2028    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2029    label_ptr = s->code_ptr;
2030    s->code_ptr += 1;
2031
2032    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2033
2034    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2035#else
2036    TCGReg index_reg;
2037    tcg_target_long disp;
2038    unsigned a_bits = get_alignment_bits(opc);
2039
2040    if (a_bits) {
2041        tcg_out_test_alignment(s, true, addr_reg, a_bits);
2042    }
2043    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2044    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2045#endif
2046}
2047
2048static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2049                            MemOpIdx oi)
2050{
2051    MemOp opc = get_memop(oi);
2052#ifdef CONFIG_SOFTMMU
2053    unsigned mem_index = get_mmuidx(oi);
2054    tcg_insn_unit *label_ptr;
2055    TCGReg base_reg;
2056
2057    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
2058
2059    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2060    label_ptr = s->code_ptr;
2061    s->code_ptr += 1;
2062
2063    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2064
2065    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2066#else
2067    TCGReg index_reg;
2068    tcg_target_long disp;
2069    unsigned a_bits = get_alignment_bits(opc);
2070
2071    if (a_bits) {
2072        tcg_out_test_alignment(s, false, addr_reg, a_bits);
2073    }
2074    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2075    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2076#endif
2077}
2078
2079# define OP_32_64(x) \
2080        case glue(glue(INDEX_op_,x),_i32): \
2081        case glue(glue(INDEX_op_,x),_i64)
2082
2083static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2084                              const TCGArg args[TCG_MAX_OP_ARGS],
2085                              const int const_args[TCG_MAX_OP_ARGS])
2086{
2087    S390Opcode op, op2;
2088    TCGArg a0, a1, a2;
2089
2090    switch (opc) {
2091    case INDEX_op_exit_tb:
2092        /* Reuse the zeroing that exists for goto_ptr.  */
2093        a0 = args[0];
2094        if (a0 == 0) {
2095            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2096        } else {
2097            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2098            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2099        }
2100        break;
2101
2102    case INDEX_op_goto_tb:
2103        a0 = args[0];
2104        if (s->tb_jmp_insn_offset) {
2105            /*
2106             * branch displacement must be aligned for atomic patching;
2107             * see if we need to add extra nop before branch
2108             */
2109            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2110                tcg_out16(s, NOP);
2111            }
2112            tcg_debug_assert(!USE_REG_TB);
2113            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2114            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2115            s->code_ptr += 2;
2116        } else {
2117            /* load address stored at s->tb_jmp_target_addr + a0 */
2118            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2119                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2120            /* and go there */
2121            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2122        }
2123        set_jmp_reset_offset(s, a0);
2124
2125        /* For the unlinked path of goto_tb, we need to reset
2126           TCG_REG_TB to the beginning of this TB.  */
2127        if (USE_REG_TB) {
2128            int ofs = -tcg_current_code_size(s);
2129            /* All TB are restricted to 64KiB by unwind info. */
2130            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2131            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2132                         TCG_REG_TB, TCG_REG_NONE, ofs);
2133        }
2134        break;
2135
2136    case INDEX_op_goto_ptr:
2137        a0 = args[0];
2138        if (USE_REG_TB) {
2139            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2140        }
2141        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2142        break;
2143
2144    OP_32_64(ld8u):
2145        /* ??? LLC (RXY format) is only present with the extended-immediate
2146           facility, whereas LLGC is always present.  */
2147        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2148        break;
2149
2150    OP_32_64(ld8s):
2151        /* ??? LB is no smaller than LGB, so no point to using it.  */
2152        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2153        break;
2154
2155    OP_32_64(ld16u):
2156        /* ??? LLH (RXY format) is only present with the extended-immediate
2157           facility, whereas LLGH is always present.  */
2158        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2159        break;
2160
2161    case INDEX_op_ld16s_i32:
2162        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2163        break;
2164
2165    case INDEX_op_ld_i32:
2166        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2167        break;
2168
2169    OP_32_64(st8):
2170        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2171                    TCG_REG_NONE, args[2]);
2172        break;
2173
2174    OP_32_64(st16):
2175        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2176                    TCG_REG_NONE, args[2]);
2177        break;
2178
2179    case INDEX_op_st_i32:
2180        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2181        break;
2182
2183    case INDEX_op_add_i32:
2184        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2185        if (const_args[2]) {
2186        do_addi_32:
2187            if (a0 == a1) {
2188                if (a2 == (int16_t)a2) {
2189                    tcg_out_insn(s, RI, AHI, a0, a2);
2190                    break;
2191                }
2192                if (HAVE_FACILITY(EXT_IMM)) {
2193                    tcg_out_insn(s, RIL, AFI, a0, a2);
2194                    break;
2195                }
2196            }
2197            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2198        } else if (a0 == a1) {
2199            tcg_out_insn(s, RR, AR, a0, a2);
2200        } else {
2201            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2202        }
2203        break;
2204    case INDEX_op_sub_i32:
2205        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2206        if (const_args[2]) {
2207            a2 = -a2;
2208            goto do_addi_32;
2209        } else if (a0 == a1) {
2210            tcg_out_insn(s, RR, SR, a0, a2);
2211        } else {
2212            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2213        }
2214        break;
2215
2216    case INDEX_op_and_i32:
2217        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2218        if (const_args[2]) {
2219            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2220            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2221        } else if (a0 == a1) {
2222            tcg_out_insn(s, RR, NR, a0, a2);
2223        } else {
2224            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2225        }
2226        break;
2227    case INDEX_op_or_i32:
2228        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2229        if (const_args[2]) {
2230            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2231            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2232        } else if (a0 == a1) {
2233            tcg_out_insn(s, RR, OR, a0, a2);
2234        } else {
2235            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2236        }
2237        break;
2238    case INDEX_op_xor_i32:
2239        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2240        if (const_args[2]) {
2241            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2242            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2243        } else if (a0 == a1) {
2244            tcg_out_insn(s, RR, XR, args[0], args[2]);
2245        } else {
2246            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2247        }
2248        break;
2249
2250    case INDEX_op_neg_i32:
2251        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2252        break;
2253
2254    case INDEX_op_mul_i32:
2255        if (const_args[2]) {
2256            if ((int32_t)args[2] == (int16_t)args[2]) {
2257                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2258            } else {
2259                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2260            }
2261        } else {
2262            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2263        }
2264        break;
2265
2266    case INDEX_op_div2_i32:
2267        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2268        break;
2269    case INDEX_op_divu2_i32:
2270        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2271        break;
2272
2273    case INDEX_op_shl_i32:
2274        op = RS_SLL;
2275        op2 = RSY_SLLK;
2276    do_shift32:
2277        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2278        if (a0 == a1) {
2279            if (const_args[2]) {
2280                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2281            } else {
2282                tcg_out_sh32(s, op, a0, a2, 0);
2283            }
2284        } else {
2285            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2286            if (const_args[2]) {
2287                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2288            } else {
2289                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2290            }
2291        }
2292        break;
2293    case INDEX_op_shr_i32:
2294        op = RS_SRL;
2295        op2 = RSY_SRLK;
2296        goto do_shift32;
2297    case INDEX_op_sar_i32:
2298        op = RS_SRA;
2299        op2 = RSY_SRAK;
2300        goto do_shift32;
2301
2302    case INDEX_op_rotl_i32:
2303        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2304        if (const_args[2]) {
2305            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2306        } else {
2307            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2308        }
2309        break;
2310    case INDEX_op_rotr_i32:
2311        if (const_args[2]) {
2312            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2313                         TCG_REG_NONE, (32 - args[2]) & 31);
2314        } else {
2315            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2316            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2317        }
2318        break;
2319
2320    case INDEX_op_ext8s_i32:
2321        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2322        break;
2323    case INDEX_op_ext16s_i32:
2324        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2325        break;
2326    case INDEX_op_ext8u_i32:
2327        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2328        break;
2329    case INDEX_op_ext16u_i32:
2330        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2331        break;
2332
2333    case INDEX_op_bswap16_i32:
2334        a0 = args[0], a1 = args[1], a2 = args[2];
2335        tcg_out_insn(s, RRE, LRVR, a0, a1);
2336        if (a2 & TCG_BSWAP_OS) {
2337            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2338        } else {
2339            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2340        }
2341        break;
2342    case INDEX_op_bswap16_i64:
2343        a0 = args[0], a1 = args[1], a2 = args[2];
2344        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2345        if (a2 & TCG_BSWAP_OS) {
2346            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2347        } else {
2348            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2349        }
2350        break;
2351
2352    case INDEX_op_bswap32_i32:
2353        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2354        break;
2355    case INDEX_op_bswap32_i64:
2356        a0 = args[0], a1 = args[1], a2 = args[2];
2357        tcg_out_insn(s, RRE, LRVR, a0, a1);
2358        if (a2 & TCG_BSWAP_OS) {
2359            tgen_ext32s(s, a0, a0);
2360        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2361            tgen_ext32u(s, a0, a0);
2362        }
2363        break;
2364
2365    case INDEX_op_add2_i32:
2366        if (const_args[4]) {
2367            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2368        } else {
2369            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2370        }
2371        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2372        break;
2373    case INDEX_op_sub2_i32:
2374        if (const_args[4]) {
2375            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2376        } else {
2377            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2378        }
2379        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2380        break;
2381
2382    case INDEX_op_br:
2383        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2384        break;
2385
2386    case INDEX_op_brcond_i32:
2387        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2388                    args[1], const_args[1], arg_label(args[3]));
2389        break;
2390    case INDEX_op_setcond_i32:
2391        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2392                     args[2], const_args[2]);
2393        break;
2394    case INDEX_op_movcond_i32:
2395        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2396                     args[2], const_args[2], args[3], const_args[3]);
2397        break;
2398
2399    case INDEX_op_qemu_ld_i32:
2400        /* ??? Technically we can use a non-extending instruction.  */
2401    case INDEX_op_qemu_ld_i64:
2402        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2403        break;
2404    case INDEX_op_qemu_st_i32:
2405    case INDEX_op_qemu_st_i64:
2406        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2407        break;
2408
2409    case INDEX_op_ld16s_i64:
2410        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2411        break;
2412    case INDEX_op_ld32u_i64:
2413        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2414        break;
2415    case INDEX_op_ld32s_i64:
2416        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2417        break;
2418    case INDEX_op_ld_i64:
2419        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2420        break;
2421
2422    case INDEX_op_st32_i64:
2423        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2424        break;
2425    case INDEX_op_st_i64:
2426        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2427        break;
2428
2429    case INDEX_op_add_i64:
2430        a0 = args[0], a1 = args[1], a2 = args[2];
2431        if (const_args[2]) {
2432        do_addi_64:
2433            if (a0 == a1) {
2434                if (a2 == (int16_t)a2) {
2435                    tcg_out_insn(s, RI, AGHI, a0, a2);
2436                    break;
2437                }
2438                if (HAVE_FACILITY(EXT_IMM)) {
2439                    if (a2 == (int32_t)a2) {
2440                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2441                        break;
2442                    } else if (a2 == (uint32_t)a2) {
2443                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2444                        break;
2445                    } else if (-a2 == (uint32_t)-a2) {
2446                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2447                        break;
2448                    }
2449                }
2450            }
2451            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2452        } else if (a0 == a1) {
2453            tcg_out_insn(s, RRE, AGR, a0, a2);
2454        } else {
2455            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2456        }
2457        break;
2458    case INDEX_op_sub_i64:
2459        a0 = args[0], a1 = args[1], a2 = args[2];
2460        if (const_args[2]) {
2461            a2 = -a2;
2462            goto do_addi_64;
2463        } else if (a0 == a1) {
2464            tcg_out_insn(s, RRE, SGR, a0, a2);
2465        } else {
2466            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2467        }
2468        break;
2469
2470    case INDEX_op_and_i64:
2471        a0 = args[0], a1 = args[1], a2 = args[2];
2472        if (const_args[2]) {
2473            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2474            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2475        } else if (a0 == a1) {
2476            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2477        } else {
2478            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2479        }
2480        break;
2481    case INDEX_op_or_i64:
2482        a0 = args[0], a1 = args[1], a2 = args[2];
2483        if (const_args[2]) {
2484            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2485            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2486        } else if (a0 == a1) {
2487            tcg_out_insn(s, RRE, OGR, a0, a2);
2488        } else {
2489            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2490        }
2491        break;
2492    case INDEX_op_xor_i64:
2493        a0 = args[0], a1 = args[1], a2 = args[2];
2494        if (const_args[2]) {
2495            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2496            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2497        } else if (a0 == a1) {
2498            tcg_out_insn(s, RRE, XGR, a0, a2);
2499        } else {
2500            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2501        }
2502        break;
2503
2504    case INDEX_op_neg_i64:
2505        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2506        break;
2507    case INDEX_op_bswap64_i64:
2508        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2509        break;
2510
2511    case INDEX_op_mul_i64:
2512        if (const_args[2]) {
2513            if (args[2] == (int16_t)args[2]) {
2514                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2515            } else {
2516                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2517            }
2518        } else {
2519            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2520        }
2521        break;
2522
2523    case INDEX_op_div2_i64:
2524        /* ??? We get an unnecessary sign-extension of the dividend
2525           into R3 with this definition, but as we do in fact always
2526           produce both quotient and remainder using INDEX_op_div_i64
2527           instead requires jumping through even more hoops.  */
2528        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2529        break;
2530    case INDEX_op_divu2_i64:
2531        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2532        break;
2533    case INDEX_op_mulu2_i64:
2534        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2535        break;
2536
2537    case INDEX_op_shl_i64:
2538        op = RSY_SLLG;
2539    do_shift64:
2540        if (const_args[2]) {
2541            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2542        } else {
2543            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2544        }
2545        break;
2546    case INDEX_op_shr_i64:
2547        op = RSY_SRLG;
2548        goto do_shift64;
2549    case INDEX_op_sar_i64:
2550        op = RSY_SRAG;
2551        goto do_shift64;
2552
2553    case INDEX_op_rotl_i64:
2554        if (const_args[2]) {
2555            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2556                         TCG_REG_NONE, args[2]);
2557        } else {
2558            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2559        }
2560        break;
2561    case INDEX_op_rotr_i64:
2562        if (const_args[2]) {
2563            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2564                         TCG_REG_NONE, (64 - args[2]) & 63);
2565        } else {
2566            /* We can use the smaller 32-bit negate because only the
2567               low 6 bits are examined for the rotate.  */
2568            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2569            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2570        }
2571        break;
2572
2573    case INDEX_op_ext8s_i64:
2574        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2575        break;
2576    case INDEX_op_ext16s_i64:
2577        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2578        break;
2579    case INDEX_op_ext_i32_i64:
2580    case INDEX_op_ext32s_i64:
2581        tgen_ext32s(s, args[0], args[1]);
2582        break;
2583    case INDEX_op_ext8u_i64:
2584        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2585        break;
2586    case INDEX_op_ext16u_i64:
2587        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2588        break;
2589    case INDEX_op_extu_i32_i64:
2590    case INDEX_op_ext32u_i64:
2591        tgen_ext32u(s, args[0], args[1]);
2592        break;
2593
2594    case INDEX_op_add2_i64:
2595        if (const_args[4]) {
2596            if ((int64_t)args[4] >= 0) {
2597                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2598            } else {
2599                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2600            }
2601        } else {
2602            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2603        }
2604        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2605        break;
2606    case INDEX_op_sub2_i64:
2607        if (const_args[4]) {
2608            if ((int64_t)args[4] >= 0) {
2609                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2610            } else {
2611                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2612            }
2613        } else {
2614            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2615        }
2616        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2617        break;
2618
2619    case INDEX_op_brcond_i64:
2620        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2621                    args[1], const_args[1], arg_label(args[3]));
2622        break;
2623    case INDEX_op_setcond_i64:
2624        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2625                     args[2], const_args[2]);
2626        break;
2627    case INDEX_op_movcond_i64:
2628        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2629                     args[2], const_args[2], args[3], const_args[3]);
2630        break;
2631
2632    OP_32_64(deposit):
2633        a0 = args[0], a1 = args[1], a2 = args[2];
2634        if (const_args[1]) {
2635            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2636        } else {
2637            /* Since we can't support "0Z" as a constraint, we allow a1 in
2638               any register.  Fix things up as if a matching constraint.  */
2639            if (a0 != a1) {
2640                TCGType type = (opc == INDEX_op_deposit_i64);
2641                if (a0 == a2) {
2642                    tcg_out_mov(s, type, TCG_TMP0, a2);
2643                    a2 = TCG_TMP0;
2644                }
2645                tcg_out_mov(s, type, a0, a1);
2646            }
2647            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2648        }
2649        break;
2650
2651    OP_32_64(extract):
2652        tgen_extract(s, args[0], args[1], args[2], args[3]);
2653        break;
2654
2655    case INDEX_op_clz_i64:
2656        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2657        break;
2658
2659    case INDEX_op_mb:
2660        /* The host memory model is quite strong, we simply need to
2661           serialize the instruction stream.  */
2662        if (args[0] & TCG_MO_ST_LD) {
2663            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2664        }
2665        break;
2666
2667    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2668    case INDEX_op_mov_i64:
2669    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2670    default:
2671        tcg_abort();
2672    }
2673}
2674
2675static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2676                            TCGReg dst, TCGReg src)
2677{
2678    if (is_general_reg(src)) {
2679        /* Replicate general register into two MO_64. */
2680        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2681        if (vece == MO_64) {
2682            return true;
2683        }
2684        src = dst;
2685    }
2686
2687    /*
2688     * Recall that the "standard" integer, within a vector, is the
2689     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2690     */
2691    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2692    return true;
2693}
2694
2695static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2696                             TCGReg dst, TCGReg base, intptr_t offset)
2697{
2698    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2699    return true;
2700}
2701
2702static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2703                             TCGReg dst, int64_t val)
2704{
2705    int i, mask, msb, lsb;
2706
2707    /* Look for int16_t elements.  */
2708    if (vece <= MO_16 ||
2709        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2710        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2711        return;
2712    }
2713
2714    /* Look for bit masks.  */
2715    if (vece == MO_32) {
2716        if (risbg_mask((int32_t)val)) {
2717            /* Handle wraparound by swapping msb and lsb.  */
2718            if ((val & 0x80000001u) == 0x80000001u) {
2719                msb = 32 - ctz32(~val);
2720                lsb = clz32(~val) - 1;
2721            } else {
2722                msb = clz32(val);
2723                lsb = 31 - ctz32(val);
2724            }
2725            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2726            return;
2727        }
2728    } else {
2729        if (risbg_mask(val)) {
2730            /* Handle wraparound by swapping msb and lsb.  */
2731            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2732                /* Handle wraparound by swapping msb and lsb.  */
2733                msb = 64 - ctz64(~val);
2734                lsb = clz64(~val) - 1;
2735            } else {
2736                msb = clz64(val);
2737                lsb = 63 - ctz64(val);
2738            }
2739            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2740            return;
2741        }
2742    }
2743
2744    /* Look for all bytes 0x00 or 0xff.  */
2745    for (i = mask = 0; i < 8; i++) {
2746        uint8_t byte = val >> (i * 8);
2747        if (byte == 0xff) {
2748            mask |= 1 << i;
2749        } else if (byte != 0) {
2750            break;
2751        }
2752    }
2753    if (i == 8) {
2754        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2755        return;
2756    }
2757
2758    /* Otherwise, stuff it in the constant pool.  */
2759    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2760    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2761    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2762}
2763
2764static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2765                           unsigned vecl, unsigned vece,
2766                           const TCGArg args[TCG_MAX_OP_ARGS],
2767                           const int const_args[TCG_MAX_OP_ARGS])
2768{
2769    TCGType type = vecl + TCG_TYPE_V64;
2770    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2771
2772    switch (opc) {
2773    case INDEX_op_ld_vec:
2774        tcg_out_ld(s, type, a0, a1, a2);
2775        break;
2776    case INDEX_op_st_vec:
2777        tcg_out_st(s, type, a0, a1, a2);
2778        break;
2779    case INDEX_op_dupm_vec:
2780        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2781        break;
2782
2783    case INDEX_op_abs_vec:
2784        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2785        break;
2786    case INDEX_op_neg_vec:
2787        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2788        break;
2789    case INDEX_op_not_vec:
2790        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2791        break;
2792
2793    case INDEX_op_add_vec:
2794        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2795        break;
2796    case INDEX_op_sub_vec:
2797        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2798        break;
2799    case INDEX_op_and_vec:
2800        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2801        break;
2802    case INDEX_op_andc_vec:
2803        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2804        break;
2805    case INDEX_op_mul_vec:
2806        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2807        break;
2808    case INDEX_op_or_vec:
2809        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2810        break;
2811    case INDEX_op_orc_vec:
2812        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2813        break;
2814    case INDEX_op_xor_vec:
2815        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2816        break;
2817    case INDEX_op_nand_vec:
2818        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2819        break;
2820    case INDEX_op_nor_vec:
2821        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2822        break;
2823    case INDEX_op_eqv_vec:
2824        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2825        break;
2826
2827    case INDEX_op_shli_vec:
2828        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2829        break;
2830    case INDEX_op_shri_vec:
2831        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2832        break;
2833    case INDEX_op_sari_vec:
2834        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2835        break;
2836    case INDEX_op_rotli_vec:
2837        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2838        break;
2839    case INDEX_op_shls_vec:
2840        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2841        break;
2842    case INDEX_op_shrs_vec:
2843        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2844        break;
2845    case INDEX_op_sars_vec:
2846        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2847        break;
2848    case INDEX_op_rotls_vec:
2849        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2850        break;
2851    case INDEX_op_shlv_vec:
2852        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2853        break;
2854    case INDEX_op_shrv_vec:
2855        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2856        break;
2857    case INDEX_op_sarv_vec:
2858        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2859        break;
2860    case INDEX_op_rotlv_vec:
2861        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2862        break;
2863
2864    case INDEX_op_smin_vec:
2865        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2866        break;
2867    case INDEX_op_smax_vec:
2868        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2869        break;
2870    case INDEX_op_umin_vec:
2871        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2872        break;
2873    case INDEX_op_umax_vec:
2874        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2875        break;
2876
2877    case INDEX_op_bitsel_vec:
2878        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2879        break;
2880
2881    case INDEX_op_cmp_vec:
2882        switch ((TCGCond)args[3]) {
2883        case TCG_COND_EQ:
2884            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2885            break;
2886        case TCG_COND_GT:
2887            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2888            break;
2889        case TCG_COND_GTU:
2890            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2891            break;
2892        default:
2893            g_assert_not_reached();
2894        }
2895        break;
2896
2897    case INDEX_op_s390_vuph_vec:
2898        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2899        break;
2900    case INDEX_op_s390_vupl_vec:
2901        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2902        break;
2903    case INDEX_op_s390_vpks_vec:
2904        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2905        break;
2906
2907    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2908    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2909    default:
2910        g_assert_not_reached();
2911    }
2912}
2913
2914int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2915{
2916    switch (opc) {
2917    case INDEX_op_abs_vec:
2918    case INDEX_op_add_vec:
2919    case INDEX_op_and_vec:
2920    case INDEX_op_andc_vec:
2921    case INDEX_op_bitsel_vec:
2922    case INDEX_op_eqv_vec:
2923    case INDEX_op_nand_vec:
2924    case INDEX_op_neg_vec:
2925    case INDEX_op_nor_vec:
2926    case INDEX_op_not_vec:
2927    case INDEX_op_or_vec:
2928    case INDEX_op_orc_vec:
2929    case INDEX_op_rotli_vec:
2930    case INDEX_op_rotls_vec:
2931    case INDEX_op_rotlv_vec:
2932    case INDEX_op_sari_vec:
2933    case INDEX_op_sars_vec:
2934    case INDEX_op_sarv_vec:
2935    case INDEX_op_shli_vec:
2936    case INDEX_op_shls_vec:
2937    case INDEX_op_shlv_vec:
2938    case INDEX_op_shri_vec:
2939    case INDEX_op_shrs_vec:
2940    case INDEX_op_shrv_vec:
2941    case INDEX_op_smax_vec:
2942    case INDEX_op_smin_vec:
2943    case INDEX_op_sub_vec:
2944    case INDEX_op_umax_vec:
2945    case INDEX_op_umin_vec:
2946    case INDEX_op_xor_vec:
2947        return 1;
2948    case INDEX_op_cmp_vec:
2949    case INDEX_op_cmpsel_vec:
2950    case INDEX_op_rotrv_vec:
2951        return -1;
2952    case INDEX_op_mul_vec:
2953        return vece < MO_64;
2954    case INDEX_op_ssadd_vec:
2955    case INDEX_op_sssub_vec:
2956        return vece < MO_64 ? -1 : 0;
2957    default:
2958        return 0;
2959    }
2960}
2961
2962static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2963                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2964{
2965    bool need_swap = false, need_inv = false;
2966
2967    switch (cond) {
2968    case TCG_COND_EQ:
2969    case TCG_COND_GT:
2970    case TCG_COND_GTU:
2971        break;
2972    case TCG_COND_NE:
2973    case TCG_COND_LE:
2974    case TCG_COND_LEU:
2975        need_inv = true;
2976        break;
2977    case TCG_COND_LT:
2978    case TCG_COND_LTU:
2979        need_swap = true;
2980        break;
2981    case TCG_COND_GE:
2982    case TCG_COND_GEU:
2983        need_swap = need_inv = true;
2984        break;
2985    default:
2986        g_assert_not_reached();
2987    }
2988
2989    if (need_inv) {
2990        cond = tcg_invert_cond(cond);
2991    }
2992    if (need_swap) {
2993        TCGv_vec t1;
2994        t1 = v1, v1 = v2, v2 = t1;
2995        cond = tcg_swap_cond(cond);
2996    }
2997
2998    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2999              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
3000
3001    return need_inv;
3002}
3003
3004static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
3005                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3006{
3007    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
3008        tcg_gen_not_vec(vece, v0, v0);
3009    }
3010}
3011
3012static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
3013                              TCGv_vec c1, TCGv_vec c2,
3014                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
3015{
3016    TCGv_vec t = tcg_temp_new_vec(type);
3017
3018    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
3019        /* Invert the sense of the compare by swapping arguments.  */
3020        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3021    } else {
3022        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3023    }
3024    tcg_temp_free_vec(t);
3025}
3026
3027static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3028                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3029{
3030    TCGv_vec h1 = tcg_temp_new_vec(type);
3031    TCGv_vec h2 = tcg_temp_new_vec(type);
3032    TCGv_vec l1 = tcg_temp_new_vec(type);
3033    TCGv_vec l2 = tcg_temp_new_vec(type);
3034
3035    tcg_debug_assert (vece < MO_64);
3036
3037    /* Unpack with sign-extension. */
3038    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3039              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3040    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3041              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3042
3043    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3044              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3045    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3046              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3047
3048    /* Arithmetic on a wider element size. */
3049    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3050              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3051    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3052              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3053
3054    /* Pack with saturation. */
3055    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3056              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3057
3058    tcg_temp_free_vec(h1);
3059    tcg_temp_free_vec(h2);
3060    tcg_temp_free_vec(l1);
3061    tcg_temp_free_vec(l2);
3062}
3063
3064void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3065                       TCGArg a0, ...)
3066{
3067    va_list va;
3068    TCGv_vec v0, v1, v2, v3, v4, t0;
3069
3070    va_start(va, a0);
3071    v0 = temp_tcgv_vec(arg_temp(a0));
3072    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3073    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3074
3075    switch (opc) {
3076    case INDEX_op_cmp_vec:
3077        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3078        break;
3079
3080    case INDEX_op_cmpsel_vec:
3081        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3082        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3083        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3084        break;
3085
3086    case INDEX_op_rotrv_vec:
3087        t0 = tcg_temp_new_vec(type);
3088        tcg_gen_neg_vec(vece, t0, v2);
3089        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3090        tcg_temp_free_vec(t0);
3091        break;
3092
3093    case INDEX_op_ssadd_vec:
3094        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3095        break;
3096    case INDEX_op_sssub_vec:
3097        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3098        break;
3099
3100    default:
3101        g_assert_not_reached();
3102    }
3103    va_end(va);
3104}
3105
3106static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3107{
3108    switch (op) {
3109    case INDEX_op_goto_ptr:
3110        return C_O0_I1(r);
3111
3112    case INDEX_op_ld8u_i32:
3113    case INDEX_op_ld8u_i64:
3114    case INDEX_op_ld8s_i32:
3115    case INDEX_op_ld8s_i64:
3116    case INDEX_op_ld16u_i32:
3117    case INDEX_op_ld16u_i64:
3118    case INDEX_op_ld16s_i32:
3119    case INDEX_op_ld16s_i64:
3120    case INDEX_op_ld_i32:
3121    case INDEX_op_ld32u_i64:
3122    case INDEX_op_ld32s_i64:
3123    case INDEX_op_ld_i64:
3124        return C_O1_I1(r, r);
3125
3126    case INDEX_op_st8_i32:
3127    case INDEX_op_st8_i64:
3128    case INDEX_op_st16_i32:
3129    case INDEX_op_st16_i64:
3130    case INDEX_op_st_i32:
3131    case INDEX_op_st32_i64:
3132    case INDEX_op_st_i64:
3133        return C_O0_I2(r, r);
3134
3135    case INDEX_op_add_i32:
3136    case INDEX_op_add_i64:
3137    case INDEX_op_shl_i64:
3138    case INDEX_op_shr_i64:
3139    case INDEX_op_sar_i64:
3140    case INDEX_op_rotl_i32:
3141    case INDEX_op_rotl_i64:
3142    case INDEX_op_rotr_i32:
3143    case INDEX_op_rotr_i64:
3144    case INDEX_op_clz_i64:
3145    case INDEX_op_setcond_i32:
3146    case INDEX_op_setcond_i64:
3147        return C_O1_I2(r, r, ri);
3148
3149    case INDEX_op_sub_i32:
3150    case INDEX_op_sub_i64:
3151    case INDEX_op_and_i32:
3152    case INDEX_op_and_i64:
3153    case INDEX_op_or_i32:
3154    case INDEX_op_or_i64:
3155    case INDEX_op_xor_i32:
3156    case INDEX_op_xor_i64:
3157        return (HAVE_FACILITY(DISTINCT_OPS)
3158                ? C_O1_I2(r, r, ri)
3159                : C_O1_I2(r, 0, ri));
3160
3161    case INDEX_op_mul_i32:
3162        /* If we have the general-instruction-extensions, then we have
3163           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
3164           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
3165        return (HAVE_FACILITY(GEN_INST_EXT)
3166                ? C_O1_I2(r, 0, ri)
3167                : C_O1_I2(r, 0, rI));
3168
3169    case INDEX_op_mul_i64:
3170        return (HAVE_FACILITY(GEN_INST_EXT)
3171                ? C_O1_I2(r, 0, rJ)
3172                : C_O1_I2(r, 0, rI));
3173
3174    case INDEX_op_shl_i32:
3175    case INDEX_op_shr_i32:
3176    case INDEX_op_sar_i32:
3177        return (HAVE_FACILITY(DISTINCT_OPS)
3178                ? C_O1_I2(r, r, ri)
3179                : C_O1_I2(r, 0, ri));
3180
3181    case INDEX_op_brcond_i32:
3182    case INDEX_op_brcond_i64:
3183        return C_O0_I2(r, ri);
3184
3185    case INDEX_op_bswap16_i32:
3186    case INDEX_op_bswap16_i64:
3187    case INDEX_op_bswap32_i32:
3188    case INDEX_op_bswap32_i64:
3189    case INDEX_op_bswap64_i64:
3190    case INDEX_op_neg_i32:
3191    case INDEX_op_neg_i64:
3192    case INDEX_op_ext8s_i32:
3193    case INDEX_op_ext8s_i64:
3194    case INDEX_op_ext8u_i32:
3195    case INDEX_op_ext8u_i64:
3196    case INDEX_op_ext16s_i32:
3197    case INDEX_op_ext16s_i64:
3198    case INDEX_op_ext16u_i32:
3199    case INDEX_op_ext16u_i64:
3200    case INDEX_op_ext32s_i64:
3201    case INDEX_op_ext32u_i64:
3202    case INDEX_op_ext_i32_i64:
3203    case INDEX_op_extu_i32_i64:
3204    case INDEX_op_extract_i32:
3205    case INDEX_op_extract_i64:
3206        return C_O1_I1(r, r);
3207
3208    case INDEX_op_qemu_ld_i32:
3209    case INDEX_op_qemu_ld_i64:
3210        return C_O1_I1(r, L);
3211    case INDEX_op_qemu_st_i64:
3212    case INDEX_op_qemu_st_i32:
3213        return C_O0_I2(L, L);
3214
3215    case INDEX_op_deposit_i32:
3216    case INDEX_op_deposit_i64:
3217        return C_O1_I2(r, rZ, r);
3218
3219    case INDEX_op_movcond_i32:
3220    case INDEX_op_movcond_i64:
3221        return (HAVE_FACILITY(LOAD_ON_COND2)
3222                ? C_O1_I4(r, r, ri, rI, 0)
3223                : C_O1_I4(r, r, ri, r, 0));
3224
3225    case INDEX_op_div2_i32:
3226    case INDEX_op_div2_i64:
3227    case INDEX_op_divu2_i32:
3228    case INDEX_op_divu2_i64:
3229        return C_O2_I3(b, a, 0, 1, r);
3230
3231    case INDEX_op_mulu2_i64:
3232        return C_O2_I2(b, a, 0, r);
3233
3234    case INDEX_op_add2_i32:
3235    case INDEX_op_sub2_i32:
3236        return (HAVE_FACILITY(EXT_IMM)
3237                ? C_O2_I4(r, r, 0, 1, ri, r)
3238                : C_O2_I4(r, r, 0, 1, r, r));
3239
3240    case INDEX_op_add2_i64:
3241    case INDEX_op_sub2_i64:
3242        return (HAVE_FACILITY(EXT_IMM)
3243                ? C_O2_I4(r, r, 0, 1, rA, r)
3244                : C_O2_I4(r, r, 0, 1, r, r));
3245
3246    case INDEX_op_st_vec:
3247        return C_O0_I2(v, r);
3248    case INDEX_op_ld_vec:
3249    case INDEX_op_dupm_vec:
3250        return C_O1_I1(v, r);
3251    case INDEX_op_dup_vec:
3252        return C_O1_I1(v, vr);
3253    case INDEX_op_abs_vec:
3254    case INDEX_op_neg_vec:
3255    case INDEX_op_not_vec:
3256    case INDEX_op_rotli_vec:
3257    case INDEX_op_sari_vec:
3258    case INDEX_op_shli_vec:
3259    case INDEX_op_shri_vec:
3260    case INDEX_op_s390_vuph_vec:
3261    case INDEX_op_s390_vupl_vec:
3262        return C_O1_I1(v, v);
3263    case INDEX_op_add_vec:
3264    case INDEX_op_sub_vec:
3265    case INDEX_op_and_vec:
3266    case INDEX_op_andc_vec:
3267    case INDEX_op_or_vec:
3268    case INDEX_op_orc_vec:
3269    case INDEX_op_xor_vec:
3270    case INDEX_op_nand_vec:
3271    case INDEX_op_nor_vec:
3272    case INDEX_op_eqv_vec:
3273    case INDEX_op_cmp_vec:
3274    case INDEX_op_mul_vec:
3275    case INDEX_op_rotlv_vec:
3276    case INDEX_op_rotrv_vec:
3277    case INDEX_op_shlv_vec:
3278    case INDEX_op_shrv_vec:
3279    case INDEX_op_sarv_vec:
3280    case INDEX_op_smax_vec:
3281    case INDEX_op_smin_vec:
3282    case INDEX_op_umax_vec:
3283    case INDEX_op_umin_vec:
3284    case INDEX_op_s390_vpks_vec:
3285        return C_O1_I2(v, v, v);
3286    case INDEX_op_rotls_vec:
3287    case INDEX_op_shls_vec:
3288    case INDEX_op_shrs_vec:
3289    case INDEX_op_sars_vec:
3290        return C_O1_I2(v, v, r);
3291    case INDEX_op_bitsel_vec:
3292        return C_O1_I3(v, v, v, v);
3293
3294    default:
3295        g_assert_not_reached();
3296    }
3297}
3298
3299/*
3300 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3301 * Some distros have fixed this up locally, others have not.
3302 */
3303#ifndef HWCAP_S390_VXRS
3304#define HWCAP_S390_VXRS 2048
3305#endif
3306
3307static void query_s390_facilities(void)
3308{
3309    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3310
3311    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3312       is present on all 64-bit systems, but let's check for it anyway.  */
3313    if (hwcap & HWCAP_S390_STFLE) {
3314        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3315        register void *r1 __asm__("1") = s390_facilities;
3316
3317        /* stfle 0(%r1) */
3318        asm volatile(".word 0xb2b0,0x1000"
3319                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3320    }
3321
3322    /*
3323     * Use of vector registers requires os support beyond the facility bit.
3324     * If the kernel does not advertise support, disable the facility bits.
3325     * There is nothing else we currently care about in the 3rd word, so
3326     * disable VECTOR with one store.
3327     */
3328    if (!(hwcap & HWCAP_S390_VXRS)) {
3329        s390_facilities[2] = 0;
3330    }
3331}
3332
3333static void tcg_target_init(TCGContext *s)
3334{
3335    query_s390_facilities();
3336
3337    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3338    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3339    if (HAVE_FACILITY(VECTOR)) {
3340        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3341        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3342    }
3343
3344    tcg_target_call_clobber_regs = 0;
3345    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3346    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3347    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3348    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3349    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3350    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3351    /* The r6 register is technically call-saved, but it's also a parameter
3352       register, so it can get killed by setup for the qemu_st helper.  */
3353    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3354    /* The return register can be considered call-clobbered.  */
3355    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3356
3357    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3358    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3359    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3360    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3361    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3362    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3363    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3364    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3365    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3366    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3367    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3368    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3369    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3371    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3372    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3381
3382    s->reserved_regs = 0;
3383    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3384    /* XXX many insns can't be used with R0, so we better avoid it for now */
3385    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3386    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3387    if (USE_REG_TB) {
3388        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3389    }
3390}
3391
3392#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3393                           + TCG_STATIC_CALL_ARGS_SIZE           \
3394                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3395
3396static void tcg_target_qemu_prologue(TCGContext *s)
3397{
3398    /* stmg %r6,%r15,48(%r15) (save registers) */
3399    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3400
3401    /* aghi %r15,-frame_size */
3402    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3403
3404    tcg_set_frame(s, TCG_REG_CALL_STACK,
3405                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3406                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3407
3408#ifndef CONFIG_SOFTMMU
3409    if (guest_base >= 0x80000) {
3410        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3411        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3412    }
3413#endif
3414
3415    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3416    if (USE_REG_TB) {
3417        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3418                    tcg_target_call_iarg_regs[1]);
3419    }
3420
3421    /* br %r3 (go to TB) */
3422    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3423
3424    /*
3425     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3426     * and fall through to the rest of the epilogue.
3427     */
3428    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3429    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3430
3431    /* TB epilogue */
3432    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3433
3434    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3435    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3436                 FRAME_SIZE + 48);
3437
3438    /* br %r14 (return) */
3439    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3440}
3441
3442static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3443{
3444    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3445}
3446
3447typedef struct {
3448    DebugFrameHeader h;
3449    uint8_t fde_def_cfa[4];
3450    uint8_t fde_reg_ofs[18];
3451} DebugFrame;
3452
3453/* We're expecting a 2 byte uleb128 encoded value.  */
3454QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3455
3456#define ELF_HOST_MACHINE  EM_S390
3457
3458static const DebugFrame debug_frame = {
3459    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3460    .h.cie.id = -1,
3461    .h.cie.version = 1,
3462    .h.cie.code_align = 1,
3463    .h.cie.data_align = 8,                /* sleb128 8 */
3464    .h.cie.return_column = TCG_REG_R14,
3465
3466    /* Total FDE size does not include the "len" member.  */
3467    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3468
3469    .fde_def_cfa = {
3470        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3471        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3472        (FRAME_SIZE >> 7)
3473    },
3474    .fde_reg_ofs = {
3475        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3476        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3477        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3478        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3479        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3480        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3481        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3482        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3483        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3484    }
3485};
3486
3487void tcg_register_jit(const void *buf, size_t buf_size)
3488{
3489    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3490}
3491