xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision a6caeee8)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36/* ??? The translation blocks produced by TCG are generally small enough to
37   be entirely reachable with a 16-bit displacement.  Leaving the option for
38   a 32-bit displacement here Just In Case.  */
39#define USE_LONG_BRANCHES 0
40
41#define TCG_CT_CONST_S16   0x100
42#define TCG_CT_CONST_S32   0x200
43#define TCG_CT_CONST_S33   0x400
44#define TCG_CT_CONST_ZERO  0x800
45
46#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
47#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
48
49/*
50 * For softmmu, we need to avoid conflicts with the first 3
51 * argument registers to perform the tlb lookup, and to call
52 * the helper function.
53 */
54#ifdef CONFIG_SOFTMMU
55#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
56#else
57#define SOFTMMU_RESERVE_REGS 0
58#endif
59
60
61/* Several places within the instruction set 0 means "no register"
62   rather than TCG_REG_R0.  */
63#define TCG_REG_NONE    0
64
65/* A scratch register that may be be used throughout the backend.  */
66#define TCG_TMP0        TCG_REG_R1
67
68/* A scratch register that holds a pointer to the beginning of the TB.
69   We don't need this when we have pc-relative loads with the general
70   instructions extension facility.  */
71#define TCG_REG_TB      TCG_REG_R12
72#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
73
74#ifndef CONFIG_SOFTMMU
75#define TCG_GUEST_BASE_REG TCG_REG_R13
76#endif
77
78/* All of the following instructions are prefixed with their instruction
79   format, and are defined as 8- or 16-bit quantities, even when the two
80   halves of the 16-bit quantity may appear 32 bits apart in the insn.
81   This makes it easy to copy the values from the tables in Appendix B.  */
82typedef enum S390Opcode {
83    RIL_AFI     = 0xc209,
84    RIL_AGFI    = 0xc208,
85    RIL_ALFI    = 0xc20b,
86    RIL_ALGFI   = 0xc20a,
87    RIL_BRASL   = 0xc005,
88    RIL_BRCL    = 0xc004,
89    RIL_CFI     = 0xc20d,
90    RIL_CGFI    = 0xc20c,
91    RIL_CLFI    = 0xc20f,
92    RIL_CLGFI   = 0xc20e,
93    RIL_CLRL    = 0xc60f,
94    RIL_CLGRL   = 0xc60a,
95    RIL_CRL     = 0xc60d,
96    RIL_CGRL    = 0xc608,
97    RIL_IIHF    = 0xc008,
98    RIL_IILF    = 0xc009,
99    RIL_LARL    = 0xc000,
100    RIL_LGFI    = 0xc001,
101    RIL_LGRL    = 0xc408,
102    RIL_LLIHF   = 0xc00e,
103    RIL_LLILF   = 0xc00f,
104    RIL_LRL     = 0xc40d,
105    RIL_MSFI    = 0xc201,
106    RIL_MSGFI   = 0xc200,
107    RIL_NIHF    = 0xc00a,
108    RIL_NILF    = 0xc00b,
109    RIL_OIHF    = 0xc00c,
110    RIL_OILF    = 0xc00d,
111    RIL_SLFI    = 0xc205,
112    RIL_SLGFI   = 0xc204,
113    RIL_XIHF    = 0xc006,
114    RIL_XILF    = 0xc007,
115
116    RI_AGHI     = 0xa70b,
117    RI_AHI      = 0xa70a,
118    RI_BRC      = 0xa704,
119    RI_CHI      = 0xa70e,
120    RI_CGHI     = 0xa70f,
121    RI_IIHH     = 0xa500,
122    RI_IIHL     = 0xa501,
123    RI_IILH     = 0xa502,
124    RI_IILL     = 0xa503,
125    RI_LGHI     = 0xa709,
126    RI_LLIHH    = 0xa50c,
127    RI_LLIHL    = 0xa50d,
128    RI_LLILH    = 0xa50e,
129    RI_LLILL    = 0xa50f,
130    RI_MGHI     = 0xa70d,
131    RI_MHI      = 0xa70c,
132    RI_NIHH     = 0xa504,
133    RI_NIHL     = 0xa505,
134    RI_NILH     = 0xa506,
135    RI_NILL     = 0xa507,
136    RI_OIHH     = 0xa508,
137    RI_OIHL     = 0xa509,
138    RI_OILH     = 0xa50a,
139    RI_OILL     = 0xa50b,
140    RI_TMLL     = 0xa701,
141
142    RIE_CGIJ    = 0xec7c,
143    RIE_CGRJ    = 0xec64,
144    RIE_CIJ     = 0xec7e,
145    RIE_CLGRJ   = 0xec65,
146    RIE_CLIJ    = 0xec7f,
147    RIE_CLGIJ   = 0xec7d,
148    RIE_CLRJ    = 0xec77,
149    RIE_CRJ     = 0xec76,
150    RIE_LOCGHI  = 0xec46,
151    RIE_RISBG   = 0xec55,
152
153    RRE_AGR     = 0xb908,
154    RRE_ALGR    = 0xb90a,
155    RRE_ALCR    = 0xb998,
156    RRE_ALCGR   = 0xb988,
157    RRE_CGR     = 0xb920,
158    RRE_CLGR    = 0xb921,
159    RRE_DLGR    = 0xb987,
160    RRE_DLR     = 0xb997,
161    RRE_DSGFR   = 0xb91d,
162    RRE_DSGR    = 0xb90d,
163    RRE_FLOGR   = 0xb983,
164    RRE_LGBR    = 0xb906,
165    RRE_LCGR    = 0xb903,
166    RRE_LGFR    = 0xb914,
167    RRE_LGHR    = 0xb907,
168    RRE_LGR     = 0xb904,
169    RRE_LLGCR   = 0xb984,
170    RRE_LLGFR   = 0xb916,
171    RRE_LLGHR   = 0xb985,
172    RRE_LRVR    = 0xb91f,
173    RRE_LRVGR   = 0xb90f,
174    RRE_LTGR    = 0xb902,
175    RRE_MLGR    = 0xb986,
176    RRE_MSGR    = 0xb90c,
177    RRE_MSR     = 0xb252,
178    RRE_NGR     = 0xb980,
179    RRE_OGR     = 0xb981,
180    RRE_SGR     = 0xb909,
181    RRE_SLGR    = 0xb90b,
182    RRE_SLBR    = 0xb999,
183    RRE_SLBGR   = 0xb989,
184    RRE_XGR     = 0xb982,
185
186    RRF_LOCR    = 0xb9f2,
187    RRF_LOCGR   = 0xb9e2,
188    RRF_NRK     = 0xb9f4,
189    RRF_NGRK    = 0xb9e4,
190    RRF_ORK     = 0xb9f6,
191    RRF_OGRK    = 0xb9e6,
192    RRF_SRK     = 0xb9f9,
193    RRF_SGRK    = 0xb9e9,
194    RRF_SLRK    = 0xb9fb,
195    RRF_SLGRK   = 0xb9eb,
196    RRF_XRK     = 0xb9f7,
197    RRF_XGRK    = 0xb9e7,
198
199    RR_AR       = 0x1a,
200    RR_ALR      = 0x1e,
201    RR_BASR     = 0x0d,
202    RR_BCR      = 0x07,
203    RR_CLR      = 0x15,
204    RR_CR       = 0x19,
205    RR_DR       = 0x1d,
206    RR_LCR      = 0x13,
207    RR_LR       = 0x18,
208    RR_LTR      = 0x12,
209    RR_NR       = 0x14,
210    RR_OR       = 0x16,
211    RR_SR       = 0x1b,
212    RR_SLR      = 0x1f,
213    RR_XR       = 0x17,
214
215    RSY_RLL     = 0xeb1d,
216    RSY_RLLG    = 0xeb1c,
217    RSY_SLLG    = 0xeb0d,
218    RSY_SLLK    = 0xebdf,
219    RSY_SRAG    = 0xeb0a,
220    RSY_SRAK    = 0xebdc,
221    RSY_SRLG    = 0xeb0c,
222    RSY_SRLK    = 0xebde,
223
224    RS_SLL      = 0x89,
225    RS_SRA      = 0x8a,
226    RS_SRL      = 0x88,
227
228    RXY_AG      = 0xe308,
229    RXY_AY      = 0xe35a,
230    RXY_CG      = 0xe320,
231    RXY_CLG     = 0xe321,
232    RXY_CLY     = 0xe355,
233    RXY_CY      = 0xe359,
234    RXY_LAY     = 0xe371,
235    RXY_LB      = 0xe376,
236    RXY_LG      = 0xe304,
237    RXY_LGB     = 0xe377,
238    RXY_LGF     = 0xe314,
239    RXY_LGH     = 0xe315,
240    RXY_LHY     = 0xe378,
241    RXY_LLGC    = 0xe390,
242    RXY_LLGF    = 0xe316,
243    RXY_LLGH    = 0xe391,
244    RXY_LMG     = 0xeb04,
245    RXY_LRV     = 0xe31e,
246    RXY_LRVG    = 0xe30f,
247    RXY_LRVH    = 0xe31f,
248    RXY_LY      = 0xe358,
249    RXY_NG      = 0xe380,
250    RXY_OG      = 0xe381,
251    RXY_STCY    = 0xe372,
252    RXY_STG     = 0xe324,
253    RXY_STHY    = 0xe370,
254    RXY_STMG    = 0xeb24,
255    RXY_STRV    = 0xe33e,
256    RXY_STRVG   = 0xe32f,
257    RXY_STRVH   = 0xe33f,
258    RXY_STY     = 0xe350,
259    RXY_XG      = 0xe382,
260
261    RX_A        = 0x5a,
262    RX_C        = 0x59,
263    RX_L        = 0x58,
264    RX_LA       = 0x41,
265    RX_LH       = 0x48,
266    RX_ST       = 0x50,
267    RX_STC      = 0x42,
268    RX_STH      = 0x40,
269
270    VRIa_VGBM   = 0xe744,
271    VRIa_VREPI  = 0xe745,
272    VRIb_VGM    = 0xe746,
273    VRIc_VREP   = 0xe74d,
274
275    VRRa_VLC    = 0xe7de,
276    VRRa_VLP    = 0xe7df,
277    VRRa_VLR    = 0xe756,
278    VRRc_VA     = 0xe7f3,
279    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
280    VRRc_VCH    = 0xe7fb,   /* " */
281    VRRc_VCHL   = 0xe7f9,   /* " */
282    VRRc_VERLLV = 0xe773,
283    VRRc_VESLV  = 0xe770,
284    VRRc_VESRAV = 0xe77a,
285    VRRc_VESRLV = 0xe778,
286    VRRc_VML    = 0xe7a2,
287    VRRc_VMN    = 0xe7fe,
288    VRRc_VMNL   = 0xe7fc,
289    VRRc_VMX    = 0xe7ff,
290    VRRc_VMXL   = 0xe7fd,
291    VRRc_VN     = 0xe768,
292    VRRc_VNC    = 0xe769,
293    VRRc_VNN    = 0xe76e,
294    VRRc_VNO    = 0xe76b,
295    VRRc_VNX    = 0xe76c,
296    VRRc_VO     = 0xe76a,
297    VRRc_VOC    = 0xe76f,
298    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
299    VRRc_VS     = 0xe7f7,
300    VRRa_VUPH   = 0xe7d7,
301    VRRa_VUPL   = 0xe7d6,
302    VRRc_VX     = 0xe76d,
303    VRRe_VSEL   = 0xe78d,
304    VRRf_VLVGP  = 0xe762,
305
306    VRSa_VERLL  = 0xe733,
307    VRSa_VESL   = 0xe730,
308    VRSa_VESRA  = 0xe73a,
309    VRSa_VESRL  = 0xe738,
310    VRSb_VLVG   = 0xe722,
311    VRSc_VLGV   = 0xe721,
312
313    VRX_VL      = 0xe706,
314    VRX_VLLEZ   = 0xe704,
315    VRX_VLREP   = 0xe705,
316    VRX_VST     = 0xe70e,
317    VRX_VSTEF   = 0xe70b,
318    VRX_VSTEG   = 0xe70a,
319
320    NOP         = 0x0707,
321} S390Opcode;
322
323#ifdef CONFIG_DEBUG_TCG
324static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
325    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
326    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
327    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
329    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
330    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
331    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
332};
333#endif
334
335/* Since R6 is a potential argument register, choose it last of the
336   call-saved registers.  Likewise prefer the call-clobbered registers
337   in reverse order to maximize the chance of avoiding the arguments.  */
338static const int tcg_target_reg_alloc_order[] = {
339    /* Call saved registers.  */
340    TCG_REG_R13,
341    TCG_REG_R12,
342    TCG_REG_R11,
343    TCG_REG_R10,
344    TCG_REG_R9,
345    TCG_REG_R8,
346    TCG_REG_R7,
347    TCG_REG_R6,
348    /* Call clobbered registers.  */
349    TCG_REG_R14,
350    TCG_REG_R0,
351    TCG_REG_R1,
352    /* Argument registers, in reverse order of allocation.  */
353    TCG_REG_R5,
354    TCG_REG_R4,
355    TCG_REG_R3,
356    TCG_REG_R2,
357
358    /* V8-V15 are call saved, and omitted. */
359    TCG_REG_V0,
360    TCG_REG_V1,
361    TCG_REG_V2,
362    TCG_REG_V3,
363    TCG_REG_V4,
364    TCG_REG_V5,
365    TCG_REG_V6,
366    TCG_REG_V7,
367    TCG_REG_V16,
368    TCG_REG_V17,
369    TCG_REG_V18,
370    TCG_REG_V19,
371    TCG_REG_V20,
372    TCG_REG_V21,
373    TCG_REG_V22,
374    TCG_REG_V23,
375    TCG_REG_V24,
376    TCG_REG_V25,
377    TCG_REG_V26,
378    TCG_REG_V27,
379    TCG_REG_V28,
380    TCG_REG_V29,
381    TCG_REG_V30,
382    TCG_REG_V31,
383};
384
385static const int tcg_target_call_iarg_regs[] = {
386    TCG_REG_R2,
387    TCG_REG_R3,
388    TCG_REG_R4,
389    TCG_REG_R5,
390    TCG_REG_R6,
391};
392
393static const int tcg_target_call_oarg_regs[] = {
394    TCG_REG_R2,
395};
396
397#define S390_CC_EQ      8
398#define S390_CC_LT      4
399#define S390_CC_GT      2
400#define S390_CC_OV      1
401#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
402#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
403#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
404#define S390_CC_NEVER   0
405#define S390_CC_ALWAYS  15
406
407/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
408static const uint8_t tcg_cond_to_s390_cond[] = {
409    [TCG_COND_EQ]  = S390_CC_EQ,
410    [TCG_COND_NE]  = S390_CC_NE,
411    [TCG_COND_LT]  = S390_CC_LT,
412    [TCG_COND_LE]  = S390_CC_LE,
413    [TCG_COND_GT]  = S390_CC_GT,
414    [TCG_COND_GE]  = S390_CC_GE,
415    [TCG_COND_LTU] = S390_CC_LT,
416    [TCG_COND_LEU] = S390_CC_LE,
417    [TCG_COND_GTU] = S390_CC_GT,
418    [TCG_COND_GEU] = S390_CC_GE,
419};
420
421/* Condition codes that result from a LOAD AND TEST.  Here, we have no
422   unsigned instruction variation, however since the test is vs zero we
423   can re-map the outcomes appropriately.  */
424static const uint8_t tcg_cond_to_ltr_cond[] = {
425    [TCG_COND_EQ]  = S390_CC_EQ,
426    [TCG_COND_NE]  = S390_CC_NE,
427    [TCG_COND_LT]  = S390_CC_LT,
428    [TCG_COND_LE]  = S390_CC_LE,
429    [TCG_COND_GT]  = S390_CC_GT,
430    [TCG_COND_GE]  = S390_CC_GE,
431    [TCG_COND_LTU] = S390_CC_NEVER,
432    [TCG_COND_LEU] = S390_CC_EQ,
433    [TCG_COND_GTU] = S390_CC_NE,
434    [TCG_COND_GEU] = S390_CC_ALWAYS,
435};
436
437#ifdef CONFIG_SOFTMMU
438static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
439    [MO_UB]   = helper_ret_ldub_mmu,
440    [MO_SB]   = helper_ret_ldsb_mmu,
441    [MO_LEUW] = helper_le_lduw_mmu,
442    [MO_LESW] = helper_le_ldsw_mmu,
443    [MO_LEUL] = helper_le_ldul_mmu,
444    [MO_LESL] = helper_le_ldsl_mmu,
445    [MO_LEUQ] = helper_le_ldq_mmu,
446    [MO_BEUW] = helper_be_lduw_mmu,
447    [MO_BESW] = helper_be_ldsw_mmu,
448    [MO_BEUL] = helper_be_ldul_mmu,
449    [MO_BESL] = helper_be_ldsl_mmu,
450    [MO_BEUQ] = helper_be_ldq_mmu,
451};
452
453static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
454    [MO_UB]   = helper_ret_stb_mmu,
455    [MO_LEUW] = helper_le_stw_mmu,
456    [MO_LEUL] = helper_le_stl_mmu,
457    [MO_LEUQ] = helper_le_stq_mmu,
458    [MO_BEUW] = helper_be_stw_mmu,
459    [MO_BEUL] = helper_be_stl_mmu,
460    [MO_BEUQ] = helper_be_stq_mmu,
461};
462#endif
463
464static const tcg_insn_unit *tb_ret_addr;
465uint64_t s390_facilities[3];
466
467static inline bool is_general_reg(TCGReg r)
468{
469    return r <= TCG_REG_R15;
470}
471
472static inline bool is_vector_reg(TCGReg r)
473{
474    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
475}
476
477static bool patch_reloc(tcg_insn_unit *src_rw, int type,
478                        intptr_t value, intptr_t addend)
479{
480    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
481    intptr_t pcrel2;
482    uint32_t old;
483
484    value += addend;
485    pcrel2 = (tcg_insn_unit *)value - src_rx;
486
487    switch (type) {
488    case R_390_PC16DBL:
489        if (pcrel2 == (int16_t)pcrel2) {
490            tcg_patch16(src_rw, pcrel2);
491            return true;
492        }
493        break;
494    case R_390_PC32DBL:
495        if (pcrel2 == (int32_t)pcrel2) {
496            tcg_patch32(src_rw, pcrel2);
497            return true;
498        }
499        break;
500    case R_390_20:
501        if (value == sextract64(value, 0, 20)) {
502            old = *(uint32_t *)src_rw & 0xf00000ff;
503            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
504            tcg_patch32(src_rw, old);
505            return true;
506        }
507        break;
508    default:
509        g_assert_not_reached();
510    }
511    return false;
512}
513
514/* Test if a constant matches the constraint. */
515static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
516{
517    if (ct & TCG_CT_CONST) {
518        return 1;
519    }
520
521    if (type == TCG_TYPE_I32) {
522        val = (int32_t)val;
523    }
524
525    /* The following are mutually exclusive.  */
526    if (ct & TCG_CT_CONST_S16) {
527        return val == (int16_t)val;
528    } else if (ct & TCG_CT_CONST_S32) {
529        return val == (int32_t)val;
530    } else if (ct & TCG_CT_CONST_S33) {
531        return val >= -0xffffffffll && val <= 0xffffffffll;
532    } else if (ct & TCG_CT_CONST_ZERO) {
533        return val == 0;
534    }
535
536    return 0;
537}
538
539/* Emit instructions according to the given instruction format.  */
540
541static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
542{
543    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
544}
545
546static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
547                             TCGReg r1, TCGReg r2)
548{
549    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
550}
551
552static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
553                             TCGReg r1, TCGReg r2, int m3)
554{
555    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
556}
557
558static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
559{
560    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
561}
562
563static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
564                             int i2, int m3)
565{
566    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
567    tcg_out32(s, (i2 << 16) | (op & 0xff));
568}
569
570static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
571{
572    tcg_out16(s, op | (r1 << 4));
573    tcg_out32(s, i2);
574}
575
576static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
577                            TCGReg b2, TCGReg r3, int disp)
578{
579    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
580              | (disp & 0xfff));
581}
582
583static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
584                             TCGReg b2, TCGReg r3, int disp)
585{
586    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
587    tcg_out32(s, (op & 0xff) | (b2 << 28)
588              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
589}
590
591#define tcg_out_insn_RX   tcg_out_insn_RS
592#define tcg_out_insn_RXY  tcg_out_insn_RSY
593
594static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
595{
596    /*
597     * Shift bit 4 of each regno to its corresponding bit of RXB.
598     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
599     * is the left-shift of the 4th operand.
600     */
601    return ((v1 & 0x10) << (4 + 3))
602         | ((v2 & 0x10) << (4 + 2))
603         | ((v3 & 0x10) << (4 + 1))
604         | ((v4 & 0x10) << (4 + 0));
605}
606
607static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
608                              TCGReg v1, uint16_t i2, int m3)
609{
610    tcg_debug_assert(is_vector_reg(v1));
611    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
612    tcg_out16(s, i2);
613    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
614}
615
616static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
617                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
618{
619    tcg_debug_assert(is_vector_reg(v1));
620    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
621    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
622    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
623}
624
625static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
626                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
627{
628    tcg_debug_assert(is_vector_reg(v1));
629    tcg_debug_assert(is_vector_reg(v3));
630    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
631    tcg_out16(s, i2);
632    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
633}
634
635static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
636                              TCGReg v1, TCGReg v2, int m3)
637{
638    tcg_debug_assert(is_vector_reg(v1));
639    tcg_debug_assert(is_vector_reg(v2));
640    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
641    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
642}
643
644static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
645                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
646{
647    tcg_debug_assert(is_vector_reg(v1));
648    tcg_debug_assert(is_vector_reg(v2));
649    tcg_debug_assert(is_vector_reg(v3));
650    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
651    tcg_out16(s, v3 << 12);
652    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
653}
654
655static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
656                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
657{
658    tcg_debug_assert(is_vector_reg(v1));
659    tcg_debug_assert(is_vector_reg(v2));
660    tcg_debug_assert(is_vector_reg(v3));
661    tcg_debug_assert(is_vector_reg(v4));
662    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
663    tcg_out16(s, v3 << 12);
664    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
665}
666
667static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
668                              TCGReg v1, TCGReg r2, TCGReg r3)
669{
670    tcg_debug_assert(is_vector_reg(v1));
671    tcg_debug_assert(is_general_reg(r2));
672    tcg_debug_assert(is_general_reg(r3));
673    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
674    tcg_out16(s, r3 << 12);
675    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
676}
677
678static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
679                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
680{
681    tcg_debug_assert(is_vector_reg(v1));
682    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
683    tcg_debug_assert(is_general_reg(b2));
684    tcg_debug_assert(is_vector_reg(v3));
685    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
686    tcg_out16(s, b2 << 12 | d2);
687    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
688}
689
690static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
691                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
692{
693    tcg_debug_assert(is_vector_reg(v1));
694    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
695    tcg_debug_assert(is_general_reg(b2));
696    tcg_debug_assert(is_general_reg(r3));
697    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
698    tcg_out16(s, b2 << 12 | d2);
699    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
700}
701
702static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
703                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
704{
705    tcg_debug_assert(is_general_reg(r1));
706    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
707    tcg_debug_assert(is_general_reg(b2));
708    tcg_debug_assert(is_vector_reg(v3));
709    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
710    tcg_out16(s, b2 << 12 | d2);
711    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
712}
713
714static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
715                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
716{
717    tcg_debug_assert(is_vector_reg(v1));
718    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
719    tcg_debug_assert(is_general_reg(x2));
720    tcg_debug_assert(is_general_reg(b2));
721    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
722    tcg_out16(s, (b2 << 12) | d2);
723    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
724}
725
726/* Emit an opcode with "type-checking" of the format.  */
727#define tcg_out_insn(S, FMT, OP, ...) \
728    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
729
730
731/* emit 64-bit shifts */
732static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
733                         TCGReg src, TCGReg sh_reg, int sh_imm)
734{
735    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
736}
737
738/* emit 32-bit shifts */
739static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
740                         TCGReg sh_reg, int sh_imm)
741{
742    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
743}
744
745static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
746{
747    if (src == dst) {
748        return true;
749    }
750    switch (type) {
751    case TCG_TYPE_I32:
752        if (likely(is_general_reg(dst) && is_general_reg(src))) {
753            tcg_out_insn(s, RR, LR, dst, src);
754            break;
755        }
756        /* fallthru */
757
758    case TCG_TYPE_I64:
759        if (likely(is_general_reg(dst))) {
760            if (likely(is_general_reg(src))) {
761                tcg_out_insn(s, RRE, LGR, dst, src);
762            } else {
763                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
764            }
765            break;
766        } else if (is_general_reg(src)) {
767            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
768            break;
769        }
770        /* fallthru */
771
772    case TCG_TYPE_V64:
773    case TCG_TYPE_V128:
774        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
775        break;
776
777    default:
778        g_assert_not_reached();
779    }
780    return true;
781}
782
783static const S390Opcode lli_insns[4] = {
784    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
785};
786
787static bool maybe_out_small_movi(TCGContext *s, TCGType type,
788                                 TCGReg ret, tcg_target_long sval)
789{
790    tcg_target_ulong uval = sval;
791    int i;
792
793    if (type == TCG_TYPE_I32) {
794        uval = (uint32_t)sval;
795        sval = (int32_t)sval;
796    }
797
798    /* Try all 32-bit insns that can load it in one go.  */
799    if (sval >= -0x8000 && sval < 0x8000) {
800        tcg_out_insn(s, RI, LGHI, ret, sval);
801        return true;
802    }
803
804    for (i = 0; i < 4; i++) {
805        tcg_target_long mask = 0xffffull << i*16;
806        if ((uval & mask) == uval) {
807            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
808            return true;
809        }
810    }
811
812    return false;
813}
814
815/* load a register with an immediate value */
816static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
817                             tcg_target_long sval, bool in_prologue)
818{
819    tcg_target_ulong uval;
820
821    /* Try all 32-bit insns that can load it in one go.  */
822    if (maybe_out_small_movi(s, type, ret, sval)) {
823        return;
824    }
825
826    uval = sval;
827    if (type == TCG_TYPE_I32) {
828        uval = (uint32_t)sval;
829        sval = (int32_t)sval;
830    }
831
832    /* Try all 48-bit insns that can load it in one go.  */
833    if (HAVE_FACILITY(EXT_IMM)) {
834        if (sval == (int32_t)sval) {
835            tcg_out_insn(s, RIL, LGFI, ret, sval);
836            return;
837        }
838        if (uval <= 0xffffffff) {
839            tcg_out_insn(s, RIL, LLILF, ret, uval);
840            return;
841        }
842        if ((uval & 0xffffffff) == 0) {
843            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
844            return;
845        }
846    }
847
848    /* Try for PC-relative address load.  For odd addresses,
849       attempt to use an offset from the start of the TB.  */
850    if ((sval & 1) == 0) {
851        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
852        if (off == (int32_t)off) {
853            tcg_out_insn(s, RIL, LARL, ret, off);
854            return;
855        }
856    } else if (USE_REG_TB && !in_prologue) {
857        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
858        if (off == sextract64(off, 0, 20)) {
859            /* This is certain to be an address within TB, and therefore
860               OFF will be negative; don't try RX_LA.  */
861            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
862            return;
863        }
864    }
865
866    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
867       that LLILL, LLIHL, LLILF above did not succeed, we know that
868       both insns are required.  */
869    if (uval <= 0xffffffff) {
870        tcg_out_insn(s, RI, LLILL, ret, uval);
871        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
872        return;
873    }
874
875    /* Otherwise, stuff it in the constant pool.  */
876    if (HAVE_FACILITY(GEN_INST_EXT)) {
877        tcg_out_insn(s, RIL, LGRL, ret, 0);
878        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
879    } else if (USE_REG_TB && !in_prologue) {
880        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
881        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
882                       tcg_tbrel_diff(s, NULL));
883    } else {
884        TCGReg base = ret ? ret : TCG_TMP0;
885        tcg_out_insn(s, RIL, LARL, base, 0);
886        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
887        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
888    }
889}
890
891static void tcg_out_movi(TCGContext *s, TCGType type,
892                         TCGReg ret, tcg_target_long sval)
893{
894    tcg_out_movi_int(s, type, ret, sval, false);
895}
896
897/* Emit a load/store type instruction.  Inputs are:
898   DATA:     The register to be loaded or stored.
899   BASE+OFS: The effective address.
900   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
901   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
902
903static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
904                        TCGReg data, TCGReg base, TCGReg index,
905                        tcg_target_long ofs)
906{
907    if (ofs < -0x80000 || ofs >= 0x80000) {
908        /* Combine the low 20 bits of the offset with the actual load insn;
909           the high 44 bits must come from an immediate load.  */
910        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
911        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
912        ofs = low;
913
914        /* If we were already given an index register, add it in.  */
915        if (index != TCG_REG_NONE) {
916            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
917        }
918        index = TCG_TMP0;
919    }
920
921    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
922        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
923    } else {
924        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
925    }
926}
927
928static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
929                            TCGReg data, TCGReg base, TCGReg index,
930                            tcg_target_long ofs, int m3)
931{
932    if (ofs < 0 || ofs >= 0x1000) {
933        if (ofs >= -0x80000 && ofs < 0x80000) {
934            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
935            base = TCG_TMP0;
936            index = TCG_REG_NONE;
937            ofs = 0;
938        } else {
939            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
940            if (index != TCG_REG_NONE) {
941                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
942            }
943            index = TCG_TMP0;
944            ofs = 0;
945        }
946    }
947    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
948}
949
950/* load data without address translation or endianness conversion */
951static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
952                       TCGReg base, intptr_t ofs)
953{
954    switch (type) {
955    case TCG_TYPE_I32:
956        if (likely(is_general_reg(data))) {
957            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
958            break;
959        }
960        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
961        break;
962
963    case TCG_TYPE_I64:
964        if (likely(is_general_reg(data))) {
965            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
966            break;
967        }
968        /* fallthru */
969
970    case TCG_TYPE_V64:
971        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
972        break;
973
974    case TCG_TYPE_V128:
975        /* Hint quadword aligned.  */
976        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
977        break;
978
979    default:
980        g_assert_not_reached();
981    }
982}
983
984static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
985                       TCGReg base, intptr_t ofs)
986{
987    switch (type) {
988    case TCG_TYPE_I32:
989        if (likely(is_general_reg(data))) {
990            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
991        } else {
992            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
993        }
994        break;
995
996    case TCG_TYPE_I64:
997        if (likely(is_general_reg(data))) {
998            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
999            break;
1000        }
1001        /* fallthru */
1002
1003    case TCG_TYPE_V64:
1004        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1005        break;
1006
1007    case TCG_TYPE_V128:
1008        /* Hint quadword aligned.  */
1009        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1010        break;
1011
1012    default:
1013        g_assert_not_reached();
1014    }
1015}
1016
1017static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1018                               TCGReg base, intptr_t ofs)
1019{
1020    return false;
1021}
1022
1023/* load data from an absolute host address */
1024static void tcg_out_ld_abs(TCGContext *s, TCGType type,
1025                           TCGReg dest, const void *abs)
1026{
1027    intptr_t addr = (intptr_t)abs;
1028
1029    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
1030        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
1031        if (disp == (int32_t)disp) {
1032            if (type == TCG_TYPE_I32) {
1033                tcg_out_insn(s, RIL, LRL, dest, disp);
1034            } else {
1035                tcg_out_insn(s, RIL, LGRL, dest, disp);
1036            }
1037            return;
1038        }
1039    }
1040    if (USE_REG_TB) {
1041        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
1042        if (disp == sextract64(disp, 0, 20)) {
1043            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
1044            return;
1045        }
1046    }
1047
1048    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1049    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1050}
1051
1052static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1053                                 int msb, int lsb, int ofs, int z)
1054{
1055    /* Format RIE-f */
1056    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1057    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1058    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1059}
1060
1061static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1062{
1063    if (HAVE_FACILITY(EXT_IMM)) {
1064        tcg_out_insn(s, RRE, LGBR, dest, src);
1065        return;
1066    }
1067
1068    if (type == TCG_TYPE_I32) {
1069        if (dest == src) {
1070            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1071        } else {
1072            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1073        }
1074        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1075    } else {
1076        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1077        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1078    }
1079}
1080
1081static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1082{
1083    if (HAVE_FACILITY(EXT_IMM)) {
1084        tcg_out_insn(s, RRE, LLGCR, dest, src);
1085        return;
1086    }
1087
1088    if (dest == src) {
1089        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1090        src = TCG_TMP0;
1091    } else {
1092        tcg_out_movi(s, type, dest, 0xff);
1093    }
1094    if (type == TCG_TYPE_I32) {
1095        tcg_out_insn(s, RR, NR, dest, src);
1096    } else {
1097        tcg_out_insn(s, RRE, NGR, dest, src);
1098    }
1099}
1100
1101static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1102{
1103    if (HAVE_FACILITY(EXT_IMM)) {
1104        tcg_out_insn(s, RRE, LGHR, dest, src);
1105        return;
1106    }
1107
1108    if (type == TCG_TYPE_I32) {
1109        if (dest == src) {
1110            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1111        } else {
1112            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1113        }
1114        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1115    } else {
1116        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1117        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1118    }
1119}
1120
1121static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1122{
1123    if (HAVE_FACILITY(EXT_IMM)) {
1124        tcg_out_insn(s, RRE, LLGHR, dest, src);
1125        return;
1126    }
1127
1128    if (dest == src) {
1129        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1130        src = TCG_TMP0;
1131    } else {
1132        tcg_out_movi(s, type, dest, 0xffff);
1133    }
1134    if (type == TCG_TYPE_I32) {
1135        tcg_out_insn(s, RR, NR, dest, src);
1136    } else {
1137        tcg_out_insn(s, RRE, NGR, dest, src);
1138    }
1139}
1140
1141static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1142{
1143    tcg_out_insn(s, RRE, LGFR, dest, src);
1144}
1145
1146static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1147{
1148    tcg_out_insn(s, RRE, LLGFR, dest, src);
1149}
1150
1151/* Accept bit patterns like these:
1152    0....01....1
1153    1....10....0
1154    1..10..01..1
1155    0..01..10..0
1156   Copied from gcc sources.  */
1157static inline bool risbg_mask(uint64_t c)
1158{
1159    uint64_t lsb;
1160    /* We don't change the number of transitions by inverting,
1161       so make sure we start with the LSB zero.  */
1162    if (c & 1) {
1163        c = ~c;
1164    }
1165    /* Reject all zeros or all ones.  */
1166    if (c == 0) {
1167        return false;
1168    }
1169    /* Find the first transition.  */
1170    lsb = c & -c;
1171    /* Invert to look for a second transition.  */
1172    c = ~c;
1173    /* Erase the first transition.  */
1174    c &= -lsb;
1175    /* Find the second transition, if any.  */
1176    lsb = c & -c;
1177    /* Match if all the bits are 1's, or if c is zero.  */
1178    return c == -lsb;
1179}
1180
1181static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1182{
1183    int msb, lsb;
1184    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1185        /* Achieve wraparound by swapping msb and lsb.  */
1186        msb = 64 - ctz64(~val);
1187        lsb = clz64(~val) - 1;
1188    } else {
1189        msb = clz64(val);
1190        lsb = 63 - ctz64(val);
1191    }
1192    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1193}
1194
1195static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1196{
1197    static const S390Opcode ni_insns[4] = {
1198        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1199    };
1200    static const S390Opcode nif_insns[2] = {
1201        RIL_NILF, RIL_NIHF
1202    };
1203    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1204    int i;
1205
1206    /* Look for the zero-extensions.  */
1207    if ((val & valid) == 0xffffffff) {
1208        tgen_ext32u(s, dest, dest);
1209        return;
1210    }
1211    if (HAVE_FACILITY(EXT_IMM)) {
1212        if ((val & valid) == 0xff) {
1213            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1214            return;
1215        }
1216        if ((val & valid) == 0xffff) {
1217            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1218            return;
1219        }
1220    }
1221
1222    /* Try all 32-bit insns that can perform it in one go.  */
1223    for (i = 0; i < 4; i++) {
1224        tcg_target_ulong mask = ~(0xffffull << i*16);
1225        if (((val | ~valid) & mask) == mask) {
1226            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1227            return;
1228        }
1229    }
1230
1231    /* Try all 48-bit insns that can perform it in one go.  */
1232    if (HAVE_FACILITY(EXT_IMM)) {
1233        for (i = 0; i < 2; i++) {
1234            tcg_target_ulong mask = ~(0xffffffffull << i*32);
1235            if (((val | ~valid) & mask) == mask) {
1236                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1237                return;
1238            }
1239        }
1240    }
1241    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1242        tgen_andi_risbg(s, dest, dest, val);
1243        return;
1244    }
1245
1246    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1247    if (USE_REG_TB) {
1248        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1249            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1250            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1251                           tcg_tbrel_diff(s, NULL));
1252            return;
1253        }
1254    } else {
1255        tcg_out_movi(s, type, TCG_TMP0, val);
1256    }
1257    if (type == TCG_TYPE_I32) {
1258        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1259    } else {
1260        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1261    }
1262}
1263
1264static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1265{
1266    static const S390Opcode oi_insns[4] = {
1267        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1268    };
1269    static const S390Opcode oif_insns[2] = {
1270        RIL_OILF, RIL_OIHF
1271    };
1272
1273    int i;
1274
1275    /* Look for no-op.  */
1276    if (unlikely(val == 0)) {
1277        return;
1278    }
1279
1280    /* Try all 32-bit insns that can perform it in one go.  */
1281    for (i = 0; i < 4; i++) {
1282        tcg_target_ulong mask = (0xffffull << i*16);
1283        if ((val & mask) != 0 && (val & ~mask) == 0) {
1284            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1285            return;
1286        }
1287    }
1288
1289    /* Try all 48-bit insns that can perform it in one go.  */
1290    if (HAVE_FACILITY(EXT_IMM)) {
1291        for (i = 0; i < 2; i++) {
1292            tcg_target_ulong mask = (0xffffffffull << i*32);
1293            if ((val & mask) != 0 && (val & ~mask) == 0) {
1294                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
1295                return;
1296            }
1297        }
1298    }
1299
1300    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1301    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1302        if (type == TCG_TYPE_I32) {
1303            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1304        } else {
1305            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1306        }
1307    } else if (USE_REG_TB) {
1308        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1309        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1310                       tcg_tbrel_diff(s, NULL));
1311    } else {
1312        /* Perform the OR via sequential modifications to the high and
1313           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1314           masks in each half.  */
1315        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1316        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1317        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1318    }
1319}
1320
1321static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1322{
1323    /* Try all 48-bit insns that can perform it in one go.  */
1324    if (HAVE_FACILITY(EXT_IMM)) {
1325        if ((val & 0xffffffff00000000ull) == 0) {
1326            tcg_out_insn(s, RIL, XILF, dest, val);
1327            return;
1328        }
1329        if ((val & 0x00000000ffffffffull) == 0) {
1330            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1331            return;
1332        }
1333    }
1334
1335    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1336    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1337        if (type == TCG_TYPE_I32) {
1338            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1339        } else {
1340            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1341        }
1342    } else if (USE_REG_TB) {
1343        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1344        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1345                       tcg_tbrel_diff(s, NULL));
1346    } else {
1347        /* Perform the xor by parts.  */
1348        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1349        if (val & 0xffffffff) {
1350            tcg_out_insn(s, RIL, XILF, dest, val);
1351        }
1352        if (val > 0xffffffff) {
1353            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1354        }
1355    }
1356}
1357
1358static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1359                    TCGArg c2, bool c2const, bool need_carry)
1360{
1361    bool is_unsigned = is_unsigned_cond(c);
1362    S390Opcode op;
1363
1364    if (c2const) {
1365        if (c2 == 0) {
1366            if (!(is_unsigned && need_carry)) {
1367                if (type == TCG_TYPE_I32) {
1368                    tcg_out_insn(s, RR, LTR, r1, r1);
1369                } else {
1370                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1371                }
1372                return tcg_cond_to_ltr_cond[c];
1373            }
1374        }
1375
1376        if (!is_unsigned && c2 == (int16_t)c2) {
1377            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1378            tcg_out_insn_RI(s, op, r1, c2);
1379            goto exit;
1380        }
1381
1382        if (HAVE_FACILITY(EXT_IMM)) {
1383            if (type == TCG_TYPE_I32) {
1384                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1385                tcg_out_insn_RIL(s, op, r1, c2);
1386                goto exit;
1387            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1388                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1389                tcg_out_insn_RIL(s, op, r1, c2);
1390                goto exit;
1391            }
1392        }
1393
1394        /* Use the constant pool, but not for small constants.  */
1395        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1396            c2 = TCG_TMP0;
1397            /* fall through to reg-reg */
1398        } else if (USE_REG_TB) {
1399            if (type == TCG_TYPE_I32) {
1400                op = (is_unsigned ? RXY_CLY : RXY_CY);
1401                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1402                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1403                               4 - tcg_tbrel_diff(s, NULL));
1404            } else {
1405                op = (is_unsigned ? RXY_CLG : RXY_CG);
1406                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1407                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1408                               tcg_tbrel_diff(s, NULL));
1409            }
1410            goto exit;
1411        } else {
1412            if (type == TCG_TYPE_I32) {
1413                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1414                tcg_out_insn_RIL(s, op, r1, 0);
1415                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1416                               s->code_ptr - 2, 2 + 4);
1417            } else {
1418                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1419                tcg_out_insn_RIL(s, op, r1, 0);
1420                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1421            }
1422            goto exit;
1423        }
1424    }
1425
1426    if (type == TCG_TYPE_I32) {
1427        op = (is_unsigned ? RR_CLR : RR_CR);
1428        tcg_out_insn_RR(s, op, r1, c2);
1429    } else {
1430        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1431        tcg_out_insn_RRE(s, op, r1, c2);
1432    }
1433
1434 exit:
1435    return tcg_cond_to_s390_cond[c];
1436}
1437
1438static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1439                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1440{
1441    int cc;
1442    bool have_loc;
1443
1444    /* With LOC2, we can always emit the minimum 3 insns.  */
1445    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1446        /* Emit: d = 0, d = (cc ? 1 : d).  */
1447        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1448        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1449        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1450        return;
1451    }
1452
1453    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1454
1455    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1456 restart:
1457    switch (cond) {
1458    case TCG_COND_NE:
1459        /* X != 0 is X > 0.  */
1460        if (c2const && c2 == 0) {
1461            cond = TCG_COND_GTU;
1462        } else {
1463            break;
1464        }
1465        /* fallthru */
1466
1467    case TCG_COND_GTU:
1468    case TCG_COND_GT:
1469        /* The result of a compare has CC=2 for GT and CC=3 unused.
1470           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1471        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1472        tcg_out_movi(s, type, dest, 0);
1473        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1474        return;
1475
1476    case TCG_COND_EQ:
1477        /* X == 0 is X <= 0.  */
1478        if (c2const && c2 == 0) {
1479            cond = TCG_COND_LEU;
1480        } else {
1481            break;
1482        }
1483        /* fallthru */
1484
1485    case TCG_COND_LEU:
1486    case TCG_COND_LE:
1487        /* As above, but we're looking for borrow, or !carry.
1488           The second insn computes d - d - borrow, or -1 for true
1489           and 0 for false.  So we must mask to 1 bit afterward.  */
1490        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1491        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1492        tgen_andi(s, type, dest, 1);
1493        return;
1494
1495    case TCG_COND_GEU:
1496    case TCG_COND_LTU:
1497    case TCG_COND_LT:
1498    case TCG_COND_GE:
1499        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1500        if (c2const) {
1501            if (have_loc) {
1502                break;
1503            }
1504            tcg_out_movi(s, type, TCG_TMP0, c2);
1505            c2 = c1;
1506            c2const = 0;
1507            c1 = TCG_TMP0;
1508        } else {
1509            TCGReg t = c1;
1510            c1 = c2;
1511            c2 = t;
1512        }
1513        cond = tcg_swap_cond(cond);
1514        goto restart;
1515
1516    default:
1517        g_assert_not_reached();
1518    }
1519
1520    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1521    if (have_loc) {
1522        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1523        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1524        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1525        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1526    } else {
1527        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1528        tcg_out_movi(s, type, dest, 1);
1529        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1530        tcg_out_movi(s, type, dest, 0);
1531    }
1532}
1533
1534static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1535                         TCGReg c1, TCGArg c2, int c2const,
1536                         TCGArg v3, int v3const)
1537{
1538    int cc;
1539    if (HAVE_FACILITY(LOAD_ON_COND)) {
1540        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1541        if (v3const) {
1542            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1543        } else {
1544            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1545        }
1546    } else {
1547        c = tcg_invert_cond(c);
1548        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1549
1550        /* Emit: if (cc) goto over; dest = r3; over:  */
1551        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1552        tcg_out_insn(s, RRE, LGR, dest, v3);
1553    }
1554}
1555
1556static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1557                     TCGArg a2, int a2const)
1558{
1559    /* Since this sets both R and R+1, we have no choice but to store the
1560       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1561    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1562    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1563
1564    if (a2const && a2 == 64) {
1565        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1566    } else {
1567        if (a2const) {
1568            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1569        } else {
1570            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1571        }
1572        if (HAVE_FACILITY(LOAD_ON_COND)) {
1573            /* Emit: if (one bit found) dest = r0.  */
1574            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1575        } else {
1576            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1577            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1578            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1579        }
1580    }
1581}
1582
1583static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1584                         int ofs, int len, int z)
1585{
1586    int lsb = (63 - ofs);
1587    int msb = lsb - (len - 1);
1588    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1589}
1590
1591static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1592                         int ofs, int len)
1593{
1594    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1595}
1596
1597static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1598{
1599    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1600    if (off == (int16_t)off) {
1601        tcg_out_insn(s, RI, BRC, cc, off);
1602    } else if (off == (int32_t)off) {
1603        tcg_out_insn(s, RIL, BRCL, cc, off);
1604    } else {
1605        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1606        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1607    }
1608}
1609
1610static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1611{
1612    if (l->has_value) {
1613        tgen_gotoi(s, cc, l->u.value_ptr);
1614    } else if (USE_LONG_BRANCHES) {
1615        tcg_out16(s, RIL_BRCL | (cc << 4));
1616        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1617        s->code_ptr += 2;
1618    } else {
1619        tcg_out16(s, RI_BRC | (cc << 4));
1620        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1621        s->code_ptr += 1;
1622    }
1623}
1624
1625static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1626                                TCGReg r1, TCGReg r2, TCGLabel *l)
1627{
1628    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1629    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1630    tcg_out16(s, 0);
1631    tcg_out16(s, cc << 12 | (opc & 0xff));
1632}
1633
1634static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1635                                    TCGReg r1, int i2, TCGLabel *l)
1636{
1637    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1638    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1639    tcg_out16(s, 0);
1640    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1641}
1642
1643static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1644                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1645{
1646    int cc;
1647
1648    if (HAVE_FACILITY(GEN_INST_EXT)) {
1649        bool is_unsigned = is_unsigned_cond(c);
1650        bool in_range;
1651        S390Opcode opc;
1652
1653        cc = tcg_cond_to_s390_cond[c];
1654
1655        if (!c2const) {
1656            opc = (type == TCG_TYPE_I32
1657                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1658                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1659            tgen_compare_branch(s, opc, cc, r1, c2, l);
1660            return;
1661        }
1662
1663        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1664           If the immediate we've been given does not fit that range, we'll
1665           fall back to separate compare and branch instructions using the
1666           larger comparison range afforded by COMPARE IMMEDIATE.  */
1667        if (type == TCG_TYPE_I32) {
1668            if (is_unsigned) {
1669                opc = RIE_CLIJ;
1670                in_range = (uint32_t)c2 == (uint8_t)c2;
1671            } else {
1672                opc = RIE_CIJ;
1673                in_range = (int32_t)c2 == (int8_t)c2;
1674            }
1675        } else {
1676            if (is_unsigned) {
1677                opc = RIE_CLGIJ;
1678                in_range = (uint64_t)c2 == (uint8_t)c2;
1679            } else {
1680                opc = RIE_CGIJ;
1681                in_range = (int64_t)c2 == (int8_t)c2;
1682            }
1683        }
1684        if (in_range) {
1685            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1686            return;
1687        }
1688    }
1689
1690    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1691    tgen_branch(s, cc, l);
1692}
1693
1694static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
1695{
1696    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1697    if (off == (int32_t)off) {
1698        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1699    } else {
1700        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1701        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1702    }
1703}
1704
1705static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1706                                   TCGReg base, TCGReg index, int disp)
1707{
1708    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1709    case MO_UB:
1710        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1711        break;
1712    case MO_SB:
1713        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1714        break;
1715
1716    case MO_UW | MO_BSWAP:
1717        /* swapped unsigned halfword load with upper bits zeroed */
1718        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1719        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1720        break;
1721    case MO_UW:
1722        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1723        break;
1724
1725    case MO_SW | MO_BSWAP:
1726        /* swapped sign-extended halfword load */
1727        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1728        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1729        break;
1730    case MO_SW:
1731        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1732        break;
1733
1734    case MO_UL | MO_BSWAP:
1735        /* swapped unsigned int load with upper bits zeroed */
1736        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1737        tgen_ext32u(s, data, data);
1738        break;
1739    case MO_UL:
1740        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1741        break;
1742
1743    case MO_SL | MO_BSWAP:
1744        /* swapped sign-extended int load */
1745        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1746        tgen_ext32s(s, data, data);
1747        break;
1748    case MO_SL:
1749        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1750        break;
1751
1752    case MO_UQ | MO_BSWAP:
1753        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1754        break;
1755    case MO_UQ:
1756        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1757        break;
1758
1759    default:
1760        tcg_abort();
1761    }
1762}
1763
1764static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1765                                   TCGReg base, TCGReg index, int disp)
1766{
1767    switch (opc & (MO_SIZE | MO_BSWAP)) {
1768    case MO_UB:
1769        if (disp >= 0 && disp < 0x1000) {
1770            tcg_out_insn(s, RX, STC, data, base, index, disp);
1771        } else {
1772            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1773        }
1774        break;
1775
1776    case MO_UW | MO_BSWAP:
1777        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1778        break;
1779    case MO_UW:
1780        if (disp >= 0 && disp < 0x1000) {
1781            tcg_out_insn(s, RX, STH, data, base, index, disp);
1782        } else {
1783            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1784        }
1785        break;
1786
1787    case MO_UL | MO_BSWAP:
1788        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1789        break;
1790    case MO_UL:
1791        if (disp >= 0 && disp < 0x1000) {
1792            tcg_out_insn(s, RX, ST, data, base, index, disp);
1793        } else {
1794            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1795        }
1796        break;
1797
1798    case MO_UQ | MO_BSWAP:
1799        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1800        break;
1801    case MO_UQ:
1802        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1803        break;
1804
1805    default:
1806        tcg_abort();
1807    }
1808}
1809
1810#if defined(CONFIG_SOFTMMU)
1811/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1812QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1813QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1814
1815/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1816   addend into R2.  Returns a register with the santitized guest address.  */
1817static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1818                               int mem_index, bool is_ld)
1819{
1820    unsigned s_bits = opc & MO_SIZE;
1821    unsigned a_bits = get_alignment_bits(opc);
1822    unsigned s_mask = (1 << s_bits) - 1;
1823    unsigned a_mask = (1 << a_bits) - 1;
1824    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1825    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1826    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1827    int ofs, a_off;
1828    uint64_t tlb_mask;
1829
1830    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1831                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1832    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1833    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1834
1835    /* For aligned accesses, we check the first byte and include the alignment
1836       bits within the address.  For unaligned access, we check that we don't
1837       cross pages using the address of the last byte of the access.  */
1838    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1839    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1840    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1841        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1842    } else {
1843        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1844        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1845    }
1846
1847    if (is_ld) {
1848        ofs = offsetof(CPUTLBEntry, addr_read);
1849    } else {
1850        ofs = offsetof(CPUTLBEntry, addr_write);
1851    }
1852    if (TARGET_LONG_BITS == 32) {
1853        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1854    } else {
1855        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1856    }
1857
1858    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1859                 offsetof(CPUTLBEntry, addend));
1860
1861    if (TARGET_LONG_BITS == 32) {
1862        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1863        return TCG_REG_R3;
1864    }
1865    return addr_reg;
1866}
1867
1868static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1869                                TCGReg data, TCGReg addr,
1870                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1871{
1872    TCGLabelQemuLdst *label = new_ldst_label(s);
1873
1874    label->is_ld = is_ld;
1875    label->oi = oi;
1876    label->datalo_reg = data;
1877    label->addrlo_reg = addr;
1878    label->raddr = tcg_splitwx_to_rx(raddr);
1879    label->label_ptr[0] = label_ptr;
1880}
1881
1882static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1883{
1884    TCGReg addr_reg = lb->addrlo_reg;
1885    TCGReg data_reg = lb->datalo_reg;
1886    MemOpIdx oi = lb->oi;
1887    MemOp opc = get_memop(oi);
1888
1889    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1890                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1891        return false;
1892    }
1893
1894    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1895    if (TARGET_LONG_BITS == 64) {
1896        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1897    }
1898    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1899    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1900    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1901    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1902
1903    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1904    return true;
1905}
1906
1907static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1908{
1909    TCGReg addr_reg = lb->addrlo_reg;
1910    TCGReg data_reg = lb->datalo_reg;
1911    MemOpIdx oi = lb->oi;
1912    MemOp opc = get_memop(oi);
1913
1914    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1915                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1916        return false;
1917    }
1918
1919    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1920    if (TARGET_LONG_BITS == 64) {
1921        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1922    }
1923    switch (opc & MO_SIZE) {
1924    case MO_UB:
1925        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1926        break;
1927    case MO_UW:
1928        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1929        break;
1930    case MO_UL:
1931        tgen_ext32u(s, TCG_REG_R4, data_reg);
1932        break;
1933    case MO_UQ:
1934        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1935        break;
1936    default:
1937        tcg_abort();
1938    }
1939    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1940    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1941    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1942
1943    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1944    return true;
1945}
1946#else
1947static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1948                                   TCGReg addrlo, unsigned a_bits)
1949{
1950    unsigned a_mask = (1 << a_bits) - 1;
1951    TCGLabelQemuLdst *l = new_ldst_label(s);
1952
1953    l->is_ld = is_ld;
1954    l->addrlo_reg = addrlo;
1955
1956    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1957    tcg_debug_assert(a_bits < 16);
1958    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1959
1960    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1961    l->label_ptr[0] = s->code_ptr;
1962    s->code_ptr += 1;
1963
1964    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1965}
1966
1967static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1968{
1969    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1970                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1971        return false;
1972    }
1973
1974    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1975    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1976
1977    /* "Tail call" to the helper, with the return address back inline. */
1978    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1979    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1980                                                 : helper_unaligned_st));
1981    return true;
1982}
1983
1984static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1985{
1986    return tcg_out_fail_alignment(s, l);
1987}
1988
1989static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1990{
1991    return tcg_out_fail_alignment(s, l);
1992}
1993
1994static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1995                                  TCGReg *index_reg, tcg_target_long *disp)
1996{
1997    if (TARGET_LONG_BITS == 32) {
1998        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1999        *addr_reg = TCG_TMP0;
2000    }
2001    if (guest_base < 0x80000) {
2002        *index_reg = TCG_REG_NONE;
2003        *disp = guest_base;
2004    } else {
2005        *index_reg = TCG_GUEST_BASE_REG;
2006        *disp = 0;
2007    }
2008}
2009#endif /* CONFIG_SOFTMMU */
2010
2011static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2012                            MemOpIdx oi)
2013{
2014    MemOp opc = get_memop(oi);
2015#ifdef CONFIG_SOFTMMU
2016    unsigned mem_index = get_mmuidx(oi);
2017    tcg_insn_unit *label_ptr;
2018    TCGReg base_reg;
2019
2020    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
2021
2022    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2023    label_ptr = s->code_ptr;
2024    s->code_ptr += 1;
2025
2026    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2027
2028    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2029#else
2030    TCGReg index_reg;
2031    tcg_target_long disp;
2032    unsigned a_bits = get_alignment_bits(opc);
2033
2034    if (a_bits) {
2035        tcg_out_test_alignment(s, true, addr_reg, a_bits);
2036    }
2037    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2038    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2039#endif
2040}
2041
2042static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
2043                            MemOpIdx oi)
2044{
2045    MemOp opc = get_memop(oi);
2046#ifdef CONFIG_SOFTMMU
2047    unsigned mem_index = get_mmuidx(oi);
2048    tcg_insn_unit *label_ptr;
2049    TCGReg base_reg;
2050
2051    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
2052
2053    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
2054    label_ptr = s->code_ptr;
2055    s->code_ptr += 1;
2056
2057    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
2058
2059    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
2060#else
2061    TCGReg index_reg;
2062    tcg_target_long disp;
2063    unsigned a_bits = get_alignment_bits(opc);
2064
2065    if (a_bits) {
2066        tcg_out_test_alignment(s, false, addr_reg, a_bits);
2067    }
2068    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
2069    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
2070#endif
2071}
2072
2073# define OP_32_64(x) \
2074        case glue(glue(INDEX_op_,x),_i32): \
2075        case glue(glue(INDEX_op_,x),_i64)
2076
2077static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2078                              const TCGArg args[TCG_MAX_OP_ARGS],
2079                              const int const_args[TCG_MAX_OP_ARGS])
2080{
2081    S390Opcode op, op2;
2082    TCGArg a0, a1, a2;
2083
2084    switch (opc) {
2085    case INDEX_op_exit_tb:
2086        /* Reuse the zeroing that exists for goto_ptr.  */
2087        a0 = args[0];
2088        if (a0 == 0) {
2089            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
2090        } else {
2091            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
2092            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
2093        }
2094        break;
2095
2096    case INDEX_op_goto_tb:
2097        a0 = args[0];
2098        if (s->tb_jmp_insn_offset) {
2099            /*
2100             * branch displacement must be aligned for atomic patching;
2101             * see if we need to add extra nop before branch
2102             */
2103            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2104                tcg_out16(s, NOP);
2105            }
2106            tcg_debug_assert(!USE_REG_TB);
2107            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2108            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2109            s->code_ptr += 2;
2110        } else {
2111            /* load address stored at s->tb_jmp_target_addr + a0 */
2112            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2113                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2114            /* and go there */
2115            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2116        }
2117        set_jmp_reset_offset(s, a0);
2118
2119        /* For the unlinked path of goto_tb, we need to reset
2120           TCG_REG_TB to the beginning of this TB.  */
2121        if (USE_REG_TB) {
2122            int ofs = -tcg_current_code_size(s);
2123            /* All TB are restricted to 64KiB by unwind info. */
2124            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2125            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2126                         TCG_REG_TB, TCG_REG_NONE, ofs);
2127        }
2128        break;
2129
2130    case INDEX_op_goto_ptr:
2131        a0 = args[0];
2132        if (USE_REG_TB) {
2133            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2134        }
2135        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2136        break;
2137
2138    OP_32_64(ld8u):
2139        /* ??? LLC (RXY format) is only present with the extended-immediate
2140           facility, whereas LLGC is always present.  */
2141        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2142        break;
2143
2144    OP_32_64(ld8s):
2145        /* ??? LB is no smaller than LGB, so no point to using it.  */
2146        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2147        break;
2148
2149    OP_32_64(ld16u):
2150        /* ??? LLH (RXY format) is only present with the extended-immediate
2151           facility, whereas LLGH is always present.  */
2152        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2153        break;
2154
2155    case INDEX_op_ld16s_i32:
2156        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2157        break;
2158
2159    case INDEX_op_ld_i32:
2160        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2161        break;
2162
2163    OP_32_64(st8):
2164        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2165                    TCG_REG_NONE, args[2]);
2166        break;
2167
2168    OP_32_64(st16):
2169        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2170                    TCG_REG_NONE, args[2]);
2171        break;
2172
2173    case INDEX_op_st_i32:
2174        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2175        break;
2176
2177    case INDEX_op_add_i32:
2178        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2179        if (const_args[2]) {
2180        do_addi_32:
2181            if (a0 == a1) {
2182                if (a2 == (int16_t)a2) {
2183                    tcg_out_insn(s, RI, AHI, a0, a2);
2184                    break;
2185                }
2186                if (HAVE_FACILITY(EXT_IMM)) {
2187                    tcg_out_insn(s, RIL, AFI, a0, a2);
2188                    break;
2189                }
2190            }
2191            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2192        } else if (a0 == a1) {
2193            tcg_out_insn(s, RR, AR, a0, a2);
2194        } else {
2195            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2196        }
2197        break;
2198    case INDEX_op_sub_i32:
2199        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2200        if (const_args[2]) {
2201            a2 = -a2;
2202            goto do_addi_32;
2203        } else if (a0 == a1) {
2204            tcg_out_insn(s, RR, SR, a0, a2);
2205        } else {
2206            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2207        }
2208        break;
2209
2210    case INDEX_op_and_i32:
2211        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2212        if (const_args[2]) {
2213            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2214            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2215        } else if (a0 == a1) {
2216            tcg_out_insn(s, RR, NR, a0, a2);
2217        } else {
2218            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2219        }
2220        break;
2221    case INDEX_op_or_i32:
2222        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2223        if (const_args[2]) {
2224            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2225            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2226        } else if (a0 == a1) {
2227            tcg_out_insn(s, RR, OR, a0, a2);
2228        } else {
2229            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2230        }
2231        break;
2232    case INDEX_op_xor_i32:
2233        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2234        if (const_args[2]) {
2235            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2236            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2237        } else if (a0 == a1) {
2238            tcg_out_insn(s, RR, XR, args[0], args[2]);
2239        } else {
2240            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2241        }
2242        break;
2243
2244    case INDEX_op_neg_i32:
2245        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2246        break;
2247
2248    case INDEX_op_mul_i32:
2249        if (const_args[2]) {
2250            if ((int32_t)args[2] == (int16_t)args[2]) {
2251                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2252            } else {
2253                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2254            }
2255        } else {
2256            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2257        }
2258        break;
2259
2260    case INDEX_op_div2_i32:
2261        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2262        break;
2263    case INDEX_op_divu2_i32:
2264        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2265        break;
2266
2267    case INDEX_op_shl_i32:
2268        op = RS_SLL;
2269        op2 = RSY_SLLK;
2270    do_shift32:
2271        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2272        if (a0 == a1) {
2273            if (const_args[2]) {
2274                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2275            } else {
2276                tcg_out_sh32(s, op, a0, a2, 0);
2277            }
2278        } else {
2279            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2280            if (const_args[2]) {
2281                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2282            } else {
2283                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2284            }
2285        }
2286        break;
2287    case INDEX_op_shr_i32:
2288        op = RS_SRL;
2289        op2 = RSY_SRLK;
2290        goto do_shift32;
2291    case INDEX_op_sar_i32:
2292        op = RS_SRA;
2293        op2 = RSY_SRAK;
2294        goto do_shift32;
2295
2296    case INDEX_op_rotl_i32:
2297        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2298        if (const_args[2]) {
2299            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2300        } else {
2301            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2302        }
2303        break;
2304    case INDEX_op_rotr_i32:
2305        if (const_args[2]) {
2306            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2307                         TCG_REG_NONE, (32 - args[2]) & 31);
2308        } else {
2309            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2310            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2311        }
2312        break;
2313
2314    case INDEX_op_ext8s_i32:
2315        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2316        break;
2317    case INDEX_op_ext16s_i32:
2318        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2319        break;
2320    case INDEX_op_ext8u_i32:
2321        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2322        break;
2323    case INDEX_op_ext16u_i32:
2324        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2325        break;
2326
2327    case INDEX_op_bswap16_i32:
2328        a0 = args[0], a1 = args[1], a2 = args[2];
2329        tcg_out_insn(s, RRE, LRVR, a0, a1);
2330        if (a2 & TCG_BSWAP_OS) {
2331            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2332        } else {
2333            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2334        }
2335        break;
2336    case INDEX_op_bswap16_i64:
2337        a0 = args[0], a1 = args[1], a2 = args[2];
2338        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2339        if (a2 & TCG_BSWAP_OS) {
2340            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2341        } else {
2342            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2343        }
2344        break;
2345
2346    case INDEX_op_bswap32_i32:
2347        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2348        break;
2349    case INDEX_op_bswap32_i64:
2350        a0 = args[0], a1 = args[1], a2 = args[2];
2351        tcg_out_insn(s, RRE, LRVR, a0, a1);
2352        if (a2 & TCG_BSWAP_OS) {
2353            tgen_ext32s(s, a0, a0);
2354        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2355            tgen_ext32u(s, a0, a0);
2356        }
2357        break;
2358
2359    case INDEX_op_add2_i32:
2360        if (const_args[4]) {
2361            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2362        } else {
2363            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2364        }
2365        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2366        break;
2367    case INDEX_op_sub2_i32:
2368        if (const_args[4]) {
2369            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2370        } else {
2371            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2372        }
2373        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2374        break;
2375
2376    case INDEX_op_br:
2377        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2378        break;
2379
2380    case INDEX_op_brcond_i32:
2381        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2382                    args[1], const_args[1], arg_label(args[3]));
2383        break;
2384    case INDEX_op_setcond_i32:
2385        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2386                     args[2], const_args[2]);
2387        break;
2388    case INDEX_op_movcond_i32:
2389        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2390                     args[2], const_args[2], args[3], const_args[3]);
2391        break;
2392
2393    case INDEX_op_qemu_ld_i32:
2394        /* ??? Technically we can use a non-extending instruction.  */
2395    case INDEX_op_qemu_ld_i64:
2396        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2397        break;
2398    case INDEX_op_qemu_st_i32:
2399    case INDEX_op_qemu_st_i64:
2400        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2401        break;
2402
2403    case INDEX_op_ld16s_i64:
2404        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2405        break;
2406    case INDEX_op_ld32u_i64:
2407        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2408        break;
2409    case INDEX_op_ld32s_i64:
2410        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2411        break;
2412    case INDEX_op_ld_i64:
2413        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2414        break;
2415
2416    case INDEX_op_st32_i64:
2417        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2418        break;
2419    case INDEX_op_st_i64:
2420        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2421        break;
2422
2423    case INDEX_op_add_i64:
2424        a0 = args[0], a1 = args[1], a2 = args[2];
2425        if (const_args[2]) {
2426        do_addi_64:
2427            if (a0 == a1) {
2428                if (a2 == (int16_t)a2) {
2429                    tcg_out_insn(s, RI, AGHI, a0, a2);
2430                    break;
2431                }
2432                if (HAVE_FACILITY(EXT_IMM)) {
2433                    if (a2 == (int32_t)a2) {
2434                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2435                        break;
2436                    } else if (a2 == (uint32_t)a2) {
2437                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2438                        break;
2439                    } else if (-a2 == (uint32_t)-a2) {
2440                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2441                        break;
2442                    }
2443                }
2444            }
2445            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2446        } else if (a0 == a1) {
2447            tcg_out_insn(s, RRE, AGR, a0, a2);
2448        } else {
2449            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2450        }
2451        break;
2452    case INDEX_op_sub_i64:
2453        a0 = args[0], a1 = args[1], a2 = args[2];
2454        if (const_args[2]) {
2455            a2 = -a2;
2456            goto do_addi_64;
2457        } else if (a0 == a1) {
2458            tcg_out_insn(s, RRE, SGR, a0, a2);
2459        } else {
2460            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2461        }
2462        break;
2463
2464    case INDEX_op_and_i64:
2465        a0 = args[0], a1 = args[1], a2 = args[2];
2466        if (const_args[2]) {
2467            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2468            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2469        } else if (a0 == a1) {
2470            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2471        } else {
2472            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2473        }
2474        break;
2475    case INDEX_op_or_i64:
2476        a0 = args[0], a1 = args[1], a2 = args[2];
2477        if (const_args[2]) {
2478            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2479            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2480        } else if (a0 == a1) {
2481            tcg_out_insn(s, RRE, OGR, a0, a2);
2482        } else {
2483            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2484        }
2485        break;
2486    case INDEX_op_xor_i64:
2487        a0 = args[0], a1 = args[1], a2 = args[2];
2488        if (const_args[2]) {
2489            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2490            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2491        } else if (a0 == a1) {
2492            tcg_out_insn(s, RRE, XGR, a0, a2);
2493        } else {
2494            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2495        }
2496        break;
2497
2498    case INDEX_op_neg_i64:
2499        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2500        break;
2501    case INDEX_op_bswap64_i64:
2502        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2503        break;
2504
2505    case INDEX_op_mul_i64:
2506        if (const_args[2]) {
2507            if (args[2] == (int16_t)args[2]) {
2508                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2509            } else {
2510                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2511            }
2512        } else {
2513            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2514        }
2515        break;
2516
2517    case INDEX_op_div2_i64:
2518        /* ??? We get an unnecessary sign-extension of the dividend
2519           into R3 with this definition, but as we do in fact always
2520           produce both quotient and remainder using INDEX_op_div_i64
2521           instead requires jumping through even more hoops.  */
2522        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2523        break;
2524    case INDEX_op_divu2_i64:
2525        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2526        break;
2527    case INDEX_op_mulu2_i64:
2528        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2529        break;
2530
2531    case INDEX_op_shl_i64:
2532        op = RSY_SLLG;
2533    do_shift64:
2534        if (const_args[2]) {
2535            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2536        } else {
2537            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2538        }
2539        break;
2540    case INDEX_op_shr_i64:
2541        op = RSY_SRLG;
2542        goto do_shift64;
2543    case INDEX_op_sar_i64:
2544        op = RSY_SRAG;
2545        goto do_shift64;
2546
2547    case INDEX_op_rotl_i64:
2548        if (const_args[2]) {
2549            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2550                         TCG_REG_NONE, args[2]);
2551        } else {
2552            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2553        }
2554        break;
2555    case INDEX_op_rotr_i64:
2556        if (const_args[2]) {
2557            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2558                         TCG_REG_NONE, (64 - args[2]) & 63);
2559        } else {
2560            /* We can use the smaller 32-bit negate because only the
2561               low 6 bits are examined for the rotate.  */
2562            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2563            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2564        }
2565        break;
2566
2567    case INDEX_op_ext8s_i64:
2568        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2569        break;
2570    case INDEX_op_ext16s_i64:
2571        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2572        break;
2573    case INDEX_op_ext_i32_i64:
2574    case INDEX_op_ext32s_i64:
2575        tgen_ext32s(s, args[0], args[1]);
2576        break;
2577    case INDEX_op_ext8u_i64:
2578        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2579        break;
2580    case INDEX_op_ext16u_i64:
2581        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2582        break;
2583    case INDEX_op_extu_i32_i64:
2584    case INDEX_op_ext32u_i64:
2585        tgen_ext32u(s, args[0], args[1]);
2586        break;
2587
2588    case INDEX_op_add2_i64:
2589        if (const_args[4]) {
2590            if ((int64_t)args[4] >= 0) {
2591                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2592            } else {
2593                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2594            }
2595        } else {
2596            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2597        }
2598        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2599        break;
2600    case INDEX_op_sub2_i64:
2601        if (const_args[4]) {
2602            if ((int64_t)args[4] >= 0) {
2603                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2604            } else {
2605                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2606            }
2607        } else {
2608            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2609        }
2610        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2611        break;
2612
2613    case INDEX_op_brcond_i64:
2614        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2615                    args[1], const_args[1], arg_label(args[3]));
2616        break;
2617    case INDEX_op_setcond_i64:
2618        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2619                     args[2], const_args[2]);
2620        break;
2621    case INDEX_op_movcond_i64:
2622        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2623                     args[2], const_args[2], args[3], const_args[3]);
2624        break;
2625
2626    OP_32_64(deposit):
2627        a0 = args[0], a1 = args[1], a2 = args[2];
2628        if (const_args[1]) {
2629            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2630        } else {
2631            /* Since we can't support "0Z" as a constraint, we allow a1 in
2632               any register.  Fix things up as if a matching constraint.  */
2633            if (a0 != a1) {
2634                TCGType type = (opc == INDEX_op_deposit_i64);
2635                if (a0 == a2) {
2636                    tcg_out_mov(s, type, TCG_TMP0, a2);
2637                    a2 = TCG_TMP0;
2638                }
2639                tcg_out_mov(s, type, a0, a1);
2640            }
2641            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2642        }
2643        break;
2644
2645    OP_32_64(extract):
2646        tgen_extract(s, args[0], args[1], args[2], args[3]);
2647        break;
2648
2649    case INDEX_op_clz_i64:
2650        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2651        break;
2652
2653    case INDEX_op_mb:
2654        /* The host memory model is quite strong, we simply need to
2655           serialize the instruction stream.  */
2656        if (args[0] & TCG_MO_ST_LD) {
2657            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2658        }
2659        break;
2660
2661    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2662    case INDEX_op_mov_i64:
2663    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2664    default:
2665        tcg_abort();
2666    }
2667}
2668
2669static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2670                            TCGReg dst, TCGReg src)
2671{
2672    if (is_general_reg(src)) {
2673        /* Replicate general register into two MO_64. */
2674        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2675        if (vece == MO_64) {
2676            return true;
2677        }
2678        src = dst;
2679    }
2680
2681    /*
2682     * Recall that the "standard" integer, within a vector, is the
2683     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2684     */
2685    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2686    return true;
2687}
2688
2689static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2690                             TCGReg dst, TCGReg base, intptr_t offset)
2691{
2692    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2693    return true;
2694}
2695
2696static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2697                             TCGReg dst, int64_t val)
2698{
2699    int i, mask, msb, lsb;
2700
2701    /* Look for int16_t elements.  */
2702    if (vece <= MO_16 ||
2703        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2704        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2705        return;
2706    }
2707
2708    /* Look for bit masks.  */
2709    if (vece == MO_32) {
2710        if (risbg_mask((int32_t)val)) {
2711            /* Handle wraparound by swapping msb and lsb.  */
2712            if ((val & 0x80000001u) == 0x80000001u) {
2713                msb = 32 - ctz32(~val);
2714                lsb = clz32(~val) - 1;
2715            } else {
2716                msb = clz32(val);
2717                lsb = 31 - ctz32(val);
2718            }
2719            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2720            return;
2721        }
2722    } else {
2723        if (risbg_mask(val)) {
2724            /* Handle wraparound by swapping msb and lsb.  */
2725            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2726                /* Handle wraparound by swapping msb and lsb.  */
2727                msb = 64 - ctz64(~val);
2728                lsb = clz64(~val) - 1;
2729            } else {
2730                msb = clz64(val);
2731                lsb = 63 - ctz64(val);
2732            }
2733            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2734            return;
2735        }
2736    }
2737
2738    /* Look for all bytes 0x00 or 0xff.  */
2739    for (i = mask = 0; i < 8; i++) {
2740        uint8_t byte = val >> (i * 8);
2741        if (byte == 0xff) {
2742            mask |= 1 << i;
2743        } else if (byte != 0) {
2744            break;
2745        }
2746    }
2747    if (i == 8) {
2748        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2749        return;
2750    }
2751
2752    /* Otherwise, stuff it in the constant pool.  */
2753    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2754    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2755    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2756}
2757
2758static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2759                           unsigned vecl, unsigned vece,
2760                           const TCGArg args[TCG_MAX_OP_ARGS],
2761                           const int const_args[TCG_MAX_OP_ARGS])
2762{
2763    TCGType type = vecl + TCG_TYPE_V64;
2764    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2765
2766    switch (opc) {
2767    case INDEX_op_ld_vec:
2768        tcg_out_ld(s, type, a0, a1, a2);
2769        break;
2770    case INDEX_op_st_vec:
2771        tcg_out_st(s, type, a0, a1, a2);
2772        break;
2773    case INDEX_op_dupm_vec:
2774        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2775        break;
2776
2777    case INDEX_op_abs_vec:
2778        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2779        break;
2780    case INDEX_op_neg_vec:
2781        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2782        break;
2783    case INDEX_op_not_vec:
2784        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2785        break;
2786
2787    case INDEX_op_add_vec:
2788        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2789        break;
2790    case INDEX_op_sub_vec:
2791        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2792        break;
2793    case INDEX_op_and_vec:
2794        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2795        break;
2796    case INDEX_op_andc_vec:
2797        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2798        break;
2799    case INDEX_op_mul_vec:
2800        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2801        break;
2802    case INDEX_op_or_vec:
2803        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2804        break;
2805    case INDEX_op_orc_vec:
2806        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2807        break;
2808    case INDEX_op_xor_vec:
2809        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2810        break;
2811    case INDEX_op_nand_vec:
2812        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2813        break;
2814    case INDEX_op_nor_vec:
2815        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2816        break;
2817    case INDEX_op_eqv_vec:
2818        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2819        break;
2820
2821    case INDEX_op_shli_vec:
2822        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2823        break;
2824    case INDEX_op_shri_vec:
2825        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2826        break;
2827    case INDEX_op_sari_vec:
2828        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2829        break;
2830    case INDEX_op_rotli_vec:
2831        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2832        break;
2833    case INDEX_op_shls_vec:
2834        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2835        break;
2836    case INDEX_op_shrs_vec:
2837        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2838        break;
2839    case INDEX_op_sars_vec:
2840        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2841        break;
2842    case INDEX_op_rotls_vec:
2843        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2844        break;
2845    case INDEX_op_shlv_vec:
2846        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2847        break;
2848    case INDEX_op_shrv_vec:
2849        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2850        break;
2851    case INDEX_op_sarv_vec:
2852        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2853        break;
2854    case INDEX_op_rotlv_vec:
2855        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2856        break;
2857
2858    case INDEX_op_smin_vec:
2859        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2860        break;
2861    case INDEX_op_smax_vec:
2862        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2863        break;
2864    case INDEX_op_umin_vec:
2865        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2866        break;
2867    case INDEX_op_umax_vec:
2868        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2869        break;
2870
2871    case INDEX_op_bitsel_vec:
2872        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2873        break;
2874
2875    case INDEX_op_cmp_vec:
2876        switch ((TCGCond)args[3]) {
2877        case TCG_COND_EQ:
2878            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2879            break;
2880        case TCG_COND_GT:
2881            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2882            break;
2883        case TCG_COND_GTU:
2884            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2885            break;
2886        default:
2887            g_assert_not_reached();
2888        }
2889        break;
2890
2891    case INDEX_op_s390_vuph_vec:
2892        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2893        break;
2894    case INDEX_op_s390_vupl_vec:
2895        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2896        break;
2897    case INDEX_op_s390_vpks_vec:
2898        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2899        break;
2900
2901    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2902    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2903    default:
2904        g_assert_not_reached();
2905    }
2906}
2907
2908int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2909{
2910    switch (opc) {
2911    case INDEX_op_abs_vec:
2912    case INDEX_op_add_vec:
2913    case INDEX_op_and_vec:
2914    case INDEX_op_andc_vec:
2915    case INDEX_op_bitsel_vec:
2916    case INDEX_op_eqv_vec:
2917    case INDEX_op_nand_vec:
2918    case INDEX_op_neg_vec:
2919    case INDEX_op_nor_vec:
2920    case INDEX_op_not_vec:
2921    case INDEX_op_or_vec:
2922    case INDEX_op_orc_vec:
2923    case INDEX_op_rotli_vec:
2924    case INDEX_op_rotls_vec:
2925    case INDEX_op_rotlv_vec:
2926    case INDEX_op_sari_vec:
2927    case INDEX_op_sars_vec:
2928    case INDEX_op_sarv_vec:
2929    case INDEX_op_shli_vec:
2930    case INDEX_op_shls_vec:
2931    case INDEX_op_shlv_vec:
2932    case INDEX_op_shri_vec:
2933    case INDEX_op_shrs_vec:
2934    case INDEX_op_shrv_vec:
2935    case INDEX_op_smax_vec:
2936    case INDEX_op_smin_vec:
2937    case INDEX_op_sub_vec:
2938    case INDEX_op_umax_vec:
2939    case INDEX_op_umin_vec:
2940    case INDEX_op_xor_vec:
2941        return 1;
2942    case INDEX_op_cmp_vec:
2943    case INDEX_op_cmpsel_vec:
2944    case INDEX_op_rotrv_vec:
2945        return -1;
2946    case INDEX_op_mul_vec:
2947        return vece < MO_64;
2948    case INDEX_op_ssadd_vec:
2949    case INDEX_op_sssub_vec:
2950        return vece < MO_64 ? -1 : 0;
2951    default:
2952        return 0;
2953    }
2954}
2955
2956static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2957                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2958{
2959    bool need_swap = false, need_inv = false;
2960
2961    switch (cond) {
2962    case TCG_COND_EQ:
2963    case TCG_COND_GT:
2964    case TCG_COND_GTU:
2965        break;
2966    case TCG_COND_NE:
2967    case TCG_COND_LE:
2968    case TCG_COND_LEU:
2969        need_inv = true;
2970        break;
2971    case TCG_COND_LT:
2972    case TCG_COND_LTU:
2973        need_swap = true;
2974        break;
2975    case TCG_COND_GE:
2976    case TCG_COND_GEU:
2977        need_swap = need_inv = true;
2978        break;
2979    default:
2980        g_assert_not_reached();
2981    }
2982
2983    if (need_inv) {
2984        cond = tcg_invert_cond(cond);
2985    }
2986    if (need_swap) {
2987        TCGv_vec t1;
2988        t1 = v1, v1 = v2, v2 = t1;
2989        cond = tcg_swap_cond(cond);
2990    }
2991
2992    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2993              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2994
2995    return need_inv;
2996}
2997
2998static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2999                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
3000{
3001    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
3002        tcg_gen_not_vec(vece, v0, v0);
3003    }
3004}
3005
3006static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
3007                              TCGv_vec c1, TCGv_vec c2,
3008                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
3009{
3010    TCGv_vec t = tcg_temp_new_vec(type);
3011
3012    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
3013        /* Invert the sense of the compare by swapping arguments.  */
3014        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
3015    } else {
3016        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3017    }
3018    tcg_temp_free_vec(t);
3019}
3020
3021static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3022                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3023{
3024    TCGv_vec h1 = tcg_temp_new_vec(type);
3025    TCGv_vec h2 = tcg_temp_new_vec(type);
3026    TCGv_vec l1 = tcg_temp_new_vec(type);
3027    TCGv_vec l2 = tcg_temp_new_vec(type);
3028
3029    tcg_debug_assert (vece < MO_64);
3030
3031    /* Unpack with sign-extension. */
3032    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3033              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3034    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3035              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3036
3037    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3038              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3039    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3040              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3041
3042    /* Arithmetic on a wider element size. */
3043    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3044              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3045    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3046              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3047
3048    /* Pack with saturation. */
3049    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3050              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3051
3052    tcg_temp_free_vec(h1);
3053    tcg_temp_free_vec(h2);
3054    tcg_temp_free_vec(l1);
3055    tcg_temp_free_vec(l2);
3056}
3057
3058void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3059                       TCGArg a0, ...)
3060{
3061    va_list va;
3062    TCGv_vec v0, v1, v2, v3, v4, t0;
3063
3064    va_start(va, a0);
3065    v0 = temp_tcgv_vec(arg_temp(a0));
3066    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3067    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3068
3069    switch (opc) {
3070    case INDEX_op_cmp_vec:
3071        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3072        break;
3073
3074    case INDEX_op_cmpsel_vec:
3075        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3076        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3077        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3078        break;
3079
3080    case INDEX_op_rotrv_vec:
3081        t0 = tcg_temp_new_vec(type);
3082        tcg_gen_neg_vec(vece, t0, v2);
3083        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3084        tcg_temp_free_vec(t0);
3085        break;
3086
3087    case INDEX_op_ssadd_vec:
3088        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3089        break;
3090    case INDEX_op_sssub_vec:
3091        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3092        break;
3093
3094    default:
3095        g_assert_not_reached();
3096    }
3097    va_end(va);
3098}
3099
3100static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3101{
3102    switch (op) {
3103    case INDEX_op_goto_ptr:
3104        return C_O0_I1(r);
3105
3106    case INDEX_op_ld8u_i32:
3107    case INDEX_op_ld8u_i64:
3108    case INDEX_op_ld8s_i32:
3109    case INDEX_op_ld8s_i64:
3110    case INDEX_op_ld16u_i32:
3111    case INDEX_op_ld16u_i64:
3112    case INDEX_op_ld16s_i32:
3113    case INDEX_op_ld16s_i64:
3114    case INDEX_op_ld_i32:
3115    case INDEX_op_ld32u_i64:
3116    case INDEX_op_ld32s_i64:
3117    case INDEX_op_ld_i64:
3118        return C_O1_I1(r, r);
3119
3120    case INDEX_op_st8_i32:
3121    case INDEX_op_st8_i64:
3122    case INDEX_op_st16_i32:
3123    case INDEX_op_st16_i64:
3124    case INDEX_op_st_i32:
3125    case INDEX_op_st32_i64:
3126    case INDEX_op_st_i64:
3127        return C_O0_I2(r, r);
3128
3129    case INDEX_op_add_i32:
3130    case INDEX_op_add_i64:
3131    case INDEX_op_shl_i64:
3132    case INDEX_op_shr_i64:
3133    case INDEX_op_sar_i64:
3134    case INDEX_op_rotl_i32:
3135    case INDEX_op_rotl_i64:
3136    case INDEX_op_rotr_i32:
3137    case INDEX_op_rotr_i64:
3138    case INDEX_op_clz_i64:
3139    case INDEX_op_setcond_i32:
3140    case INDEX_op_setcond_i64:
3141        return C_O1_I2(r, r, ri);
3142
3143    case INDEX_op_sub_i32:
3144    case INDEX_op_sub_i64:
3145    case INDEX_op_and_i32:
3146    case INDEX_op_and_i64:
3147    case INDEX_op_or_i32:
3148    case INDEX_op_or_i64:
3149    case INDEX_op_xor_i32:
3150    case INDEX_op_xor_i64:
3151        return (HAVE_FACILITY(DISTINCT_OPS)
3152                ? C_O1_I2(r, r, ri)
3153                : C_O1_I2(r, 0, ri));
3154
3155    case INDEX_op_mul_i32:
3156        /* If we have the general-instruction-extensions, then we have
3157           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
3158           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
3159        return (HAVE_FACILITY(GEN_INST_EXT)
3160                ? C_O1_I2(r, 0, ri)
3161                : C_O1_I2(r, 0, rI));
3162
3163    case INDEX_op_mul_i64:
3164        return (HAVE_FACILITY(GEN_INST_EXT)
3165                ? C_O1_I2(r, 0, rJ)
3166                : C_O1_I2(r, 0, rI));
3167
3168    case INDEX_op_shl_i32:
3169    case INDEX_op_shr_i32:
3170    case INDEX_op_sar_i32:
3171        return (HAVE_FACILITY(DISTINCT_OPS)
3172                ? C_O1_I2(r, r, ri)
3173                : C_O1_I2(r, 0, ri));
3174
3175    case INDEX_op_brcond_i32:
3176    case INDEX_op_brcond_i64:
3177        return C_O0_I2(r, ri);
3178
3179    case INDEX_op_bswap16_i32:
3180    case INDEX_op_bswap16_i64:
3181    case INDEX_op_bswap32_i32:
3182    case INDEX_op_bswap32_i64:
3183    case INDEX_op_bswap64_i64:
3184    case INDEX_op_neg_i32:
3185    case INDEX_op_neg_i64:
3186    case INDEX_op_ext8s_i32:
3187    case INDEX_op_ext8s_i64:
3188    case INDEX_op_ext8u_i32:
3189    case INDEX_op_ext8u_i64:
3190    case INDEX_op_ext16s_i32:
3191    case INDEX_op_ext16s_i64:
3192    case INDEX_op_ext16u_i32:
3193    case INDEX_op_ext16u_i64:
3194    case INDEX_op_ext32s_i64:
3195    case INDEX_op_ext32u_i64:
3196    case INDEX_op_ext_i32_i64:
3197    case INDEX_op_extu_i32_i64:
3198    case INDEX_op_extract_i32:
3199    case INDEX_op_extract_i64:
3200        return C_O1_I1(r, r);
3201
3202    case INDEX_op_qemu_ld_i32:
3203    case INDEX_op_qemu_ld_i64:
3204        return C_O1_I1(r, L);
3205    case INDEX_op_qemu_st_i64:
3206    case INDEX_op_qemu_st_i32:
3207        return C_O0_I2(L, L);
3208
3209    case INDEX_op_deposit_i32:
3210    case INDEX_op_deposit_i64:
3211        return C_O1_I2(r, rZ, r);
3212
3213    case INDEX_op_movcond_i32:
3214    case INDEX_op_movcond_i64:
3215        return (HAVE_FACILITY(LOAD_ON_COND2)
3216                ? C_O1_I4(r, r, ri, rI, 0)
3217                : C_O1_I4(r, r, ri, r, 0));
3218
3219    case INDEX_op_div2_i32:
3220    case INDEX_op_div2_i64:
3221    case INDEX_op_divu2_i32:
3222    case INDEX_op_divu2_i64:
3223        return C_O2_I3(b, a, 0, 1, r);
3224
3225    case INDEX_op_mulu2_i64:
3226        return C_O2_I2(b, a, 0, r);
3227
3228    case INDEX_op_add2_i32:
3229    case INDEX_op_sub2_i32:
3230        return (HAVE_FACILITY(EXT_IMM)
3231                ? C_O2_I4(r, r, 0, 1, ri, r)
3232                : C_O2_I4(r, r, 0, 1, r, r));
3233
3234    case INDEX_op_add2_i64:
3235    case INDEX_op_sub2_i64:
3236        return (HAVE_FACILITY(EXT_IMM)
3237                ? C_O2_I4(r, r, 0, 1, rA, r)
3238                : C_O2_I4(r, r, 0, 1, r, r));
3239
3240    case INDEX_op_st_vec:
3241        return C_O0_I2(v, r);
3242    case INDEX_op_ld_vec:
3243    case INDEX_op_dupm_vec:
3244        return C_O1_I1(v, r);
3245    case INDEX_op_dup_vec:
3246        return C_O1_I1(v, vr);
3247    case INDEX_op_abs_vec:
3248    case INDEX_op_neg_vec:
3249    case INDEX_op_not_vec:
3250    case INDEX_op_rotli_vec:
3251    case INDEX_op_sari_vec:
3252    case INDEX_op_shli_vec:
3253    case INDEX_op_shri_vec:
3254    case INDEX_op_s390_vuph_vec:
3255    case INDEX_op_s390_vupl_vec:
3256        return C_O1_I1(v, v);
3257    case INDEX_op_add_vec:
3258    case INDEX_op_sub_vec:
3259    case INDEX_op_and_vec:
3260    case INDEX_op_andc_vec:
3261    case INDEX_op_or_vec:
3262    case INDEX_op_orc_vec:
3263    case INDEX_op_xor_vec:
3264    case INDEX_op_nand_vec:
3265    case INDEX_op_nor_vec:
3266    case INDEX_op_eqv_vec:
3267    case INDEX_op_cmp_vec:
3268    case INDEX_op_mul_vec:
3269    case INDEX_op_rotlv_vec:
3270    case INDEX_op_rotrv_vec:
3271    case INDEX_op_shlv_vec:
3272    case INDEX_op_shrv_vec:
3273    case INDEX_op_sarv_vec:
3274    case INDEX_op_smax_vec:
3275    case INDEX_op_smin_vec:
3276    case INDEX_op_umax_vec:
3277    case INDEX_op_umin_vec:
3278    case INDEX_op_s390_vpks_vec:
3279        return C_O1_I2(v, v, v);
3280    case INDEX_op_rotls_vec:
3281    case INDEX_op_shls_vec:
3282    case INDEX_op_shrs_vec:
3283    case INDEX_op_sars_vec:
3284        return C_O1_I2(v, v, r);
3285    case INDEX_op_bitsel_vec:
3286        return C_O1_I3(v, v, v, v);
3287
3288    default:
3289        g_assert_not_reached();
3290    }
3291}
3292
3293/*
3294 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3295 * Some distros have fixed this up locally, others have not.
3296 */
3297#ifndef HWCAP_S390_VXRS
3298#define HWCAP_S390_VXRS 2048
3299#endif
3300
3301static void query_s390_facilities(void)
3302{
3303    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3304
3305    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3306       is present on all 64-bit systems, but let's check for it anyway.  */
3307    if (hwcap & HWCAP_S390_STFLE) {
3308        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3309        register void *r1 __asm__("1") = s390_facilities;
3310
3311        /* stfle 0(%r1) */
3312        asm volatile(".word 0xb2b0,0x1000"
3313                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3314    }
3315
3316    /*
3317     * Use of vector registers requires os support beyond the facility bit.
3318     * If the kernel does not advertise support, disable the facility bits.
3319     * There is nothing else we currently care about in the 3rd word, so
3320     * disable VECTOR with one store.
3321     */
3322    if (!(hwcap & HWCAP_S390_VXRS)) {
3323        s390_facilities[2] = 0;
3324    }
3325}
3326
3327static void tcg_target_init(TCGContext *s)
3328{
3329    query_s390_facilities();
3330
3331    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3332    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3333    if (HAVE_FACILITY(VECTOR)) {
3334        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3335        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3336    }
3337
3338    tcg_target_call_clobber_regs = 0;
3339    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3340    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3341    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3342    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3343    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3344    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3345    /* The r6 register is technically call-saved, but it's also a parameter
3346       register, so it can get killed by setup for the qemu_st helper.  */
3347    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3348    /* The return register can be considered call-clobbered.  */
3349    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3350
3351    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3352    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3353    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3354    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3355    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3356    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3357    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3358    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3359    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3360    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3361    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3362    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3363    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3364    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3365    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3366    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3367    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3368    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3369    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3371    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3372    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3375
3376    s->reserved_regs = 0;
3377    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3378    /* XXX many insns can't be used with R0, so we better avoid it for now */
3379    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3380    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3381    if (USE_REG_TB) {
3382        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3383    }
3384}
3385
3386#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3387                           + TCG_STATIC_CALL_ARGS_SIZE           \
3388                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3389
3390static void tcg_target_qemu_prologue(TCGContext *s)
3391{
3392    /* stmg %r6,%r15,48(%r15) (save registers) */
3393    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3394
3395    /* aghi %r15,-frame_size */
3396    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3397
3398    tcg_set_frame(s, TCG_REG_CALL_STACK,
3399                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3400                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3401
3402#ifndef CONFIG_SOFTMMU
3403    if (guest_base >= 0x80000) {
3404        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3405        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3406    }
3407#endif
3408
3409    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3410    if (USE_REG_TB) {
3411        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3412                    tcg_target_call_iarg_regs[1]);
3413    }
3414
3415    /* br %r3 (go to TB) */
3416    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3417
3418    /*
3419     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3420     * and fall through to the rest of the epilogue.
3421     */
3422    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3423    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3424
3425    /* TB epilogue */
3426    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3427
3428    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3429    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3430                 FRAME_SIZE + 48);
3431
3432    /* br %r14 (return) */
3433    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3434}
3435
3436static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3437{
3438    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3439}
3440
3441typedef struct {
3442    DebugFrameHeader h;
3443    uint8_t fde_def_cfa[4];
3444    uint8_t fde_reg_ofs[18];
3445} DebugFrame;
3446
3447/* We're expecting a 2 byte uleb128 encoded value.  */
3448QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3449
3450#define ELF_HOST_MACHINE  EM_S390
3451
3452static const DebugFrame debug_frame = {
3453    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3454    .h.cie.id = -1,
3455    .h.cie.version = 1,
3456    .h.cie.code_align = 1,
3457    .h.cie.data_align = 8,                /* sleb128 8 */
3458    .h.cie.return_column = TCG_REG_R14,
3459
3460    /* Total FDE size does not include the "len" member.  */
3461    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3462
3463    .fde_def_cfa = {
3464        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3465        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3466        (FRAME_SIZE >> 7)
3467    },
3468    .fde_reg_ofs = {
3469        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3470        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3471        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3472        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3473        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3474        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3475        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3476        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3477        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3478    }
3479};
3480
3481void tcg_register_jit(const void *buf, size_t buf_size)
3482{
3483    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3484}
3485