xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision ae77bbe5747dc655bed213006798f9b07e2f79bf)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-pool.c.inc"
33#include "elf.h"
34
35/* ??? The translation blocks produced by TCG are generally small enough to
36   be entirely reachable with a 16-bit displacement.  Leaving the option for
37   a 32-bit displacement here Just In Case.  */
38#define USE_LONG_BRANCHES 0
39
40#define TCG_CT_CONST_S16   0x100
41#define TCG_CT_CONST_S32   0x200
42#define TCG_CT_CONST_S33   0x400
43#define TCG_CT_CONST_ZERO  0x800
44
45#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
46#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
47
48/*
49 * For softmmu, we need to avoid conflicts with the first 3
50 * argument registers to perform the tlb lookup, and to call
51 * the helper function.
52 */
53#ifdef CONFIG_SOFTMMU
54#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
55#else
56#define SOFTMMU_RESERVE_REGS 0
57#endif
58
59
60/* Several places within the instruction set 0 means "no register"
61   rather than TCG_REG_R0.  */
62#define TCG_REG_NONE    0
63
64/* A scratch register that may be be used throughout the backend.  */
65#define TCG_TMP0        TCG_REG_R1
66
67/* A scratch register that holds a pointer to the beginning of the TB.
68   We don't need this when we have pc-relative loads with the general
69   instructions extension facility.  */
70#define TCG_REG_TB      TCG_REG_R12
71#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
72
73#ifndef CONFIG_SOFTMMU
74#define TCG_GUEST_BASE_REG TCG_REG_R13
75#endif
76
77/* All of the following instructions are prefixed with their instruction
78   format, and are defined as 8- or 16-bit quantities, even when the two
79   halves of the 16-bit quantity may appear 32 bits apart in the insn.
80   This makes it easy to copy the values from the tables in Appendix B.  */
81typedef enum S390Opcode {
82    RIL_AFI     = 0xc209,
83    RIL_AGFI    = 0xc208,
84    RIL_ALFI    = 0xc20b,
85    RIL_ALGFI   = 0xc20a,
86    RIL_BRASL   = 0xc005,
87    RIL_BRCL    = 0xc004,
88    RIL_CFI     = 0xc20d,
89    RIL_CGFI    = 0xc20c,
90    RIL_CLFI    = 0xc20f,
91    RIL_CLGFI   = 0xc20e,
92    RIL_CLRL    = 0xc60f,
93    RIL_CLGRL   = 0xc60a,
94    RIL_CRL     = 0xc60d,
95    RIL_CGRL    = 0xc608,
96    RIL_IIHF    = 0xc008,
97    RIL_IILF    = 0xc009,
98    RIL_LARL    = 0xc000,
99    RIL_LGFI    = 0xc001,
100    RIL_LGRL    = 0xc408,
101    RIL_LLIHF   = 0xc00e,
102    RIL_LLILF   = 0xc00f,
103    RIL_LRL     = 0xc40d,
104    RIL_MSFI    = 0xc201,
105    RIL_MSGFI   = 0xc200,
106    RIL_NIHF    = 0xc00a,
107    RIL_NILF    = 0xc00b,
108    RIL_OIHF    = 0xc00c,
109    RIL_OILF    = 0xc00d,
110    RIL_SLFI    = 0xc205,
111    RIL_SLGFI   = 0xc204,
112    RIL_XIHF    = 0xc006,
113    RIL_XILF    = 0xc007,
114
115    RI_AGHI     = 0xa70b,
116    RI_AHI      = 0xa70a,
117    RI_BRC      = 0xa704,
118    RI_CHI      = 0xa70e,
119    RI_CGHI     = 0xa70f,
120    RI_IIHH     = 0xa500,
121    RI_IIHL     = 0xa501,
122    RI_IILH     = 0xa502,
123    RI_IILL     = 0xa503,
124    RI_LGHI     = 0xa709,
125    RI_LLIHH    = 0xa50c,
126    RI_LLIHL    = 0xa50d,
127    RI_LLILH    = 0xa50e,
128    RI_LLILL    = 0xa50f,
129    RI_MGHI     = 0xa70d,
130    RI_MHI      = 0xa70c,
131    RI_NIHH     = 0xa504,
132    RI_NIHL     = 0xa505,
133    RI_NILH     = 0xa506,
134    RI_NILL     = 0xa507,
135    RI_OIHH     = 0xa508,
136    RI_OIHL     = 0xa509,
137    RI_OILH     = 0xa50a,
138    RI_OILL     = 0xa50b,
139
140    RIE_CGIJ    = 0xec7c,
141    RIE_CGRJ    = 0xec64,
142    RIE_CIJ     = 0xec7e,
143    RIE_CLGRJ   = 0xec65,
144    RIE_CLIJ    = 0xec7f,
145    RIE_CLGIJ   = 0xec7d,
146    RIE_CLRJ    = 0xec77,
147    RIE_CRJ     = 0xec76,
148    RIE_LOCGHI  = 0xec46,
149    RIE_RISBG   = 0xec55,
150
151    RRE_AGR     = 0xb908,
152    RRE_ALGR    = 0xb90a,
153    RRE_ALCR    = 0xb998,
154    RRE_ALCGR   = 0xb988,
155    RRE_CGR     = 0xb920,
156    RRE_CLGR    = 0xb921,
157    RRE_DLGR    = 0xb987,
158    RRE_DLR     = 0xb997,
159    RRE_DSGFR   = 0xb91d,
160    RRE_DSGR    = 0xb90d,
161    RRE_FLOGR   = 0xb983,
162    RRE_LGBR    = 0xb906,
163    RRE_LCGR    = 0xb903,
164    RRE_LGFR    = 0xb914,
165    RRE_LGHR    = 0xb907,
166    RRE_LGR     = 0xb904,
167    RRE_LLGCR   = 0xb984,
168    RRE_LLGFR   = 0xb916,
169    RRE_LLGHR   = 0xb985,
170    RRE_LRVR    = 0xb91f,
171    RRE_LRVGR   = 0xb90f,
172    RRE_LTGR    = 0xb902,
173    RRE_MLGR    = 0xb986,
174    RRE_MSGR    = 0xb90c,
175    RRE_MSR     = 0xb252,
176    RRE_NGR     = 0xb980,
177    RRE_OGR     = 0xb981,
178    RRE_SGR     = 0xb909,
179    RRE_SLGR    = 0xb90b,
180    RRE_SLBR    = 0xb999,
181    RRE_SLBGR   = 0xb989,
182    RRE_XGR     = 0xb982,
183
184    RRF_LOCR    = 0xb9f2,
185    RRF_LOCGR   = 0xb9e2,
186    RRF_NRK     = 0xb9f4,
187    RRF_NGRK    = 0xb9e4,
188    RRF_ORK     = 0xb9f6,
189    RRF_OGRK    = 0xb9e6,
190    RRF_SRK     = 0xb9f9,
191    RRF_SGRK    = 0xb9e9,
192    RRF_SLRK    = 0xb9fb,
193    RRF_SLGRK   = 0xb9eb,
194    RRF_XRK     = 0xb9f7,
195    RRF_XGRK    = 0xb9e7,
196
197    RR_AR       = 0x1a,
198    RR_ALR      = 0x1e,
199    RR_BASR     = 0x0d,
200    RR_BCR      = 0x07,
201    RR_CLR      = 0x15,
202    RR_CR       = 0x19,
203    RR_DR       = 0x1d,
204    RR_LCR      = 0x13,
205    RR_LR       = 0x18,
206    RR_LTR      = 0x12,
207    RR_NR       = 0x14,
208    RR_OR       = 0x16,
209    RR_SR       = 0x1b,
210    RR_SLR      = 0x1f,
211    RR_XR       = 0x17,
212
213    RSY_RLL     = 0xeb1d,
214    RSY_RLLG    = 0xeb1c,
215    RSY_SLLG    = 0xeb0d,
216    RSY_SLLK    = 0xebdf,
217    RSY_SRAG    = 0xeb0a,
218    RSY_SRAK    = 0xebdc,
219    RSY_SRLG    = 0xeb0c,
220    RSY_SRLK    = 0xebde,
221
222    RS_SLL      = 0x89,
223    RS_SRA      = 0x8a,
224    RS_SRL      = 0x88,
225
226    RXY_AG      = 0xe308,
227    RXY_AY      = 0xe35a,
228    RXY_CG      = 0xe320,
229    RXY_CLG     = 0xe321,
230    RXY_CLY     = 0xe355,
231    RXY_CY      = 0xe359,
232    RXY_LAY     = 0xe371,
233    RXY_LB      = 0xe376,
234    RXY_LG      = 0xe304,
235    RXY_LGB     = 0xe377,
236    RXY_LGF     = 0xe314,
237    RXY_LGH     = 0xe315,
238    RXY_LHY     = 0xe378,
239    RXY_LLGC    = 0xe390,
240    RXY_LLGF    = 0xe316,
241    RXY_LLGH    = 0xe391,
242    RXY_LMG     = 0xeb04,
243    RXY_LRV     = 0xe31e,
244    RXY_LRVG    = 0xe30f,
245    RXY_LRVH    = 0xe31f,
246    RXY_LY      = 0xe358,
247    RXY_NG      = 0xe380,
248    RXY_OG      = 0xe381,
249    RXY_STCY    = 0xe372,
250    RXY_STG     = 0xe324,
251    RXY_STHY    = 0xe370,
252    RXY_STMG    = 0xeb24,
253    RXY_STRV    = 0xe33e,
254    RXY_STRVG   = 0xe32f,
255    RXY_STRVH   = 0xe33f,
256    RXY_STY     = 0xe350,
257    RXY_XG      = 0xe382,
258
259    RX_A        = 0x5a,
260    RX_C        = 0x59,
261    RX_L        = 0x58,
262    RX_LA       = 0x41,
263    RX_LH       = 0x48,
264    RX_ST       = 0x50,
265    RX_STC      = 0x42,
266    RX_STH      = 0x40,
267
268    VRIa_VGBM   = 0xe744,
269    VRIa_VREPI  = 0xe745,
270    VRIb_VGM    = 0xe746,
271    VRIc_VREP   = 0xe74d,
272
273    VRRa_VLC    = 0xe7de,
274    VRRa_VLP    = 0xe7df,
275    VRRa_VLR    = 0xe756,
276    VRRc_VA     = 0xe7f3,
277    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
278    VRRc_VCH    = 0xe7fb,   /* " */
279    VRRc_VCHL   = 0xe7f9,   /* " */
280    VRRc_VN     = 0xe768,
281    VRRc_VNC    = 0xe769,
282    VRRc_VNO    = 0xe76b,
283    VRRc_VO     = 0xe76a,
284    VRRc_VOC    = 0xe76f,
285    VRRc_VS     = 0xe7f7,
286    VRRc_VX     = 0xe76d,
287    VRRf_VLVGP  = 0xe762,
288
289    VRSb_VLVG   = 0xe722,
290    VRSc_VLGV   = 0xe721,
291
292    VRX_VL      = 0xe706,
293    VRX_VLLEZ   = 0xe704,
294    VRX_VLREP   = 0xe705,
295    VRX_VST     = 0xe70e,
296    VRX_VSTEF   = 0xe70b,
297    VRX_VSTEG   = 0xe70a,
298
299    NOP         = 0x0707,
300} S390Opcode;
301
302#ifdef CONFIG_DEBUG_TCG
303static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
304    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
305    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
306    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
308    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
309    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
310    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
311};
312#endif
313
314/* Since R6 is a potential argument register, choose it last of the
315   call-saved registers.  Likewise prefer the call-clobbered registers
316   in reverse order to maximize the chance of avoiding the arguments.  */
317static const int tcg_target_reg_alloc_order[] = {
318    /* Call saved registers.  */
319    TCG_REG_R13,
320    TCG_REG_R12,
321    TCG_REG_R11,
322    TCG_REG_R10,
323    TCG_REG_R9,
324    TCG_REG_R8,
325    TCG_REG_R7,
326    TCG_REG_R6,
327    /* Call clobbered registers.  */
328    TCG_REG_R14,
329    TCG_REG_R0,
330    TCG_REG_R1,
331    /* Argument registers, in reverse order of allocation.  */
332    TCG_REG_R5,
333    TCG_REG_R4,
334    TCG_REG_R3,
335    TCG_REG_R2,
336
337    /* V8-V15 are call saved, and omitted. */
338    TCG_REG_V0,
339    TCG_REG_V1,
340    TCG_REG_V2,
341    TCG_REG_V3,
342    TCG_REG_V4,
343    TCG_REG_V5,
344    TCG_REG_V6,
345    TCG_REG_V7,
346    TCG_REG_V16,
347    TCG_REG_V17,
348    TCG_REG_V18,
349    TCG_REG_V19,
350    TCG_REG_V20,
351    TCG_REG_V21,
352    TCG_REG_V22,
353    TCG_REG_V23,
354    TCG_REG_V24,
355    TCG_REG_V25,
356    TCG_REG_V26,
357    TCG_REG_V27,
358    TCG_REG_V28,
359    TCG_REG_V29,
360    TCG_REG_V30,
361    TCG_REG_V31,
362};
363
364static const int tcg_target_call_iarg_regs[] = {
365    TCG_REG_R2,
366    TCG_REG_R3,
367    TCG_REG_R4,
368    TCG_REG_R5,
369    TCG_REG_R6,
370};
371
372static const int tcg_target_call_oarg_regs[] = {
373    TCG_REG_R2,
374};
375
376#define S390_CC_EQ      8
377#define S390_CC_LT      4
378#define S390_CC_GT      2
379#define S390_CC_OV      1
380#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
381#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
382#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
383#define S390_CC_NEVER   0
384#define S390_CC_ALWAYS  15
385
386/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
387static const uint8_t tcg_cond_to_s390_cond[] = {
388    [TCG_COND_EQ]  = S390_CC_EQ,
389    [TCG_COND_NE]  = S390_CC_NE,
390    [TCG_COND_LT]  = S390_CC_LT,
391    [TCG_COND_LE]  = S390_CC_LE,
392    [TCG_COND_GT]  = S390_CC_GT,
393    [TCG_COND_GE]  = S390_CC_GE,
394    [TCG_COND_LTU] = S390_CC_LT,
395    [TCG_COND_LEU] = S390_CC_LE,
396    [TCG_COND_GTU] = S390_CC_GT,
397    [TCG_COND_GEU] = S390_CC_GE,
398};
399
400/* Condition codes that result from a LOAD AND TEST.  Here, we have no
401   unsigned instruction variation, however since the test is vs zero we
402   can re-map the outcomes appropriately.  */
403static const uint8_t tcg_cond_to_ltr_cond[] = {
404    [TCG_COND_EQ]  = S390_CC_EQ,
405    [TCG_COND_NE]  = S390_CC_NE,
406    [TCG_COND_LT]  = S390_CC_LT,
407    [TCG_COND_LE]  = S390_CC_LE,
408    [TCG_COND_GT]  = S390_CC_GT,
409    [TCG_COND_GE]  = S390_CC_GE,
410    [TCG_COND_LTU] = S390_CC_NEVER,
411    [TCG_COND_LEU] = S390_CC_EQ,
412    [TCG_COND_GTU] = S390_CC_NE,
413    [TCG_COND_GEU] = S390_CC_ALWAYS,
414};
415
416#ifdef CONFIG_SOFTMMU
417static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
418    [MO_UB]   = helper_ret_ldub_mmu,
419    [MO_SB]   = helper_ret_ldsb_mmu,
420    [MO_LEUW] = helper_le_lduw_mmu,
421    [MO_LESW] = helper_le_ldsw_mmu,
422    [MO_LEUL] = helper_le_ldul_mmu,
423    [MO_LESL] = helper_le_ldsl_mmu,
424    [MO_LEQ]  = helper_le_ldq_mmu,
425    [MO_BEUW] = helper_be_lduw_mmu,
426    [MO_BESW] = helper_be_ldsw_mmu,
427    [MO_BEUL] = helper_be_ldul_mmu,
428    [MO_BESL] = helper_be_ldsl_mmu,
429    [MO_BEQ]  = helper_be_ldq_mmu,
430};
431
432static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
433    [MO_UB]   = helper_ret_stb_mmu,
434    [MO_LEUW] = helper_le_stw_mmu,
435    [MO_LEUL] = helper_le_stl_mmu,
436    [MO_LEQ]  = helper_le_stq_mmu,
437    [MO_BEUW] = helper_be_stw_mmu,
438    [MO_BEUL] = helper_be_stl_mmu,
439    [MO_BEQ]  = helper_be_stq_mmu,
440};
441#endif
442
443static const tcg_insn_unit *tb_ret_addr;
444uint64_t s390_facilities[3];
445
446static inline bool is_general_reg(TCGReg r)
447{
448    return r <= TCG_REG_R15;
449}
450
451static inline bool is_vector_reg(TCGReg r)
452{
453    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
454}
455
456static bool patch_reloc(tcg_insn_unit *src_rw, int type,
457                        intptr_t value, intptr_t addend)
458{
459    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
460    intptr_t pcrel2;
461    uint32_t old;
462
463    value += addend;
464    pcrel2 = (tcg_insn_unit *)value - src_rx;
465
466    switch (type) {
467    case R_390_PC16DBL:
468        if (pcrel2 == (int16_t)pcrel2) {
469            tcg_patch16(src_rw, pcrel2);
470            return true;
471        }
472        break;
473    case R_390_PC32DBL:
474        if (pcrel2 == (int32_t)pcrel2) {
475            tcg_patch32(src_rw, pcrel2);
476            return true;
477        }
478        break;
479    case R_390_20:
480        if (value == sextract64(value, 0, 20)) {
481            old = *(uint32_t *)src_rw & 0xf00000ff;
482            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
483            tcg_patch32(src_rw, old);
484            return true;
485        }
486        break;
487    default:
488        g_assert_not_reached();
489    }
490    return false;
491}
492
493/* Test if a constant matches the constraint. */
494static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
495{
496    if (ct & TCG_CT_CONST) {
497        return 1;
498    }
499
500    if (type == TCG_TYPE_I32) {
501        val = (int32_t)val;
502    }
503
504    /* The following are mutually exclusive.  */
505    if (ct & TCG_CT_CONST_S16) {
506        return val == (int16_t)val;
507    } else if (ct & TCG_CT_CONST_S32) {
508        return val == (int32_t)val;
509    } else if (ct & TCG_CT_CONST_S33) {
510        return val >= -0xffffffffll && val <= 0xffffffffll;
511    } else if (ct & TCG_CT_CONST_ZERO) {
512        return val == 0;
513    }
514
515    return 0;
516}
517
518/* Emit instructions according to the given instruction format.  */
519
520static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
521{
522    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
523}
524
525static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
526                             TCGReg r1, TCGReg r2)
527{
528    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
529}
530
531static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
532                             TCGReg r1, TCGReg r2, int m3)
533{
534    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
535}
536
537static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
538{
539    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
540}
541
542static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
543                             int i2, int m3)
544{
545    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
546    tcg_out32(s, (i2 << 16) | (op & 0xff));
547}
548
549static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
550{
551    tcg_out16(s, op | (r1 << 4));
552    tcg_out32(s, i2);
553}
554
555static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
556                            TCGReg b2, TCGReg r3, int disp)
557{
558    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
559              | (disp & 0xfff));
560}
561
562static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
563                             TCGReg b2, TCGReg r3, int disp)
564{
565    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
566    tcg_out32(s, (op & 0xff) | (b2 << 28)
567              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
568}
569
570#define tcg_out_insn_RX   tcg_out_insn_RS
571#define tcg_out_insn_RXY  tcg_out_insn_RSY
572
573static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
574{
575    /*
576     * Shift bit 4 of each regno to its corresponding bit of RXB.
577     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
578     * is the left-shift of the 4th operand.
579     */
580    return ((v1 & 0x10) << (4 + 3))
581         | ((v2 & 0x10) << (4 + 2))
582         | ((v3 & 0x10) << (4 + 1))
583         | ((v4 & 0x10) << (4 + 0));
584}
585
586static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
587                              TCGReg v1, uint16_t i2, int m3)
588{
589    tcg_debug_assert(is_vector_reg(v1));
590    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
591    tcg_out16(s, i2);
592    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
593}
594
595static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
596                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
597{
598    tcg_debug_assert(is_vector_reg(v1));
599    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
600    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
601    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
602}
603
604static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
605                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
606{
607    tcg_debug_assert(is_vector_reg(v1));
608    tcg_debug_assert(is_vector_reg(v3));
609    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
610    tcg_out16(s, i2);
611    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
612}
613
614static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
615                              TCGReg v1, TCGReg v2, int m3)
616{
617    tcg_debug_assert(is_vector_reg(v1));
618    tcg_debug_assert(is_vector_reg(v2));
619    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
620    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
621}
622
623static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
624                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
625{
626    tcg_debug_assert(is_vector_reg(v1));
627    tcg_debug_assert(is_vector_reg(v2));
628    tcg_debug_assert(is_vector_reg(v3));
629    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
630    tcg_out16(s, v3 << 12);
631    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
632}
633
634static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
635                              TCGReg v1, TCGReg r2, TCGReg r3)
636{
637    tcg_debug_assert(is_vector_reg(v1));
638    tcg_debug_assert(is_general_reg(r2));
639    tcg_debug_assert(is_general_reg(r3));
640    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
641    tcg_out16(s, r3 << 12);
642    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
643}
644
645static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
646                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
647{
648    tcg_debug_assert(is_vector_reg(v1));
649    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
650    tcg_debug_assert(is_general_reg(b2));
651    tcg_debug_assert(is_general_reg(r3));
652    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
653    tcg_out16(s, b2 << 12 | d2);
654    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
655}
656
657static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
658                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
659{
660    tcg_debug_assert(is_general_reg(r1));
661    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
662    tcg_debug_assert(is_general_reg(b2));
663    tcg_debug_assert(is_vector_reg(v3));
664    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
665    tcg_out16(s, b2 << 12 | d2);
666    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
667}
668
669static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
670                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
671{
672    tcg_debug_assert(is_vector_reg(v1));
673    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
674    tcg_debug_assert(is_general_reg(x2));
675    tcg_debug_assert(is_general_reg(b2));
676    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
677    tcg_out16(s, (b2 << 12) | d2);
678    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
679}
680
681/* Emit an opcode with "type-checking" of the format.  */
682#define tcg_out_insn(S, FMT, OP, ...) \
683    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
684
685
686/* emit 64-bit shifts */
687static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
688                         TCGReg src, TCGReg sh_reg, int sh_imm)
689{
690    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
691}
692
693/* emit 32-bit shifts */
694static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
695                         TCGReg sh_reg, int sh_imm)
696{
697    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
698}
699
700static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
701{
702    if (src == dst) {
703        return true;
704    }
705    switch (type) {
706    case TCG_TYPE_I32:
707        if (likely(is_general_reg(dst) && is_general_reg(src))) {
708            tcg_out_insn(s, RR, LR, dst, src);
709            break;
710        }
711        /* fallthru */
712
713    case TCG_TYPE_I64:
714        if (likely(is_general_reg(dst))) {
715            if (likely(is_general_reg(src))) {
716                tcg_out_insn(s, RRE, LGR, dst, src);
717            } else {
718                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
719            }
720            break;
721        } else if (is_general_reg(src)) {
722            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
723            break;
724        }
725        /* fallthru */
726
727    case TCG_TYPE_V64:
728    case TCG_TYPE_V128:
729        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
730        break;
731
732    default:
733        g_assert_not_reached();
734    }
735    return true;
736}
737
738static const S390Opcode lli_insns[4] = {
739    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
740};
741
742static bool maybe_out_small_movi(TCGContext *s, TCGType type,
743                                 TCGReg ret, tcg_target_long sval)
744{
745    tcg_target_ulong uval = sval;
746    int i;
747
748    if (type == TCG_TYPE_I32) {
749        uval = (uint32_t)sval;
750        sval = (int32_t)sval;
751    }
752
753    /* Try all 32-bit insns that can load it in one go.  */
754    if (sval >= -0x8000 && sval < 0x8000) {
755        tcg_out_insn(s, RI, LGHI, ret, sval);
756        return true;
757    }
758
759    for (i = 0; i < 4; i++) {
760        tcg_target_long mask = 0xffffull << i*16;
761        if ((uval & mask) == uval) {
762            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
763            return true;
764        }
765    }
766
767    return false;
768}
769
770/* load a register with an immediate value */
771static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
772                             tcg_target_long sval, bool in_prologue)
773{
774    tcg_target_ulong uval;
775
776    /* Try all 32-bit insns that can load it in one go.  */
777    if (maybe_out_small_movi(s, type, ret, sval)) {
778        return;
779    }
780
781    uval = sval;
782    if (type == TCG_TYPE_I32) {
783        uval = (uint32_t)sval;
784        sval = (int32_t)sval;
785    }
786
787    /* Try all 48-bit insns that can load it in one go.  */
788    if (HAVE_FACILITY(EXT_IMM)) {
789        if (sval == (int32_t)sval) {
790            tcg_out_insn(s, RIL, LGFI, ret, sval);
791            return;
792        }
793        if (uval <= 0xffffffff) {
794            tcg_out_insn(s, RIL, LLILF, ret, uval);
795            return;
796        }
797        if ((uval & 0xffffffff) == 0) {
798            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
799            return;
800        }
801    }
802
803    /* Try for PC-relative address load.  For odd addresses,
804       attempt to use an offset from the start of the TB.  */
805    if ((sval & 1) == 0) {
806        ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
807        if (off == (int32_t)off) {
808            tcg_out_insn(s, RIL, LARL, ret, off);
809            return;
810        }
811    } else if (USE_REG_TB && !in_prologue) {
812        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
813        if (off == sextract64(off, 0, 20)) {
814            /* This is certain to be an address within TB, and therefore
815               OFF will be negative; don't try RX_LA.  */
816            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
817            return;
818        }
819    }
820
821    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
822       that LLILL, LLIHL, LLILF above did not succeed, we know that
823       both insns are required.  */
824    if (uval <= 0xffffffff) {
825        tcg_out_insn(s, RI, LLILL, ret, uval);
826        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
827        return;
828    }
829
830    /* Otherwise, stuff it in the constant pool.  */
831    if (HAVE_FACILITY(GEN_INST_EXT)) {
832        tcg_out_insn(s, RIL, LGRL, ret, 0);
833        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
834    } else if (USE_REG_TB && !in_prologue) {
835        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
836        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
837                       tcg_tbrel_diff(s, NULL));
838    } else {
839        TCGReg base = ret ? ret : TCG_TMP0;
840        tcg_out_insn(s, RIL, LARL, base, 0);
841        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
842        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
843    }
844}
845
846static void tcg_out_movi(TCGContext *s, TCGType type,
847                         TCGReg ret, tcg_target_long sval)
848{
849    tcg_out_movi_int(s, type, ret, sval, false);
850}
851
852/* Emit a load/store type instruction.  Inputs are:
853   DATA:     The register to be loaded or stored.
854   BASE+OFS: The effective address.
855   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
856   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
857
858static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
859                        TCGReg data, TCGReg base, TCGReg index,
860                        tcg_target_long ofs)
861{
862    if (ofs < -0x80000 || ofs >= 0x80000) {
863        /* Combine the low 20 bits of the offset with the actual load insn;
864           the high 44 bits must come from an immediate load.  */
865        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
866        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
867        ofs = low;
868
869        /* If we were already given an index register, add it in.  */
870        if (index != TCG_REG_NONE) {
871            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
872        }
873        index = TCG_TMP0;
874    }
875
876    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
877        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
878    } else {
879        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
880    }
881}
882
883static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
884                            TCGReg data, TCGReg base, TCGReg index,
885                            tcg_target_long ofs, int m3)
886{
887    if (ofs < 0 || ofs >= 0x1000) {
888        if (ofs >= -0x80000 && ofs < 0x80000) {
889            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
890            base = TCG_TMP0;
891            index = TCG_REG_NONE;
892            ofs = 0;
893        } else {
894            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
895            if (index != TCG_REG_NONE) {
896                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
897            }
898            index = TCG_TMP0;
899            ofs = 0;
900        }
901    }
902    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
903}
904
905/* load data without address translation or endianness conversion */
906static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
907                       TCGReg base, intptr_t ofs)
908{
909    switch (type) {
910    case TCG_TYPE_I32:
911        if (likely(is_general_reg(data))) {
912            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
913            break;
914        }
915        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
916        break;
917
918    case TCG_TYPE_I64:
919        if (likely(is_general_reg(data))) {
920            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
921            break;
922        }
923        /* fallthru */
924
925    case TCG_TYPE_V64:
926        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
927        break;
928
929    case TCG_TYPE_V128:
930        /* Hint quadword aligned.  */
931        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
932        break;
933
934    default:
935        g_assert_not_reached();
936    }
937}
938
939static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
940                       TCGReg base, intptr_t ofs)
941{
942    switch (type) {
943    case TCG_TYPE_I32:
944        if (likely(is_general_reg(data))) {
945            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
946        } else {
947            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
948        }
949        break;
950
951    case TCG_TYPE_I64:
952        if (likely(is_general_reg(data))) {
953            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
954            break;
955        }
956        /* fallthru */
957
958    case TCG_TYPE_V64:
959        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
960        break;
961
962    case TCG_TYPE_V128:
963        /* Hint quadword aligned.  */
964        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
965        break;
966
967    default:
968        g_assert_not_reached();
969    }
970}
971
972static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
973                               TCGReg base, intptr_t ofs)
974{
975    return false;
976}
977
978/* load data from an absolute host address */
979static void tcg_out_ld_abs(TCGContext *s, TCGType type,
980                           TCGReg dest, const void *abs)
981{
982    intptr_t addr = (intptr_t)abs;
983
984    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
985        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
986        if (disp == (int32_t)disp) {
987            if (type == TCG_TYPE_I32) {
988                tcg_out_insn(s, RIL, LRL, dest, disp);
989            } else {
990                tcg_out_insn(s, RIL, LGRL, dest, disp);
991            }
992            return;
993        }
994    }
995    if (USE_REG_TB) {
996        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
997        if (disp == sextract64(disp, 0, 20)) {
998            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
999            return;
1000        }
1001    }
1002
1003    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
1004    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
1005}
1006
1007static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1008                                 int msb, int lsb, int ofs, int z)
1009{
1010    /* Format RIE-f */
1011    tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
1012    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1013    tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
1014}
1015
1016static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1017{
1018    if (HAVE_FACILITY(EXT_IMM)) {
1019        tcg_out_insn(s, RRE, LGBR, dest, src);
1020        return;
1021    }
1022
1023    if (type == TCG_TYPE_I32) {
1024        if (dest == src) {
1025            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
1026        } else {
1027            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
1028        }
1029        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
1030    } else {
1031        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
1032        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
1033    }
1034}
1035
1036static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1037{
1038    if (HAVE_FACILITY(EXT_IMM)) {
1039        tcg_out_insn(s, RRE, LLGCR, dest, src);
1040        return;
1041    }
1042
1043    if (dest == src) {
1044        tcg_out_movi(s, type, TCG_TMP0, 0xff);
1045        src = TCG_TMP0;
1046    } else {
1047        tcg_out_movi(s, type, dest, 0xff);
1048    }
1049    if (type == TCG_TYPE_I32) {
1050        tcg_out_insn(s, RR, NR, dest, src);
1051    } else {
1052        tcg_out_insn(s, RRE, NGR, dest, src);
1053    }
1054}
1055
1056static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1057{
1058    if (HAVE_FACILITY(EXT_IMM)) {
1059        tcg_out_insn(s, RRE, LGHR, dest, src);
1060        return;
1061    }
1062
1063    if (type == TCG_TYPE_I32) {
1064        if (dest == src) {
1065            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
1066        } else {
1067            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
1068        }
1069        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
1070    } else {
1071        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
1072        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
1073    }
1074}
1075
1076static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1077{
1078    if (HAVE_FACILITY(EXT_IMM)) {
1079        tcg_out_insn(s, RRE, LLGHR, dest, src);
1080        return;
1081    }
1082
1083    if (dest == src) {
1084        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
1085        src = TCG_TMP0;
1086    } else {
1087        tcg_out_movi(s, type, dest, 0xffff);
1088    }
1089    if (type == TCG_TYPE_I32) {
1090        tcg_out_insn(s, RR, NR, dest, src);
1091    } else {
1092        tcg_out_insn(s, RRE, NGR, dest, src);
1093    }
1094}
1095
1096static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1097{
1098    tcg_out_insn(s, RRE, LGFR, dest, src);
1099}
1100
1101static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1102{
1103    tcg_out_insn(s, RRE, LLGFR, dest, src);
1104}
1105
1106/* Accept bit patterns like these:
1107    0....01....1
1108    1....10....0
1109    1..10..01..1
1110    0..01..10..0
1111   Copied from gcc sources.  */
1112static inline bool risbg_mask(uint64_t c)
1113{
1114    uint64_t lsb;
1115    /* We don't change the number of transitions by inverting,
1116       so make sure we start with the LSB zero.  */
1117    if (c & 1) {
1118        c = ~c;
1119    }
1120    /* Reject all zeros or all ones.  */
1121    if (c == 0) {
1122        return false;
1123    }
1124    /* Find the first transition.  */
1125    lsb = c & -c;
1126    /* Invert to look for a second transition.  */
1127    c = ~c;
1128    /* Erase the first transition.  */
1129    c &= -lsb;
1130    /* Find the second transition, if any.  */
1131    lsb = c & -c;
1132    /* Match if all the bits are 1's, or if c is zero.  */
1133    return c == -lsb;
1134}
1135
1136static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1137{
1138    int msb, lsb;
1139    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1140        /* Achieve wraparound by swapping msb and lsb.  */
1141        msb = 64 - ctz64(~val);
1142        lsb = clz64(~val) - 1;
1143    } else {
1144        msb = clz64(val);
1145        lsb = 63 - ctz64(val);
1146    }
1147    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1148}
1149
1150static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1151{
1152    static const S390Opcode ni_insns[4] = {
1153        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1154    };
1155    static const S390Opcode nif_insns[2] = {
1156        RIL_NILF, RIL_NIHF
1157    };
1158    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1159    int i;
1160
1161    /* Look for the zero-extensions.  */
1162    if ((val & valid) == 0xffffffff) {
1163        tgen_ext32u(s, dest, dest);
1164        return;
1165    }
1166    if (HAVE_FACILITY(EXT_IMM)) {
1167        if ((val & valid) == 0xff) {
1168            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1169            return;
1170        }
1171        if ((val & valid) == 0xffff) {
1172            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1173            return;
1174        }
1175    }
1176
1177    /* Try all 32-bit insns that can perform it in one go.  */
1178    for (i = 0; i < 4; i++) {
1179        tcg_target_ulong mask = ~(0xffffull << i*16);
1180        if (((val | ~valid) & mask) == mask) {
1181            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
1182            return;
1183        }
1184    }
1185
1186    /* Try all 48-bit insns that can perform it in one go.  */
1187    if (HAVE_FACILITY(EXT_IMM)) {
1188        for (i = 0; i < 2; i++) {
1189            tcg_target_ulong mask = ~(0xffffffffull << i*32);
1190            if (((val | ~valid) & mask) == mask) {
1191                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
1192                return;
1193            }
1194        }
1195    }
1196    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
1197        tgen_andi_risbg(s, dest, dest, val);
1198        return;
1199    }
1200
1201    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1202    if (USE_REG_TB) {
1203        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1204            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1205            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
1206                           tcg_tbrel_diff(s, NULL));
1207            return;
1208        }
1209    } else {
1210        tcg_out_movi(s, type, TCG_TMP0, val);
1211    }
1212    if (type == TCG_TYPE_I32) {
1213        tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1214    } else {
1215        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1216    }
1217}
1218
1219static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1220{
1221    static const S390Opcode oi_insns[4] = {
1222        RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1223    };
1224    static const S390Opcode oif_insns[2] = {
1225        RIL_OILF, RIL_OIHF
1226    };
1227
1228    int i;
1229
1230    /* Look for no-op.  */
1231    if (unlikely(val == 0)) {
1232        return;
1233    }
1234
1235    /* Try all 32-bit insns that can perform it in one go.  */
1236    for (i = 0; i < 4; i++) {
1237        tcg_target_ulong mask = (0xffffull << i*16);
1238        if ((val & mask) != 0 && (val & ~mask) == 0) {
1239            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
1240            return;
1241        }
1242    }
1243
1244    /* Try all 48-bit insns that can perform it in one go.  */
1245    if (HAVE_FACILITY(EXT_IMM)) {
1246        for (i = 0; i < 2; i++) {
1247            tcg_target_ulong mask = (0xffffffffull << i*32);
1248            if ((val & mask) != 0 && (val & ~mask) == 0) {
1249                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
1250                return;
1251            }
1252        }
1253    }
1254
1255    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1256    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1257        if (type == TCG_TYPE_I32) {
1258            tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1259        } else {
1260            tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1261        }
1262    } else if (USE_REG_TB) {
1263        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1264        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1265                       tcg_tbrel_diff(s, NULL));
1266    } else {
1267        /* Perform the OR via sequential modifications to the high and
1268           low parts.  Do this via recursion to handle 16-bit vs 32-bit
1269           masks in each half.  */
1270        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1271        tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1272        tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1273    }
1274}
1275
1276static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1277{
1278    /* Try all 48-bit insns that can perform it in one go.  */
1279    if (HAVE_FACILITY(EXT_IMM)) {
1280        if ((val & 0xffffffff00000000ull) == 0) {
1281            tcg_out_insn(s, RIL, XILF, dest, val);
1282            return;
1283        }
1284        if ((val & 0x00000000ffffffffull) == 0) {
1285            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1286            return;
1287        }
1288    }
1289
1290    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
1291    if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1292        if (type == TCG_TYPE_I32) {
1293            tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1294        } else {
1295            tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1296        }
1297    } else if (USE_REG_TB) {
1298        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
1299        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
1300                       tcg_tbrel_diff(s, NULL));
1301    } else {
1302        /* Perform the xor by parts.  */
1303        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
1304        if (val & 0xffffffff) {
1305            tcg_out_insn(s, RIL, XILF, dest, val);
1306        }
1307        if (val > 0xffffffff) {
1308            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1309        }
1310    }
1311}
1312
1313static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1314                    TCGArg c2, bool c2const, bool need_carry)
1315{
1316    bool is_unsigned = is_unsigned_cond(c);
1317    S390Opcode op;
1318
1319    if (c2const) {
1320        if (c2 == 0) {
1321            if (!(is_unsigned && need_carry)) {
1322                if (type == TCG_TYPE_I32) {
1323                    tcg_out_insn(s, RR, LTR, r1, r1);
1324                } else {
1325                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1326                }
1327                return tcg_cond_to_ltr_cond[c];
1328            }
1329        }
1330
1331        if (!is_unsigned && c2 == (int16_t)c2) {
1332            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1333            tcg_out_insn_RI(s, op, r1, c2);
1334            goto exit;
1335        }
1336
1337        if (HAVE_FACILITY(EXT_IMM)) {
1338            if (type == TCG_TYPE_I32) {
1339                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1340                tcg_out_insn_RIL(s, op, r1, c2);
1341                goto exit;
1342            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1343                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1344                tcg_out_insn_RIL(s, op, r1, c2);
1345                goto exit;
1346            }
1347        }
1348
1349        /* Use the constant pool, but not for small constants.  */
1350        if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1351            c2 = TCG_TMP0;
1352            /* fall through to reg-reg */
1353        } else if (USE_REG_TB) {
1354            if (type == TCG_TYPE_I32) {
1355                op = (is_unsigned ? RXY_CLY : RXY_CY);
1356                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1357                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
1358                               4 - tcg_tbrel_diff(s, NULL));
1359            } else {
1360                op = (is_unsigned ? RXY_CLG : RXY_CG);
1361                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
1362                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
1363                               tcg_tbrel_diff(s, NULL));
1364            }
1365            goto exit;
1366        } else {
1367            if (type == TCG_TYPE_I32) {
1368                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
1369                tcg_out_insn_RIL(s, op, r1, 0);
1370                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
1371                               s->code_ptr - 2, 2 + 4);
1372            } else {
1373                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1374                tcg_out_insn_RIL(s, op, r1, 0);
1375                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1376            }
1377            goto exit;
1378        }
1379    }
1380
1381    if (type == TCG_TYPE_I32) {
1382        op = (is_unsigned ? RR_CLR : RR_CR);
1383        tcg_out_insn_RR(s, op, r1, c2);
1384    } else {
1385        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1386        tcg_out_insn_RRE(s, op, r1, c2);
1387    }
1388
1389 exit:
1390    return tcg_cond_to_s390_cond[c];
1391}
1392
1393static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1394                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1395{
1396    int cc;
1397    bool have_loc;
1398
1399    /* With LOC2, we can always emit the minimum 3 insns.  */
1400    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1401        /* Emit: d = 0, d = (cc ? 1 : d).  */
1402        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1403        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1404        tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1405        return;
1406    }
1407
1408    have_loc = HAVE_FACILITY(LOAD_ON_COND);
1409
1410    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
1411 restart:
1412    switch (cond) {
1413    case TCG_COND_NE:
1414        /* X != 0 is X > 0.  */
1415        if (c2const && c2 == 0) {
1416            cond = TCG_COND_GTU;
1417        } else {
1418            break;
1419        }
1420        /* fallthru */
1421
1422    case TCG_COND_GTU:
1423    case TCG_COND_GT:
1424        /* The result of a compare has CC=2 for GT and CC=3 unused.
1425           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1426        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1427        tcg_out_movi(s, type, dest, 0);
1428        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1429        return;
1430
1431    case TCG_COND_EQ:
1432        /* X == 0 is X <= 0.  */
1433        if (c2const && c2 == 0) {
1434            cond = TCG_COND_LEU;
1435        } else {
1436            break;
1437        }
1438        /* fallthru */
1439
1440    case TCG_COND_LEU:
1441    case TCG_COND_LE:
1442        /* As above, but we're looking for borrow, or !carry.
1443           The second insn computes d - d - borrow, or -1 for true
1444           and 0 for false.  So we must mask to 1 bit afterward.  */
1445        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1446        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1447        tgen_andi(s, type, dest, 1);
1448        return;
1449
1450    case TCG_COND_GEU:
1451    case TCG_COND_LTU:
1452    case TCG_COND_LT:
1453    case TCG_COND_GE:
1454        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1455        if (c2const) {
1456            if (have_loc) {
1457                break;
1458            }
1459            tcg_out_movi(s, type, TCG_TMP0, c2);
1460            c2 = c1;
1461            c2const = 0;
1462            c1 = TCG_TMP0;
1463        } else {
1464            TCGReg t = c1;
1465            c1 = c2;
1466            c2 = t;
1467        }
1468        cond = tcg_swap_cond(cond);
1469        goto restart;
1470
1471    default:
1472        g_assert_not_reached();
1473    }
1474
1475    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1476    if (have_loc) {
1477        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1478        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1479        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1480        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1481    } else {
1482        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
1483        tcg_out_movi(s, type, dest, 1);
1484        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1485        tcg_out_movi(s, type, dest, 0);
1486    }
1487}
1488
1489static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1490                         TCGReg c1, TCGArg c2, int c2const,
1491                         TCGArg v3, int v3const)
1492{
1493    int cc;
1494    if (HAVE_FACILITY(LOAD_ON_COND)) {
1495        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1496        if (v3const) {
1497            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1498        } else {
1499            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1500        }
1501    } else {
1502        c = tcg_invert_cond(c);
1503        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1504
1505        /* Emit: if (cc) goto over; dest = r3; over:  */
1506        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
1507        tcg_out_insn(s, RRE, LGR, dest, v3);
1508    }
1509}
1510
1511static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1512                     TCGArg a2, int a2const)
1513{
1514    /* Since this sets both R and R+1, we have no choice but to store the
1515       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1516    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1517    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1518
1519    if (a2const && a2 == 64) {
1520        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1521    } else {
1522        if (a2const) {
1523            tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1524        } else {
1525            tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1526        }
1527        if (HAVE_FACILITY(LOAD_ON_COND)) {
1528            /* Emit: if (one bit found) dest = r0.  */
1529            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1530        } else {
1531            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
1532            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
1533            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
1534        }
1535    }
1536}
1537
1538static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1539                         int ofs, int len, int z)
1540{
1541    int lsb = (63 - ofs);
1542    int msb = lsb - (len - 1);
1543    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1544}
1545
1546static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1547                         int ofs, int len)
1548{
1549    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1550}
1551
1552static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1553{
1554    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1555    if (off == (int16_t)off) {
1556        tcg_out_insn(s, RI, BRC, cc, off);
1557    } else if (off == (int32_t)off) {
1558        tcg_out_insn(s, RIL, BRCL, cc, off);
1559    } else {
1560        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1561        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1562    }
1563}
1564
1565static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1566{
1567    if (l->has_value) {
1568        tgen_gotoi(s, cc, l->u.value_ptr);
1569    } else if (USE_LONG_BRANCHES) {
1570        tcg_out16(s, RIL_BRCL | (cc << 4));
1571        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
1572        s->code_ptr += 2;
1573    } else {
1574        tcg_out16(s, RI_BRC | (cc << 4));
1575        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1576        s->code_ptr += 1;
1577    }
1578}
1579
1580static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1581                                TCGReg r1, TCGReg r2, TCGLabel *l)
1582{
1583    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1584    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1585    tcg_out16(s, 0);
1586    tcg_out16(s, cc << 12 | (opc & 0xff));
1587}
1588
1589static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1590                                    TCGReg r1, int i2, TCGLabel *l)
1591{
1592    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1593    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1594    tcg_out16(s, 0);
1595    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1596}
1597
1598static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1599                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1600{
1601    int cc;
1602
1603    if (HAVE_FACILITY(GEN_INST_EXT)) {
1604        bool is_unsigned = is_unsigned_cond(c);
1605        bool in_range;
1606        S390Opcode opc;
1607
1608        cc = tcg_cond_to_s390_cond[c];
1609
1610        if (!c2const) {
1611            opc = (type == TCG_TYPE_I32
1612                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1613                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1614            tgen_compare_branch(s, opc, cc, r1, c2, l);
1615            return;
1616        }
1617
1618        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1619           If the immediate we've been given does not fit that range, we'll
1620           fall back to separate compare and branch instructions using the
1621           larger comparison range afforded by COMPARE IMMEDIATE.  */
1622        if (type == TCG_TYPE_I32) {
1623            if (is_unsigned) {
1624                opc = RIE_CLIJ;
1625                in_range = (uint32_t)c2 == (uint8_t)c2;
1626            } else {
1627                opc = RIE_CIJ;
1628                in_range = (int32_t)c2 == (int8_t)c2;
1629            }
1630        } else {
1631            if (is_unsigned) {
1632                opc = RIE_CLGIJ;
1633                in_range = (uint64_t)c2 == (uint8_t)c2;
1634            } else {
1635                opc = RIE_CGIJ;
1636                in_range = (int64_t)c2 == (int8_t)c2;
1637            }
1638        }
1639        if (in_range) {
1640            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1641            return;
1642        }
1643    }
1644
1645    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1646    tgen_branch(s, cc, l);
1647}
1648
1649static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
1650{
1651    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1652    if (off == (int32_t)off) {
1653        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1654    } else {
1655        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1656        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1657    }
1658}
1659
1660static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1661                                   TCGReg base, TCGReg index, int disp)
1662{
1663    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1664    case MO_UB:
1665        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1666        break;
1667    case MO_SB:
1668        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1669        break;
1670
1671    case MO_UW | MO_BSWAP:
1672        /* swapped unsigned halfword load with upper bits zeroed */
1673        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1674        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1675        break;
1676    case MO_UW:
1677        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1678        break;
1679
1680    case MO_SW | MO_BSWAP:
1681        /* swapped sign-extended halfword load */
1682        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1683        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1684        break;
1685    case MO_SW:
1686        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1687        break;
1688
1689    case MO_UL | MO_BSWAP:
1690        /* swapped unsigned int load with upper bits zeroed */
1691        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1692        tgen_ext32u(s, data, data);
1693        break;
1694    case MO_UL:
1695        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1696        break;
1697
1698    case MO_SL | MO_BSWAP:
1699        /* swapped sign-extended int load */
1700        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1701        tgen_ext32s(s, data, data);
1702        break;
1703    case MO_SL:
1704        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1705        break;
1706
1707    case MO_Q | MO_BSWAP:
1708        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1709        break;
1710    case MO_Q:
1711        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1712        break;
1713
1714    default:
1715        tcg_abort();
1716    }
1717}
1718
1719static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1720                                   TCGReg base, TCGReg index, int disp)
1721{
1722    switch (opc & (MO_SIZE | MO_BSWAP)) {
1723    case MO_UB:
1724        if (disp >= 0 && disp < 0x1000) {
1725            tcg_out_insn(s, RX, STC, data, base, index, disp);
1726        } else {
1727            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1728        }
1729        break;
1730
1731    case MO_UW | MO_BSWAP:
1732        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1733        break;
1734    case MO_UW:
1735        if (disp >= 0 && disp < 0x1000) {
1736            tcg_out_insn(s, RX, STH, data, base, index, disp);
1737        } else {
1738            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1739        }
1740        break;
1741
1742    case MO_UL | MO_BSWAP:
1743        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1744        break;
1745    case MO_UL:
1746        if (disp >= 0 && disp < 0x1000) {
1747            tcg_out_insn(s, RX, ST, data, base, index, disp);
1748        } else {
1749            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1750        }
1751        break;
1752
1753    case MO_Q | MO_BSWAP:
1754        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1755        break;
1756    case MO_Q:
1757        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1758        break;
1759
1760    default:
1761        tcg_abort();
1762    }
1763}
1764
1765#if defined(CONFIG_SOFTMMU)
1766#include "../tcg-ldst.c.inc"
1767
1768/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1769QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1770QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1771
1772/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1773   addend into R2.  Returns a register with the santitized guest address.  */
1774static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1775                               int mem_index, bool is_ld)
1776{
1777    unsigned s_bits = opc & MO_SIZE;
1778    unsigned a_bits = get_alignment_bits(opc);
1779    unsigned s_mask = (1 << s_bits) - 1;
1780    unsigned a_mask = (1 << a_bits) - 1;
1781    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1782    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1783    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1784    int ofs, a_off;
1785    uint64_t tlb_mask;
1786
1787    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1788                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1789    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1790    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1791
1792    /* For aligned accesses, we check the first byte and include the alignment
1793       bits within the address.  For unaligned access, we check that we don't
1794       cross pages using the address of the last byte of the access.  */
1795    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1796    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1797    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
1798        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1799    } else {
1800        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1801        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1802    }
1803
1804    if (is_ld) {
1805        ofs = offsetof(CPUTLBEntry, addr_read);
1806    } else {
1807        ofs = offsetof(CPUTLBEntry, addr_write);
1808    }
1809    if (TARGET_LONG_BITS == 32) {
1810        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1811    } else {
1812        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1813    }
1814
1815    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1816                 offsetof(CPUTLBEntry, addend));
1817
1818    if (TARGET_LONG_BITS == 32) {
1819        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1820        return TCG_REG_R3;
1821    }
1822    return addr_reg;
1823}
1824
1825static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1826                                TCGReg data, TCGReg addr,
1827                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1828{
1829    TCGLabelQemuLdst *label = new_ldst_label(s);
1830
1831    label->is_ld = is_ld;
1832    label->oi = oi;
1833    label->datalo_reg = data;
1834    label->addrlo_reg = addr;
1835    label->raddr = tcg_splitwx_to_rx(raddr);
1836    label->label_ptr[0] = label_ptr;
1837}
1838
1839static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1840{
1841    TCGReg addr_reg = lb->addrlo_reg;
1842    TCGReg data_reg = lb->datalo_reg;
1843    MemOpIdx oi = lb->oi;
1844    MemOp opc = get_memop(oi);
1845
1846    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1847                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1848        return false;
1849    }
1850
1851    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1852    if (TARGET_LONG_BITS == 64) {
1853        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1854    }
1855    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1856    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1857    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1858    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1859
1860    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1861    return true;
1862}
1863
1864static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1865{
1866    TCGReg addr_reg = lb->addrlo_reg;
1867    TCGReg data_reg = lb->datalo_reg;
1868    MemOpIdx oi = lb->oi;
1869    MemOp opc = get_memop(oi);
1870
1871    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1872                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1873        return false;
1874    }
1875
1876    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1877    if (TARGET_LONG_BITS == 64) {
1878        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1879    }
1880    switch (opc & MO_SIZE) {
1881    case MO_UB:
1882        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1883        break;
1884    case MO_UW:
1885        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1886        break;
1887    case MO_UL:
1888        tgen_ext32u(s, TCG_REG_R4, data_reg);
1889        break;
1890    case MO_Q:
1891        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1892        break;
1893    default:
1894        tcg_abort();
1895    }
1896    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1897    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1898    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1899
1900    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1901    return true;
1902}
1903#else
1904static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1905                                  TCGReg *index_reg, tcg_target_long *disp)
1906{
1907    if (TARGET_LONG_BITS == 32) {
1908        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1909        *addr_reg = TCG_TMP0;
1910    }
1911    if (guest_base < 0x80000) {
1912        *index_reg = TCG_REG_NONE;
1913        *disp = guest_base;
1914    } else {
1915        *index_reg = TCG_GUEST_BASE_REG;
1916        *disp = 0;
1917    }
1918}
1919#endif /* CONFIG_SOFTMMU */
1920
1921static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1922                            MemOpIdx oi)
1923{
1924    MemOp opc = get_memop(oi);
1925#ifdef CONFIG_SOFTMMU
1926    unsigned mem_index = get_mmuidx(oi);
1927    tcg_insn_unit *label_ptr;
1928    TCGReg base_reg;
1929
1930    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1931
1932    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1933    label_ptr = s->code_ptr;
1934    s->code_ptr += 1;
1935
1936    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1937
1938    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1939#else
1940    TCGReg index_reg;
1941    tcg_target_long disp;
1942
1943    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1944    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1945#endif
1946}
1947
1948static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1949                            MemOpIdx oi)
1950{
1951    MemOp opc = get_memop(oi);
1952#ifdef CONFIG_SOFTMMU
1953    unsigned mem_index = get_mmuidx(oi);
1954    tcg_insn_unit *label_ptr;
1955    TCGReg base_reg;
1956
1957    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1958
1959    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1960    label_ptr = s->code_ptr;
1961    s->code_ptr += 1;
1962
1963    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1964
1965    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1966#else
1967    TCGReg index_reg;
1968    tcg_target_long disp;
1969
1970    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1971    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1972#endif
1973}
1974
1975# define OP_32_64(x) \
1976        case glue(glue(INDEX_op_,x),_i32): \
1977        case glue(glue(INDEX_op_,x),_i64)
1978
1979static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1980                              const TCGArg args[TCG_MAX_OP_ARGS],
1981                              const int const_args[TCG_MAX_OP_ARGS])
1982{
1983    S390Opcode op, op2;
1984    TCGArg a0, a1, a2;
1985
1986    switch (opc) {
1987    case INDEX_op_exit_tb:
1988        /* Reuse the zeroing that exists for goto_ptr.  */
1989        a0 = args[0];
1990        if (a0 == 0) {
1991            tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1992        } else {
1993            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1994            tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1995        }
1996        break;
1997
1998    case INDEX_op_goto_tb:
1999        a0 = args[0];
2000        if (s->tb_jmp_insn_offset) {
2001            /*
2002             * branch displacement must be aligned for atomic patching;
2003             * see if we need to add extra nop before branch
2004             */
2005            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
2006                tcg_out16(s, NOP);
2007            }
2008            tcg_debug_assert(!USE_REG_TB);
2009            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
2010            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
2011            s->code_ptr += 2;
2012        } else {
2013            /* load address stored at s->tb_jmp_target_addr + a0 */
2014            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
2015                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
2016            /* and go there */
2017            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
2018        }
2019        set_jmp_reset_offset(s, a0);
2020
2021        /* For the unlinked path of goto_tb, we need to reset
2022           TCG_REG_TB to the beginning of this TB.  */
2023        if (USE_REG_TB) {
2024            int ofs = -tcg_current_code_size(s);
2025            /* All TB are restricted to 64KiB by unwind info. */
2026            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
2027            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
2028                         TCG_REG_TB, TCG_REG_NONE, ofs);
2029        }
2030        break;
2031
2032    case INDEX_op_goto_ptr:
2033        a0 = args[0];
2034        if (USE_REG_TB) {
2035            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
2036        }
2037        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2038        break;
2039
2040    OP_32_64(ld8u):
2041        /* ??? LLC (RXY format) is only present with the extended-immediate
2042           facility, whereas LLGC is always present.  */
2043        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2044        break;
2045
2046    OP_32_64(ld8s):
2047        /* ??? LB is no smaller than LGB, so no point to using it.  */
2048        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2049        break;
2050
2051    OP_32_64(ld16u):
2052        /* ??? LLH (RXY format) is only present with the extended-immediate
2053           facility, whereas LLGH is always present.  */
2054        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2055        break;
2056
2057    case INDEX_op_ld16s_i32:
2058        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2059        break;
2060
2061    case INDEX_op_ld_i32:
2062        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2063        break;
2064
2065    OP_32_64(st8):
2066        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2067                    TCG_REG_NONE, args[2]);
2068        break;
2069
2070    OP_32_64(st16):
2071        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2072                    TCG_REG_NONE, args[2]);
2073        break;
2074
2075    case INDEX_op_st_i32:
2076        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2077        break;
2078
2079    case INDEX_op_add_i32:
2080        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2081        if (const_args[2]) {
2082        do_addi_32:
2083            if (a0 == a1) {
2084                if (a2 == (int16_t)a2) {
2085                    tcg_out_insn(s, RI, AHI, a0, a2);
2086                    break;
2087                }
2088                if (HAVE_FACILITY(EXT_IMM)) {
2089                    tcg_out_insn(s, RIL, AFI, a0, a2);
2090                    break;
2091                }
2092            }
2093            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2094        } else if (a0 == a1) {
2095            tcg_out_insn(s, RR, AR, a0, a2);
2096        } else {
2097            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2098        }
2099        break;
2100    case INDEX_op_sub_i32:
2101        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2102        if (const_args[2]) {
2103            a2 = -a2;
2104            goto do_addi_32;
2105        } else if (a0 == a1) {
2106            tcg_out_insn(s, RR, SR, a0, a2);
2107        } else {
2108            tcg_out_insn(s, RRF, SRK, a0, a1, a2);
2109        }
2110        break;
2111
2112    case INDEX_op_and_i32:
2113        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2114        if (const_args[2]) {
2115            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2116            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2117        } else if (a0 == a1) {
2118            tcg_out_insn(s, RR, NR, a0, a2);
2119        } else {
2120            tcg_out_insn(s, RRF, NRK, a0, a1, a2);
2121        }
2122        break;
2123    case INDEX_op_or_i32:
2124        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2125        if (const_args[2]) {
2126            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2127            tgen_ori(s, TCG_TYPE_I32, a0, a2);
2128        } else if (a0 == a1) {
2129            tcg_out_insn(s, RR, OR, a0, a2);
2130        } else {
2131            tcg_out_insn(s, RRF, ORK, a0, a1, a2);
2132        }
2133        break;
2134    case INDEX_op_xor_i32:
2135        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2136        if (const_args[2]) {
2137            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2138            tgen_xori(s, TCG_TYPE_I32, a0, a2);
2139        } else if (a0 == a1) {
2140            tcg_out_insn(s, RR, XR, args[0], args[2]);
2141        } else {
2142            tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2143        }
2144        break;
2145
2146    case INDEX_op_neg_i32:
2147        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2148        break;
2149
2150    case INDEX_op_mul_i32:
2151        if (const_args[2]) {
2152            if ((int32_t)args[2] == (int16_t)args[2]) {
2153                tcg_out_insn(s, RI, MHI, args[0], args[2]);
2154            } else {
2155                tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2156            }
2157        } else {
2158            tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2159        }
2160        break;
2161
2162    case INDEX_op_div2_i32:
2163        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
2164        break;
2165    case INDEX_op_divu2_i32:
2166        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
2167        break;
2168
2169    case INDEX_op_shl_i32:
2170        op = RS_SLL;
2171        op2 = RSY_SLLK;
2172    do_shift32:
2173        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2174        if (a0 == a1) {
2175            if (const_args[2]) {
2176                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2177            } else {
2178                tcg_out_sh32(s, op, a0, a2, 0);
2179            }
2180        } else {
2181            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2182            if (const_args[2]) {
2183                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2184            } else {
2185                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2186            }
2187        }
2188        break;
2189    case INDEX_op_shr_i32:
2190        op = RS_SRL;
2191        op2 = RSY_SRLK;
2192        goto do_shift32;
2193    case INDEX_op_sar_i32:
2194        op = RS_SRA;
2195        op2 = RSY_SRAK;
2196        goto do_shift32;
2197
2198    case INDEX_op_rotl_i32:
2199        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2200        if (const_args[2]) {
2201            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2202        } else {
2203            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2204        }
2205        break;
2206    case INDEX_op_rotr_i32:
2207        if (const_args[2]) {
2208            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2209                         TCG_REG_NONE, (32 - args[2]) & 31);
2210        } else {
2211            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2212            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2213        }
2214        break;
2215
2216    case INDEX_op_ext8s_i32:
2217        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2218        break;
2219    case INDEX_op_ext16s_i32:
2220        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2221        break;
2222    case INDEX_op_ext8u_i32:
2223        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2224        break;
2225    case INDEX_op_ext16u_i32:
2226        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2227        break;
2228
2229    case INDEX_op_bswap16_i32:
2230        a0 = args[0], a1 = args[1], a2 = args[2];
2231        tcg_out_insn(s, RRE, LRVR, a0, a1);
2232        if (a2 & TCG_BSWAP_OS) {
2233            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2234        } else {
2235            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2236        }
2237        break;
2238    case INDEX_op_bswap16_i64:
2239        a0 = args[0], a1 = args[1], a2 = args[2];
2240        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2241        if (a2 & TCG_BSWAP_OS) {
2242            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2243        } else {
2244            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2245        }
2246        break;
2247
2248    case INDEX_op_bswap32_i32:
2249        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2250        break;
2251    case INDEX_op_bswap32_i64:
2252        a0 = args[0], a1 = args[1], a2 = args[2];
2253        tcg_out_insn(s, RRE, LRVR, a0, a1);
2254        if (a2 & TCG_BSWAP_OS) {
2255            tgen_ext32s(s, a0, a0);
2256        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2257            tgen_ext32u(s, a0, a0);
2258        }
2259        break;
2260
2261    case INDEX_op_add2_i32:
2262        if (const_args[4]) {
2263            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2264        } else {
2265            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2266        }
2267        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2268        break;
2269    case INDEX_op_sub2_i32:
2270        if (const_args[4]) {
2271            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2272        } else {
2273            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2274        }
2275        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2276        break;
2277
2278    case INDEX_op_br:
2279        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2280        break;
2281
2282    case INDEX_op_brcond_i32:
2283        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2284                    args[1], const_args[1], arg_label(args[3]));
2285        break;
2286    case INDEX_op_setcond_i32:
2287        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2288                     args[2], const_args[2]);
2289        break;
2290    case INDEX_op_movcond_i32:
2291        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2292                     args[2], const_args[2], args[3], const_args[3]);
2293        break;
2294
2295    case INDEX_op_qemu_ld_i32:
2296        /* ??? Technically we can use a non-extending instruction.  */
2297    case INDEX_op_qemu_ld_i64:
2298        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2299        break;
2300    case INDEX_op_qemu_st_i32:
2301    case INDEX_op_qemu_st_i64:
2302        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2303        break;
2304
2305    case INDEX_op_ld16s_i64:
2306        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2307        break;
2308    case INDEX_op_ld32u_i64:
2309        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2310        break;
2311    case INDEX_op_ld32s_i64:
2312        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2313        break;
2314    case INDEX_op_ld_i64:
2315        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2316        break;
2317
2318    case INDEX_op_st32_i64:
2319        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2320        break;
2321    case INDEX_op_st_i64:
2322        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2323        break;
2324
2325    case INDEX_op_add_i64:
2326        a0 = args[0], a1 = args[1], a2 = args[2];
2327        if (const_args[2]) {
2328        do_addi_64:
2329            if (a0 == a1) {
2330                if (a2 == (int16_t)a2) {
2331                    tcg_out_insn(s, RI, AGHI, a0, a2);
2332                    break;
2333                }
2334                if (HAVE_FACILITY(EXT_IMM)) {
2335                    if (a2 == (int32_t)a2) {
2336                        tcg_out_insn(s, RIL, AGFI, a0, a2);
2337                        break;
2338                    } else if (a2 == (uint32_t)a2) {
2339                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
2340                        break;
2341                    } else if (-a2 == (uint32_t)-a2) {
2342                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2343                        break;
2344                    }
2345                }
2346            }
2347            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2348        } else if (a0 == a1) {
2349            tcg_out_insn(s, RRE, AGR, a0, a2);
2350        } else {
2351            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2352        }
2353        break;
2354    case INDEX_op_sub_i64:
2355        a0 = args[0], a1 = args[1], a2 = args[2];
2356        if (const_args[2]) {
2357            a2 = -a2;
2358            goto do_addi_64;
2359        } else if (a0 == a1) {
2360            tcg_out_insn(s, RRE, SGR, a0, a2);
2361        } else {
2362            tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2363        }
2364        break;
2365
2366    case INDEX_op_and_i64:
2367        a0 = args[0], a1 = args[1], a2 = args[2];
2368        if (const_args[2]) {
2369            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2370            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2371        } else if (a0 == a1) {
2372            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
2373        } else {
2374            tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2375        }
2376        break;
2377    case INDEX_op_or_i64:
2378        a0 = args[0], a1 = args[1], a2 = args[2];
2379        if (const_args[2]) {
2380            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2381            tgen_ori(s, TCG_TYPE_I64, a0, a2);
2382        } else if (a0 == a1) {
2383            tcg_out_insn(s, RRE, OGR, a0, a2);
2384        } else {
2385            tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2386        }
2387        break;
2388    case INDEX_op_xor_i64:
2389        a0 = args[0], a1 = args[1], a2 = args[2];
2390        if (const_args[2]) {
2391            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2392            tgen_xori(s, TCG_TYPE_I64, a0, a2);
2393        } else if (a0 == a1) {
2394            tcg_out_insn(s, RRE, XGR, a0, a2);
2395        } else {
2396            tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2397        }
2398        break;
2399
2400    case INDEX_op_neg_i64:
2401        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2402        break;
2403    case INDEX_op_bswap64_i64:
2404        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2405        break;
2406
2407    case INDEX_op_mul_i64:
2408        if (const_args[2]) {
2409            if (args[2] == (int16_t)args[2]) {
2410                tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2411            } else {
2412                tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2413            }
2414        } else {
2415            tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2416        }
2417        break;
2418
2419    case INDEX_op_div2_i64:
2420        /* ??? We get an unnecessary sign-extension of the dividend
2421           into R3 with this definition, but as we do in fact always
2422           produce both quotient and remainder using INDEX_op_div_i64
2423           instead requires jumping through even more hoops.  */
2424        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
2425        break;
2426    case INDEX_op_divu2_i64:
2427        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
2428        break;
2429    case INDEX_op_mulu2_i64:
2430        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
2431        break;
2432
2433    case INDEX_op_shl_i64:
2434        op = RSY_SLLG;
2435    do_shift64:
2436        if (const_args[2]) {
2437            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2438        } else {
2439            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2440        }
2441        break;
2442    case INDEX_op_shr_i64:
2443        op = RSY_SRLG;
2444        goto do_shift64;
2445    case INDEX_op_sar_i64:
2446        op = RSY_SRAG;
2447        goto do_shift64;
2448
2449    case INDEX_op_rotl_i64:
2450        if (const_args[2]) {
2451            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2452                         TCG_REG_NONE, args[2]);
2453        } else {
2454            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2455        }
2456        break;
2457    case INDEX_op_rotr_i64:
2458        if (const_args[2]) {
2459            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2460                         TCG_REG_NONE, (64 - args[2]) & 63);
2461        } else {
2462            /* We can use the smaller 32-bit negate because only the
2463               low 6 bits are examined for the rotate.  */
2464            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2465            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2466        }
2467        break;
2468
2469    case INDEX_op_ext8s_i64:
2470        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2471        break;
2472    case INDEX_op_ext16s_i64:
2473        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2474        break;
2475    case INDEX_op_ext_i32_i64:
2476    case INDEX_op_ext32s_i64:
2477        tgen_ext32s(s, args[0], args[1]);
2478        break;
2479    case INDEX_op_ext8u_i64:
2480        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2481        break;
2482    case INDEX_op_ext16u_i64:
2483        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2484        break;
2485    case INDEX_op_extu_i32_i64:
2486    case INDEX_op_ext32u_i64:
2487        tgen_ext32u(s, args[0], args[1]);
2488        break;
2489
2490    case INDEX_op_add2_i64:
2491        if (const_args[4]) {
2492            if ((int64_t)args[4] >= 0) {
2493                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2494            } else {
2495                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2496            }
2497        } else {
2498            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2499        }
2500        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2501        break;
2502    case INDEX_op_sub2_i64:
2503        if (const_args[4]) {
2504            if ((int64_t)args[4] >= 0) {
2505                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2506            } else {
2507                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2508            }
2509        } else {
2510            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2511        }
2512        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2513        break;
2514
2515    case INDEX_op_brcond_i64:
2516        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2517                    args[1], const_args[1], arg_label(args[3]));
2518        break;
2519    case INDEX_op_setcond_i64:
2520        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2521                     args[2], const_args[2]);
2522        break;
2523    case INDEX_op_movcond_i64:
2524        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2525                     args[2], const_args[2], args[3], const_args[3]);
2526        break;
2527
2528    OP_32_64(deposit):
2529        a0 = args[0], a1 = args[1], a2 = args[2];
2530        if (const_args[1]) {
2531            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2532        } else {
2533            /* Since we can't support "0Z" as a constraint, we allow a1 in
2534               any register.  Fix things up as if a matching constraint.  */
2535            if (a0 != a1) {
2536                TCGType type = (opc == INDEX_op_deposit_i64);
2537                if (a0 == a2) {
2538                    tcg_out_mov(s, type, TCG_TMP0, a2);
2539                    a2 = TCG_TMP0;
2540                }
2541                tcg_out_mov(s, type, a0, a1);
2542            }
2543            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2544        }
2545        break;
2546
2547    OP_32_64(extract):
2548        tgen_extract(s, args[0], args[1], args[2], args[3]);
2549        break;
2550
2551    case INDEX_op_clz_i64:
2552        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2553        break;
2554
2555    case INDEX_op_mb:
2556        /* The host memory model is quite strong, we simply need to
2557           serialize the instruction stream.  */
2558        if (args[0] & TCG_MO_ST_LD) {
2559            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
2560        }
2561        break;
2562
2563    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2564    case INDEX_op_mov_i64:
2565    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2566    default:
2567        tcg_abort();
2568    }
2569}
2570
2571static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2572                            TCGReg dst, TCGReg src)
2573{
2574    if (is_general_reg(src)) {
2575        /* Replicate general register into two MO_64. */
2576        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2577        if (vece == MO_64) {
2578            return true;
2579        }
2580    }
2581
2582    /*
2583     * Recall that the "standard" integer, within a vector, is the
2584     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2585     */
2586    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2587    return true;
2588}
2589
2590static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2591                             TCGReg dst, TCGReg base, intptr_t offset)
2592{
2593    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2594    return true;
2595}
2596
2597static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2598                             TCGReg dst, int64_t val)
2599{
2600    int i, mask, msb, lsb;
2601
2602    /* Look for int16_t elements.  */
2603    if (vece <= MO_16 ||
2604        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2605        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2606        return;
2607    }
2608
2609    /* Look for bit masks.  */
2610    if (vece == MO_32) {
2611        if (risbg_mask((int32_t)val)) {
2612            /* Handle wraparound by swapping msb and lsb.  */
2613            if ((val & 0x80000001u) == 0x80000001u) {
2614                msb = 32 - ctz32(~val);
2615                lsb = clz32(~val) - 1;
2616            } else {
2617                msb = clz32(val);
2618                lsb = 31 - ctz32(val);
2619            }
2620            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
2621            return;
2622        }
2623    } else {
2624        if (risbg_mask(val)) {
2625            /* Handle wraparound by swapping msb and lsb.  */
2626            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2627                /* Handle wraparound by swapping msb and lsb.  */
2628                msb = 64 - ctz64(~val);
2629                lsb = clz64(~val) - 1;
2630            } else {
2631                msb = clz64(val);
2632                lsb = 63 - ctz64(val);
2633            }
2634            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
2635            return;
2636        }
2637    }
2638
2639    /* Look for all bytes 0x00 or 0xff.  */
2640    for (i = mask = 0; i < 8; i++) {
2641        uint8_t byte = val >> (i * 8);
2642        if (byte == 0xff) {
2643            mask |= 1 << i;
2644        } else if (byte != 0) {
2645            break;
2646        }
2647    }
2648    if (i == 8) {
2649        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2650        return;
2651    }
2652
2653    /* Otherwise, stuff it in the constant pool.  */
2654    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2655    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2656    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2657}
2658
2659static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2660                           unsigned vecl, unsigned vece,
2661                           const TCGArg *args, const int *const_args)
2662{
2663    TCGType type = vecl + TCG_TYPE_V64;
2664    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2665
2666    switch (opc) {
2667    case INDEX_op_ld_vec:
2668        tcg_out_ld(s, type, a0, a1, a2);
2669        break;
2670    case INDEX_op_st_vec:
2671        tcg_out_st(s, type, a0, a1, a2);
2672        break;
2673    case INDEX_op_dupm_vec:
2674        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2675        break;
2676
2677    case INDEX_op_abs_vec:
2678        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2679        break;
2680    case INDEX_op_neg_vec:
2681        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2682        break;
2683    case INDEX_op_not_vec:
2684        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2685        break;
2686
2687    case INDEX_op_add_vec:
2688        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2689        break;
2690    case INDEX_op_sub_vec:
2691        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2692        break;
2693    case INDEX_op_and_vec:
2694        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2695        break;
2696    case INDEX_op_andc_vec:
2697        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2698        break;
2699    case INDEX_op_or_vec:
2700        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2701        break;
2702    case INDEX_op_orc_vec:
2703        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2704        break;
2705    case INDEX_op_xor_vec:
2706        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2707        break;
2708
2709    case INDEX_op_cmp_vec:
2710        switch ((TCGCond)args[3]) {
2711        case TCG_COND_EQ:
2712            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2713            break;
2714        case TCG_COND_GT:
2715            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2716            break;
2717        case TCG_COND_GTU:
2718            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2719            break;
2720        default:
2721            g_assert_not_reached();
2722        }
2723        break;
2724
2725    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2726    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2727    default:
2728        g_assert_not_reached();
2729    }
2730}
2731
2732int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2733{
2734    switch (opc) {
2735    case INDEX_op_abs_vec:
2736    case INDEX_op_add_vec:
2737    case INDEX_op_and_vec:
2738    case INDEX_op_andc_vec:
2739    case INDEX_op_neg_vec:
2740    case INDEX_op_not_vec:
2741    case INDEX_op_or_vec:
2742    case INDEX_op_orc_vec:
2743    case INDEX_op_sub_vec:
2744    case INDEX_op_xor_vec:
2745        return 1;
2746    case INDEX_op_cmp_vec:
2747        return -1;
2748    default:
2749        return 0;
2750    }
2751}
2752
2753static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2754                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2755{
2756    bool need_swap = false, need_inv = false;
2757
2758    switch (cond) {
2759    case TCG_COND_EQ:
2760    case TCG_COND_GT:
2761    case TCG_COND_GTU:
2762        break;
2763    case TCG_COND_NE:
2764    case TCG_COND_LE:
2765    case TCG_COND_LEU:
2766        need_inv = true;
2767        break;
2768    case TCG_COND_LT:
2769    case TCG_COND_LTU:
2770        need_swap = true;
2771        break;
2772    case TCG_COND_GE:
2773    case TCG_COND_GEU:
2774        need_swap = need_inv = true;
2775        break;
2776    default:
2777        g_assert_not_reached();
2778    }
2779
2780    if (need_inv) {
2781        cond = tcg_invert_cond(cond);
2782    }
2783    if (need_swap) {
2784        TCGv_vec t1;
2785        t1 = v1, v1 = v2, v2 = t1;
2786        cond = tcg_swap_cond(cond);
2787    }
2788
2789    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2790              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2791
2792    return need_inv;
2793}
2794
2795static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2796                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2797{
2798    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2799        tcg_gen_not_vec(vece, v0, v0);
2800    }
2801}
2802
2803void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2804                       TCGArg a0, ...)
2805{
2806    va_list va;
2807    TCGv_vec v0, v1, v2;
2808
2809    va_start(va, a0);
2810    v0 = temp_tcgv_vec(arg_temp(a0));
2811    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2812    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2813
2814    switch (opc) {
2815    case INDEX_op_cmp_vec:
2816        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
2817        break;
2818
2819    default:
2820        g_assert_not_reached();
2821    }
2822    va_end(va);
2823}
2824
2825static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2826{
2827    switch (op) {
2828    case INDEX_op_goto_ptr:
2829        return C_O0_I1(r);
2830
2831    case INDEX_op_ld8u_i32:
2832    case INDEX_op_ld8u_i64:
2833    case INDEX_op_ld8s_i32:
2834    case INDEX_op_ld8s_i64:
2835    case INDEX_op_ld16u_i32:
2836    case INDEX_op_ld16u_i64:
2837    case INDEX_op_ld16s_i32:
2838    case INDEX_op_ld16s_i64:
2839    case INDEX_op_ld_i32:
2840    case INDEX_op_ld32u_i64:
2841    case INDEX_op_ld32s_i64:
2842    case INDEX_op_ld_i64:
2843        return C_O1_I1(r, r);
2844
2845    case INDEX_op_st8_i32:
2846    case INDEX_op_st8_i64:
2847    case INDEX_op_st16_i32:
2848    case INDEX_op_st16_i64:
2849    case INDEX_op_st_i32:
2850    case INDEX_op_st32_i64:
2851    case INDEX_op_st_i64:
2852        return C_O0_I2(r, r);
2853
2854    case INDEX_op_add_i32:
2855    case INDEX_op_add_i64:
2856    case INDEX_op_shl_i64:
2857    case INDEX_op_shr_i64:
2858    case INDEX_op_sar_i64:
2859    case INDEX_op_rotl_i32:
2860    case INDEX_op_rotl_i64:
2861    case INDEX_op_rotr_i32:
2862    case INDEX_op_rotr_i64:
2863    case INDEX_op_clz_i64:
2864    case INDEX_op_setcond_i32:
2865    case INDEX_op_setcond_i64:
2866        return C_O1_I2(r, r, ri);
2867
2868    case INDEX_op_sub_i32:
2869    case INDEX_op_sub_i64:
2870    case INDEX_op_and_i32:
2871    case INDEX_op_and_i64:
2872    case INDEX_op_or_i32:
2873    case INDEX_op_or_i64:
2874    case INDEX_op_xor_i32:
2875    case INDEX_op_xor_i64:
2876        return (HAVE_FACILITY(DISTINCT_OPS)
2877                ? C_O1_I2(r, r, ri)
2878                : C_O1_I2(r, 0, ri));
2879
2880    case INDEX_op_mul_i32:
2881        /* If we have the general-instruction-extensions, then we have
2882           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
2883           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
2884        return (HAVE_FACILITY(GEN_INST_EXT)
2885                ? C_O1_I2(r, 0, ri)
2886                : C_O1_I2(r, 0, rI));
2887
2888    case INDEX_op_mul_i64:
2889        return (HAVE_FACILITY(GEN_INST_EXT)
2890                ? C_O1_I2(r, 0, rJ)
2891                : C_O1_I2(r, 0, rI));
2892
2893    case INDEX_op_shl_i32:
2894    case INDEX_op_shr_i32:
2895    case INDEX_op_sar_i32:
2896        return (HAVE_FACILITY(DISTINCT_OPS)
2897                ? C_O1_I2(r, r, ri)
2898                : C_O1_I2(r, 0, ri));
2899
2900    case INDEX_op_brcond_i32:
2901    case INDEX_op_brcond_i64:
2902        return C_O0_I2(r, ri);
2903
2904    case INDEX_op_bswap16_i32:
2905    case INDEX_op_bswap16_i64:
2906    case INDEX_op_bswap32_i32:
2907    case INDEX_op_bswap32_i64:
2908    case INDEX_op_bswap64_i64:
2909    case INDEX_op_neg_i32:
2910    case INDEX_op_neg_i64:
2911    case INDEX_op_ext8s_i32:
2912    case INDEX_op_ext8s_i64:
2913    case INDEX_op_ext8u_i32:
2914    case INDEX_op_ext8u_i64:
2915    case INDEX_op_ext16s_i32:
2916    case INDEX_op_ext16s_i64:
2917    case INDEX_op_ext16u_i32:
2918    case INDEX_op_ext16u_i64:
2919    case INDEX_op_ext32s_i64:
2920    case INDEX_op_ext32u_i64:
2921    case INDEX_op_ext_i32_i64:
2922    case INDEX_op_extu_i32_i64:
2923    case INDEX_op_extract_i32:
2924    case INDEX_op_extract_i64:
2925        return C_O1_I1(r, r);
2926
2927    case INDEX_op_qemu_ld_i32:
2928    case INDEX_op_qemu_ld_i64:
2929        return C_O1_I1(r, L);
2930    case INDEX_op_qemu_st_i64:
2931    case INDEX_op_qemu_st_i32:
2932        return C_O0_I2(L, L);
2933
2934    case INDEX_op_deposit_i32:
2935    case INDEX_op_deposit_i64:
2936        return C_O1_I2(r, rZ, r);
2937
2938    case INDEX_op_movcond_i32:
2939    case INDEX_op_movcond_i64:
2940        return (HAVE_FACILITY(LOAD_ON_COND2)
2941                ? C_O1_I4(r, r, ri, rI, 0)
2942                : C_O1_I4(r, r, ri, r, 0));
2943
2944    case INDEX_op_div2_i32:
2945    case INDEX_op_div2_i64:
2946    case INDEX_op_divu2_i32:
2947    case INDEX_op_divu2_i64:
2948        return C_O2_I3(b, a, 0, 1, r);
2949
2950    case INDEX_op_mulu2_i64:
2951        return C_O2_I2(b, a, 0, r);
2952
2953    case INDEX_op_add2_i32:
2954    case INDEX_op_sub2_i32:
2955        return (HAVE_FACILITY(EXT_IMM)
2956                ? C_O2_I4(r, r, 0, 1, ri, r)
2957                : C_O2_I4(r, r, 0, 1, r, r));
2958
2959    case INDEX_op_add2_i64:
2960    case INDEX_op_sub2_i64:
2961        return (HAVE_FACILITY(EXT_IMM)
2962                ? C_O2_I4(r, r, 0, 1, rA, r)
2963                : C_O2_I4(r, r, 0, 1, r, r));
2964
2965    case INDEX_op_st_vec:
2966        return C_O0_I2(v, r);
2967    case INDEX_op_ld_vec:
2968    case INDEX_op_dupm_vec:
2969        return C_O1_I1(v, r);
2970    case INDEX_op_dup_vec:
2971        return C_O1_I1(v, vr);
2972    case INDEX_op_abs_vec:
2973    case INDEX_op_neg_vec:
2974    case INDEX_op_not_vec:
2975        return C_O1_I1(v, v);
2976    case INDEX_op_add_vec:
2977    case INDEX_op_sub_vec:
2978    case INDEX_op_and_vec:
2979    case INDEX_op_andc_vec:
2980    case INDEX_op_or_vec:
2981    case INDEX_op_orc_vec:
2982    case INDEX_op_xor_vec:
2983    case INDEX_op_cmp_vec:
2984        return C_O1_I2(v, v, v);
2985
2986    default:
2987        g_assert_not_reached();
2988    }
2989}
2990
2991/*
2992 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
2993 * Some distros have fixed this up locally, others have not.
2994 */
2995#ifndef HWCAP_S390_VXRS
2996#define HWCAP_S390_VXRS 2048
2997#endif
2998
2999static void query_s390_facilities(void)
3000{
3001    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3002
3003    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3004       is present on all 64-bit systems, but let's check for it anyway.  */
3005    if (hwcap & HWCAP_S390_STFLE) {
3006        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3007        register void *r1 __asm__("1") = s390_facilities;
3008
3009        /* stfle 0(%r1) */
3010        asm volatile(".word 0xb2b0,0x1000"
3011                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3012    }
3013
3014    /*
3015     * Use of vector registers requires os support beyond the facility bit.
3016     * If the kernel does not advertise support, disable the facility bits.
3017     * There is nothing else we currently care about in the 3rd word, so
3018     * disable VECTOR with one store.
3019     */
3020    if (!(hwcap & HWCAP_S390_VXRS)) {
3021        s390_facilities[2] = 0;
3022    }
3023}
3024
3025static void tcg_target_init(TCGContext *s)
3026{
3027    query_s390_facilities();
3028
3029    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3030    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3031    if (HAVE_FACILITY(VECTOR)) {
3032        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3033        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3034    }
3035
3036    tcg_target_call_clobber_regs = 0;
3037    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3038    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3039    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3040    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3041    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3042    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3043    /* The r6 register is technically call-saved, but it's also a parameter
3044       register, so it can get killed by setup for the qemu_st helper.  */
3045    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3046    /* The return register can be considered call-clobbered.  */
3047    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3048
3049    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3050    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3051    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3052    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3053    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3054    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3055    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3056    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3057    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3058    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3059    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3060    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3061    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3062    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3063    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3064    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3065    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3066    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3067    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3068    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3069    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3070    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3071    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3072    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3073
3074    s->reserved_regs = 0;
3075    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3076    /* XXX many insns can't be used with R0, so we better avoid it for now */
3077    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3078    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3079    if (USE_REG_TB) {
3080        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
3081    }
3082}
3083
3084#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3085                           + TCG_STATIC_CALL_ARGS_SIZE           \
3086                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3087
3088static void tcg_target_qemu_prologue(TCGContext *s)
3089{
3090    /* stmg %r6,%r15,48(%r15) (save registers) */
3091    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3092
3093    /* aghi %r15,-frame_size */
3094    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3095
3096    tcg_set_frame(s, TCG_REG_CALL_STACK,
3097                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3098                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3099
3100#ifndef CONFIG_SOFTMMU
3101    if (guest_base >= 0x80000) {
3102        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
3103        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3104    }
3105#endif
3106
3107    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3108    if (USE_REG_TB) {
3109        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
3110                    tcg_target_call_iarg_regs[1]);
3111    }
3112
3113    /* br %r3 (go to TB) */
3114    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3115
3116    /*
3117     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3118     * and fall through to the rest of the epilogue.
3119     */
3120    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3121    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3122
3123    /* TB epilogue */
3124    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3125
3126    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3127    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3128                 FRAME_SIZE + 48);
3129
3130    /* br %r14 (return) */
3131    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3132}
3133
3134static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3135{
3136    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3137}
3138
3139typedef struct {
3140    DebugFrameHeader h;
3141    uint8_t fde_def_cfa[4];
3142    uint8_t fde_reg_ofs[18];
3143} DebugFrame;
3144
3145/* We're expecting a 2 byte uleb128 encoded value.  */
3146QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3147
3148#define ELF_HOST_MACHINE  EM_S390
3149
3150static const DebugFrame debug_frame = {
3151    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3152    .h.cie.id = -1,
3153    .h.cie.version = 1,
3154    .h.cie.code_align = 1,
3155    .h.cie.data_align = 8,                /* sleb128 8 */
3156    .h.cie.return_column = TCG_REG_R14,
3157
3158    /* Total FDE size does not include the "len" member.  */
3159    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3160
3161    .fde_def_cfa = {
3162        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3163        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3164        (FRAME_SIZE >> 7)
3165    },
3166    .fde_reg_ofs = {
3167        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3168        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3169        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3170        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3171        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3172        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3173        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3174        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3175        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3176    }
3177};
3178
3179void tcg_register_jit(const void *buf, size_t buf_size)
3180{
3181    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3182}
3183