xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision dc688246)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36#define TCG_CT_CONST_S16        (1 << 8)
37#define TCG_CT_CONST_S32        (1 << 9)
38#define TCG_CT_CONST_S33        (1 << 10)
39#define TCG_CT_CONST_ZERO       (1 << 11)
40#define TCG_CT_CONST_P32        (1 << 12)
41#define TCG_CT_CONST_INV        (1 << 13)
42#define TCG_CT_CONST_INVRISBG   (1 << 14)
43
44#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
45#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
46
47/*
48 * For softmmu, we need to avoid conflicts with the first 3
49 * argument registers to perform the tlb lookup, and to call
50 * the helper function.
51 */
52#ifdef CONFIG_SOFTMMU
53#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
54#else
55#define SOFTMMU_RESERVE_REGS 0
56#endif
57
58
59/* Several places within the instruction set 0 means "no register"
60   rather than TCG_REG_R0.  */
61#define TCG_REG_NONE    0
62
63/* A scratch register that may be be used throughout the backend.  */
64#define TCG_TMP0        TCG_REG_R1
65
66#ifndef CONFIG_SOFTMMU
67#define TCG_GUEST_BASE_REG TCG_REG_R13
68#endif
69
70/* All of the following instructions are prefixed with their instruction
71   format, and are defined as 8- or 16-bit quantities, even when the two
72   halves of the 16-bit quantity may appear 32 bits apart in the insn.
73   This makes it easy to copy the values from the tables in Appendix B.  */
74typedef enum S390Opcode {
75    RIL_AFI     = 0xc209,
76    RIL_AGFI    = 0xc208,
77    RIL_ALFI    = 0xc20b,
78    RIL_ALGFI   = 0xc20a,
79    RIL_BRASL   = 0xc005,
80    RIL_BRCL    = 0xc004,
81    RIL_CFI     = 0xc20d,
82    RIL_CGFI    = 0xc20c,
83    RIL_CLFI    = 0xc20f,
84    RIL_CLGFI   = 0xc20e,
85    RIL_CLRL    = 0xc60f,
86    RIL_CLGRL   = 0xc60a,
87    RIL_CRL     = 0xc60d,
88    RIL_CGRL    = 0xc608,
89    RIL_IIHF    = 0xc008,
90    RIL_IILF    = 0xc009,
91    RIL_LARL    = 0xc000,
92    RIL_LGFI    = 0xc001,
93    RIL_LGRL    = 0xc408,
94    RIL_LLIHF   = 0xc00e,
95    RIL_LLILF   = 0xc00f,
96    RIL_LRL     = 0xc40d,
97    RIL_MSFI    = 0xc201,
98    RIL_MSGFI   = 0xc200,
99    RIL_NIHF    = 0xc00a,
100    RIL_NILF    = 0xc00b,
101    RIL_OIHF    = 0xc00c,
102    RIL_OILF    = 0xc00d,
103    RIL_SLFI    = 0xc205,
104    RIL_SLGFI   = 0xc204,
105    RIL_XIHF    = 0xc006,
106    RIL_XILF    = 0xc007,
107
108    RI_AGHI     = 0xa70b,
109    RI_AHI      = 0xa70a,
110    RI_BRC      = 0xa704,
111    RI_CHI      = 0xa70e,
112    RI_CGHI     = 0xa70f,
113    RI_IIHH     = 0xa500,
114    RI_IIHL     = 0xa501,
115    RI_IILH     = 0xa502,
116    RI_IILL     = 0xa503,
117    RI_LGHI     = 0xa709,
118    RI_LLIHH    = 0xa50c,
119    RI_LLIHL    = 0xa50d,
120    RI_LLILH    = 0xa50e,
121    RI_LLILL    = 0xa50f,
122    RI_MGHI     = 0xa70d,
123    RI_MHI      = 0xa70c,
124    RI_NIHH     = 0xa504,
125    RI_NIHL     = 0xa505,
126    RI_NILH     = 0xa506,
127    RI_NILL     = 0xa507,
128    RI_OIHH     = 0xa508,
129    RI_OIHL     = 0xa509,
130    RI_OILH     = 0xa50a,
131    RI_OILL     = 0xa50b,
132    RI_TMLL     = 0xa701,
133
134    RIEb_CGRJ    = 0xec64,
135    RIEb_CLGRJ   = 0xec65,
136    RIEb_CLRJ    = 0xec77,
137    RIEb_CRJ     = 0xec76,
138
139    RIEc_CGIJ    = 0xec7c,
140    RIEc_CIJ     = 0xec7e,
141    RIEc_CLGIJ   = 0xec7d,
142    RIEc_CLIJ    = 0xec7f,
143
144    RIEf_RISBG   = 0xec55,
145
146    RIEg_LOCGHI  = 0xec46,
147
148    RRE_AGR     = 0xb908,
149    RRE_ALGR    = 0xb90a,
150    RRE_ALCR    = 0xb998,
151    RRE_ALCGR   = 0xb988,
152    RRE_CGR     = 0xb920,
153    RRE_CLGR    = 0xb921,
154    RRE_DLGR    = 0xb987,
155    RRE_DLR     = 0xb997,
156    RRE_DSGFR   = 0xb91d,
157    RRE_DSGR    = 0xb90d,
158    RRE_FLOGR   = 0xb983,
159    RRE_LGBR    = 0xb906,
160    RRE_LCGR    = 0xb903,
161    RRE_LGFR    = 0xb914,
162    RRE_LGHR    = 0xb907,
163    RRE_LGR     = 0xb904,
164    RRE_LLGCR   = 0xb984,
165    RRE_LLGFR   = 0xb916,
166    RRE_LLGHR   = 0xb985,
167    RRE_LRVR    = 0xb91f,
168    RRE_LRVGR   = 0xb90f,
169    RRE_LTGR    = 0xb902,
170    RRE_MLGR    = 0xb986,
171    RRE_MSGR    = 0xb90c,
172    RRE_MSR     = 0xb252,
173    RRE_NGR     = 0xb980,
174    RRE_OGR     = 0xb981,
175    RRE_SGR     = 0xb909,
176    RRE_SLGR    = 0xb90b,
177    RRE_SLBR    = 0xb999,
178    RRE_SLBGR   = 0xb989,
179    RRE_XGR     = 0xb982,
180
181    RRFa_MGRK   = 0xb9ec,
182    RRFa_MSRKC  = 0xb9fd,
183    RRFa_MSGRKC = 0xb9ed,
184    RRFa_NCRK   = 0xb9f5,
185    RRFa_NCGRK  = 0xb9e5,
186    RRFa_NNRK   = 0xb974,
187    RRFa_NNGRK  = 0xb964,
188    RRFa_NORK   = 0xb976,
189    RRFa_NOGRK  = 0xb966,
190    RRFa_NRK    = 0xb9f4,
191    RRFa_NGRK   = 0xb9e4,
192    RRFa_NXRK   = 0xb977,
193    RRFa_NXGRK  = 0xb967,
194    RRFa_OCRK   = 0xb975,
195    RRFa_OCGRK  = 0xb965,
196    RRFa_ORK    = 0xb9f6,
197    RRFa_OGRK   = 0xb9e6,
198    RRFa_SRK    = 0xb9f9,
199    RRFa_SGRK   = 0xb9e9,
200    RRFa_SLRK   = 0xb9fb,
201    RRFa_SLGRK  = 0xb9eb,
202    RRFa_XRK    = 0xb9f7,
203    RRFa_XGRK   = 0xb9e7,
204
205    RRFam_SELGR = 0xb9e3,
206
207    RRFc_LOCR   = 0xb9f2,
208    RRFc_LOCGR  = 0xb9e2,
209    RRFc_POPCNT = 0xb9e1,
210
211    RR_AR       = 0x1a,
212    RR_ALR      = 0x1e,
213    RR_BASR     = 0x0d,
214    RR_BCR      = 0x07,
215    RR_CLR      = 0x15,
216    RR_CR       = 0x19,
217    RR_DR       = 0x1d,
218    RR_LCR      = 0x13,
219    RR_LR       = 0x18,
220    RR_LTR      = 0x12,
221    RR_NR       = 0x14,
222    RR_OR       = 0x16,
223    RR_SR       = 0x1b,
224    RR_SLR      = 0x1f,
225    RR_XR       = 0x17,
226
227    RSY_RLL     = 0xeb1d,
228    RSY_RLLG    = 0xeb1c,
229    RSY_SLLG    = 0xeb0d,
230    RSY_SLLK    = 0xebdf,
231    RSY_SRAG    = 0xeb0a,
232    RSY_SRAK    = 0xebdc,
233    RSY_SRLG    = 0xeb0c,
234    RSY_SRLK    = 0xebde,
235
236    RS_SLL      = 0x89,
237    RS_SRA      = 0x8a,
238    RS_SRL      = 0x88,
239
240    RXY_AG      = 0xe308,
241    RXY_AY      = 0xe35a,
242    RXY_CG      = 0xe320,
243    RXY_CLG     = 0xe321,
244    RXY_CLY     = 0xe355,
245    RXY_CY      = 0xe359,
246    RXY_LAY     = 0xe371,
247    RXY_LB      = 0xe376,
248    RXY_LG      = 0xe304,
249    RXY_LGB     = 0xe377,
250    RXY_LGF     = 0xe314,
251    RXY_LGH     = 0xe315,
252    RXY_LHY     = 0xe378,
253    RXY_LLGC    = 0xe390,
254    RXY_LLGF    = 0xe316,
255    RXY_LLGH    = 0xe391,
256    RXY_LMG     = 0xeb04,
257    RXY_LRV     = 0xe31e,
258    RXY_LRVG    = 0xe30f,
259    RXY_LRVH    = 0xe31f,
260    RXY_LY      = 0xe358,
261    RXY_NG      = 0xe380,
262    RXY_OG      = 0xe381,
263    RXY_STCY    = 0xe372,
264    RXY_STG     = 0xe324,
265    RXY_STHY    = 0xe370,
266    RXY_STMG    = 0xeb24,
267    RXY_STRV    = 0xe33e,
268    RXY_STRVG   = 0xe32f,
269    RXY_STRVH   = 0xe33f,
270    RXY_STY     = 0xe350,
271    RXY_XG      = 0xe382,
272
273    RX_A        = 0x5a,
274    RX_C        = 0x59,
275    RX_L        = 0x58,
276    RX_LA       = 0x41,
277    RX_LH       = 0x48,
278    RX_ST       = 0x50,
279    RX_STC      = 0x42,
280    RX_STH      = 0x40,
281
282    VRIa_VGBM   = 0xe744,
283    VRIa_VREPI  = 0xe745,
284    VRIb_VGM    = 0xe746,
285    VRIc_VREP   = 0xe74d,
286
287    VRRa_VLC    = 0xe7de,
288    VRRa_VLP    = 0xe7df,
289    VRRa_VLR    = 0xe756,
290    VRRc_VA     = 0xe7f3,
291    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
292    VRRc_VCH    = 0xe7fb,   /* " */
293    VRRc_VCHL   = 0xe7f9,   /* " */
294    VRRc_VERLLV = 0xe773,
295    VRRc_VESLV  = 0xe770,
296    VRRc_VESRAV = 0xe77a,
297    VRRc_VESRLV = 0xe778,
298    VRRc_VML    = 0xe7a2,
299    VRRc_VMN    = 0xe7fe,
300    VRRc_VMNL   = 0xe7fc,
301    VRRc_VMX    = 0xe7ff,
302    VRRc_VMXL   = 0xe7fd,
303    VRRc_VN     = 0xe768,
304    VRRc_VNC    = 0xe769,
305    VRRc_VNN    = 0xe76e,
306    VRRc_VNO    = 0xe76b,
307    VRRc_VNX    = 0xe76c,
308    VRRc_VO     = 0xe76a,
309    VRRc_VOC    = 0xe76f,
310    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
311    VRRc_VS     = 0xe7f7,
312    VRRa_VUPH   = 0xe7d7,
313    VRRa_VUPL   = 0xe7d6,
314    VRRc_VX     = 0xe76d,
315    VRRe_VSEL   = 0xe78d,
316    VRRf_VLVGP  = 0xe762,
317
318    VRSa_VERLL  = 0xe733,
319    VRSa_VESL   = 0xe730,
320    VRSa_VESRA  = 0xe73a,
321    VRSa_VESRL  = 0xe738,
322    VRSb_VLVG   = 0xe722,
323    VRSc_VLGV   = 0xe721,
324
325    VRX_VL      = 0xe706,
326    VRX_VLLEZ   = 0xe704,
327    VRX_VLREP   = 0xe705,
328    VRX_VST     = 0xe70e,
329    VRX_VSTEF   = 0xe70b,
330    VRX_VSTEG   = 0xe70a,
331
332    NOP         = 0x0707,
333} S390Opcode;
334
335#ifdef CONFIG_DEBUG_TCG
336static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
337    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
338    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
339    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
341    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
342    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
343    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
344};
345#endif
346
347/* Since R6 is a potential argument register, choose it last of the
348   call-saved registers.  Likewise prefer the call-clobbered registers
349   in reverse order to maximize the chance of avoiding the arguments.  */
350static const int tcg_target_reg_alloc_order[] = {
351    /* Call saved registers.  */
352    TCG_REG_R13,
353    TCG_REG_R12,
354    TCG_REG_R11,
355    TCG_REG_R10,
356    TCG_REG_R9,
357    TCG_REG_R8,
358    TCG_REG_R7,
359    TCG_REG_R6,
360    /* Call clobbered registers.  */
361    TCG_REG_R14,
362    TCG_REG_R0,
363    TCG_REG_R1,
364    /* Argument registers, in reverse order of allocation.  */
365    TCG_REG_R5,
366    TCG_REG_R4,
367    TCG_REG_R3,
368    TCG_REG_R2,
369
370    /* V8-V15 are call saved, and omitted. */
371    TCG_REG_V0,
372    TCG_REG_V1,
373    TCG_REG_V2,
374    TCG_REG_V3,
375    TCG_REG_V4,
376    TCG_REG_V5,
377    TCG_REG_V6,
378    TCG_REG_V7,
379    TCG_REG_V16,
380    TCG_REG_V17,
381    TCG_REG_V18,
382    TCG_REG_V19,
383    TCG_REG_V20,
384    TCG_REG_V21,
385    TCG_REG_V22,
386    TCG_REG_V23,
387    TCG_REG_V24,
388    TCG_REG_V25,
389    TCG_REG_V26,
390    TCG_REG_V27,
391    TCG_REG_V28,
392    TCG_REG_V29,
393    TCG_REG_V30,
394    TCG_REG_V31,
395};
396
397static const int tcg_target_call_iarg_regs[] = {
398    TCG_REG_R2,
399    TCG_REG_R3,
400    TCG_REG_R4,
401    TCG_REG_R5,
402    TCG_REG_R6,
403};
404
405static const int tcg_target_call_oarg_regs[] = {
406    TCG_REG_R2,
407};
408
409#define S390_CC_EQ      8
410#define S390_CC_LT      4
411#define S390_CC_GT      2
412#define S390_CC_OV      1
413#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
414#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
415#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
416#define S390_CC_NEVER   0
417#define S390_CC_ALWAYS  15
418
419/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
420static const uint8_t tcg_cond_to_s390_cond[] = {
421    [TCG_COND_EQ]  = S390_CC_EQ,
422    [TCG_COND_NE]  = S390_CC_NE,
423    [TCG_COND_LT]  = S390_CC_LT,
424    [TCG_COND_LE]  = S390_CC_LE,
425    [TCG_COND_GT]  = S390_CC_GT,
426    [TCG_COND_GE]  = S390_CC_GE,
427    [TCG_COND_LTU] = S390_CC_LT,
428    [TCG_COND_LEU] = S390_CC_LE,
429    [TCG_COND_GTU] = S390_CC_GT,
430    [TCG_COND_GEU] = S390_CC_GE,
431};
432
433/* Condition codes that result from a LOAD AND TEST.  Here, we have no
434   unsigned instruction variation, however since the test is vs zero we
435   can re-map the outcomes appropriately.  */
436static const uint8_t tcg_cond_to_ltr_cond[] = {
437    [TCG_COND_EQ]  = S390_CC_EQ,
438    [TCG_COND_NE]  = S390_CC_NE,
439    [TCG_COND_LT]  = S390_CC_LT,
440    [TCG_COND_LE]  = S390_CC_LE,
441    [TCG_COND_GT]  = S390_CC_GT,
442    [TCG_COND_GE]  = S390_CC_GE,
443    [TCG_COND_LTU] = S390_CC_NEVER,
444    [TCG_COND_LEU] = S390_CC_EQ,
445    [TCG_COND_GTU] = S390_CC_NE,
446    [TCG_COND_GEU] = S390_CC_ALWAYS,
447};
448
449#ifdef CONFIG_SOFTMMU
450static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
451    [MO_UB]   = helper_ret_ldub_mmu,
452    [MO_SB]   = helper_ret_ldsb_mmu,
453    [MO_LEUW] = helper_le_lduw_mmu,
454    [MO_LESW] = helper_le_ldsw_mmu,
455    [MO_LEUL] = helper_le_ldul_mmu,
456    [MO_LESL] = helper_le_ldsl_mmu,
457    [MO_LEUQ] = helper_le_ldq_mmu,
458    [MO_BEUW] = helper_be_lduw_mmu,
459    [MO_BESW] = helper_be_ldsw_mmu,
460    [MO_BEUL] = helper_be_ldul_mmu,
461    [MO_BESL] = helper_be_ldsl_mmu,
462    [MO_BEUQ] = helper_be_ldq_mmu,
463};
464
465static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
466    [MO_UB]   = helper_ret_stb_mmu,
467    [MO_LEUW] = helper_le_stw_mmu,
468    [MO_LEUL] = helper_le_stl_mmu,
469    [MO_LEUQ] = helper_le_stq_mmu,
470    [MO_BEUW] = helper_be_stw_mmu,
471    [MO_BEUL] = helper_be_stl_mmu,
472    [MO_BEUQ] = helper_be_stq_mmu,
473};
474#endif
475
476static const tcg_insn_unit *tb_ret_addr;
477uint64_t s390_facilities[3];
478
479static inline bool is_general_reg(TCGReg r)
480{
481    return r <= TCG_REG_R15;
482}
483
484static inline bool is_vector_reg(TCGReg r)
485{
486    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
487}
488
489static bool patch_reloc(tcg_insn_unit *src_rw, int type,
490                        intptr_t value, intptr_t addend)
491{
492    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
493    intptr_t pcrel2;
494    uint32_t old;
495
496    value += addend;
497    pcrel2 = (tcg_insn_unit *)value - src_rx;
498
499    switch (type) {
500    case R_390_PC16DBL:
501        if (pcrel2 == (int16_t)pcrel2) {
502            tcg_patch16(src_rw, pcrel2);
503            return true;
504        }
505        break;
506    case R_390_PC32DBL:
507        if (pcrel2 == (int32_t)pcrel2) {
508            tcg_patch32(src_rw, pcrel2);
509            return true;
510        }
511        break;
512    case R_390_20:
513        if (value == sextract64(value, 0, 20)) {
514            old = *(uint32_t *)src_rw & 0xf00000ff;
515            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
516            tcg_patch32(src_rw, old);
517            return true;
518        }
519        break;
520    default:
521        g_assert_not_reached();
522    }
523    return false;
524}
525
526static int is_const_p16(uint64_t val)
527{
528    for (int i = 0; i < 4; ++i) {
529        uint64_t mask = 0xffffull << (i * 16);
530        if ((val & ~mask) == 0) {
531            return i;
532        }
533    }
534    return -1;
535}
536
537static int is_const_p32(uint64_t val)
538{
539    if ((val & 0xffffffff00000000ull) == 0) {
540        return 0;
541    }
542    if ((val & 0x00000000ffffffffull) == 0) {
543        return 1;
544    }
545    return -1;
546}
547
548/*
549 * Accept bit patterns like these:
550 *  0....01....1
551 *  1....10....0
552 *  1..10..01..1
553 *  0..01..10..0
554 * Copied from gcc sources.
555 */
556static bool risbg_mask(uint64_t c)
557{
558    uint64_t lsb;
559    /* We don't change the number of transitions by inverting,
560       so make sure we start with the LSB zero.  */
561    if (c & 1) {
562        c = ~c;
563    }
564    /* Reject all zeros or all ones.  */
565    if (c == 0) {
566        return false;
567    }
568    /* Find the first transition.  */
569    lsb = c & -c;
570    /* Invert to look for a second transition.  */
571    c = ~c;
572    /* Erase the first transition.  */
573    c &= -lsb;
574    /* Find the second transition, if any.  */
575    lsb = c & -c;
576    /* Match if all the bits are 1's, or if c is zero.  */
577    return c == -lsb;
578}
579
580/* Test if a constant matches the constraint. */
581static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
582{
583    if (ct & TCG_CT_CONST) {
584        return 1;
585    }
586
587    if (type == TCG_TYPE_I32) {
588        val = (int32_t)val;
589    }
590
591    /* The following are mutually exclusive.  */
592    if (ct & TCG_CT_CONST_S16) {
593        return val == (int16_t)val;
594    } else if (ct & TCG_CT_CONST_S32) {
595        return val == (int32_t)val;
596    } else if (ct & TCG_CT_CONST_S33) {
597        return val >= -0xffffffffll && val <= 0xffffffffll;
598    } else if (ct & TCG_CT_CONST_ZERO) {
599        return val == 0;
600    }
601
602    if (ct & TCG_CT_CONST_INV) {
603        val = ~val;
604    }
605    /*
606     * Note that is_const_p16 is a subset of is_const_p32,
607     * so we don't need both constraints.
608     */
609    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
610        return true;
611    }
612    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
613        return true;
614    }
615
616    return 0;
617}
618
619/* Emit instructions according to the given instruction format.  */
620
621static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
622{
623    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
624}
625
626static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
627                             TCGReg r1, TCGReg r2)
628{
629    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
630}
631
632/* RRF-a without the m4 field */
633static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
634                              TCGReg r1, TCGReg r2, TCGReg r3)
635{
636    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
637}
638
639/* RRF-a with the m4 field */
640static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
641                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
642{
643    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
644}
645
646static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
647                              TCGReg r1, TCGReg r2, int m3)
648{
649    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
650}
651
652static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
653{
654    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
655}
656
657static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
658                             int i2, int m3)
659{
660    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
661    tcg_out32(s, (i2 << 16) | (op & 0xff));
662}
663
664static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
665{
666    tcg_out16(s, op | (r1 << 4));
667    tcg_out32(s, i2);
668}
669
670static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
671                            TCGReg b2, TCGReg r3, int disp)
672{
673    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
674              | (disp & 0xfff));
675}
676
677static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
678                             TCGReg b2, TCGReg r3, int disp)
679{
680    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
681    tcg_out32(s, (op & 0xff) | (b2 << 28)
682              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
683}
684
685#define tcg_out_insn_RX   tcg_out_insn_RS
686#define tcg_out_insn_RXY  tcg_out_insn_RSY
687
688static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
689{
690    /*
691     * Shift bit 4 of each regno to its corresponding bit of RXB.
692     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
693     * is the left-shift of the 4th operand.
694     */
695    return ((v1 & 0x10) << (4 + 3))
696         | ((v2 & 0x10) << (4 + 2))
697         | ((v3 & 0x10) << (4 + 1))
698         | ((v4 & 0x10) << (4 + 0));
699}
700
701static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
702                              TCGReg v1, uint16_t i2, int m3)
703{
704    tcg_debug_assert(is_vector_reg(v1));
705    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
706    tcg_out16(s, i2);
707    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
708}
709
710static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
711                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
712{
713    tcg_debug_assert(is_vector_reg(v1));
714    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
715    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
716    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
717}
718
719static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
720                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
721{
722    tcg_debug_assert(is_vector_reg(v1));
723    tcg_debug_assert(is_vector_reg(v3));
724    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
725    tcg_out16(s, i2);
726    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
727}
728
729static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
730                              TCGReg v1, TCGReg v2, int m3)
731{
732    tcg_debug_assert(is_vector_reg(v1));
733    tcg_debug_assert(is_vector_reg(v2));
734    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
735    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
736}
737
738static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
739                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
740{
741    tcg_debug_assert(is_vector_reg(v1));
742    tcg_debug_assert(is_vector_reg(v2));
743    tcg_debug_assert(is_vector_reg(v3));
744    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
745    tcg_out16(s, v3 << 12);
746    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
747}
748
749static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
750                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
751{
752    tcg_debug_assert(is_vector_reg(v1));
753    tcg_debug_assert(is_vector_reg(v2));
754    tcg_debug_assert(is_vector_reg(v3));
755    tcg_debug_assert(is_vector_reg(v4));
756    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
757    tcg_out16(s, v3 << 12);
758    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
759}
760
761static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
762                              TCGReg v1, TCGReg r2, TCGReg r3)
763{
764    tcg_debug_assert(is_vector_reg(v1));
765    tcg_debug_assert(is_general_reg(r2));
766    tcg_debug_assert(is_general_reg(r3));
767    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
768    tcg_out16(s, r3 << 12);
769    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
770}
771
772static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
773                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
774{
775    tcg_debug_assert(is_vector_reg(v1));
776    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
777    tcg_debug_assert(is_general_reg(b2));
778    tcg_debug_assert(is_vector_reg(v3));
779    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
780    tcg_out16(s, b2 << 12 | d2);
781    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
782}
783
784static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
785                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
786{
787    tcg_debug_assert(is_vector_reg(v1));
788    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
789    tcg_debug_assert(is_general_reg(b2));
790    tcg_debug_assert(is_general_reg(r3));
791    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
792    tcg_out16(s, b2 << 12 | d2);
793    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
794}
795
796static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
797                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
798{
799    tcg_debug_assert(is_general_reg(r1));
800    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
801    tcg_debug_assert(is_general_reg(b2));
802    tcg_debug_assert(is_vector_reg(v3));
803    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
804    tcg_out16(s, b2 << 12 | d2);
805    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
806}
807
808static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
809                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
810{
811    tcg_debug_assert(is_vector_reg(v1));
812    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
813    tcg_debug_assert(is_general_reg(x2));
814    tcg_debug_assert(is_general_reg(b2));
815    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
816    tcg_out16(s, (b2 << 12) | d2);
817    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
818}
819
820/* Emit an opcode with "type-checking" of the format.  */
821#define tcg_out_insn(S, FMT, OP, ...) \
822    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
823
824
825/* emit 64-bit shifts */
826static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
827                         TCGReg src, TCGReg sh_reg, int sh_imm)
828{
829    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
830}
831
832/* emit 32-bit shifts */
833static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
834                         TCGReg sh_reg, int sh_imm)
835{
836    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
837}
838
839static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
840{
841    if (src == dst) {
842        return true;
843    }
844    switch (type) {
845    case TCG_TYPE_I32:
846        if (likely(is_general_reg(dst) && is_general_reg(src))) {
847            tcg_out_insn(s, RR, LR, dst, src);
848            break;
849        }
850        /* fallthru */
851
852    case TCG_TYPE_I64:
853        if (likely(is_general_reg(dst))) {
854            if (likely(is_general_reg(src))) {
855                tcg_out_insn(s, RRE, LGR, dst, src);
856            } else {
857                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
858            }
859            break;
860        } else if (is_general_reg(src)) {
861            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
862            break;
863        }
864        /* fallthru */
865
866    case TCG_TYPE_V64:
867    case TCG_TYPE_V128:
868        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
869        break;
870
871    default:
872        g_assert_not_reached();
873    }
874    return true;
875}
876
877static const S390Opcode li_insns[4] = {
878    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
879};
880static const S390Opcode oi_insns[4] = {
881    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
882};
883static const S390Opcode lif_insns[2] = {
884    RIL_LLILF, RIL_LLIHF,
885};
886
887/* load a register with an immediate value */
888static void tcg_out_movi(TCGContext *s, TCGType type,
889                         TCGReg ret, tcg_target_long sval)
890{
891    tcg_target_ulong uval = sval;
892    ptrdiff_t pc_off;
893    int i;
894
895    if (type == TCG_TYPE_I32) {
896        uval = (uint32_t)sval;
897        sval = (int32_t)sval;
898    }
899
900    /* Try all 32-bit insns that can load it in one go.  */
901    if (sval >= -0x8000 && sval < 0x8000) {
902        tcg_out_insn(s, RI, LGHI, ret, sval);
903        return;
904    }
905
906    i = is_const_p16(uval);
907    if (i >= 0) {
908        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
909        return;
910    }
911
912    /* Try all 48-bit insns that can load it in one go.  */
913    if (sval == (int32_t)sval) {
914        tcg_out_insn(s, RIL, LGFI, ret, sval);
915        return;
916    }
917
918    i = is_const_p32(uval);
919    if (i >= 0) {
920        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
921        return;
922    }
923
924    /* Try for PC-relative address load.  For odd addresses, add one. */
925    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
926    if (pc_off == (int32_t)pc_off) {
927        tcg_out_insn(s, RIL, LARL, ret, pc_off);
928        if (sval & 1) {
929            tcg_out_insn(s, RI, AGHI, ret, 1);
930        }
931        return;
932    }
933
934    /* Otherwise, load it by parts. */
935    i = is_const_p16((uint32_t)uval);
936    if (i >= 0) {
937        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
938    } else {
939        tcg_out_insn(s, RIL, LLILF, ret, uval);
940    }
941    uval >>= 32;
942    i = is_const_p16(uval);
943    if (i >= 0) {
944        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
945    } else {
946        tcg_out_insn(s, RIL, OIHF, ret, uval);
947    }
948}
949
950/* Emit a load/store type instruction.  Inputs are:
951   DATA:     The register to be loaded or stored.
952   BASE+OFS: The effective address.
953   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
954   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
955
956static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
957                        TCGReg data, TCGReg base, TCGReg index,
958                        tcg_target_long ofs)
959{
960    if (ofs < -0x80000 || ofs >= 0x80000) {
961        /* Combine the low 20 bits of the offset with the actual load insn;
962           the high 44 bits must come from an immediate load.  */
963        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
964        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
965        ofs = low;
966
967        /* If we were already given an index register, add it in.  */
968        if (index != TCG_REG_NONE) {
969            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
970        }
971        index = TCG_TMP0;
972    }
973
974    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
975        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
976    } else {
977        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
978    }
979}
980
981static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
982                            TCGReg data, TCGReg base, TCGReg index,
983                            tcg_target_long ofs, int m3)
984{
985    if (ofs < 0 || ofs >= 0x1000) {
986        if (ofs >= -0x80000 && ofs < 0x80000) {
987            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
988            base = TCG_TMP0;
989            index = TCG_REG_NONE;
990            ofs = 0;
991        } else {
992            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
993            if (index != TCG_REG_NONE) {
994                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
995            }
996            index = TCG_TMP0;
997            ofs = 0;
998        }
999    }
1000    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1001}
1002
1003/* load data without address translation or endianness conversion */
1004static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1005                       TCGReg base, intptr_t ofs)
1006{
1007    switch (type) {
1008    case TCG_TYPE_I32:
1009        if (likely(is_general_reg(data))) {
1010            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1011            break;
1012        }
1013        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1014        break;
1015
1016    case TCG_TYPE_I64:
1017        if (likely(is_general_reg(data))) {
1018            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1019            break;
1020        }
1021        /* fallthru */
1022
1023    case TCG_TYPE_V64:
1024        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1025        break;
1026
1027    case TCG_TYPE_V128:
1028        /* Hint quadword aligned.  */
1029        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1030        break;
1031
1032    default:
1033        g_assert_not_reached();
1034    }
1035}
1036
1037static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1038                       TCGReg base, intptr_t ofs)
1039{
1040    switch (type) {
1041    case TCG_TYPE_I32:
1042        if (likely(is_general_reg(data))) {
1043            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1044        } else {
1045            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1046        }
1047        break;
1048
1049    case TCG_TYPE_I64:
1050        if (likely(is_general_reg(data))) {
1051            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1052            break;
1053        }
1054        /* fallthru */
1055
1056    case TCG_TYPE_V64:
1057        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1058        break;
1059
1060    case TCG_TYPE_V128:
1061        /* Hint quadword aligned.  */
1062        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1063        break;
1064
1065    default:
1066        g_assert_not_reached();
1067    }
1068}
1069
1070static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1071                               TCGReg base, intptr_t ofs)
1072{
1073    return false;
1074}
1075
1076static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1077                                 int msb, int lsb, int ofs, int z)
1078{
1079    /* Format RIE-f */
1080    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1081    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1082    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1083}
1084
1085static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1086{
1087    tcg_out_insn(s, RRE, LGBR, dest, src);
1088}
1089
1090static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1091{
1092    tcg_out_insn(s, RRE, LLGCR, dest, src);
1093}
1094
1095static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1096{
1097    tcg_out_insn(s, RRE, LGHR, dest, src);
1098}
1099
1100static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1101{
1102    tcg_out_insn(s, RRE, LLGHR, dest, src);
1103}
1104
1105static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1106{
1107    tcg_out_insn(s, RRE, LGFR, dest, src);
1108}
1109
1110static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1111{
1112    tcg_out_insn(s, RRE, LLGFR, dest, src);
1113}
1114
1115static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1116{
1117    int msb, lsb;
1118    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1119        /* Achieve wraparound by swapping msb and lsb.  */
1120        msb = 64 - ctz64(~val);
1121        lsb = clz64(~val) - 1;
1122    } else {
1123        msb = clz64(val);
1124        lsb = 63 - ctz64(val);
1125    }
1126    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1127}
1128
1129static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1130{
1131    static const S390Opcode ni_insns[4] = {
1132        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1133    };
1134    static const S390Opcode nif_insns[2] = {
1135        RIL_NILF, RIL_NIHF
1136    };
1137    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1138    int i;
1139
1140    /* Look for the zero-extensions.  */
1141    if ((val & valid) == 0xffffffff) {
1142        tgen_ext32u(s, dest, dest);
1143        return;
1144    }
1145    if ((val & valid) == 0xff) {
1146        tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1147        return;
1148    }
1149    if ((val & valid) == 0xffff) {
1150        tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1151        return;
1152    }
1153
1154    i = is_const_p16(~val & valid);
1155    if (i >= 0) {
1156        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1157        return;
1158    }
1159
1160    i = is_const_p32(~val & valid);
1161    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1162    if (i >= 0) {
1163        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1164        return;
1165    }
1166
1167    if (risbg_mask(val)) {
1168        tgen_andi_risbg(s, dest, dest, val);
1169        return;
1170    }
1171
1172    g_assert_not_reached();
1173}
1174
1175static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1176{
1177    static const S390Opcode oif_insns[2] = {
1178        RIL_OILF, RIL_OIHF
1179    };
1180
1181    int i;
1182
1183    i = is_const_p16(val);
1184    if (i >= 0) {
1185        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1186        return;
1187    }
1188
1189    i = is_const_p32(val);
1190    if (i >= 0) {
1191        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1192        return;
1193    }
1194
1195    g_assert_not_reached();
1196}
1197
1198static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1199{
1200    switch (is_const_p32(val)) {
1201    case 0:
1202        tcg_out_insn(s, RIL, XILF, dest, val);
1203        break;
1204    case 1:
1205        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1206        break;
1207    default:
1208        g_assert_not_reached();
1209    }
1210}
1211
1212static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1213                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1214{
1215    bool is_unsigned = is_unsigned_cond(c);
1216    TCGCond inv_c = tcg_invert_cond(c);
1217    S390Opcode op;
1218
1219    if (c2const) {
1220        if (c2 == 0) {
1221            if (!(is_unsigned && need_carry)) {
1222                if (type == TCG_TYPE_I32) {
1223                    tcg_out_insn(s, RR, LTR, r1, r1);
1224                } else {
1225                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1226                }
1227                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1228                return tcg_cond_to_ltr_cond[c];
1229            }
1230        }
1231
1232        if (!is_unsigned && c2 == (int16_t)c2) {
1233            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1234            tcg_out_insn_RI(s, op, r1, c2);
1235            goto exit;
1236        }
1237
1238        if (type == TCG_TYPE_I32) {
1239            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1240            tcg_out_insn_RIL(s, op, r1, c2);
1241            goto exit;
1242        }
1243
1244        /*
1245         * Constraints are for a signed 33-bit operand, which is a
1246         * convenient superset of this signed/unsigned test.
1247         */
1248        if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1249            op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1250            tcg_out_insn_RIL(s, op, r1, c2);
1251            goto exit;
1252        }
1253
1254        /* Load everything else into a register. */
1255        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
1256        c2 = TCG_TMP0;
1257    }
1258
1259    if (type == TCG_TYPE_I32) {
1260        op = (is_unsigned ? RR_CLR : RR_CR);
1261        tcg_out_insn_RR(s, op, r1, c2);
1262    } else {
1263        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1264        tcg_out_insn_RRE(s, op, r1, c2);
1265    }
1266
1267 exit:
1268    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1269    return tcg_cond_to_s390_cond[c];
1270}
1271
1272static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1273                    TCGArg c2, bool c2const, bool need_carry)
1274{
1275    int inv_cc;
1276    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1277}
1278
1279static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1280                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1281{
1282    int cc;
1283
1284    /* With LOC2, we can always emit the minimum 3 insns.  */
1285    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1286        /* Emit: d = 0, d = (cc ? 1 : d).  */
1287        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1288        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1289        tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
1290        return;
1291    }
1292
1293 restart:
1294    switch (cond) {
1295    case TCG_COND_NE:
1296        /* X != 0 is X > 0.  */
1297        if (c2const && c2 == 0) {
1298            cond = TCG_COND_GTU;
1299        } else {
1300            break;
1301        }
1302        /* fallthru */
1303
1304    case TCG_COND_GTU:
1305    case TCG_COND_GT:
1306        /* The result of a compare has CC=2 for GT and CC=3 unused.
1307           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1308        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1309        tcg_out_movi(s, type, dest, 0);
1310        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1311        return;
1312
1313    case TCG_COND_EQ:
1314        /* X == 0 is X <= 0.  */
1315        if (c2const && c2 == 0) {
1316            cond = TCG_COND_LEU;
1317        } else {
1318            break;
1319        }
1320        /* fallthru */
1321
1322    case TCG_COND_LEU:
1323    case TCG_COND_LE:
1324        /* As above, but we're looking for borrow, or !carry.
1325           The second insn computes d - d - borrow, or -1 for true
1326           and 0 for false.  So we must mask to 1 bit afterward.  */
1327        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1328        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1329        tgen_andi(s, type, dest, 1);
1330        return;
1331
1332    case TCG_COND_GEU:
1333    case TCG_COND_LTU:
1334    case TCG_COND_LT:
1335    case TCG_COND_GE:
1336        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1337        if (!c2const) {
1338            TCGReg t = c1;
1339            c1 = c2;
1340            c2 = t;
1341            cond = tcg_swap_cond(cond);
1342            goto restart;
1343        }
1344        break;
1345
1346    default:
1347        g_assert_not_reached();
1348    }
1349
1350    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1351    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1352    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1353    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1354    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1355}
1356
1357static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1358                             TCGArg v3, int v3const, TCGReg v4,
1359                             int cc, int inv_cc)
1360{
1361    TCGReg src;
1362
1363    if (v3const) {
1364        if (dest == v4) {
1365            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1366                /* Emit: if (cc) dest = v3. */
1367                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1368                return;
1369            }
1370            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1371            src = TCG_TMP0;
1372        } else {
1373            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1374            tcg_out_insn(s, RI, LGHI, dest, v3);
1375            cc = inv_cc;
1376            src = v4;
1377        }
1378    } else {
1379        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1380            /* Emit: dest = cc ? v3 : v4. */
1381            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1382            return;
1383        }
1384        if (dest == v4) {
1385            src = v3;
1386        } else {
1387            tcg_out_mov(s, type, dest, v3);
1388            cc = inv_cc;
1389            src = v4;
1390        }
1391    }
1392
1393    /* Emit: if (cc) dest = src. */
1394    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1395}
1396
1397static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1398                         TCGReg c1, TCGArg c2, int c2const,
1399                         TCGArg v3, int v3const, TCGReg v4)
1400{
1401    int cc, inv_cc;
1402
1403    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1404    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1405}
1406
1407static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1408                     TCGArg a2, int a2const)
1409{
1410    /* Since this sets both R and R+1, we have no choice but to store the
1411       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1412    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1413    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1414
1415    if (a2const && a2 == 64) {
1416        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1417        return;
1418    }
1419
1420    /*
1421     * Conditions from FLOGR are:
1422     *   2 -> one bit found
1423     *   8 -> no one bit found
1424     */
1425    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1426}
1427
1428static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1429{
1430    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1431    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1432        if (type == TCG_TYPE_I32) {
1433            tgen_ext32u(s, dest, src);
1434            src = dest;
1435        }
1436        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1437        return;
1438    }
1439
1440    /* Without MIE3, each byte gets the count of bits for the byte. */
1441    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1442
1443    /* Multiply to sum each byte at the top of the word. */
1444    if (type == TCG_TYPE_I32) {
1445        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1446        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1447    } else {
1448        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1449        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1450        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1451    }
1452}
1453
1454static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1455                         int ofs, int len, int z)
1456{
1457    int lsb = (63 - ofs);
1458    int msb = lsb - (len - 1);
1459    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1460}
1461
1462static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1463                         int ofs, int len)
1464{
1465    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1466}
1467
1468static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1469{
1470    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1471    if (off == (int16_t)off) {
1472        tcg_out_insn(s, RI, BRC, cc, off);
1473    } else if (off == (int32_t)off) {
1474        tcg_out_insn(s, RIL, BRCL, cc, off);
1475    } else {
1476        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1477        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1478    }
1479}
1480
1481static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1482{
1483    if (l->has_value) {
1484        tgen_gotoi(s, cc, l->u.value_ptr);
1485    } else {
1486        tcg_out16(s, RI_BRC | (cc << 4));
1487        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1488        s->code_ptr += 1;
1489    }
1490}
1491
1492static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1493                                TCGReg r1, TCGReg r2, TCGLabel *l)
1494{
1495    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1496    /* Format RIE-b */
1497    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1498    tcg_out16(s, 0);
1499    tcg_out16(s, cc << 12 | (opc & 0xff));
1500}
1501
1502static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1503                                    TCGReg r1, int i2, TCGLabel *l)
1504{
1505    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1506    /* Format RIE-c */
1507    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1508    tcg_out16(s, 0);
1509    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1510}
1511
1512static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1513                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1514{
1515    int cc;
1516    bool is_unsigned = is_unsigned_cond(c);
1517    bool in_range;
1518    S390Opcode opc;
1519
1520    cc = tcg_cond_to_s390_cond[c];
1521
1522    if (!c2const) {
1523        opc = (type == TCG_TYPE_I32
1524               ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1525               : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1526        tgen_compare_branch(s, opc, cc, r1, c2, l);
1527        return;
1528    }
1529
1530    /*
1531     * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1532     * If the immediate we've been given does not fit that range, we'll
1533     * fall back to separate compare and branch instructions using the
1534     * larger comparison range afforded by COMPARE IMMEDIATE.
1535     */
1536    if (type == TCG_TYPE_I32) {
1537        if (is_unsigned) {
1538            opc = RIEc_CLIJ;
1539            in_range = (uint32_t)c2 == (uint8_t)c2;
1540        } else {
1541            opc = RIEc_CIJ;
1542            in_range = (int32_t)c2 == (int8_t)c2;
1543        }
1544    } else {
1545        if (is_unsigned) {
1546            opc = RIEc_CLGIJ;
1547            in_range = (uint64_t)c2 == (uint8_t)c2;
1548        } else {
1549            opc = RIEc_CGIJ;
1550            in_range = (int64_t)c2 == (int8_t)c2;
1551        }
1552    }
1553    if (in_range) {
1554        tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1555        return;
1556    }
1557
1558    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1559    tgen_branch(s, cc, l);
1560}
1561
1562static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1563{
1564    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1565    if (off == (int32_t)off) {
1566        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1567    } else {
1568        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1569        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1570    }
1571}
1572
1573static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1574                         const TCGHelperInfo *info)
1575{
1576    tcg_out_call_int(s, dest);
1577}
1578
1579static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1580                                   TCGReg base, TCGReg index, int disp)
1581{
1582    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1583    case MO_UB:
1584        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1585        break;
1586    case MO_SB:
1587        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1588        break;
1589
1590    case MO_UW | MO_BSWAP:
1591        /* swapped unsigned halfword load with upper bits zeroed */
1592        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1593        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1594        break;
1595    case MO_UW:
1596        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1597        break;
1598
1599    case MO_SW | MO_BSWAP:
1600        /* swapped sign-extended halfword load */
1601        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1602        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1603        break;
1604    case MO_SW:
1605        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1606        break;
1607
1608    case MO_UL | MO_BSWAP:
1609        /* swapped unsigned int load with upper bits zeroed */
1610        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1611        tgen_ext32u(s, data, data);
1612        break;
1613    case MO_UL:
1614        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1615        break;
1616
1617    case MO_SL | MO_BSWAP:
1618        /* swapped sign-extended int load */
1619        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1620        tgen_ext32s(s, data, data);
1621        break;
1622    case MO_SL:
1623        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1624        break;
1625
1626    case MO_UQ | MO_BSWAP:
1627        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1628        break;
1629    case MO_UQ:
1630        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1631        break;
1632
1633    default:
1634        tcg_abort();
1635    }
1636}
1637
1638static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1639                                   TCGReg base, TCGReg index, int disp)
1640{
1641    switch (opc & (MO_SIZE | MO_BSWAP)) {
1642    case MO_UB:
1643        if (disp >= 0 && disp < 0x1000) {
1644            tcg_out_insn(s, RX, STC, data, base, index, disp);
1645        } else {
1646            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1647        }
1648        break;
1649
1650    case MO_UW | MO_BSWAP:
1651        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1652        break;
1653    case MO_UW:
1654        if (disp >= 0 && disp < 0x1000) {
1655            tcg_out_insn(s, RX, STH, data, base, index, disp);
1656        } else {
1657            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1658        }
1659        break;
1660
1661    case MO_UL | MO_BSWAP:
1662        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1663        break;
1664    case MO_UL:
1665        if (disp >= 0 && disp < 0x1000) {
1666            tcg_out_insn(s, RX, ST, data, base, index, disp);
1667        } else {
1668            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1669        }
1670        break;
1671
1672    case MO_UQ | MO_BSWAP:
1673        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1674        break;
1675    case MO_UQ:
1676        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1677        break;
1678
1679    default:
1680        tcg_abort();
1681    }
1682}
1683
1684#if defined(CONFIG_SOFTMMU)
1685/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1686QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1687QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1688
1689/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1690   addend into R2.  Returns a register with the santitized guest address.  */
1691static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1692                               int mem_index, bool is_ld)
1693{
1694    unsigned s_bits = opc & MO_SIZE;
1695    unsigned a_bits = get_alignment_bits(opc);
1696    unsigned s_mask = (1 << s_bits) - 1;
1697    unsigned a_mask = (1 << a_bits) - 1;
1698    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1699    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1700    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1701    int ofs, a_off;
1702    uint64_t tlb_mask;
1703
1704    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1705                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1706    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1707    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1708
1709    /* For aligned accesses, we check the first byte and include the alignment
1710       bits within the address.  For unaligned access, we check that we don't
1711       cross pages using the address of the last byte of the access.  */
1712    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1713    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1714    if (a_off == 0) {
1715        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1716    } else {
1717        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1718        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1719    }
1720
1721    if (is_ld) {
1722        ofs = offsetof(CPUTLBEntry, addr_read);
1723    } else {
1724        ofs = offsetof(CPUTLBEntry, addr_write);
1725    }
1726    if (TARGET_LONG_BITS == 32) {
1727        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1728    } else {
1729        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1730    }
1731
1732    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1733                 offsetof(CPUTLBEntry, addend));
1734
1735    if (TARGET_LONG_BITS == 32) {
1736        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1737        return TCG_REG_R3;
1738    }
1739    return addr_reg;
1740}
1741
1742static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1743                                TCGReg data, TCGReg addr,
1744                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1745{
1746    TCGLabelQemuLdst *label = new_ldst_label(s);
1747
1748    label->is_ld = is_ld;
1749    label->oi = oi;
1750    label->datalo_reg = data;
1751    label->addrlo_reg = addr;
1752    label->raddr = tcg_splitwx_to_rx(raddr);
1753    label->label_ptr[0] = label_ptr;
1754}
1755
1756static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1757{
1758    TCGReg addr_reg = lb->addrlo_reg;
1759    TCGReg data_reg = lb->datalo_reg;
1760    MemOpIdx oi = lb->oi;
1761    MemOp opc = get_memop(oi);
1762
1763    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1764                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1765        return false;
1766    }
1767
1768    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1769    if (TARGET_LONG_BITS == 64) {
1770        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1771    }
1772    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1773    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1774    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1775    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1776
1777    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1778    return true;
1779}
1780
1781static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1782{
1783    TCGReg addr_reg = lb->addrlo_reg;
1784    TCGReg data_reg = lb->datalo_reg;
1785    MemOpIdx oi = lb->oi;
1786    MemOp opc = get_memop(oi);
1787
1788    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1789                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1790        return false;
1791    }
1792
1793    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1794    if (TARGET_LONG_BITS == 64) {
1795        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1796    }
1797    switch (opc & MO_SIZE) {
1798    case MO_UB:
1799        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1800        break;
1801    case MO_UW:
1802        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1803        break;
1804    case MO_UL:
1805        tgen_ext32u(s, TCG_REG_R4, data_reg);
1806        break;
1807    case MO_UQ:
1808        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1809        break;
1810    default:
1811        tcg_abort();
1812    }
1813    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1814    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1815    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1816
1817    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1818    return true;
1819}
1820#else
1821static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1822                                   TCGReg addrlo, unsigned a_bits)
1823{
1824    unsigned a_mask = (1 << a_bits) - 1;
1825    TCGLabelQemuLdst *l = new_ldst_label(s);
1826
1827    l->is_ld = is_ld;
1828    l->addrlo_reg = addrlo;
1829
1830    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1831    tcg_debug_assert(a_bits < 16);
1832    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1833
1834    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1835    l->label_ptr[0] = s->code_ptr;
1836    s->code_ptr += 1;
1837
1838    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1839}
1840
1841static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1842{
1843    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1844                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1845        return false;
1846    }
1847
1848    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1849    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1850
1851    /* "Tail call" to the helper, with the return address back inline. */
1852    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1853    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1854                                                 : helper_unaligned_st));
1855    return true;
1856}
1857
1858static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1859{
1860    return tcg_out_fail_alignment(s, l);
1861}
1862
1863static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1864{
1865    return tcg_out_fail_alignment(s, l);
1866}
1867
1868static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1869                                  TCGReg *index_reg, tcg_target_long *disp)
1870{
1871    if (TARGET_LONG_BITS == 32) {
1872        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1873        *addr_reg = TCG_TMP0;
1874    }
1875    if (guest_base < 0x80000) {
1876        *index_reg = TCG_REG_NONE;
1877        *disp = guest_base;
1878    } else {
1879        *index_reg = TCG_GUEST_BASE_REG;
1880        *disp = 0;
1881    }
1882}
1883#endif /* CONFIG_SOFTMMU */
1884
1885static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1886                            MemOpIdx oi)
1887{
1888    MemOp opc = get_memop(oi);
1889#ifdef CONFIG_SOFTMMU
1890    unsigned mem_index = get_mmuidx(oi);
1891    tcg_insn_unit *label_ptr;
1892    TCGReg base_reg;
1893
1894    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1895
1896    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1897    label_ptr = s->code_ptr;
1898    s->code_ptr += 1;
1899
1900    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1901
1902    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1903#else
1904    TCGReg index_reg;
1905    tcg_target_long disp;
1906    unsigned a_bits = get_alignment_bits(opc);
1907
1908    if (a_bits) {
1909        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1910    }
1911    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1912    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1913#endif
1914}
1915
1916static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1917                            MemOpIdx oi)
1918{
1919    MemOp opc = get_memop(oi);
1920#ifdef CONFIG_SOFTMMU
1921    unsigned mem_index = get_mmuidx(oi);
1922    tcg_insn_unit *label_ptr;
1923    TCGReg base_reg;
1924
1925    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1926
1927    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1928    label_ptr = s->code_ptr;
1929    s->code_ptr += 1;
1930
1931    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1932
1933    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1934#else
1935    TCGReg index_reg;
1936    tcg_target_long disp;
1937    unsigned a_bits = get_alignment_bits(opc);
1938
1939    if (a_bits) {
1940        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1941    }
1942    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1943    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1944#endif
1945}
1946
1947static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1948{
1949    /* Reuse the zeroing that exists for goto_ptr.  */
1950    if (a0 == 0) {
1951        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1952    } else {
1953        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1954        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1955    }
1956}
1957
1958static void tcg_out_goto_tb(TCGContext *s, int which)
1959{
1960    /*
1961     * Branch displacement must be aligned for atomic patching;
1962     * see if we need to add extra nop before branch
1963     */
1964    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1965        tcg_out16(s, NOP);
1966    }
1967    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1968    set_jmp_insn_offset(s, which);
1969    s->code_ptr += 2;
1970    set_jmp_reset_offset(s, which);
1971}
1972
1973void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1974                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1975{
1976    if (!HAVE_FACILITY(GEN_INST_EXT)) {
1977        return;
1978    }
1979    /* patch the branch destination */
1980    uintptr_t addr = tb->jmp_target_addr[n];
1981    intptr_t disp = addr - (jmp_rx - 2);
1982    qatomic_set((int32_t *)jmp_rw, disp / 2);
1983    /* no need to flush icache explicitly */
1984}
1985
1986# define OP_32_64(x) \
1987        case glue(glue(INDEX_op_,x),_i32): \
1988        case glue(glue(INDEX_op_,x),_i64)
1989
1990static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1991                              const TCGArg args[TCG_MAX_OP_ARGS],
1992                              const int const_args[TCG_MAX_OP_ARGS])
1993{
1994    S390Opcode op, op2;
1995    TCGArg a0, a1, a2;
1996
1997    switch (opc) {
1998    case INDEX_op_goto_ptr:
1999        a0 = args[0];
2000        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2001        break;
2002
2003    OP_32_64(ld8u):
2004        /* ??? LLC (RXY format) is only present with the extended-immediate
2005           facility, whereas LLGC is always present.  */
2006        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2007        break;
2008
2009    OP_32_64(ld8s):
2010        /* ??? LB is no smaller than LGB, so no point to using it.  */
2011        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2012        break;
2013
2014    OP_32_64(ld16u):
2015        /* ??? LLH (RXY format) is only present with the extended-immediate
2016           facility, whereas LLGH is always present.  */
2017        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2018        break;
2019
2020    case INDEX_op_ld16s_i32:
2021        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2022        break;
2023
2024    case INDEX_op_ld_i32:
2025        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2026        break;
2027
2028    OP_32_64(st8):
2029        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2030                    TCG_REG_NONE, args[2]);
2031        break;
2032
2033    OP_32_64(st16):
2034        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2035                    TCG_REG_NONE, args[2]);
2036        break;
2037
2038    case INDEX_op_st_i32:
2039        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2040        break;
2041
2042    case INDEX_op_add_i32:
2043        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2044        if (const_args[2]) {
2045        do_addi_32:
2046            if (a0 == a1) {
2047                if (a2 == (int16_t)a2) {
2048                    tcg_out_insn(s, RI, AHI, a0, a2);
2049                    break;
2050                }
2051                tcg_out_insn(s, RIL, AFI, a0, a2);
2052                break;
2053            }
2054            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2055        } else if (a0 == a1) {
2056            tcg_out_insn(s, RR, AR, a0, a2);
2057        } else {
2058            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2059        }
2060        break;
2061    case INDEX_op_sub_i32:
2062        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2063        if (const_args[2]) {
2064            a2 = -a2;
2065            goto do_addi_32;
2066        } else if (a0 == a1) {
2067            tcg_out_insn(s, RR, SR, a0, a2);
2068        } else {
2069            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2070        }
2071        break;
2072
2073    case INDEX_op_and_i32:
2074        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2075        if (const_args[2]) {
2076            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2077            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2078        } else if (a0 == a1) {
2079            tcg_out_insn(s, RR, NR, a0, a2);
2080        } else {
2081            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2082        }
2083        break;
2084    case INDEX_op_or_i32:
2085        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2086        if (const_args[2]) {
2087            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2088            tgen_ori(s, a0, a2);
2089        } else if (a0 == a1) {
2090            tcg_out_insn(s, RR, OR, a0, a2);
2091        } else {
2092            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2093        }
2094        break;
2095    case INDEX_op_xor_i32:
2096        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2097        if (const_args[2]) {
2098            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2099            tcg_out_insn(s, RIL, XILF, a0, a2);
2100        } else if (a0 == a1) {
2101            tcg_out_insn(s, RR, XR, args[0], args[2]);
2102        } else {
2103            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2104        }
2105        break;
2106
2107    case INDEX_op_andc_i32:
2108        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2109        if (const_args[2]) {
2110            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2111            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2112	} else {
2113            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2114	}
2115        break;
2116    case INDEX_op_orc_i32:
2117        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2118        if (const_args[2]) {
2119            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2120            tgen_ori(s, a0, (uint32_t)~a2);
2121        } else {
2122            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2123        }
2124        break;
2125    case INDEX_op_eqv_i32:
2126        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2127        if (const_args[2]) {
2128            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2129            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2130        } else {
2131            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2132        }
2133        break;
2134    case INDEX_op_nand_i32:
2135        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2136        break;
2137    case INDEX_op_nor_i32:
2138        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2139        break;
2140
2141    case INDEX_op_neg_i32:
2142        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2143        break;
2144    case INDEX_op_not_i32:
2145        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2146        break;
2147
2148    case INDEX_op_mul_i32:
2149        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2150        if (const_args[2]) {
2151            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2152            if (a2 == (int16_t)a2) {
2153                tcg_out_insn(s, RI, MHI, a0, a2);
2154            } else {
2155                tcg_out_insn(s, RIL, MSFI, a0, a2);
2156            }
2157        } else if (a0 == a1) {
2158            tcg_out_insn(s, RRE, MSR, a0, a2);
2159        } else {
2160            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2161        }
2162        break;
2163
2164    case INDEX_op_div2_i32:
2165        tcg_debug_assert(args[0] == args[2]);
2166        tcg_debug_assert(args[1] == args[3]);
2167        tcg_debug_assert((args[1] & 1) == 0);
2168        tcg_debug_assert(args[0] == args[1] + 1);
2169        tcg_out_insn(s, RR, DR, args[1], args[4]);
2170        break;
2171    case INDEX_op_divu2_i32:
2172        tcg_debug_assert(args[0] == args[2]);
2173        tcg_debug_assert(args[1] == args[3]);
2174        tcg_debug_assert((args[1] & 1) == 0);
2175        tcg_debug_assert(args[0] == args[1] + 1);
2176        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2177        break;
2178
2179    case INDEX_op_shl_i32:
2180        op = RS_SLL;
2181        op2 = RSY_SLLK;
2182    do_shift32:
2183        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2184        if (a0 == a1) {
2185            if (const_args[2]) {
2186                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2187            } else {
2188                tcg_out_sh32(s, op, a0, a2, 0);
2189            }
2190        } else {
2191            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2192            if (const_args[2]) {
2193                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2194            } else {
2195                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2196            }
2197        }
2198        break;
2199    case INDEX_op_shr_i32:
2200        op = RS_SRL;
2201        op2 = RSY_SRLK;
2202        goto do_shift32;
2203    case INDEX_op_sar_i32:
2204        op = RS_SRA;
2205        op2 = RSY_SRAK;
2206        goto do_shift32;
2207
2208    case INDEX_op_rotl_i32:
2209        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2210        if (const_args[2]) {
2211            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2212        } else {
2213            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2214        }
2215        break;
2216    case INDEX_op_rotr_i32:
2217        if (const_args[2]) {
2218            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2219                         TCG_REG_NONE, (32 - args[2]) & 31);
2220        } else {
2221            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2222            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2223        }
2224        break;
2225
2226    case INDEX_op_ext8s_i32:
2227        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2228        break;
2229    case INDEX_op_ext16s_i32:
2230        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2231        break;
2232    case INDEX_op_ext8u_i32:
2233        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2234        break;
2235    case INDEX_op_ext16u_i32:
2236        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2237        break;
2238
2239    case INDEX_op_bswap16_i32:
2240        a0 = args[0], a1 = args[1], a2 = args[2];
2241        tcg_out_insn(s, RRE, LRVR, a0, a1);
2242        if (a2 & TCG_BSWAP_OS) {
2243            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2244        } else {
2245            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2246        }
2247        break;
2248    case INDEX_op_bswap16_i64:
2249        a0 = args[0], a1 = args[1], a2 = args[2];
2250        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2251        if (a2 & TCG_BSWAP_OS) {
2252            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2253        } else {
2254            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2255        }
2256        break;
2257
2258    case INDEX_op_bswap32_i32:
2259        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2260        break;
2261    case INDEX_op_bswap32_i64:
2262        a0 = args[0], a1 = args[1], a2 = args[2];
2263        tcg_out_insn(s, RRE, LRVR, a0, a1);
2264        if (a2 & TCG_BSWAP_OS) {
2265            tgen_ext32s(s, a0, a0);
2266        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2267            tgen_ext32u(s, a0, a0);
2268        }
2269        break;
2270
2271    case INDEX_op_add2_i32:
2272        if (const_args[4]) {
2273            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2274        } else {
2275            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2276        }
2277        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2278        break;
2279    case INDEX_op_sub2_i32:
2280        if (const_args[4]) {
2281            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2282        } else {
2283            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2284        }
2285        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2286        break;
2287
2288    case INDEX_op_br:
2289        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2290        break;
2291
2292    case INDEX_op_brcond_i32:
2293        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2294                    args[1], const_args[1], arg_label(args[3]));
2295        break;
2296    case INDEX_op_setcond_i32:
2297        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2298                     args[2], const_args[2]);
2299        break;
2300    case INDEX_op_movcond_i32:
2301        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2302                     args[2], const_args[2], args[3], const_args[3], args[4]);
2303        break;
2304
2305    case INDEX_op_qemu_ld_i32:
2306        /* ??? Technically we can use a non-extending instruction.  */
2307    case INDEX_op_qemu_ld_i64:
2308        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2309        break;
2310    case INDEX_op_qemu_st_i32:
2311    case INDEX_op_qemu_st_i64:
2312        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2313        break;
2314
2315    case INDEX_op_ld16s_i64:
2316        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2317        break;
2318    case INDEX_op_ld32u_i64:
2319        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2320        break;
2321    case INDEX_op_ld32s_i64:
2322        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2323        break;
2324    case INDEX_op_ld_i64:
2325        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2326        break;
2327
2328    case INDEX_op_st32_i64:
2329        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2330        break;
2331    case INDEX_op_st_i64:
2332        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2333        break;
2334
2335    case INDEX_op_add_i64:
2336        a0 = args[0], a1 = args[1], a2 = args[2];
2337        if (const_args[2]) {
2338        do_addi_64:
2339            if (a0 == a1) {
2340                if (a2 == (int16_t)a2) {
2341                    tcg_out_insn(s, RI, AGHI, a0, a2);
2342                    break;
2343                }
2344                if (a2 == (int32_t)a2) {
2345                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2346                    break;
2347                }
2348                if (a2 == (uint32_t)a2) {
2349                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2350                    break;
2351                }
2352                if (-a2 == (uint32_t)-a2) {
2353                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2354                    break;
2355                }
2356            }
2357            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2358        } else if (a0 == a1) {
2359            tcg_out_insn(s, RRE, AGR, a0, a2);
2360        } else {
2361            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2362        }
2363        break;
2364    case INDEX_op_sub_i64:
2365        a0 = args[0], a1 = args[1], a2 = args[2];
2366        if (const_args[2]) {
2367            a2 = -a2;
2368            goto do_addi_64;
2369        } else {
2370            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2371        }
2372        break;
2373
2374    case INDEX_op_and_i64:
2375        a0 = args[0], a1 = args[1], a2 = args[2];
2376        if (const_args[2]) {
2377            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2378            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2379        } else {
2380            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2381        }
2382        break;
2383    case INDEX_op_or_i64:
2384        a0 = args[0], a1 = args[1], a2 = args[2];
2385        if (const_args[2]) {
2386            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2387            tgen_ori(s, a0, a2);
2388        } else {
2389            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2390        }
2391        break;
2392    case INDEX_op_xor_i64:
2393        a0 = args[0], a1 = args[1], a2 = args[2];
2394        if (const_args[2]) {
2395            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2396            tgen_xori(s, a0, a2);
2397        } else {
2398            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2399        }
2400        break;
2401
2402    case INDEX_op_andc_i64:
2403        a0 = args[0], a1 = args[1], a2 = args[2];
2404        if (const_args[2]) {
2405            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2406            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2407        } else {
2408            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2409        }
2410        break;
2411    case INDEX_op_orc_i64:
2412        a0 = args[0], a1 = args[1], a2 = args[2];
2413        if (const_args[2]) {
2414            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2415            tgen_ori(s, a0, ~a2);
2416        } else {
2417            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2418        }
2419        break;
2420    case INDEX_op_eqv_i64:
2421        a0 = args[0], a1 = args[1], a2 = args[2];
2422        if (const_args[2]) {
2423            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2424            tgen_xori(s, a0, ~a2);
2425        } else {
2426            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2427        }
2428        break;
2429    case INDEX_op_nand_i64:
2430        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2431        break;
2432    case INDEX_op_nor_i64:
2433        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2434        break;
2435
2436    case INDEX_op_neg_i64:
2437        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2438        break;
2439    case INDEX_op_not_i64:
2440        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2441        break;
2442    case INDEX_op_bswap64_i64:
2443        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2444        break;
2445
2446    case INDEX_op_mul_i64:
2447        a0 = args[0], a1 = args[1], a2 = args[2];
2448        if (const_args[2]) {
2449            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2450            if (a2 == (int16_t)a2) {
2451                tcg_out_insn(s, RI, MGHI, a0, a2);
2452            } else {
2453                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2454            }
2455        } else if (a0 == a1) {
2456            tcg_out_insn(s, RRE, MSGR, a0, a2);
2457        } else {
2458            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2459        }
2460        break;
2461
2462    case INDEX_op_div2_i64:
2463        /*
2464         * ??? We get an unnecessary sign-extension of the dividend
2465         * into op0 with this definition, but as we do in fact always
2466         * produce both quotient and remainder using INDEX_op_div_i64
2467         * instead requires jumping through even more hoops.
2468         */
2469        tcg_debug_assert(args[0] == args[2]);
2470        tcg_debug_assert(args[1] == args[3]);
2471        tcg_debug_assert((args[1] & 1) == 0);
2472        tcg_debug_assert(args[0] == args[1] + 1);
2473        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2474        break;
2475    case INDEX_op_divu2_i64:
2476        tcg_debug_assert(args[0] == args[2]);
2477        tcg_debug_assert(args[1] == args[3]);
2478        tcg_debug_assert((args[1] & 1) == 0);
2479        tcg_debug_assert(args[0] == args[1] + 1);
2480        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2481        break;
2482    case INDEX_op_mulu2_i64:
2483        tcg_debug_assert(args[0] == args[2]);
2484        tcg_debug_assert((args[1] & 1) == 0);
2485        tcg_debug_assert(args[0] == args[1] + 1);
2486        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2487        break;
2488    case INDEX_op_muls2_i64:
2489        tcg_debug_assert((args[1] & 1) == 0);
2490        tcg_debug_assert(args[0] == args[1] + 1);
2491        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2492        break;
2493
2494    case INDEX_op_shl_i64:
2495        op = RSY_SLLG;
2496    do_shift64:
2497        if (const_args[2]) {
2498            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2499        } else {
2500            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2501        }
2502        break;
2503    case INDEX_op_shr_i64:
2504        op = RSY_SRLG;
2505        goto do_shift64;
2506    case INDEX_op_sar_i64:
2507        op = RSY_SRAG;
2508        goto do_shift64;
2509
2510    case INDEX_op_rotl_i64:
2511        if (const_args[2]) {
2512            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2513                         TCG_REG_NONE, args[2]);
2514        } else {
2515            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2516        }
2517        break;
2518    case INDEX_op_rotr_i64:
2519        if (const_args[2]) {
2520            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2521                         TCG_REG_NONE, (64 - args[2]) & 63);
2522        } else {
2523            /* We can use the smaller 32-bit negate because only the
2524               low 6 bits are examined for the rotate.  */
2525            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2526            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2527        }
2528        break;
2529
2530    case INDEX_op_ext8s_i64:
2531        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2532        break;
2533    case INDEX_op_ext16s_i64:
2534        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2535        break;
2536    case INDEX_op_ext_i32_i64:
2537    case INDEX_op_ext32s_i64:
2538        tgen_ext32s(s, args[0], args[1]);
2539        break;
2540    case INDEX_op_ext8u_i64:
2541        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2542        break;
2543    case INDEX_op_ext16u_i64:
2544        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2545        break;
2546    case INDEX_op_extu_i32_i64:
2547    case INDEX_op_ext32u_i64:
2548        tgen_ext32u(s, args[0], args[1]);
2549        break;
2550
2551    case INDEX_op_add2_i64:
2552        if (const_args[4]) {
2553            if ((int64_t)args[4] >= 0) {
2554                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2555            } else {
2556                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2557            }
2558        } else {
2559            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2560        }
2561        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2562        break;
2563    case INDEX_op_sub2_i64:
2564        if (const_args[4]) {
2565            if ((int64_t)args[4] >= 0) {
2566                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2567            } else {
2568                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2569            }
2570        } else {
2571            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2572        }
2573        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2574        break;
2575
2576    case INDEX_op_brcond_i64:
2577        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2578                    args[1], const_args[1], arg_label(args[3]));
2579        break;
2580    case INDEX_op_setcond_i64:
2581        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2582                     args[2], const_args[2]);
2583        break;
2584    case INDEX_op_movcond_i64:
2585        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2586                     args[2], const_args[2], args[3], const_args[3], args[4]);
2587        break;
2588
2589    OP_32_64(deposit):
2590        a0 = args[0], a1 = args[1], a2 = args[2];
2591        if (const_args[1]) {
2592            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2593        } else {
2594            /* Since we can't support "0Z" as a constraint, we allow a1 in
2595               any register.  Fix things up as if a matching constraint.  */
2596            if (a0 != a1) {
2597                TCGType type = (opc == INDEX_op_deposit_i64);
2598                if (a0 == a2) {
2599                    tcg_out_mov(s, type, TCG_TMP0, a2);
2600                    a2 = TCG_TMP0;
2601                }
2602                tcg_out_mov(s, type, a0, a1);
2603            }
2604            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2605        }
2606        break;
2607
2608    OP_32_64(extract):
2609        tgen_extract(s, args[0], args[1], args[2], args[3]);
2610        break;
2611
2612    case INDEX_op_clz_i64:
2613        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2614        break;
2615
2616    case INDEX_op_ctpop_i32:
2617        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2618        break;
2619    case INDEX_op_ctpop_i64:
2620        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2621        break;
2622
2623    case INDEX_op_mb:
2624        /* The host memory model is quite strong, we simply need to
2625           serialize the instruction stream.  */
2626        if (args[0] & TCG_MO_ST_LD) {
2627            /* fast-bcr-serialization facility (45) is present */
2628            tcg_out_insn(s, RR, BCR, 14, 0);
2629        }
2630        break;
2631
2632    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2633    case INDEX_op_mov_i64:
2634    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2635    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2636    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2637    default:
2638        tcg_abort();
2639    }
2640}
2641
2642static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2643                            TCGReg dst, TCGReg src)
2644{
2645    if (is_general_reg(src)) {
2646        /* Replicate general register into two MO_64. */
2647        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2648        if (vece == MO_64) {
2649            return true;
2650        }
2651        src = dst;
2652    }
2653
2654    /*
2655     * Recall that the "standard" integer, within a vector, is the
2656     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2657     */
2658    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2659    return true;
2660}
2661
2662static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2663                             TCGReg dst, TCGReg base, intptr_t offset)
2664{
2665    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2666    return true;
2667}
2668
2669static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2670                             TCGReg dst, int64_t val)
2671{
2672    int i, mask, msb, lsb;
2673
2674    /* Look for int16_t elements.  */
2675    if (vece <= MO_16 ||
2676        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2677        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2678        return;
2679    }
2680
2681    /* Look for bit masks.  */
2682    if (vece == MO_32) {
2683        if (risbg_mask((int32_t)val)) {
2684            /* Handle wraparound by swapping msb and lsb.  */
2685            if ((val & 0x80000001u) == 0x80000001u) {
2686                msb = 32 - ctz32(~val);
2687                lsb = clz32(~val) - 1;
2688            } else {
2689                msb = clz32(val);
2690                lsb = 31 - ctz32(val);
2691            }
2692            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2693            return;
2694        }
2695    } else {
2696        if (risbg_mask(val)) {
2697            /* Handle wraparound by swapping msb and lsb.  */
2698            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2699                /* Handle wraparound by swapping msb and lsb.  */
2700                msb = 64 - ctz64(~val);
2701                lsb = clz64(~val) - 1;
2702            } else {
2703                msb = clz64(val);
2704                lsb = 63 - ctz64(val);
2705            }
2706            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2707            return;
2708        }
2709    }
2710
2711    /* Look for all bytes 0x00 or 0xff.  */
2712    for (i = mask = 0; i < 8; i++) {
2713        uint8_t byte = val >> (i * 8);
2714        if (byte == 0xff) {
2715            mask |= 1 << i;
2716        } else if (byte != 0) {
2717            break;
2718        }
2719    }
2720    if (i == 8) {
2721        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2722        return;
2723    }
2724
2725    /* Otherwise, stuff it in the constant pool.  */
2726    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2727    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2728    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2729}
2730
2731static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2732                           unsigned vecl, unsigned vece,
2733                           const TCGArg args[TCG_MAX_OP_ARGS],
2734                           const int const_args[TCG_MAX_OP_ARGS])
2735{
2736    TCGType type = vecl + TCG_TYPE_V64;
2737    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2738
2739    switch (opc) {
2740    case INDEX_op_ld_vec:
2741        tcg_out_ld(s, type, a0, a1, a2);
2742        break;
2743    case INDEX_op_st_vec:
2744        tcg_out_st(s, type, a0, a1, a2);
2745        break;
2746    case INDEX_op_dupm_vec:
2747        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2748        break;
2749
2750    case INDEX_op_abs_vec:
2751        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2752        break;
2753    case INDEX_op_neg_vec:
2754        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2755        break;
2756    case INDEX_op_not_vec:
2757        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2758        break;
2759
2760    case INDEX_op_add_vec:
2761        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2762        break;
2763    case INDEX_op_sub_vec:
2764        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2765        break;
2766    case INDEX_op_and_vec:
2767        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2768        break;
2769    case INDEX_op_andc_vec:
2770        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2771        break;
2772    case INDEX_op_mul_vec:
2773        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2774        break;
2775    case INDEX_op_or_vec:
2776        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2777        break;
2778    case INDEX_op_orc_vec:
2779        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2780        break;
2781    case INDEX_op_xor_vec:
2782        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2783        break;
2784    case INDEX_op_nand_vec:
2785        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2786        break;
2787    case INDEX_op_nor_vec:
2788        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2789        break;
2790    case INDEX_op_eqv_vec:
2791        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2792        break;
2793
2794    case INDEX_op_shli_vec:
2795        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2796        break;
2797    case INDEX_op_shri_vec:
2798        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2799        break;
2800    case INDEX_op_sari_vec:
2801        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2802        break;
2803    case INDEX_op_rotli_vec:
2804        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2805        break;
2806    case INDEX_op_shls_vec:
2807        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2808        break;
2809    case INDEX_op_shrs_vec:
2810        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2811        break;
2812    case INDEX_op_sars_vec:
2813        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2814        break;
2815    case INDEX_op_rotls_vec:
2816        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2817        break;
2818    case INDEX_op_shlv_vec:
2819        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2820        break;
2821    case INDEX_op_shrv_vec:
2822        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2823        break;
2824    case INDEX_op_sarv_vec:
2825        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2826        break;
2827    case INDEX_op_rotlv_vec:
2828        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2829        break;
2830
2831    case INDEX_op_smin_vec:
2832        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2833        break;
2834    case INDEX_op_smax_vec:
2835        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2836        break;
2837    case INDEX_op_umin_vec:
2838        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2839        break;
2840    case INDEX_op_umax_vec:
2841        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2842        break;
2843
2844    case INDEX_op_bitsel_vec:
2845        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2846        break;
2847
2848    case INDEX_op_cmp_vec:
2849        switch ((TCGCond)args[3]) {
2850        case TCG_COND_EQ:
2851            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2852            break;
2853        case TCG_COND_GT:
2854            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2855            break;
2856        case TCG_COND_GTU:
2857            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2858            break;
2859        default:
2860            g_assert_not_reached();
2861        }
2862        break;
2863
2864    case INDEX_op_s390_vuph_vec:
2865        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2866        break;
2867    case INDEX_op_s390_vupl_vec:
2868        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2869        break;
2870    case INDEX_op_s390_vpks_vec:
2871        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2872        break;
2873
2874    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2875    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2876    default:
2877        g_assert_not_reached();
2878    }
2879}
2880
2881int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2882{
2883    switch (opc) {
2884    case INDEX_op_abs_vec:
2885    case INDEX_op_add_vec:
2886    case INDEX_op_and_vec:
2887    case INDEX_op_andc_vec:
2888    case INDEX_op_bitsel_vec:
2889    case INDEX_op_eqv_vec:
2890    case INDEX_op_nand_vec:
2891    case INDEX_op_neg_vec:
2892    case INDEX_op_nor_vec:
2893    case INDEX_op_not_vec:
2894    case INDEX_op_or_vec:
2895    case INDEX_op_orc_vec:
2896    case INDEX_op_rotli_vec:
2897    case INDEX_op_rotls_vec:
2898    case INDEX_op_rotlv_vec:
2899    case INDEX_op_sari_vec:
2900    case INDEX_op_sars_vec:
2901    case INDEX_op_sarv_vec:
2902    case INDEX_op_shli_vec:
2903    case INDEX_op_shls_vec:
2904    case INDEX_op_shlv_vec:
2905    case INDEX_op_shri_vec:
2906    case INDEX_op_shrs_vec:
2907    case INDEX_op_shrv_vec:
2908    case INDEX_op_smax_vec:
2909    case INDEX_op_smin_vec:
2910    case INDEX_op_sub_vec:
2911    case INDEX_op_umax_vec:
2912    case INDEX_op_umin_vec:
2913    case INDEX_op_xor_vec:
2914        return 1;
2915    case INDEX_op_cmp_vec:
2916    case INDEX_op_cmpsel_vec:
2917    case INDEX_op_rotrv_vec:
2918        return -1;
2919    case INDEX_op_mul_vec:
2920        return vece < MO_64;
2921    case INDEX_op_ssadd_vec:
2922    case INDEX_op_sssub_vec:
2923        return vece < MO_64 ? -1 : 0;
2924    default:
2925        return 0;
2926    }
2927}
2928
2929static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2930                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2931{
2932    bool need_swap = false, need_inv = false;
2933
2934    switch (cond) {
2935    case TCG_COND_EQ:
2936    case TCG_COND_GT:
2937    case TCG_COND_GTU:
2938        break;
2939    case TCG_COND_NE:
2940    case TCG_COND_LE:
2941    case TCG_COND_LEU:
2942        need_inv = true;
2943        break;
2944    case TCG_COND_LT:
2945    case TCG_COND_LTU:
2946        need_swap = true;
2947        break;
2948    case TCG_COND_GE:
2949    case TCG_COND_GEU:
2950        need_swap = need_inv = true;
2951        break;
2952    default:
2953        g_assert_not_reached();
2954    }
2955
2956    if (need_inv) {
2957        cond = tcg_invert_cond(cond);
2958    }
2959    if (need_swap) {
2960        TCGv_vec t1;
2961        t1 = v1, v1 = v2, v2 = t1;
2962        cond = tcg_swap_cond(cond);
2963    }
2964
2965    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2966              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2967
2968    return need_inv;
2969}
2970
2971static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2972                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2973{
2974    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2975        tcg_gen_not_vec(vece, v0, v0);
2976    }
2977}
2978
2979static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2980                              TCGv_vec c1, TCGv_vec c2,
2981                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2982{
2983    TCGv_vec t = tcg_temp_new_vec(type);
2984
2985    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2986        /* Invert the sense of the compare by swapping arguments.  */
2987        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2988    } else {
2989        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2990    }
2991    tcg_temp_free_vec(t);
2992}
2993
2994static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2995                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2996{
2997    TCGv_vec h1 = tcg_temp_new_vec(type);
2998    TCGv_vec h2 = tcg_temp_new_vec(type);
2999    TCGv_vec l1 = tcg_temp_new_vec(type);
3000    TCGv_vec l2 = tcg_temp_new_vec(type);
3001
3002    tcg_debug_assert (vece < MO_64);
3003
3004    /* Unpack with sign-extension. */
3005    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3006              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3007    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3008              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3009
3010    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3011              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3012    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3013              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3014
3015    /* Arithmetic on a wider element size. */
3016    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3017              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3018    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3019              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3020
3021    /* Pack with saturation. */
3022    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3023              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3024
3025    tcg_temp_free_vec(h1);
3026    tcg_temp_free_vec(h2);
3027    tcg_temp_free_vec(l1);
3028    tcg_temp_free_vec(l2);
3029}
3030
3031void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3032                       TCGArg a0, ...)
3033{
3034    va_list va;
3035    TCGv_vec v0, v1, v2, v3, v4, t0;
3036
3037    va_start(va, a0);
3038    v0 = temp_tcgv_vec(arg_temp(a0));
3039    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3040    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3041
3042    switch (opc) {
3043    case INDEX_op_cmp_vec:
3044        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3045        break;
3046
3047    case INDEX_op_cmpsel_vec:
3048        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3049        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3050        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3051        break;
3052
3053    case INDEX_op_rotrv_vec:
3054        t0 = tcg_temp_new_vec(type);
3055        tcg_gen_neg_vec(vece, t0, v2);
3056        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3057        tcg_temp_free_vec(t0);
3058        break;
3059
3060    case INDEX_op_ssadd_vec:
3061        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3062        break;
3063    case INDEX_op_sssub_vec:
3064        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3065        break;
3066
3067    default:
3068        g_assert_not_reached();
3069    }
3070    va_end(va);
3071}
3072
3073static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3074{
3075    switch (op) {
3076    case INDEX_op_goto_ptr:
3077        return C_O0_I1(r);
3078
3079    case INDEX_op_ld8u_i32:
3080    case INDEX_op_ld8u_i64:
3081    case INDEX_op_ld8s_i32:
3082    case INDEX_op_ld8s_i64:
3083    case INDEX_op_ld16u_i32:
3084    case INDEX_op_ld16u_i64:
3085    case INDEX_op_ld16s_i32:
3086    case INDEX_op_ld16s_i64:
3087    case INDEX_op_ld_i32:
3088    case INDEX_op_ld32u_i64:
3089    case INDEX_op_ld32s_i64:
3090    case INDEX_op_ld_i64:
3091        return C_O1_I1(r, r);
3092
3093    case INDEX_op_st8_i32:
3094    case INDEX_op_st8_i64:
3095    case INDEX_op_st16_i32:
3096    case INDEX_op_st16_i64:
3097    case INDEX_op_st_i32:
3098    case INDEX_op_st32_i64:
3099    case INDEX_op_st_i64:
3100        return C_O0_I2(r, r);
3101
3102    case INDEX_op_add_i32:
3103    case INDEX_op_add_i64:
3104    case INDEX_op_shl_i64:
3105    case INDEX_op_shr_i64:
3106    case INDEX_op_sar_i64:
3107    case INDEX_op_rotl_i32:
3108    case INDEX_op_rotl_i64:
3109    case INDEX_op_rotr_i32:
3110    case INDEX_op_rotr_i64:
3111    case INDEX_op_setcond_i32:
3112        return C_O1_I2(r, r, ri);
3113    case INDEX_op_setcond_i64:
3114        return C_O1_I2(r, r, rA);
3115
3116    case INDEX_op_clz_i64:
3117        return C_O1_I2(r, r, rI);
3118
3119    case INDEX_op_sub_i32:
3120    case INDEX_op_sub_i64:
3121    case INDEX_op_and_i32:
3122    case INDEX_op_or_i32:
3123    case INDEX_op_xor_i32:
3124        return C_O1_I2(r, r, ri);
3125    case INDEX_op_and_i64:
3126        return C_O1_I2(r, r, rNKR);
3127    case INDEX_op_or_i64:
3128    case INDEX_op_xor_i64:
3129        return C_O1_I2(r, r, rK);
3130
3131    case INDEX_op_andc_i32:
3132    case INDEX_op_orc_i32:
3133    case INDEX_op_eqv_i32:
3134        return C_O1_I2(r, r, ri);
3135    case INDEX_op_andc_i64:
3136        return C_O1_I2(r, r, rKR);
3137    case INDEX_op_orc_i64:
3138    case INDEX_op_eqv_i64:
3139        return C_O1_I2(r, r, rNK);
3140
3141    case INDEX_op_nand_i32:
3142    case INDEX_op_nand_i64:
3143    case INDEX_op_nor_i32:
3144    case INDEX_op_nor_i64:
3145        return C_O1_I2(r, r, r);
3146
3147    case INDEX_op_mul_i32:
3148        return (HAVE_FACILITY(MISC_INSN_EXT2)
3149                ? C_O1_I2(r, r, ri)
3150                : C_O1_I2(r, 0, ri));
3151    case INDEX_op_mul_i64:
3152        return (HAVE_FACILITY(MISC_INSN_EXT2)
3153                ? C_O1_I2(r, r, rJ)
3154                : C_O1_I2(r, 0, rJ));
3155
3156    case INDEX_op_shl_i32:
3157    case INDEX_op_shr_i32:
3158    case INDEX_op_sar_i32:
3159        return C_O1_I2(r, r, ri);
3160
3161    case INDEX_op_brcond_i32:
3162        return C_O0_I2(r, ri);
3163    case INDEX_op_brcond_i64:
3164        return C_O0_I2(r, rA);
3165
3166    case INDEX_op_bswap16_i32:
3167    case INDEX_op_bswap16_i64:
3168    case INDEX_op_bswap32_i32:
3169    case INDEX_op_bswap32_i64:
3170    case INDEX_op_bswap64_i64:
3171    case INDEX_op_neg_i32:
3172    case INDEX_op_neg_i64:
3173    case INDEX_op_not_i32:
3174    case INDEX_op_not_i64:
3175    case INDEX_op_ext8s_i32:
3176    case INDEX_op_ext8s_i64:
3177    case INDEX_op_ext8u_i32:
3178    case INDEX_op_ext8u_i64:
3179    case INDEX_op_ext16s_i32:
3180    case INDEX_op_ext16s_i64:
3181    case INDEX_op_ext16u_i32:
3182    case INDEX_op_ext16u_i64:
3183    case INDEX_op_ext32s_i64:
3184    case INDEX_op_ext32u_i64:
3185    case INDEX_op_ext_i32_i64:
3186    case INDEX_op_extu_i32_i64:
3187    case INDEX_op_extract_i32:
3188    case INDEX_op_extract_i64:
3189    case INDEX_op_ctpop_i32:
3190    case INDEX_op_ctpop_i64:
3191        return C_O1_I1(r, r);
3192
3193    case INDEX_op_qemu_ld_i32:
3194    case INDEX_op_qemu_ld_i64:
3195        return C_O1_I1(r, L);
3196    case INDEX_op_qemu_st_i64:
3197    case INDEX_op_qemu_st_i32:
3198        return C_O0_I2(L, L);
3199
3200    case INDEX_op_deposit_i32:
3201    case INDEX_op_deposit_i64:
3202        return C_O1_I2(r, rZ, r);
3203
3204    case INDEX_op_movcond_i32:
3205        return C_O1_I4(r, r, ri, rI, r);
3206    case INDEX_op_movcond_i64:
3207        return C_O1_I4(r, r, rA, rI, r);
3208
3209    case INDEX_op_div2_i32:
3210    case INDEX_op_div2_i64:
3211    case INDEX_op_divu2_i32:
3212    case INDEX_op_divu2_i64:
3213        return C_O2_I3(o, m, 0, 1, r);
3214
3215    case INDEX_op_mulu2_i64:
3216        return C_O2_I2(o, m, 0, r);
3217    case INDEX_op_muls2_i64:
3218        return C_O2_I2(o, m, r, r);
3219
3220    case INDEX_op_add2_i32:
3221    case INDEX_op_sub2_i32:
3222        return C_O2_I4(r, r, 0, 1, ri, r);
3223
3224    case INDEX_op_add2_i64:
3225    case INDEX_op_sub2_i64:
3226        return C_O2_I4(r, r, 0, 1, rA, r);
3227
3228    case INDEX_op_st_vec:
3229        return C_O0_I2(v, r);
3230    case INDEX_op_ld_vec:
3231    case INDEX_op_dupm_vec:
3232        return C_O1_I1(v, r);
3233    case INDEX_op_dup_vec:
3234        return C_O1_I1(v, vr);
3235    case INDEX_op_abs_vec:
3236    case INDEX_op_neg_vec:
3237    case INDEX_op_not_vec:
3238    case INDEX_op_rotli_vec:
3239    case INDEX_op_sari_vec:
3240    case INDEX_op_shli_vec:
3241    case INDEX_op_shri_vec:
3242    case INDEX_op_s390_vuph_vec:
3243    case INDEX_op_s390_vupl_vec:
3244        return C_O1_I1(v, v);
3245    case INDEX_op_add_vec:
3246    case INDEX_op_sub_vec:
3247    case INDEX_op_and_vec:
3248    case INDEX_op_andc_vec:
3249    case INDEX_op_or_vec:
3250    case INDEX_op_orc_vec:
3251    case INDEX_op_xor_vec:
3252    case INDEX_op_nand_vec:
3253    case INDEX_op_nor_vec:
3254    case INDEX_op_eqv_vec:
3255    case INDEX_op_cmp_vec:
3256    case INDEX_op_mul_vec:
3257    case INDEX_op_rotlv_vec:
3258    case INDEX_op_rotrv_vec:
3259    case INDEX_op_shlv_vec:
3260    case INDEX_op_shrv_vec:
3261    case INDEX_op_sarv_vec:
3262    case INDEX_op_smax_vec:
3263    case INDEX_op_smin_vec:
3264    case INDEX_op_umax_vec:
3265    case INDEX_op_umin_vec:
3266    case INDEX_op_s390_vpks_vec:
3267        return C_O1_I2(v, v, v);
3268    case INDEX_op_rotls_vec:
3269    case INDEX_op_shls_vec:
3270    case INDEX_op_shrs_vec:
3271    case INDEX_op_sars_vec:
3272        return C_O1_I2(v, v, r);
3273    case INDEX_op_bitsel_vec:
3274        return C_O1_I3(v, v, v, v);
3275
3276    default:
3277        g_assert_not_reached();
3278    }
3279}
3280
3281/*
3282 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3283 * Some distros have fixed this up locally, others have not.
3284 */
3285#ifndef HWCAP_S390_VXRS
3286#define HWCAP_S390_VXRS 2048
3287#endif
3288
3289static void query_s390_facilities(void)
3290{
3291    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3292    const char *which;
3293
3294    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3295       is present on all 64-bit systems, but let's check for it anyway.  */
3296    if (hwcap & HWCAP_S390_STFLE) {
3297        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3298        register void *r1 __asm__("1") = s390_facilities;
3299
3300        /* stfle 0(%r1) */
3301        asm volatile(".word 0xb2b0,0x1000"
3302                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3303    }
3304
3305    /*
3306     * Use of vector registers requires os support beyond the facility bit.
3307     * If the kernel does not advertise support, disable the facility bits.
3308     * There is nothing else we currently care about in the 3rd word, so
3309     * disable VECTOR with one store.
3310     */
3311    if (!(hwcap & HWCAP_S390_VXRS)) {
3312        s390_facilities[2] = 0;
3313    }
3314
3315    /*
3316     * Minimum supported cpu revision is z196.
3317     * Check for all required facilities.
3318     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3319     */
3320    if (!HAVE_FACILITY(LONG_DISP)) {
3321        which = "long-displacement";
3322        goto fail;
3323    }
3324    if (!HAVE_FACILITY(EXT_IMM)) {
3325        which = "extended-immediate";
3326        goto fail;
3327    }
3328    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3329        which = "general-instructions-extension";
3330        goto fail;
3331    }
3332    /*
3333     * Facility 45 is a big bin that contains: distinct-operands,
3334     * fast-BCR-serialization, high-word, population-count,
3335     * interlocked-access-1, and load/store-on-condition-1
3336     */
3337    if (!HAVE_FACILITY(45)) {
3338        which = "45";
3339        goto fail;
3340    }
3341    return;
3342
3343 fail:
3344    error_report("%s: missing required facility %s", __func__, which);
3345    exit(EXIT_FAILURE);
3346}
3347
3348static void tcg_target_init(TCGContext *s)
3349{
3350    query_s390_facilities();
3351
3352    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3353    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3354    if (HAVE_FACILITY(VECTOR)) {
3355        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3356        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3357    }
3358
3359    tcg_target_call_clobber_regs = 0;
3360    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3361    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3362    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3363    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3364    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3365    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3366    /* The r6 register is technically call-saved, but it's also a parameter
3367       register, so it can get killed by setup for the qemu_st helper.  */
3368    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3369    /* The return register can be considered call-clobbered.  */
3370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3371
3372    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3376    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3377    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3379    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3381    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3382    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3384    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3385    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3396
3397    s->reserved_regs = 0;
3398    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3399    /* XXX many insns can't be used with R0, so we better avoid it for now */
3400    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3401    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3402}
3403
3404#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3405                           + TCG_STATIC_CALL_ARGS_SIZE           \
3406                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3407
3408static void tcg_target_qemu_prologue(TCGContext *s)
3409{
3410    /* stmg %r6,%r15,48(%r15) (save registers) */
3411    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3412
3413    /* aghi %r15,-frame_size */
3414    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3415
3416    tcg_set_frame(s, TCG_REG_CALL_STACK,
3417                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3418                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3419
3420#ifndef CONFIG_SOFTMMU
3421    if (guest_base >= 0x80000) {
3422        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3423        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3424    }
3425#endif
3426
3427    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3428
3429    /* br %r3 (go to TB) */
3430    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3431
3432    /*
3433     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3434     * and fall through to the rest of the epilogue.
3435     */
3436    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3437    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3438
3439    /* TB epilogue */
3440    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3441
3442    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3443    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3444                 FRAME_SIZE + 48);
3445
3446    /* br %r14 (return) */
3447    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3448}
3449
3450static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3451{
3452    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3453}
3454
3455typedef struct {
3456    DebugFrameHeader h;
3457    uint8_t fde_def_cfa[4];
3458    uint8_t fde_reg_ofs[18];
3459} DebugFrame;
3460
3461/* We're expecting a 2 byte uleb128 encoded value.  */
3462QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3463
3464#define ELF_HOST_MACHINE  EM_S390
3465
3466static const DebugFrame debug_frame = {
3467    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3468    .h.cie.id = -1,
3469    .h.cie.version = 1,
3470    .h.cie.code_align = 1,
3471    .h.cie.data_align = 8,                /* sleb128 8 */
3472    .h.cie.return_column = TCG_REG_R14,
3473
3474    /* Total FDE size does not include the "len" member.  */
3475    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3476
3477    .fde_def_cfa = {
3478        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3479        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3480        (FRAME_SIZE >> 7)
3481    },
3482    .fde_reg_ofs = {
3483        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3484        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3485        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3486        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3487        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3488        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3489        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3490        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3491        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3492    }
3493};
3494
3495void tcg_register_jit(const void *buf, size_t buf_size)
3496{
3497    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3498}
3499