xref: /openbmc/qemu/tcg/s390x/tcg-target.c.inc (revision 48805df9c22a0700fba4b3b548fafaa21726ca68)
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2009 Ulrich Hecht <uli@suse.de>
5 * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
6 * Copyright (c) 2010 Richard Henderson <rth@twiddle.net>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
27/* We only support generating code for 64-bit mode.  */
28#if TCG_TARGET_REG_BITS != 64
29#error "unsupported code generation mode"
30#endif
31
32#include "../tcg-ldst.c.inc"
33#include "../tcg-pool.c.inc"
34#include "elf.h"
35
36#define TCG_CT_CONST_S16        (1 << 8)
37#define TCG_CT_CONST_S32        (1 << 9)
38#define TCG_CT_CONST_S33        (1 << 10)
39#define TCG_CT_CONST_ZERO       (1 << 11)
40#define TCG_CT_CONST_P32        (1 << 12)
41#define TCG_CT_CONST_INV        (1 << 13)
42#define TCG_CT_CONST_INVRISBG   (1 << 14)
43
44#define ALL_GENERAL_REGS     MAKE_64BIT_MASK(0, 16)
45#define ALL_VECTOR_REGS      MAKE_64BIT_MASK(32, 32)
46
47/*
48 * For softmmu, we need to avoid conflicts with the first 3
49 * argument registers to perform the tlb lookup, and to call
50 * the helper function.
51 */
52#ifdef CONFIG_SOFTMMU
53#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
54#else
55#define SOFTMMU_RESERVE_REGS 0
56#endif
57
58
59/* Several places within the instruction set 0 means "no register"
60   rather than TCG_REG_R0.  */
61#define TCG_REG_NONE    0
62
63/* A scratch register that may be be used throughout the backend.  */
64#define TCG_TMP0        TCG_REG_R1
65
66#ifndef CONFIG_SOFTMMU
67#define TCG_GUEST_BASE_REG TCG_REG_R13
68#endif
69
70/* All of the following instructions are prefixed with their instruction
71   format, and are defined as 8- or 16-bit quantities, even when the two
72   halves of the 16-bit quantity may appear 32 bits apart in the insn.
73   This makes it easy to copy the values from the tables in Appendix B.  */
74typedef enum S390Opcode {
75    RIL_AFI     = 0xc209,
76    RIL_AGFI    = 0xc208,
77    RIL_ALFI    = 0xc20b,
78    RIL_ALGFI   = 0xc20a,
79    RIL_BRASL   = 0xc005,
80    RIL_BRCL    = 0xc004,
81    RIL_CFI     = 0xc20d,
82    RIL_CGFI    = 0xc20c,
83    RIL_CLFI    = 0xc20f,
84    RIL_CLGFI   = 0xc20e,
85    RIL_CLRL    = 0xc60f,
86    RIL_CLGRL   = 0xc60a,
87    RIL_CRL     = 0xc60d,
88    RIL_CGRL    = 0xc608,
89    RIL_IIHF    = 0xc008,
90    RIL_IILF    = 0xc009,
91    RIL_LARL    = 0xc000,
92    RIL_LGFI    = 0xc001,
93    RIL_LGRL    = 0xc408,
94    RIL_LLIHF   = 0xc00e,
95    RIL_LLILF   = 0xc00f,
96    RIL_LRL     = 0xc40d,
97    RIL_MSFI    = 0xc201,
98    RIL_MSGFI   = 0xc200,
99    RIL_NIHF    = 0xc00a,
100    RIL_NILF    = 0xc00b,
101    RIL_OIHF    = 0xc00c,
102    RIL_OILF    = 0xc00d,
103    RIL_SLFI    = 0xc205,
104    RIL_SLGFI   = 0xc204,
105    RIL_XIHF    = 0xc006,
106    RIL_XILF    = 0xc007,
107
108    RI_AGHI     = 0xa70b,
109    RI_AHI      = 0xa70a,
110    RI_BRC      = 0xa704,
111    RI_CHI      = 0xa70e,
112    RI_CGHI     = 0xa70f,
113    RI_IIHH     = 0xa500,
114    RI_IIHL     = 0xa501,
115    RI_IILH     = 0xa502,
116    RI_IILL     = 0xa503,
117    RI_LGHI     = 0xa709,
118    RI_LLIHH    = 0xa50c,
119    RI_LLIHL    = 0xa50d,
120    RI_LLILH    = 0xa50e,
121    RI_LLILL    = 0xa50f,
122    RI_MGHI     = 0xa70d,
123    RI_MHI      = 0xa70c,
124    RI_NIHH     = 0xa504,
125    RI_NIHL     = 0xa505,
126    RI_NILH     = 0xa506,
127    RI_NILL     = 0xa507,
128    RI_OIHH     = 0xa508,
129    RI_OIHL     = 0xa509,
130    RI_OILH     = 0xa50a,
131    RI_OILL     = 0xa50b,
132    RI_TMLL     = 0xa701,
133
134    RIEb_CGRJ    = 0xec64,
135    RIEb_CLGRJ   = 0xec65,
136    RIEb_CLRJ    = 0xec77,
137    RIEb_CRJ     = 0xec76,
138
139    RIEc_CGIJ    = 0xec7c,
140    RIEc_CIJ     = 0xec7e,
141    RIEc_CLGIJ   = 0xec7d,
142    RIEc_CLIJ    = 0xec7f,
143
144    RIEf_RISBG   = 0xec55,
145
146    RIEg_LOCGHI  = 0xec46,
147
148    RRE_AGR     = 0xb908,
149    RRE_ALGR    = 0xb90a,
150    RRE_ALCR    = 0xb998,
151    RRE_ALCGR   = 0xb988,
152    RRE_CGR     = 0xb920,
153    RRE_CLGR    = 0xb921,
154    RRE_DLGR    = 0xb987,
155    RRE_DLR     = 0xb997,
156    RRE_DSGFR   = 0xb91d,
157    RRE_DSGR    = 0xb90d,
158    RRE_FLOGR   = 0xb983,
159    RRE_LGBR    = 0xb906,
160    RRE_LCGR    = 0xb903,
161    RRE_LGFR    = 0xb914,
162    RRE_LGHR    = 0xb907,
163    RRE_LGR     = 0xb904,
164    RRE_LLGCR   = 0xb984,
165    RRE_LLGFR   = 0xb916,
166    RRE_LLGHR   = 0xb985,
167    RRE_LRVR    = 0xb91f,
168    RRE_LRVGR   = 0xb90f,
169    RRE_LTGR    = 0xb902,
170    RRE_MLGR    = 0xb986,
171    RRE_MSGR    = 0xb90c,
172    RRE_MSR     = 0xb252,
173    RRE_NGR     = 0xb980,
174    RRE_OGR     = 0xb981,
175    RRE_SGR     = 0xb909,
176    RRE_SLGR    = 0xb90b,
177    RRE_SLBR    = 0xb999,
178    RRE_SLBGR   = 0xb989,
179    RRE_XGR     = 0xb982,
180
181    RRFa_MGRK   = 0xb9ec,
182    RRFa_MSRKC  = 0xb9fd,
183    RRFa_MSGRKC = 0xb9ed,
184    RRFa_NCRK   = 0xb9f5,
185    RRFa_NCGRK  = 0xb9e5,
186    RRFa_NNRK   = 0xb974,
187    RRFa_NNGRK  = 0xb964,
188    RRFa_NORK   = 0xb976,
189    RRFa_NOGRK  = 0xb966,
190    RRFa_NRK    = 0xb9f4,
191    RRFa_NGRK   = 0xb9e4,
192    RRFa_NXRK   = 0xb977,
193    RRFa_NXGRK  = 0xb967,
194    RRFa_OCRK   = 0xb975,
195    RRFa_OCGRK  = 0xb965,
196    RRFa_ORK    = 0xb9f6,
197    RRFa_OGRK   = 0xb9e6,
198    RRFa_SRK    = 0xb9f9,
199    RRFa_SGRK   = 0xb9e9,
200    RRFa_SLRK   = 0xb9fb,
201    RRFa_SLGRK  = 0xb9eb,
202    RRFa_XRK    = 0xb9f7,
203    RRFa_XGRK   = 0xb9e7,
204
205    RRFam_SELGR = 0xb9e3,
206
207    RRFc_LOCR   = 0xb9f2,
208    RRFc_LOCGR  = 0xb9e2,
209    RRFc_POPCNT = 0xb9e1,
210
211    RR_AR       = 0x1a,
212    RR_ALR      = 0x1e,
213    RR_BASR     = 0x0d,
214    RR_BCR      = 0x07,
215    RR_CLR      = 0x15,
216    RR_CR       = 0x19,
217    RR_DR       = 0x1d,
218    RR_LCR      = 0x13,
219    RR_LR       = 0x18,
220    RR_LTR      = 0x12,
221    RR_NR       = 0x14,
222    RR_OR       = 0x16,
223    RR_SR       = 0x1b,
224    RR_SLR      = 0x1f,
225    RR_XR       = 0x17,
226
227    RSY_RLL     = 0xeb1d,
228    RSY_RLLG    = 0xeb1c,
229    RSY_SLLG    = 0xeb0d,
230    RSY_SLLK    = 0xebdf,
231    RSY_SRAG    = 0xeb0a,
232    RSY_SRAK    = 0xebdc,
233    RSY_SRLG    = 0xeb0c,
234    RSY_SRLK    = 0xebde,
235
236    RS_SLL      = 0x89,
237    RS_SRA      = 0x8a,
238    RS_SRL      = 0x88,
239
240    RXY_AG      = 0xe308,
241    RXY_AY      = 0xe35a,
242    RXY_CG      = 0xe320,
243    RXY_CLG     = 0xe321,
244    RXY_CLY     = 0xe355,
245    RXY_CY      = 0xe359,
246    RXY_LAY     = 0xe371,
247    RXY_LB      = 0xe376,
248    RXY_LG      = 0xe304,
249    RXY_LGB     = 0xe377,
250    RXY_LGF     = 0xe314,
251    RXY_LGH     = 0xe315,
252    RXY_LHY     = 0xe378,
253    RXY_LLGC    = 0xe390,
254    RXY_LLGF    = 0xe316,
255    RXY_LLGH    = 0xe391,
256    RXY_LMG     = 0xeb04,
257    RXY_LRV     = 0xe31e,
258    RXY_LRVG    = 0xe30f,
259    RXY_LRVH    = 0xe31f,
260    RXY_LY      = 0xe358,
261    RXY_NG      = 0xe380,
262    RXY_OG      = 0xe381,
263    RXY_STCY    = 0xe372,
264    RXY_STG     = 0xe324,
265    RXY_STHY    = 0xe370,
266    RXY_STMG    = 0xeb24,
267    RXY_STRV    = 0xe33e,
268    RXY_STRVG   = 0xe32f,
269    RXY_STRVH   = 0xe33f,
270    RXY_STY     = 0xe350,
271    RXY_XG      = 0xe382,
272
273    RX_A        = 0x5a,
274    RX_C        = 0x59,
275    RX_L        = 0x58,
276    RX_LA       = 0x41,
277    RX_LH       = 0x48,
278    RX_ST       = 0x50,
279    RX_STC      = 0x42,
280    RX_STH      = 0x40,
281
282    VRIa_VGBM   = 0xe744,
283    VRIa_VREPI  = 0xe745,
284    VRIb_VGM    = 0xe746,
285    VRIc_VREP   = 0xe74d,
286
287    VRRa_VLC    = 0xe7de,
288    VRRa_VLP    = 0xe7df,
289    VRRa_VLR    = 0xe756,
290    VRRc_VA     = 0xe7f3,
291    VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
292    VRRc_VCH    = 0xe7fb,   /* " */
293    VRRc_VCHL   = 0xe7f9,   /* " */
294    VRRc_VERLLV = 0xe773,
295    VRRc_VESLV  = 0xe770,
296    VRRc_VESRAV = 0xe77a,
297    VRRc_VESRLV = 0xe778,
298    VRRc_VML    = 0xe7a2,
299    VRRc_VMN    = 0xe7fe,
300    VRRc_VMNL   = 0xe7fc,
301    VRRc_VMX    = 0xe7ff,
302    VRRc_VMXL   = 0xe7fd,
303    VRRc_VN     = 0xe768,
304    VRRc_VNC    = 0xe769,
305    VRRc_VNN    = 0xe76e,
306    VRRc_VNO    = 0xe76b,
307    VRRc_VNX    = 0xe76c,
308    VRRc_VO     = 0xe76a,
309    VRRc_VOC    = 0xe76f,
310    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
311    VRRc_VS     = 0xe7f7,
312    VRRa_VUPH   = 0xe7d7,
313    VRRa_VUPL   = 0xe7d6,
314    VRRc_VX     = 0xe76d,
315    VRRe_VSEL   = 0xe78d,
316    VRRf_VLVGP  = 0xe762,
317
318    VRSa_VERLL  = 0xe733,
319    VRSa_VESL   = 0xe730,
320    VRSa_VESRA  = 0xe73a,
321    VRSa_VESRL  = 0xe738,
322    VRSb_VLVG   = 0xe722,
323    VRSc_VLGV   = 0xe721,
324
325    VRX_VL      = 0xe706,
326    VRX_VLLEZ   = 0xe704,
327    VRX_VLREP   = 0xe705,
328    VRX_VST     = 0xe70e,
329    VRX_VSTEF   = 0xe70b,
330    VRX_VSTEG   = 0xe70a,
331
332    NOP         = 0x0707,
333} S390Opcode;
334
335#ifdef CONFIG_DEBUG_TCG
336static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
337    "%r0",  "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
338    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
339    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
340    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6",  "%v7",
341    "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
342    "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
343    "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
344};
345#endif
346
347/* Since R6 is a potential argument register, choose it last of the
348   call-saved registers.  Likewise prefer the call-clobbered registers
349   in reverse order to maximize the chance of avoiding the arguments.  */
350static const int tcg_target_reg_alloc_order[] = {
351    /* Call saved registers.  */
352    TCG_REG_R13,
353    TCG_REG_R12,
354    TCG_REG_R11,
355    TCG_REG_R10,
356    TCG_REG_R9,
357    TCG_REG_R8,
358    TCG_REG_R7,
359    TCG_REG_R6,
360    /* Call clobbered registers.  */
361    TCG_REG_R14,
362    TCG_REG_R0,
363    TCG_REG_R1,
364    /* Argument registers, in reverse order of allocation.  */
365    TCG_REG_R5,
366    TCG_REG_R4,
367    TCG_REG_R3,
368    TCG_REG_R2,
369
370    /* V8-V15 are call saved, and omitted. */
371    TCG_REG_V0,
372    TCG_REG_V1,
373    TCG_REG_V2,
374    TCG_REG_V3,
375    TCG_REG_V4,
376    TCG_REG_V5,
377    TCG_REG_V6,
378    TCG_REG_V7,
379    TCG_REG_V16,
380    TCG_REG_V17,
381    TCG_REG_V18,
382    TCG_REG_V19,
383    TCG_REG_V20,
384    TCG_REG_V21,
385    TCG_REG_V22,
386    TCG_REG_V23,
387    TCG_REG_V24,
388    TCG_REG_V25,
389    TCG_REG_V26,
390    TCG_REG_V27,
391    TCG_REG_V28,
392    TCG_REG_V29,
393    TCG_REG_V30,
394    TCG_REG_V31,
395};
396
397static const int tcg_target_call_iarg_regs[] = {
398    TCG_REG_R2,
399    TCG_REG_R3,
400    TCG_REG_R4,
401    TCG_REG_R5,
402    TCG_REG_R6,
403};
404
405static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
406{
407    tcg_debug_assert(kind == TCG_CALL_RET_NORMAL);
408    tcg_debug_assert(slot == 0);
409    return TCG_REG_R2;
410}
411
412#define S390_CC_EQ      8
413#define S390_CC_LT      4
414#define S390_CC_GT      2
415#define S390_CC_OV      1
416#define S390_CC_NE      (S390_CC_LT | S390_CC_GT)
417#define S390_CC_LE      (S390_CC_LT | S390_CC_EQ)
418#define S390_CC_GE      (S390_CC_GT | S390_CC_EQ)
419#define S390_CC_NEVER   0
420#define S390_CC_ALWAYS  15
421
422/* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
423static const uint8_t tcg_cond_to_s390_cond[] = {
424    [TCG_COND_EQ]  = S390_CC_EQ,
425    [TCG_COND_NE]  = S390_CC_NE,
426    [TCG_COND_LT]  = S390_CC_LT,
427    [TCG_COND_LE]  = S390_CC_LE,
428    [TCG_COND_GT]  = S390_CC_GT,
429    [TCG_COND_GE]  = S390_CC_GE,
430    [TCG_COND_LTU] = S390_CC_LT,
431    [TCG_COND_LEU] = S390_CC_LE,
432    [TCG_COND_GTU] = S390_CC_GT,
433    [TCG_COND_GEU] = S390_CC_GE,
434};
435
436/* Condition codes that result from a LOAD AND TEST.  Here, we have no
437   unsigned instruction variation, however since the test is vs zero we
438   can re-map the outcomes appropriately.  */
439static const uint8_t tcg_cond_to_ltr_cond[] = {
440    [TCG_COND_EQ]  = S390_CC_EQ,
441    [TCG_COND_NE]  = S390_CC_NE,
442    [TCG_COND_LT]  = S390_CC_LT,
443    [TCG_COND_LE]  = S390_CC_LE,
444    [TCG_COND_GT]  = S390_CC_GT,
445    [TCG_COND_GE]  = S390_CC_GE,
446    [TCG_COND_LTU] = S390_CC_NEVER,
447    [TCG_COND_LEU] = S390_CC_EQ,
448    [TCG_COND_GTU] = S390_CC_NE,
449    [TCG_COND_GEU] = S390_CC_ALWAYS,
450};
451
452#ifdef CONFIG_SOFTMMU
453static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
454    [MO_UB]   = helper_ret_ldub_mmu,
455    [MO_SB]   = helper_ret_ldsb_mmu,
456    [MO_LEUW] = helper_le_lduw_mmu,
457    [MO_LESW] = helper_le_ldsw_mmu,
458    [MO_LEUL] = helper_le_ldul_mmu,
459    [MO_LESL] = helper_le_ldsl_mmu,
460    [MO_LEUQ] = helper_le_ldq_mmu,
461    [MO_BEUW] = helper_be_lduw_mmu,
462    [MO_BESW] = helper_be_ldsw_mmu,
463    [MO_BEUL] = helper_be_ldul_mmu,
464    [MO_BESL] = helper_be_ldsl_mmu,
465    [MO_BEUQ] = helper_be_ldq_mmu,
466};
467
468static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
469    [MO_UB]   = helper_ret_stb_mmu,
470    [MO_LEUW] = helper_le_stw_mmu,
471    [MO_LEUL] = helper_le_stl_mmu,
472    [MO_LEUQ] = helper_le_stq_mmu,
473    [MO_BEUW] = helper_be_stw_mmu,
474    [MO_BEUL] = helper_be_stl_mmu,
475    [MO_BEUQ] = helper_be_stq_mmu,
476};
477#endif
478
479static const tcg_insn_unit *tb_ret_addr;
480uint64_t s390_facilities[3];
481
482static inline bool is_general_reg(TCGReg r)
483{
484    return r <= TCG_REG_R15;
485}
486
487static inline bool is_vector_reg(TCGReg r)
488{
489    return r >= TCG_REG_V0 && r <= TCG_REG_V31;
490}
491
492static bool patch_reloc(tcg_insn_unit *src_rw, int type,
493                        intptr_t value, intptr_t addend)
494{
495    const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
496    intptr_t pcrel2;
497    uint32_t old;
498
499    value += addend;
500    pcrel2 = (tcg_insn_unit *)value - src_rx;
501
502    switch (type) {
503    case R_390_PC16DBL:
504        if (pcrel2 == (int16_t)pcrel2) {
505            tcg_patch16(src_rw, pcrel2);
506            return true;
507        }
508        break;
509    case R_390_PC32DBL:
510        if (pcrel2 == (int32_t)pcrel2) {
511            tcg_patch32(src_rw, pcrel2);
512            return true;
513        }
514        break;
515    case R_390_20:
516        if (value == sextract64(value, 0, 20)) {
517            old = *(uint32_t *)src_rw & 0xf00000ff;
518            old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
519            tcg_patch32(src_rw, old);
520            return true;
521        }
522        break;
523    default:
524        g_assert_not_reached();
525    }
526    return false;
527}
528
529static int is_const_p16(uint64_t val)
530{
531    for (int i = 0; i < 4; ++i) {
532        uint64_t mask = 0xffffull << (i * 16);
533        if ((val & ~mask) == 0) {
534            return i;
535        }
536    }
537    return -1;
538}
539
540static int is_const_p32(uint64_t val)
541{
542    if ((val & 0xffffffff00000000ull) == 0) {
543        return 0;
544    }
545    if ((val & 0x00000000ffffffffull) == 0) {
546        return 1;
547    }
548    return -1;
549}
550
551/*
552 * Accept bit patterns like these:
553 *  0....01....1
554 *  1....10....0
555 *  1..10..01..1
556 *  0..01..10..0
557 * Copied from gcc sources.
558 */
559static bool risbg_mask(uint64_t c)
560{
561    uint64_t lsb;
562    /* We don't change the number of transitions by inverting,
563       so make sure we start with the LSB zero.  */
564    if (c & 1) {
565        c = ~c;
566    }
567    /* Reject all zeros or all ones.  */
568    if (c == 0) {
569        return false;
570    }
571    /* Find the first transition.  */
572    lsb = c & -c;
573    /* Invert to look for a second transition.  */
574    c = ~c;
575    /* Erase the first transition.  */
576    c &= -lsb;
577    /* Find the second transition, if any.  */
578    lsb = c & -c;
579    /* Match if all the bits are 1's, or if c is zero.  */
580    return c == -lsb;
581}
582
583/* Test if a constant matches the constraint. */
584static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
585{
586    if (ct & TCG_CT_CONST) {
587        return 1;
588    }
589
590    if (type == TCG_TYPE_I32) {
591        val = (int32_t)val;
592    }
593
594    /* The following are mutually exclusive.  */
595    if (ct & TCG_CT_CONST_S16) {
596        return val == (int16_t)val;
597    } else if (ct & TCG_CT_CONST_S32) {
598        return val == (int32_t)val;
599    } else if (ct & TCG_CT_CONST_S33) {
600        return val >= -0xffffffffll && val <= 0xffffffffll;
601    } else if (ct & TCG_CT_CONST_ZERO) {
602        return val == 0;
603    }
604
605    if (ct & TCG_CT_CONST_INV) {
606        val = ~val;
607    }
608    /*
609     * Note that is_const_p16 is a subset of is_const_p32,
610     * so we don't need both constraints.
611     */
612    if ((ct & TCG_CT_CONST_P32) && is_const_p32(val) >= 0) {
613        return true;
614    }
615    if ((ct & TCG_CT_CONST_INVRISBG) && risbg_mask(~val)) {
616        return true;
617    }
618
619    return 0;
620}
621
622/* Emit instructions according to the given instruction format.  */
623
624static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
625{
626    tcg_out16(s, (op << 8) | (r1 << 4) | r2);
627}
628
629static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
630                             TCGReg r1, TCGReg r2)
631{
632    tcg_out32(s, (op << 16) | (r1 << 4) | r2);
633}
634
635/* RRF-a without the m4 field */
636static void tcg_out_insn_RRFa(TCGContext *s, S390Opcode op,
637                              TCGReg r1, TCGReg r2, TCGReg r3)
638{
639    tcg_out32(s, (op << 16) | (r3 << 12) | (r1 << 4) | r2);
640}
641
642/* RRF-a with the m4 field */
643static void tcg_out_insn_RRFam(TCGContext *s, S390Opcode op,
644                               TCGReg r1, TCGReg r2, TCGReg r3, int m4)
645{
646    tcg_out32(s, (op << 16) | (r3 << 12) | (m4 << 8) | (r1 << 4) | r2);
647}
648
649static void tcg_out_insn_RRFc(TCGContext *s, S390Opcode op,
650                              TCGReg r1, TCGReg r2, int m3)
651{
652    tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
653}
654
655static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
656{
657    tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
658}
659
660static void tcg_out_insn_RIEg(TCGContext *s, S390Opcode op, TCGReg r1,
661                             int i2, int m3)
662{
663    tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
664    tcg_out32(s, (i2 << 16) | (op & 0xff));
665}
666
667static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
668{
669    tcg_out16(s, op | (r1 << 4));
670    tcg_out32(s, i2);
671}
672
673static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
674                            TCGReg b2, TCGReg r3, int disp)
675{
676    tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
677              | (disp & 0xfff));
678}
679
680static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
681                             TCGReg b2, TCGReg r3, int disp)
682{
683    tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
684    tcg_out32(s, (op & 0xff) | (b2 << 28)
685              | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
686}
687
688#define tcg_out_insn_RX   tcg_out_insn_RS
689#define tcg_out_insn_RXY  tcg_out_insn_RSY
690
691static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
692{
693    /*
694     * Shift bit 4 of each regno to its corresponding bit of RXB.
695     * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
696     * is the left-shift of the 4th operand.
697     */
698    return ((v1 & 0x10) << (4 + 3))
699         | ((v2 & 0x10) << (4 + 2))
700         | ((v3 & 0x10) << (4 + 1))
701         | ((v4 & 0x10) << (4 + 0));
702}
703
704static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
705                              TCGReg v1, uint16_t i2, int m3)
706{
707    tcg_debug_assert(is_vector_reg(v1));
708    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
709    tcg_out16(s, i2);
710    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
711}
712
713static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
714                              TCGReg v1, uint8_t i2, uint8_t i3, int m4)
715{
716    tcg_debug_assert(is_vector_reg(v1));
717    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
718    tcg_out16(s, (i2 << 8) | (i3 & 0xff));
719    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
720}
721
722static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
723                              TCGReg v1, uint16_t i2, TCGReg v3, int m4)
724{
725    tcg_debug_assert(is_vector_reg(v1));
726    tcg_debug_assert(is_vector_reg(v3));
727    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
728    tcg_out16(s, i2);
729    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
730}
731
732static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
733                              TCGReg v1, TCGReg v2, int m3)
734{
735    tcg_debug_assert(is_vector_reg(v1));
736    tcg_debug_assert(is_vector_reg(v2));
737    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
738    tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
739}
740
741static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
742                              TCGReg v1, TCGReg v2, TCGReg v3, int m4)
743{
744    tcg_debug_assert(is_vector_reg(v1));
745    tcg_debug_assert(is_vector_reg(v2));
746    tcg_debug_assert(is_vector_reg(v3));
747    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
748    tcg_out16(s, v3 << 12);
749    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
750}
751
752static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
753                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
754{
755    tcg_debug_assert(is_vector_reg(v1));
756    tcg_debug_assert(is_vector_reg(v2));
757    tcg_debug_assert(is_vector_reg(v3));
758    tcg_debug_assert(is_vector_reg(v4));
759    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
760    tcg_out16(s, v3 << 12);
761    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
762}
763
764static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
765                              TCGReg v1, TCGReg r2, TCGReg r3)
766{
767    tcg_debug_assert(is_vector_reg(v1));
768    tcg_debug_assert(is_general_reg(r2));
769    tcg_debug_assert(is_general_reg(r3));
770    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
771    tcg_out16(s, r3 << 12);
772    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
773}
774
775static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
776                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
777{
778    tcg_debug_assert(is_vector_reg(v1));
779    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
780    tcg_debug_assert(is_general_reg(b2));
781    tcg_debug_assert(is_vector_reg(v3));
782    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
783    tcg_out16(s, b2 << 12 | d2);
784    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
785}
786
787static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
788                              intptr_t d2, TCGReg b2, TCGReg r3, int m4)
789{
790    tcg_debug_assert(is_vector_reg(v1));
791    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
792    tcg_debug_assert(is_general_reg(b2));
793    tcg_debug_assert(is_general_reg(r3));
794    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
795    tcg_out16(s, b2 << 12 | d2);
796    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
797}
798
799static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
800                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
801{
802    tcg_debug_assert(is_general_reg(r1));
803    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
804    tcg_debug_assert(is_general_reg(b2));
805    tcg_debug_assert(is_vector_reg(v3));
806    tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
807    tcg_out16(s, b2 << 12 | d2);
808    tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
809}
810
811static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
812                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
813{
814    tcg_debug_assert(is_vector_reg(v1));
815    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
816    tcg_debug_assert(is_general_reg(x2));
817    tcg_debug_assert(is_general_reg(b2));
818    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
819    tcg_out16(s, (b2 << 12) | d2);
820    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
821}
822
823/* Emit an opcode with "type-checking" of the format.  */
824#define tcg_out_insn(S, FMT, OP, ...) \
825    glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
826
827
828/* emit 64-bit shifts */
829static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
830                         TCGReg src, TCGReg sh_reg, int sh_imm)
831{
832    tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
833}
834
835/* emit 32-bit shifts */
836static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
837                         TCGReg sh_reg, int sh_imm)
838{
839    tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
840}
841
842static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
843{
844    if (src == dst) {
845        return true;
846    }
847    switch (type) {
848    case TCG_TYPE_I32:
849        if (likely(is_general_reg(dst) && is_general_reg(src))) {
850            tcg_out_insn(s, RR, LR, dst, src);
851            break;
852        }
853        /* fallthru */
854
855    case TCG_TYPE_I64:
856        if (likely(is_general_reg(dst))) {
857            if (likely(is_general_reg(src))) {
858                tcg_out_insn(s, RRE, LGR, dst, src);
859            } else {
860                tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
861            }
862            break;
863        } else if (is_general_reg(src)) {
864            tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
865            break;
866        }
867        /* fallthru */
868
869    case TCG_TYPE_V64:
870    case TCG_TYPE_V128:
871        tcg_out_insn(s, VRRa, VLR, dst, src, 0);
872        break;
873
874    default:
875        g_assert_not_reached();
876    }
877    return true;
878}
879
880static const S390Opcode li_insns[4] = {
881    RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
882};
883static const S390Opcode oi_insns[4] = {
884    RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
885};
886static const S390Opcode lif_insns[2] = {
887    RIL_LLILF, RIL_LLIHF,
888};
889
890/* load a register with an immediate value */
891static void tcg_out_movi(TCGContext *s, TCGType type,
892                         TCGReg ret, tcg_target_long sval)
893{
894    tcg_target_ulong uval = sval;
895    ptrdiff_t pc_off;
896    int i;
897
898    if (type == TCG_TYPE_I32) {
899        uval = (uint32_t)sval;
900        sval = (int32_t)sval;
901    }
902
903    /* Try all 32-bit insns that can load it in one go.  */
904    if (sval >= -0x8000 && sval < 0x8000) {
905        tcg_out_insn(s, RI, LGHI, ret, sval);
906        return;
907    }
908
909    i = is_const_p16(uval);
910    if (i >= 0) {
911        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
912        return;
913    }
914
915    /* Try all 48-bit insns that can load it in one go.  */
916    if (sval == (int32_t)sval) {
917        tcg_out_insn(s, RIL, LGFI, ret, sval);
918        return;
919    }
920
921    i = is_const_p32(uval);
922    if (i >= 0) {
923        tcg_out_insn_RIL(s, lif_insns[i], ret, uval >> (i * 32));
924        return;
925    }
926
927    /* Try for PC-relative address load.  For odd addresses, add one. */
928    pc_off = tcg_pcrel_diff(s, (void *)sval) >> 1;
929    if (pc_off == (int32_t)pc_off) {
930        tcg_out_insn(s, RIL, LARL, ret, pc_off);
931        if (sval & 1) {
932            tcg_out_insn(s, RI, AGHI, ret, 1);
933        }
934        return;
935    }
936
937    /* Otherwise, load it by parts. */
938    i = is_const_p16((uint32_t)uval);
939    if (i >= 0) {
940        tcg_out_insn_RI(s, li_insns[i], ret, uval >> (i * 16));
941    } else {
942        tcg_out_insn(s, RIL, LLILF, ret, uval);
943    }
944    uval >>= 32;
945    i = is_const_p16(uval);
946    if (i >= 0) {
947        tcg_out_insn_RI(s, oi_insns[i + 2], ret, uval >> (i * 16));
948    } else {
949        tcg_out_insn(s, RIL, OIHF, ret, uval);
950    }
951}
952
953/* Emit a load/store type instruction.  Inputs are:
954   DATA:     The register to be loaded or stored.
955   BASE+OFS: The effective address.
956   OPC_RX:   If the operation has an RX format opcode (e.g. STC), otherwise 0.
957   OPC_RXY:  The RXY format opcode for the operation (e.g. STCY).  */
958
959static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
960                        TCGReg data, TCGReg base, TCGReg index,
961                        tcg_target_long ofs)
962{
963    if (ofs < -0x80000 || ofs >= 0x80000) {
964        /* Combine the low 20 bits of the offset with the actual load insn;
965           the high 44 bits must come from an immediate load.  */
966        tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
967        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
968        ofs = low;
969
970        /* If we were already given an index register, add it in.  */
971        if (index != TCG_REG_NONE) {
972            tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
973        }
974        index = TCG_TMP0;
975    }
976
977    if (opc_rx && ofs >= 0 && ofs < 0x1000) {
978        tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
979    } else {
980        tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
981    }
982}
983
984static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
985                            TCGReg data, TCGReg base, TCGReg index,
986                            tcg_target_long ofs, int m3)
987{
988    if (ofs < 0 || ofs >= 0x1000) {
989        if (ofs >= -0x80000 && ofs < 0x80000) {
990            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
991            base = TCG_TMP0;
992            index = TCG_REG_NONE;
993            ofs = 0;
994        } else {
995            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
996            if (index != TCG_REG_NONE) {
997                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
998            }
999            index = TCG_TMP0;
1000            ofs = 0;
1001        }
1002    }
1003    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
1004}
1005
1006/* load data without address translation or endianness conversion */
1007static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
1008                       TCGReg base, intptr_t ofs)
1009{
1010    switch (type) {
1011    case TCG_TYPE_I32:
1012        if (likely(is_general_reg(data))) {
1013            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
1014            break;
1015        }
1016        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
1017        break;
1018
1019    case TCG_TYPE_I64:
1020        if (likely(is_general_reg(data))) {
1021            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
1022            break;
1023        }
1024        /* fallthru */
1025
1026    case TCG_TYPE_V64:
1027        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
1028        break;
1029
1030    case TCG_TYPE_V128:
1031        /* Hint quadword aligned.  */
1032        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
1033        break;
1034
1035    default:
1036        g_assert_not_reached();
1037    }
1038}
1039
1040static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
1041                       TCGReg base, intptr_t ofs)
1042{
1043    switch (type) {
1044    case TCG_TYPE_I32:
1045        if (likely(is_general_reg(data))) {
1046            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
1047        } else {
1048            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
1049        }
1050        break;
1051
1052    case TCG_TYPE_I64:
1053        if (likely(is_general_reg(data))) {
1054            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
1055            break;
1056        }
1057        /* fallthru */
1058
1059    case TCG_TYPE_V64:
1060        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
1061        break;
1062
1063    case TCG_TYPE_V128:
1064        /* Hint quadword aligned.  */
1065        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
1066        break;
1067
1068    default:
1069        g_assert_not_reached();
1070    }
1071}
1072
1073static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
1074                               TCGReg base, intptr_t ofs)
1075{
1076    return false;
1077}
1078
1079static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
1080                             tcg_target_long imm)
1081{
1082    /* This function is only used for passing structs by reference. */
1083    tcg_out_mem(s, RX_LA, RXY_LAY, rd, rs, TCG_REG_NONE, imm);
1084}
1085
1086static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
1087                                 int msb, int lsb, int ofs, int z)
1088{
1089    /* Format RIE-f */
1090    tcg_out16(s, (RIEf_RISBG & 0xff00) | (dest << 4) | src);
1091    tcg_out16(s, (msb << 8) | (z << 7) | lsb);
1092    tcg_out16(s, (ofs << 8) | (RIEf_RISBG & 0xff));
1093}
1094
1095static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1096{
1097    tcg_out_insn(s, RRE, LGBR, dest, src);
1098}
1099
1100static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1101{
1102    tcg_out_insn(s, RRE, LLGCR, dest, src);
1103}
1104
1105static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1106{
1107    tcg_out_insn(s, RRE, LGHR, dest, src);
1108}
1109
1110static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1111{
1112    tcg_out_insn(s, RRE, LLGHR, dest, src);
1113}
1114
1115static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1116{
1117    tcg_out_insn(s, RRE, LGFR, dest, src);
1118}
1119
1120static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1121{
1122    tcg_out_insn(s, RRE, LLGFR, dest, src);
1123}
1124
1125static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1126{
1127    int msb, lsb;
1128    if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1129        /* Achieve wraparound by swapping msb and lsb.  */
1130        msb = 64 - ctz64(~val);
1131        lsb = clz64(~val) - 1;
1132    } else {
1133        msb = clz64(val);
1134        lsb = 63 - ctz64(val);
1135    }
1136    tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1137}
1138
1139static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1140{
1141    static const S390Opcode ni_insns[4] = {
1142        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1143    };
1144    static const S390Opcode nif_insns[2] = {
1145        RIL_NILF, RIL_NIHF
1146    };
1147    uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1148    int i;
1149
1150    /* Look for the zero-extensions.  */
1151    if ((val & valid) == 0xffffffff) {
1152        tgen_ext32u(s, dest, dest);
1153        return;
1154    }
1155    if ((val & valid) == 0xff) {
1156        tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1157        return;
1158    }
1159    if ((val & valid) == 0xffff) {
1160        tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1161        return;
1162    }
1163
1164    i = is_const_p16(~val & valid);
1165    if (i >= 0) {
1166        tcg_out_insn_RI(s, ni_insns[i], dest, val >> (i * 16));
1167        return;
1168    }
1169
1170    i = is_const_p32(~val & valid);
1171    tcg_debug_assert(i == 0 || type != TCG_TYPE_I32);
1172    if (i >= 0) {
1173        tcg_out_insn_RIL(s, nif_insns[i], dest, val >> (i * 32));
1174        return;
1175    }
1176
1177    if (risbg_mask(val)) {
1178        tgen_andi_risbg(s, dest, dest, val);
1179        return;
1180    }
1181
1182    g_assert_not_reached();
1183}
1184
1185static void tgen_ori(TCGContext *s, TCGReg dest, uint64_t val)
1186{
1187    static const S390Opcode oif_insns[2] = {
1188        RIL_OILF, RIL_OIHF
1189    };
1190
1191    int i;
1192
1193    i = is_const_p16(val);
1194    if (i >= 0) {
1195        tcg_out_insn_RI(s, oi_insns[i], dest, val >> (i * 16));
1196        return;
1197    }
1198
1199    i = is_const_p32(val);
1200    if (i >= 0) {
1201        tcg_out_insn_RIL(s, oif_insns[i], dest, val >> (i * 32));
1202        return;
1203    }
1204
1205    g_assert_not_reached();
1206}
1207
1208static void tgen_xori(TCGContext *s, TCGReg dest, uint64_t val)
1209{
1210    switch (is_const_p32(val)) {
1211    case 0:
1212        tcg_out_insn(s, RIL, XILF, dest, val);
1213        break;
1214    case 1:
1215        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1216        break;
1217    default:
1218        g_assert_not_reached();
1219    }
1220}
1221
1222static int tgen_cmp2(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1223                     TCGArg c2, bool c2const, bool need_carry, int *inv_cc)
1224{
1225    bool is_unsigned = is_unsigned_cond(c);
1226    TCGCond inv_c = tcg_invert_cond(c);
1227    S390Opcode op;
1228
1229    if (c2const) {
1230        if (c2 == 0) {
1231            if (!(is_unsigned && need_carry)) {
1232                if (type == TCG_TYPE_I32) {
1233                    tcg_out_insn(s, RR, LTR, r1, r1);
1234                } else {
1235                    tcg_out_insn(s, RRE, LTGR, r1, r1);
1236                }
1237                *inv_cc = tcg_cond_to_ltr_cond[inv_c];
1238                return tcg_cond_to_ltr_cond[c];
1239            }
1240        }
1241
1242        if (!is_unsigned && c2 == (int16_t)c2) {
1243            op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1244            tcg_out_insn_RI(s, op, r1, c2);
1245            goto exit;
1246        }
1247
1248        if (type == TCG_TYPE_I32) {
1249            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1250            tcg_out_insn_RIL(s, op, r1, c2);
1251            goto exit;
1252        }
1253
1254        /*
1255         * Constraints are for a signed 33-bit operand, which is a
1256         * convenient superset of this signed/unsigned test.
1257         */
1258        if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1259            op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1260            tcg_out_insn_RIL(s, op, r1, c2);
1261            goto exit;
1262        }
1263
1264        /* Load everything else into a register. */
1265        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, c2);
1266        c2 = TCG_TMP0;
1267    }
1268
1269    if (type == TCG_TYPE_I32) {
1270        op = (is_unsigned ? RR_CLR : RR_CR);
1271        tcg_out_insn_RR(s, op, r1, c2);
1272    } else {
1273        op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1274        tcg_out_insn_RRE(s, op, r1, c2);
1275    }
1276
1277 exit:
1278    *inv_cc = tcg_cond_to_s390_cond[inv_c];
1279    return tcg_cond_to_s390_cond[c];
1280}
1281
1282static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1283                    TCGArg c2, bool c2const, bool need_carry)
1284{
1285    int inv_cc;
1286    return tgen_cmp2(s, type, c, r1, c2, c2const, need_carry, &inv_cc);
1287}
1288
1289static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1290                         TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1291{
1292    int cc;
1293
1294    /* With LOC2, we can always emit the minimum 3 insns.  */
1295    if (HAVE_FACILITY(LOAD_ON_COND2)) {
1296        /* Emit: d = 0, d = (cc ? 1 : d).  */
1297        cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1298        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1299        tcg_out_insn(s, RIEg, LOCGHI, dest, 1, cc);
1300        return;
1301    }
1302
1303 restart:
1304    switch (cond) {
1305    case TCG_COND_NE:
1306        /* X != 0 is X > 0.  */
1307        if (c2const && c2 == 0) {
1308            cond = TCG_COND_GTU;
1309        } else {
1310            break;
1311        }
1312        /* fallthru */
1313
1314    case TCG_COND_GTU:
1315    case TCG_COND_GT:
1316        /* The result of a compare has CC=2 for GT and CC=3 unused.
1317           ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit.  */
1318        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1319        tcg_out_movi(s, type, dest, 0);
1320        tcg_out_insn(s, RRE, ALCGR, dest, dest);
1321        return;
1322
1323    case TCG_COND_EQ:
1324        /* X == 0 is X <= 0.  */
1325        if (c2const && c2 == 0) {
1326            cond = TCG_COND_LEU;
1327        } else {
1328            break;
1329        }
1330        /* fallthru */
1331
1332    case TCG_COND_LEU:
1333    case TCG_COND_LE:
1334        /* As above, but we're looking for borrow, or !carry.
1335           The second insn computes d - d - borrow, or -1 for true
1336           and 0 for false.  So we must mask to 1 bit afterward.  */
1337        tgen_cmp(s, type, cond, c1, c2, c2const, true);
1338        tcg_out_insn(s, RRE, SLBGR, dest, dest);
1339        tgen_andi(s, type, dest, 1);
1340        return;
1341
1342    case TCG_COND_GEU:
1343    case TCG_COND_LTU:
1344    case TCG_COND_LT:
1345    case TCG_COND_GE:
1346        /* Swap operands so that we can use LEU/GTU/GT/LE.  */
1347        if (!c2const) {
1348            TCGReg t = c1;
1349            c1 = c2;
1350            c2 = t;
1351            cond = tcg_swap_cond(cond);
1352            goto restart;
1353        }
1354        break;
1355
1356    default:
1357        g_assert_not_reached();
1358    }
1359
1360    cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1361    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
1362    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1363    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1364    tcg_out_insn(s, RRFc, LOCGR, dest, TCG_TMP0, cc);
1365}
1366
1367static void tgen_movcond_int(TCGContext *s, TCGType type, TCGReg dest,
1368                             TCGArg v3, int v3const, TCGReg v4,
1369                             int cc, int inv_cc)
1370{
1371    TCGReg src;
1372
1373    if (v3const) {
1374        if (dest == v4) {
1375            if (HAVE_FACILITY(LOAD_ON_COND2)) {
1376                /* Emit: if (cc) dest = v3. */
1377                tcg_out_insn(s, RIEg, LOCGHI, dest, v3, cc);
1378                return;
1379            }
1380            tcg_out_insn(s, RI, LGHI, TCG_TMP0, v3);
1381            src = TCG_TMP0;
1382        } else {
1383            /* LGR+LOCGHI is larger than LGHI+LOCGR. */
1384            tcg_out_insn(s, RI, LGHI, dest, v3);
1385            cc = inv_cc;
1386            src = v4;
1387        }
1388    } else {
1389        if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1390            /* Emit: dest = cc ? v3 : v4. */
1391            tcg_out_insn(s, RRFam, SELGR, dest, v3, v4, cc);
1392            return;
1393        }
1394        if (dest == v4) {
1395            src = v3;
1396        } else {
1397            tcg_out_mov(s, type, dest, v3);
1398            cc = inv_cc;
1399            src = v4;
1400        }
1401    }
1402
1403    /* Emit: if (cc) dest = src. */
1404    tcg_out_insn(s, RRFc, LOCGR, dest, src, cc);
1405}
1406
1407static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1408                         TCGReg c1, TCGArg c2, int c2const,
1409                         TCGArg v3, int v3const, TCGReg v4)
1410{
1411    int cc, inv_cc;
1412
1413    cc = tgen_cmp2(s, type, c, c1, c2, c2const, false, &inv_cc);
1414    tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
1415}
1416
1417static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1418                     TCGArg a2, int a2const)
1419{
1420    /* Since this sets both R and R+1, we have no choice but to store the
1421       result into R0, allowing R1 == TCG_TMP0 to be clobbered as well.  */
1422    QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1423    tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1424
1425    if (a2const && a2 == 64) {
1426        tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1427        return;
1428    }
1429
1430    /*
1431     * Conditions from FLOGR are:
1432     *   2 -> one bit found
1433     *   8 -> no one bit found
1434     */
1435    tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
1436}
1437
1438static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1439{
1440    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
1441    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
1442        if (type == TCG_TYPE_I32) {
1443            tgen_ext32u(s, dest, src);
1444            src = dest;
1445        }
1446        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
1447        return;
1448    }
1449
1450    /* Without MIE3, each byte gets the count of bits for the byte. */
1451    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
1452
1453    /* Multiply to sum each byte at the top of the word. */
1454    if (type == TCG_TYPE_I32) {
1455        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
1456        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
1457    } else {
1458        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
1459        tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
1460        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
1461    }
1462}
1463
1464static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1465                         int ofs, int len, int z)
1466{
1467    int lsb = (63 - ofs);
1468    int msb = lsb - (len - 1);
1469    tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1470}
1471
1472static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1473                         int ofs, int len)
1474{
1475    tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1476}
1477
1478static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1479{
1480    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1481    if (off == (int16_t)off) {
1482        tcg_out_insn(s, RI, BRC, cc, off);
1483    } else if (off == (int32_t)off) {
1484        tcg_out_insn(s, RIL, BRCL, cc, off);
1485    } else {
1486        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1487        tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1488    }
1489}
1490
1491static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1492{
1493    if (l->has_value) {
1494        tgen_gotoi(s, cc, l->u.value_ptr);
1495    } else {
1496        tcg_out16(s, RI_BRC | (cc << 4));
1497        tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1498        s->code_ptr += 1;
1499    }
1500}
1501
1502static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1503                                TCGReg r1, TCGReg r2, TCGLabel *l)
1504{
1505    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1506    /* Format RIE-b */
1507    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1508    tcg_out16(s, 0);
1509    tcg_out16(s, cc << 12 | (opc & 0xff));
1510}
1511
1512static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1513                                    TCGReg r1, int i2, TCGLabel *l)
1514{
1515    tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1516    /* Format RIE-c */
1517    tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1518    tcg_out16(s, 0);
1519    tcg_out16(s, (i2 << 8) | (opc & 0xff));
1520}
1521
1522static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1523                        TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1524{
1525    int cc;
1526    bool is_unsigned = is_unsigned_cond(c);
1527    bool in_range;
1528    S390Opcode opc;
1529
1530    cc = tcg_cond_to_s390_cond[c];
1531
1532    if (!c2const) {
1533        opc = (type == TCG_TYPE_I32
1534               ? (is_unsigned ? RIEb_CLRJ : RIEb_CRJ)
1535               : (is_unsigned ? RIEb_CLGRJ : RIEb_CGRJ));
1536        tgen_compare_branch(s, opc, cc, r1, c2, l);
1537        return;
1538    }
1539
1540    /*
1541     * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1542     * If the immediate we've been given does not fit that range, we'll
1543     * fall back to separate compare and branch instructions using the
1544     * larger comparison range afforded by COMPARE IMMEDIATE.
1545     */
1546    if (type == TCG_TYPE_I32) {
1547        if (is_unsigned) {
1548            opc = RIEc_CLIJ;
1549            in_range = (uint32_t)c2 == (uint8_t)c2;
1550        } else {
1551            opc = RIEc_CIJ;
1552            in_range = (int32_t)c2 == (int8_t)c2;
1553        }
1554    } else {
1555        if (is_unsigned) {
1556            opc = RIEc_CLGIJ;
1557            in_range = (uint64_t)c2 == (uint8_t)c2;
1558        } else {
1559            opc = RIEc_CGIJ;
1560            in_range = (int64_t)c2 == (int8_t)c2;
1561        }
1562    }
1563    if (in_range) {
1564        tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1565        return;
1566    }
1567
1568    cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1569    tgen_branch(s, cc, l);
1570}
1571
1572static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1573{
1574    ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1575    if (off == (int32_t)off) {
1576        tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1577    } else {
1578        tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1579        tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1580    }
1581}
1582
1583static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1584                         const TCGHelperInfo *info)
1585{
1586    tcg_out_call_int(s, dest);
1587}
1588
1589static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1590                                   TCGReg base, TCGReg index, int disp)
1591{
1592    switch (opc & (MO_SSIZE | MO_BSWAP)) {
1593    case MO_UB:
1594        tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1595        break;
1596    case MO_SB:
1597        tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1598        break;
1599
1600    case MO_UW | MO_BSWAP:
1601        /* swapped unsigned halfword load with upper bits zeroed */
1602        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1603        tgen_ext16u(s, TCG_TYPE_I64, data, data);
1604        break;
1605    case MO_UW:
1606        tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1607        break;
1608
1609    case MO_SW | MO_BSWAP:
1610        /* swapped sign-extended halfword load */
1611        tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1612        tgen_ext16s(s, TCG_TYPE_I64, data, data);
1613        break;
1614    case MO_SW:
1615        tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1616        break;
1617
1618    case MO_UL | MO_BSWAP:
1619        /* swapped unsigned int load with upper bits zeroed */
1620        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1621        tgen_ext32u(s, data, data);
1622        break;
1623    case MO_UL:
1624        tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1625        break;
1626
1627    case MO_SL | MO_BSWAP:
1628        /* swapped sign-extended int load */
1629        tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1630        tgen_ext32s(s, data, data);
1631        break;
1632    case MO_SL:
1633        tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1634        break;
1635
1636    case MO_UQ | MO_BSWAP:
1637        tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1638        break;
1639    case MO_UQ:
1640        tcg_out_insn(s, RXY, LG, data, base, index, disp);
1641        break;
1642
1643    default:
1644        tcg_abort();
1645    }
1646}
1647
1648static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1649                                   TCGReg base, TCGReg index, int disp)
1650{
1651    switch (opc & (MO_SIZE | MO_BSWAP)) {
1652    case MO_UB:
1653        if (disp >= 0 && disp < 0x1000) {
1654            tcg_out_insn(s, RX, STC, data, base, index, disp);
1655        } else {
1656            tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1657        }
1658        break;
1659
1660    case MO_UW | MO_BSWAP:
1661        tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1662        break;
1663    case MO_UW:
1664        if (disp >= 0 && disp < 0x1000) {
1665            tcg_out_insn(s, RX, STH, data, base, index, disp);
1666        } else {
1667            tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1668        }
1669        break;
1670
1671    case MO_UL | MO_BSWAP:
1672        tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1673        break;
1674    case MO_UL:
1675        if (disp >= 0 && disp < 0x1000) {
1676            tcg_out_insn(s, RX, ST, data, base, index, disp);
1677        } else {
1678            tcg_out_insn(s, RXY, STY, data, base, index, disp);
1679        }
1680        break;
1681
1682    case MO_UQ | MO_BSWAP:
1683        tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1684        break;
1685    case MO_UQ:
1686        tcg_out_insn(s, RXY, STG, data, base, index, disp);
1687        break;
1688
1689    default:
1690        tcg_abort();
1691    }
1692}
1693
1694#if defined(CONFIG_SOFTMMU)
1695/* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
1696QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1697QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1698
1699/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
1700   addend into R2.  Returns a register with the santitized guest address.  */
1701static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1702                               int mem_index, bool is_ld)
1703{
1704    unsigned s_bits = opc & MO_SIZE;
1705    unsigned a_bits = get_alignment_bits(opc);
1706    unsigned s_mask = (1 << s_bits) - 1;
1707    unsigned a_mask = (1 << a_bits) - 1;
1708    int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1709    int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1710    int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1711    int ofs, a_off;
1712    uint64_t tlb_mask;
1713
1714    tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1715                 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1716    tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1717    tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1718
1719    /* For aligned accesses, we check the first byte and include the alignment
1720       bits within the address.  For unaligned access, we check that we don't
1721       cross pages using the address of the last byte of the access.  */
1722    a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1723    tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1724    if (a_off == 0) {
1725        tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1726    } else {
1727        tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1728        tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1729    }
1730
1731    if (is_ld) {
1732        ofs = offsetof(CPUTLBEntry, addr_read);
1733    } else {
1734        ofs = offsetof(CPUTLBEntry, addr_write);
1735    }
1736    if (TARGET_LONG_BITS == 32) {
1737        tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1738    } else {
1739        tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1740    }
1741
1742    tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1743                 offsetof(CPUTLBEntry, addend));
1744
1745    if (TARGET_LONG_BITS == 32) {
1746        tgen_ext32u(s, TCG_REG_R3, addr_reg);
1747        return TCG_REG_R3;
1748    }
1749    return addr_reg;
1750}
1751
1752static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1753                                TCGReg data, TCGReg addr,
1754                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1755{
1756    TCGLabelQemuLdst *label = new_ldst_label(s);
1757
1758    label->is_ld = is_ld;
1759    label->oi = oi;
1760    label->datalo_reg = data;
1761    label->addrlo_reg = addr;
1762    label->raddr = tcg_splitwx_to_rx(raddr);
1763    label->label_ptr[0] = label_ptr;
1764}
1765
1766static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1767{
1768    TCGReg addr_reg = lb->addrlo_reg;
1769    TCGReg data_reg = lb->datalo_reg;
1770    MemOpIdx oi = lb->oi;
1771    MemOp opc = get_memop(oi);
1772
1773    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1774                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1775        return false;
1776    }
1777
1778    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1779    if (TARGET_LONG_BITS == 64) {
1780        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1781    }
1782    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1783    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1784    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1785    tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1786
1787    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1788    return true;
1789}
1790
1791static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1792{
1793    TCGReg addr_reg = lb->addrlo_reg;
1794    TCGReg data_reg = lb->datalo_reg;
1795    MemOpIdx oi = lb->oi;
1796    MemOp opc = get_memop(oi);
1797
1798    if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1799                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1800        return false;
1801    }
1802
1803    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1804    if (TARGET_LONG_BITS == 64) {
1805        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1806    }
1807    switch (opc & MO_SIZE) {
1808    case MO_UB:
1809        tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1810        break;
1811    case MO_UW:
1812        tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1813        break;
1814    case MO_UL:
1815        tgen_ext32u(s, TCG_REG_R4, data_reg);
1816        break;
1817    case MO_UQ:
1818        tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1819        break;
1820    default:
1821        tcg_abort();
1822    }
1823    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1824    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1825    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1826
1827    tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1828    return true;
1829}
1830#else
1831static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1832                                   TCGReg addrlo, unsigned a_bits)
1833{
1834    unsigned a_mask = (1 << a_bits) - 1;
1835    TCGLabelQemuLdst *l = new_ldst_label(s);
1836
1837    l->is_ld = is_ld;
1838    l->addrlo_reg = addrlo;
1839
1840    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1841    tcg_debug_assert(a_bits < 16);
1842    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1843
1844    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1845    l->label_ptr[0] = s->code_ptr;
1846    s->code_ptr += 1;
1847
1848    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1849}
1850
1851static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1852{
1853    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1854                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1855        return false;
1856    }
1857
1858    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1859    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1860
1861    /* "Tail call" to the helper, with the return address back inline. */
1862    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1863    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1864                                                 : helper_unaligned_st));
1865    return true;
1866}
1867
1868static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1869{
1870    return tcg_out_fail_alignment(s, l);
1871}
1872
1873static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1874{
1875    return tcg_out_fail_alignment(s, l);
1876}
1877
1878static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1879                                  TCGReg *index_reg, tcg_target_long *disp)
1880{
1881    if (TARGET_LONG_BITS == 32) {
1882        tgen_ext32u(s, TCG_TMP0, *addr_reg);
1883        *addr_reg = TCG_TMP0;
1884    }
1885    if (guest_base < 0x80000) {
1886        *index_reg = TCG_REG_NONE;
1887        *disp = guest_base;
1888    } else {
1889        *index_reg = TCG_GUEST_BASE_REG;
1890        *disp = 0;
1891    }
1892}
1893#endif /* CONFIG_SOFTMMU */
1894
1895static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1896                            MemOpIdx oi)
1897{
1898    MemOp opc = get_memop(oi);
1899#ifdef CONFIG_SOFTMMU
1900    unsigned mem_index = get_mmuidx(oi);
1901    tcg_insn_unit *label_ptr;
1902    TCGReg base_reg;
1903
1904    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1905
1906    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1907    label_ptr = s->code_ptr;
1908    s->code_ptr += 1;
1909
1910    tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1911
1912    add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1913#else
1914    TCGReg index_reg;
1915    tcg_target_long disp;
1916    unsigned a_bits = get_alignment_bits(opc);
1917
1918    if (a_bits) {
1919        tcg_out_test_alignment(s, true, addr_reg, a_bits);
1920    }
1921    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1922    tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1923#endif
1924}
1925
1926static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1927                            MemOpIdx oi)
1928{
1929    MemOp opc = get_memop(oi);
1930#ifdef CONFIG_SOFTMMU
1931    unsigned mem_index = get_mmuidx(oi);
1932    tcg_insn_unit *label_ptr;
1933    TCGReg base_reg;
1934
1935    base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1936
1937    tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1938    label_ptr = s->code_ptr;
1939    s->code_ptr += 1;
1940
1941    tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1942
1943    add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1944#else
1945    TCGReg index_reg;
1946    tcg_target_long disp;
1947    unsigned a_bits = get_alignment_bits(opc);
1948
1949    if (a_bits) {
1950        tcg_out_test_alignment(s, false, addr_reg, a_bits);
1951    }
1952    tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1953    tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1954#endif
1955}
1956
1957static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
1958{
1959    /* Reuse the zeroing that exists for goto_ptr.  */
1960    if (a0 == 0) {
1961        tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1962    } else {
1963        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1964        tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1965    }
1966}
1967
1968static void tcg_out_goto_tb(TCGContext *s, int which)
1969{
1970    /*
1971     * Branch displacement must be aligned for atomic patching;
1972     * see if we need to add extra nop before branch
1973     */
1974    if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1975        tcg_out16(s, NOP);
1976    }
1977    tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1978    set_jmp_insn_offset(s, which);
1979    s->code_ptr += 2;
1980    set_jmp_reset_offset(s, which);
1981}
1982
1983void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
1984                              uintptr_t jmp_rx, uintptr_t jmp_rw)
1985{
1986    if (!HAVE_FACILITY(GEN_INST_EXT)) {
1987        return;
1988    }
1989    /* patch the branch destination */
1990    uintptr_t addr = tb->jmp_target_addr[n];
1991    intptr_t disp = addr - (jmp_rx - 2);
1992    qatomic_set((int32_t *)jmp_rw, disp / 2);
1993    /* no need to flush icache explicitly */
1994}
1995
1996# define OP_32_64(x) \
1997        case glue(glue(INDEX_op_,x),_i32): \
1998        case glue(glue(INDEX_op_,x),_i64)
1999
2000static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
2001                              const TCGArg args[TCG_MAX_OP_ARGS],
2002                              const int const_args[TCG_MAX_OP_ARGS])
2003{
2004    S390Opcode op, op2;
2005    TCGArg a0, a1, a2;
2006
2007    switch (opc) {
2008    case INDEX_op_goto_ptr:
2009        a0 = args[0];
2010        tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
2011        break;
2012
2013    OP_32_64(ld8u):
2014        /* ??? LLC (RXY format) is only present with the extended-immediate
2015           facility, whereas LLGC is always present.  */
2016        tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
2017        break;
2018
2019    OP_32_64(ld8s):
2020        /* ??? LB is no smaller than LGB, so no point to using it.  */
2021        tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
2022        break;
2023
2024    OP_32_64(ld16u):
2025        /* ??? LLH (RXY format) is only present with the extended-immediate
2026           facility, whereas LLGH is always present.  */
2027        tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
2028        break;
2029
2030    case INDEX_op_ld16s_i32:
2031        tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
2032        break;
2033
2034    case INDEX_op_ld_i32:
2035        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2036        break;
2037
2038    OP_32_64(st8):
2039        tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
2040                    TCG_REG_NONE, args[2]);
2041        break;
2042
2043    OP_32_64(st16):
2044        tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
2045                    TCG_REG_NONE, args[2]);
2046        break;
2047
2048    case INDEX_op_st_i32:
2049        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2050        break;
2051
2052    case INDEX_op_add_i32:
2053        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2054        if (const_args[2]) {
2055        do_addi_32:
2056            if (a0 == a1) {
2057                if (a2 == (int16_t)a2) {
2058                    tcg_out_insn(s, RI, AHI, a0, a2);
2059                    break;
2060                }
2061                tcg_out_insn(s, RIL, AFI, a0, a2);
2062                break;
2063            }
2064            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2065        } else if (a0 == a1) {
2066            tcg_out_insn(s, RR, AR, a0, a2);
2067        } else {
2068            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2069        }
2070        break;
2071    case INDEX_op_sub_i32:
2072        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2073        if (const_args[2]) {
2074            a2 = -a2;
2075            goto do_addi_32;
2076        } else if (a0 == a1) {
2077            tcg_out_insn(s, RR, SR, a0, a2);
2078        } else {
2079            tcg_out_insn(s, RRFa, SRK, a0, a1, a2);
2080        }
2081        break;
2082
2083    case INDEX_op_and_i32:
2084        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2085        if (const_args[2]) {
2086            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2087            tgen_andi(s, TCG_TYPE_I32, a0, a2);
2088        } else if (a0 == a1) {
2089            tcg_out_insn(s, RR, NR, a0, a2);
2090        } else {
2091            tcg_out_insn(s, RRFa, NRK, a0, a1, a2);
2092        }
2093        break;
2094    case INDEX_op_or_i32:
2095        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2096        if (const_args[2]) {
2097            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2098            tgen_ori(s, a0, a2);
2099        } else if (a0 == a1) {
2100            tcg_out_insn(s, RR, OR, a0, a2);
2101        } else {
2102            tcg_out_insn(s, RRFa, ORK, a0, a1, a2);
2103        }
2104        break;
2105    case INDEX_op_xor_i32:
2106        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2107        if (const_args[2]) {
2108            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2109            tcg_out_insn(s, RIL, XILF, a0, a2);
2110        } else if (a0 == a1) {
2111            tcg_out_insn(s, RR, XR, args[0], args[2]);
2112        } else {
2113            tcg_out_insn(s, RRFa, XRK, a0, a1, a2);
2114        }
2115        break;
2116
2117    case INDEX_op_andc_i32:
2118        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2119        if (const_args[2]) {
2120            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2121            tgen_andi(s, TCG_TYPE_I32, a0, (uint32_t)~a2);
2122	} else {
2123            tcg_out_insn(s, RRFa, NCRK, a0, a1, a2);
2124	}
2125        break;
2126    case INDEX_op_orc_i32:
2127        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2128        if (const_args[2]) {
2129            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2130            tgen_ori(s, a0, (uint32_t)~a2);
2131        } else {
2132            tcg_out_insn(s, RRFa, OCRK, a0, a1, a2);
2133        }
2134        break;
2135    case INDEX_op_eqv_i32:
2136        a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
2137        if (const_args[2]) {
2138            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2139            tcg_out_insn(s, RIL, XILF, a0, ~a2);
2140        } else {
2141            tcg_out_insn(s, RRFa, NXRK, a0, a1, a2);
2142        }
2143        break;
2144    case INDEX_op_nand_i32:
2145        tcg_out_insn(s, RRFa, NNRK, args[0], args[1], args[2]);
2146        break;
2147    case INDEX_op_nor_i32:
2148        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[2]);
2149        break;
2150
2151    case INDEX_op_neg_i32:
2152        tcg_out_insn(s, RR, LCR, args[0], args[1]);
2153        break;
2154    case INDEX_op_not_i32:
2155        tcg_out_insn(s, RRFa, NORK, args[0], args[1], args[1]);
2156        break;
2157
2158    case INDEX_op_mul_i32:
2159        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2160        if (const_args[2]) {
2161            tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
2162            if (a2 == (int16_t)a2) {
2163                tcg_out_insn(s, RI, MHI, a0, a2);
2164            } else {
2165                tcg_out_insn(s, RIL, MSFI, a0, a2);
2166            }
2167        } else if (a0 == a1) {
2168            tcg_out_insn(s, RRE, MSR, a0, a2);
2169        } else {
2170            tcg_out_insn(s, RRFa, MSRKC, a0, a1, a2);
2171        }
2172        break;
2173
2174    case INDEX_op_div2_i32:
2175        tcg_debug_assert(args[0] == args[2]);
2176        tcg_debug_assert(args[1] == args[3]);
2177        tcg_debug_assert((args[1] & 1) == 0);
2178        tcg_debug_assert(args[0] == args[1] + 1);
2179        tcg_out_insn(s, RR, DR, args[1], args[4]);
2180        break;
2181    case INDEX_op_divu2_i32:
2182        tcg_debug_assert(args[0] == args[2]);
2183        tcg_debug_assert(args[1] == args[3]);
2184        tcg_debug_assert((args[1] & 1) == 0);
2185        tcg_debug_assert(args[0] == args[1] + 1);
2186        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2187        break;
2188
2189    case INDEX_op_shl_i32:
2190        op = RS_SLL;
2191        op2 = RSY_SLLK;
2192    do_shift32:
2193        a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2194        if (a0 == a1) {
2195            if (const_args[2]) {
2196                tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2197            } else {
2198                tcg_out_sh32(s, op, a0, a2, 0);
2199            }
2200        } else {
2201            /* Using tcg_out_sh64 here for the format; it is a 32-bit shift.  */
2202            if (const_args[2]) {
2203                tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2204            } else {
2205                tcg_out_sh64(s, op2, a0, a1, a2, 0);
2206            }
2207        }
2208        break;
2209    case INDEX_op_shr_i32:
2210        op = RS_SRL;
2211        op2 = RSY_SRLK;
2212        goto do_shift32;
2213    case INDEX_op_sar_i32:
2214        op = RS_SRA;
2215        op2 = RSY_SRAK;
2216        goto do_shift32;
2217
2218    case INDEX_op_rotl_i32:
2219        /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol.  */
2220        if (const_args[2]) {
2221            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2222        } else {
2223            tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2224        }
2225        break;
2226    case INDEX_op_rotr_i32:
2227        if (const_args[2]) {
2228            tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2229                         TCG_REG_NONE, (32 - args[2]) & 31);
2230        } else {
2231            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2232            tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2233        }
2234        break;
2235
2236    case INDEX_op_ext8s_i32:
2237        tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2238        break;
2239    case INDEX_op_ext16s_i32:
2240        tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2241        break;
2242    case INDEX_op_ext8u_i32:
2243        tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2244        break;
2245    case INDEX_op_ext16u_i32:
2246        tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2247        break;
2248
2249    case INDEX_op_bswap16_i32:
2250        a0 = args[0], a1 = args[1], a2 = args[2];
2251        tcg_out_insn(s, RRE, LRVR, a0, a1);
2252        if (a2 & TCG_BSWAP_OS) {
2253            tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2254        } else {
2255            tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2256        }
2257        break;
2258    case INDEX_op_bswap16_i64:
2259        a0 = args[0], a1 = args[1], a2 = args[2];
2260        tcg_out_insn(s, RRE, LRVGR, a0, a1);
2261        if (a2 & TCG_BSWAP_OS) {
2262            tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2263        } else {
2264            tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2265        }
2266        break;
2267
2268    case INDEX_op_bswap32_i32:
2269        tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2270        break;
2271    case INDEX_op_bswap32_i64:
2272        a0 = args[0], a1 = args[1], a2 = args[2];
2273        tcg_out_insn(s, RRE, LRVR, a0, a1);
2274        if (a2 & TCG_BSWAP_OS) {
2275            tgen_ext32s(s, a0, a0);
2276        } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2277            tgen_ext32u(s, a0, a0);
2278        }
2279        break;
2280
2281    case INDEX_op_add2_i32:
2282        if (const_args[4]) {
2283            tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2284        } else {
2285            tcg_out_insn(s, RR, ALR, args[0], args[4]);
2286        }
2287        tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2288        break;
2289    case INDEX_op_sub2_i32:
2290        if (const_args[4]) {
2291            tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2292        } else {
2293            tcg_out_insn(s, RR, SLR, args[0], args[4]);
2294        }
2295        tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2296        break;
2297
2298    case INDEX_op_br:
2299        tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2300        break;
2301
2302    case INDEX_op_brcond_i32:
2303        tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2304                    args[1], const_args[1], arg_label(args[3]));
2305        break;
2306    case INDEX_op_setcond_i32:
2307        tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2308                     args[2], const_args[2]);
2309        break;
2310    case INDEX_op_movcond_i32:
2311        tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2312                     args[2], const_args[2], args[3], const_args[3], args[4]);
2313        break;
2314
2315    case INDEX_op_qemu_ld_i32:
2316        /* ??? Technically we can use a non-extending instruction.  */
2317    case INDEX_op_qemu_ld_i64:
2318        tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2319        break;
2320    case INDEX_op_qemu_st_i32:
2321    case INDEX_op_qemu_st_i64:
2322        tcg_out_qemu_st(s, args[0], args[1], args[2]);
2323        break;
2324
2325    case INDEX_op_ld16s_i64:
2326        tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2327        break;
2328    case INDEX_op_ld32u_i64:
2329        tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2330        break;
2331    case INDEX_op_ld32s_i64:
2332        tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2333        break;
2334    case INDEX_op_ld_i64:
2335        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2336        break;
2337
2338    case INDEX_op_st32_i64:
2339        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2340        break;
2341    case INDEX_op_st_i64:
2342        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2343        break;
2344
2345    case INDEX_op_add_i64:
2346        a0 = args[0], a1 = args[1], a2 = args[2];
2347        if (const_args[2]) {
2348        do_addi_64:
2349            if (a0 == a1) {
2350                if (a2 == (int16_t)a2) {
2351                    tcg_out_insn(s, RI, AGHI, a0, a2);
2352                    break;
2353                }
2354                if (a2 == (int32_t)a2) {
2355                    tcg_out_insn(s, RIL, AGFI, a0, a2);
2356                    break;
2357                }
2358                if (a2 == (uint32_t)a2) {
2359                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
2360                    break;
2361                }
2362                if (-a2 == (uint32_t)-a2) {
2363                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2364                    break;
2365                }
2366            }
2367            tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2368        } else if (a0 == a1) {
2369            tcg_out_insn(s, RRE, AGR, a0, a2);
2370        } else {
2371            tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2372        }
2373        break;
2374    case INDEX_op_sub_i64:
2375        a0 = args[0], a1 = args[1], a2 = args[2];
2376        if (const_args[2]) {
2377            a2 = -a2;
2378            goto do_addi_64;
2379        } else {
2380            tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
2381        }
2382        break;
2383
2384    case INDEX_op_and_i64:
2385        a0 = args[0], a1 = args[1], a2 = args[2];
2386        if (const_args[2]) {
2387            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2388            tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2389        } else {
2390            tcg_out_insn(s, RRFa, NGRK, a0, a1, a2);
2391        }
2392        break;
2393    case INDEX_op_or_i64:
2394        a0 = args[0], a1 = args[1], a2 = args[2];
2395        if (const_args[2]) {
2396            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2397            tgen_ori(s, a0, a2);
2398        } else {
2399            tcg_out_insn(s, RRFa, OGRK, a0, a1, a2);
2400        }
2401        break;
2402    case INDEX_op_xor_i64:
2403        a0 = args[0], a1 = args[1], a2 = args[2];
2404        if (const_args[2]) {
2405            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2406            tgen_xori(s, a0, a2);
2407        } else {
2408            tcg_out_insn(s, RRFa, XGRK, a0, a1, a2);
2409        }
2410        break;
2411
2412    case INDEX_op_andc_i64:
2413        a0 = args[0], a1 = args[1], a2 = args[2];
2414        if (const_args[2]) {
2415            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2416            tgen_andi(s, TCG_TYPE_I64, a0, ~a2);
2417        } else {
2418            tcg_out_insn(s, RRFa, NCGRK, a0, a1, a2);
2419        }
2420        break;
2421    case INDEX_op_orc_i64:
2422        a0 = args[0], a1 = args[1], a2 = args[2];
2423        if (const_args[2]) {
2424            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2425            tgen_ori(s, a0, ~a2);
2426        } else {
2427            tcg_out_insn(s, RRFa, OCGRK, a0, a1, a2);
2428        }
2429        break;
2430    case INDEX_op_eqv_i64:
2431        a0 = args[0], a1 = args[1], a2 = args[2];
2432        if (const_args[2]) {
2433            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2434            tgen_xori(s, a0, ~a2);
2435        } else {
2436            tcg_out_insn(s, RRFa, NXGRK, a0, a1, a2);
2437        }
2438        break;
2439    case INDEX_op_nand_i64:
2440        tcg_out_insn(s, RRFa, NNGRK, args[0], args[1], args[2]);
2441        break;
2442    case INDEX_op_nor_i64:
2443        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[2]);
2444        break;
2445
2446    case INDEX_op_neg_i64:
2447        tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2448        break;
2449    case INDEX_op_not_i64:
2450        tcg_out_insn(s, RRFa, NOGRK, args[0], args[1], args[1]);
2451        break;
2452    case INDEX_op_bswap64_i64:
2453        tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2454        break;
2455
2456    case INDEX_op_mul_i64:
2457        a0 = args[0], a1 = args[1], a2 = args[2];
2458        if (const_args[2]) {
2459            tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2460            if (a2 == (int16_t)a2) {
2461                tcg_out_insn(s, RI, MGHI, a0, a2);
2462            } else {
2463                tcg_out_insn(s, RIL, MSGFI, a0, a2);
2464            }
2465        } else if (a0 == a1) {
2466            tcg_out_insn(s, RRE, MSGR, a0, a2);
2467        } else {
2468            tcg_out_insn(s, RRFa, MSGRKC, a0, a1, a2);
2469        }
2470        break;
2471
2472    case INDEX_op_div2_i64:
2473        /*
2474         * ??? We get an unnecessary sign-extension of the dividend
2475         * into op0 with this definition, but as we do in fact always
2476         * produce both quotient and remainder using INDEX_op_div_i64
2477         * instead requires jumping through even more hoops.
2478         */
2479        tcg_debug_assert(args[0] == args[2]);
2480        tcg_debug_assert(args[1] == args[3]);
2481        tcg_debug_assert((args[1] & 1) == 0);
2482        tcg_debug_assert(args[0] == args[1] + 1);
2483        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2484        break;
2485    case INDEX_op_divu2_i64:
2486        tcg_debug_assert(args[0] == args[2]);
2487        tcg_debug_assert(args[1] == args[3]);
2488        tcg_debug_assert((args[1] & 1) == 0);
2489        tcg_debug_assert(args[0] == args[1] + 1);
2490        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2491        break;
2492    case INDEX_op_mulu2_i64:
2493        tcg_debug_assert(args[0] == args[2]);
2494        tcg_debug_assert((args[1] & 1) == 0);
2495        tcg_debug_assert(args[0] == args[1] + 1);
2496        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2497        break;
2498    case INDEX_op_muls2_i64:
2499        tcg_debug_assert((args[1] & 1) == 0);
2500        tcg_debug_assert(args[0] == args[1] + 1);
2501        tcg_out_insn(s, RRFa, MGRK, args[1], args[2], args[3]);
2502        break;
2503
2504    case INDEX_op_shl_i64:
2505        op = RSY_SLLG;
2506    do_shift64:
2507        if (const_args[2]) {
2508            tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2509        } else {
2510            tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2511        }
2512        break;
2513    case INDEX_op_shr_i64:
2514        op = RSY_SRLG;
2515        goto do_shift64;
2516    case INDEX_op_sar_i64:
2517        op = RSY_SRAG;
2518        goto do_shift64;
2519
2520    case INDEX_op_rotl_i64:
2521        if (const_args[2]) {
2522            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2523                         TCG_REG_NONE, args[2]);
2524        } else {
2525            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2526        }
2527        break;
2528    case INDEX_op_rotr_i64:
2529        if (const_args[2]) {
2530            tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2531                         TCG_REG_NONE, (64 - args[2]) & 63);
2532        } else {
2533            /* We can use the smaller 32-bit negate because only the
2534               low 6 bits are examined for the rotate.  */
2535            tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2536            tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2537        }
2538        break;
2539
2540    case INDEX_op_ext8s_i64:
2541        tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2542        break;
2543    case INDEX_op_ext16s_i64:
2544        tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2545        break;
2546    case INDEX_op_ext_i32_i64:
2547    case INDEX_op_ext32s_i64:
2548        tgen_ext32s(s, args[0], args[1]);
2549        break;
2550    case INDEX_op_ext8u_i64:
2551        tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2552        break;
2553    case INDEX_op_ext16u_i64:
2554        tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2555        break;
2556    case INDEX_op_extu_i32_i64:
2557    case INDEX_op_ext32u_i64:
2558        tgen_ext32u(s, args[0], args[1]);
2559        break;
2560
2561    case INDEX_op_add2_i64:
2562        if (const_args[4]) {
2563            if ((int64_t)args[4] >= 0) {
2564                tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2565            } else {
2566                tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2567            }
2568        } else {
2569            tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2570        }
2571        tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2572        break;
2573    case INDEX_op_sub2_i64:
2574        if (const_args[4]) {
2575            if ((int64_t)args[4] >= 0) {
2576                tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2577            } else {
2578                tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2579            }
2580        } else {
2581            tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2582        }
2583        tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2584        break;
2585
2586    case INDEX_op_brcond_i64:
2587        tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2588                    args[1], const_args[1], arg_label(args[3]));
2589        break;
2590    case INDEX_op_setcond_i64:
2591        tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2592                     args[2], const_args[2]);
2593        break;
2594    case INDEX_op_movcond_i64:
2595        tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2596                     args[2], const_args[2], args[3], const_args[3], args[4]);
2597        break;
2598
2599    OP_32_64(deposit):
2600        a0 = args[0], a1 = args[1], a2 = args[2];
2601        if (const_args[1]) {
2602            tgen_deposit(s, a0, a2, args[3], args[4], 1);
2603        } else {
2604            /* Since we can't support "0Z" as a constraint, we allow a1 in
2605               any register.  Fix things up as if a matching constraint.  */
2606            if (a0 != a1) {
2607                TCGType type = (opc == INDEX_op_deposit_i64);
2608                if (a0 == a2) {
2609                    tcg_out_mov(s, type, TCG_TMP0, a2);
2610                    a2 = TCG_TMP0;
2611                }
2612                tcg_out_mov(s, type, a0, a1);
2613            }
2614            tgen_deposit(s, a0, a2, args[3], args[4], 0);
2615        }
2616        break;
2617
2618    OP_32_64(extract):
2619        tgen_extract(s, args[0], args[1], args[2], args[3]);
2620        break;
2621
2622    case INDEX_op_clz_i64:
2623        tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2624        break;
2625
2626    case INDEX_op_ctpop_i32:
2627        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
2628        break;
2629    case INDEX_op_ctpop_i64:
2630        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
2631        break;
2632
2633    case INDEX_op_mb:
2634        /* The host memory model is quite strong, we simply need to
2635           serialize the instruction stream.  */
2636        if (args[0] & TCG_MO_ST_LD) {
2637            /* fast-bcr-serialization facility (45) is present */
2638            tcg_out_insn(s, RR, BCR, 14, 0);
2639        }
2640        break;
2641
2642    case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
2643    case INDEX_op_mov_i64:
2644    case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
2645    case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
2646    case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
2647    default:
2648        tcg_abort();
2649    }
2650}
2651
2652static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2653                            TCGReg dst, TCGReg src)
2654{
2655    if (is_general_reg(src)) {
2656        /* Replicate general register into two MO_64. */
2657        tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2658        if (vece == MO_64) {
2659            return true;
2660        }
2661        src = dst;
2662    }
2663
2664    /*
2665     * Recall that the "standard" integer, within a vector, is the
2666     * rightmost element of the leftmost doubleword, a-la VLLEZ.
2667     */
2668    tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2669    return true;
2670}
2671
2672static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2673                             TCGReg dst, TCGReg base, intptr_t offset)
2674{
2675    tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2676    return true;
2677}
2678
2679static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2680                             TCGReg dst, int64_t val)
2681{
2682    int i, mask, msb, lsb;
2683
2684    /* Look for int16_t elements.  */
2685    if (vece <= MO_16 ||
2686        (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2687        tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2688        return;
2689    }
2690
2691    /* Look for bit masks.  */
2692    if (vece == MO_32) {
2693        if (risbg_mask((int32_t)val)) {
2694            /* Handle wraparound by swapping msb and lsb.  */
2695            if ((val & 0x80000001u) == 0x80000001u) {
2696                msb = 32 - ctz32(~val);
2697                lsb = clz32(~val) - 1;
2698            } else {
2699                msb = clz32(val);
2700                lsb = 31 - ctz32(val);
2701            }
2702            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2703            return;
2704        }
2705    } else {
2706        if (risbg_mask(val)) {
2707            /* Handle wraparound by swapping msb and lsb.  */
2708            if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2709                /* Handle wraparound by swapping msb and lsb.  */
2710                msb = 64 - ctz64(~val);
2711                lsb = clz64(~val) - 1;
2712            } else {
2713                msb = clz64(val);
2714                lsb = 63 - ctz64(val);
2715            }
2716            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2717            return;
2718        }
2719    }
2720
2721    /* Look for all bytes 0x00 or 0xff.  */
2722    for (i = mask = 0; i < 8; i++) {
2723        uint8_t byte = val >> (i * 8);
2724        if (byte == 0xff) {
2725            mask |= 1 << i;
2726        } else if (byte != 0) {
2727            break;
2728        }
2729    }
2730    if (i == 8) {
2731        tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2732        return;
2733    }
2734
2735    /* Otherwise, stuff it in the constant pool.  */
2736    tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2737    new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2738    tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2739}
2740
2741static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2742                           unsigned vecl, unsigned vece,
2743                           const TCGArg args[TCG_MAX_OP_ARGS],
2744                           const int const_args[TCG_MAX_OP_ARGS])
2745{
2746    TCGType type = vecl + TCG_TYPE_V64;
2747    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2748
2749    switch (opc) {
2750    case INDEX_op_ld_vec:
2751        tcg_out_ld(s, type, a0, a1, a2);
2752        break;
2753    case INDEX_op_st_vec:
2754        tcg_out_st(s, type, a0, a1, a2);
2755        break;
2756    case INDEX_op_dupm_vec:
2757        tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2758        break;
2759
2760    case INDEX_op_abs_vec:
2761        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2762        break;
2763    case INDEX_op_neg_vec:
2764        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2765        break;
2766    case INDEX_op_not_vec:
2767        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2768        break;
2769
2770    case INDEX_op_add_vec:
2771        tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2772        break;
2773    case INDEX_op_sub_vec:
2774        tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2775        break;
2776    case INDEX_op_and_vec:
2777        tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2778        break;
2779    case INDEX_op_andc_vec:
2780        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2781        break;
2782    case INDEX_op_mul_vec:
2783        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2784        break;
2785    case INDEX_op_or_vec:
2786        tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2787        break;
2788    case INDEX_op_orc_vec:
2789        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2790        break;
2791    case INDEX_op_xor_vec:
2792        tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2793        break;
2794    case INDEX_op_nand_vec:
2795        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2796        break;
2797    case INDEX_op_nor_vec:
2798        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2799        break;
2800    case INDEX_op_eqv_vec:
2801        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2802        break;
2803
2804    case INDEX_op_shli_vec:
2805        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2806        break;
2807    case INDEX_op_shri_vec:
2808        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2809        break;
2810    case INDEX_op_sari_vec:
2811        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2812        break;
2813    case INDEX_op_rotli_vec:
2814        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2815        break;
2816    case INDEX_op_shls_vec:
2817        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2818        break;
2819    case INDEX_op_shrs_vec:
2820        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2821        break;
2822    case INDEX_op_sars_vec:
2823        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2824        break;
2825    case INDEX_op_rotls_vec:
2826        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2827        break;
2828    case INDEX_op_shlv_vec:
2829        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2830        break;
2831    case INDEX_op_shrv_vec:
2832        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2833        break;
2834    case INDEX_op_sarv_vec:
2835        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2836        break;
2837    case INDEX_op_rotlv_vec:
2838        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2839        break;
2840
2841    case INDEX_op_smin_vec:
2842        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2843        break;
2844    case INDEX_op_smax_vec:
2845        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2846        break;
2847    case INDEX_op_umin_vec:
2848        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2849        break;
2850    case INDEX_op_umax_vec:
2851        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2852        break;
2853
2854    case INDEX_op_bitsel_vec:
2855        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2856        break;
2857
2858    case INDEX_op_cmp_vec:
2859        switch ((TCGCond)args[3]) {
2860        case TCG_COND_EQ:
2861            tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2862            break;
2863        case TCG_COND_GT:
2864            tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2865            break;
2866        case TCG_COND_GTU:
2867            tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2868            break;
2869        default:
2870            g_assert_not_reached();
2871        }
2872        break;
2873
2874    case INDEX_op_s390_vuph_vec:
2875        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2876        break;
2877    case INDEX_op_s390_vupl_vec:
2878        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2879        break;
2880    case INDEX_op_s390_vpks_vec:
2881        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2882        break;
2883
2884    case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
2885    case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
2886    default:
2887        g_assert_not_reached();
2888    }
2889}
2890
2891int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2892{
2893    switch (opc) {
2894    case INDEX_op_abs_vec:
2895    case INDEX_op_add_vec:
2896    case INDEX_op_and_vec:
2897    case INDEX_op_andc_vec:
2898    case INDEX_op_bitsel_vec:
2899    case INDEX_op_eqv_vec:
2900    case INDEX_op_nand_vec:
2901    case INDEX_op_neg_vec:
2902    case INDEX_op_nor_vec:
2903    case INDEX_op_not_vec:
2904    case INDEX_op_or_vec:
2905    case INDEX_op_orc_vec:
2906    case INDEX_op_rotli_vec:
2907    case INDEX_op_rotls_vec:
2908    case INDEX_op_rotlv_vec:
2909    case INDEX_op_sari_vec:
2910    case INDEX_op_sars_vec:
2911    case INDEX_op_sarv_vec:
2912    case INDEX_op_shli_vec:
2913    case INDEX_op_shls_vec:
2914    case INDEX_op_shlv_vec:
2915    case INDEX_op_shri_vec:
2916    case INDEX_op_shrs_vec:
2917    case INDEX_op_shrv_vec:
2918    case INDEX_op_smax_vec:
2919    case INDEX_op_smin_vec:
2920    case INDEX_op_sub_vec:
2921    case INDEX_op_umax_vec:
2922    case INDEX_op_umin_vec:
2923    case INDEX_op_xor_vec:
2924        return 1;
2925    case INDEX_op_cmp_vec:
2926    case INDEX_op_cmpsel_vec:
2927    case INDEX_op_rotrv_vec:
2928        return -1;
2929    case INDEX_op_mul_vec:
2930        return vece < MO_64;
2931    case INDEX_op_ssadd_vec:
2932    case INDEX_op_sssub_vec:
2933        return vece < MO_64 ? -1 : 0;
2934    default:
2935        return 0;
2936    }
2937}
2938
2939static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2940                                 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2941{
2942    bool need_swap = false, need_inv = false;
2943
2944    switch (cond) {
2945    case TCG_COND_EQ:
2946    case TCG_COND_GT:
2947    case TCG_COND_GTU:
2948        break;
2949    case TCG_COND_NE:
2950    case TCG_COND_LE:
2951    case TCG_COND_LEU:
2952        need_inv = true;
2953        break;
2954    case TCG_COND_LT:
2955    case TCG_COND_LTU:
2956        need_swap = true;
2957        break;
2958    case TCG_COND_GE:
2959    case TCG_COND_GEU:
2960        need_swap = need_inv = true;
2961        break;
2962    default:
2963        g_assert_not_reached();
2964    }
2965
2966    if (need_inv) {
2967        cond = tcg_invert_cond(cond);
2968    }
2969    if (need_swap) {
2970        TCGv_vec t1;
2971        t1 = v1, v1 = v2, v2 = t1;
2972        cond = tcg_swap_cond(cond);
2973    }
2974
2975    vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2976              tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2977
2978    return need_inv;
2979}
2980
2981static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2982                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2983{
2984    if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2985        tcg_gen_not_vec(vece, v0, v0);
2986    }
2987}
2988
2989static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2990                              TCGv_vec c1, TCGv_vec c2,
2991                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2992{
2993    TCGv_vec t = tcg_temp_new_vec(type);
2994
2995    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2996        /* Invert the sense of the compare by swapping arguments.  */
2997        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2998    } else {
2999        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
3000    }
3001    tcg_temp_free_vec(t);
3002}
3003
3004static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
3005                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
3006{
3007    TCGv_vec h1 = tcg_temp_new_vec(type);
3008    TCGv_vec h2 = tcg_temp_new_vec(type);
3009    TCGv_vec l1 = tcg_temp_new_vec(type);
3010    TCGv_vec l2 = tcg_temp_new_vec(type);
3011
3012    tcg_debug_assert (vece < MO_64);
3013
3014    /* Unpack with sign-extension. */
3015    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3016              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
3017    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
3018              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
3019
3020    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3021              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
3022    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
3023              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
3024
3025    /* Arithmetic on a wider element size. */
3026    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
3027              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
3028    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
3029              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
3030
3031    /* Pack with saturation. */
3032    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
3033              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
3034
3035    tcg_temp_free_vec(h1);
3036    tcg_temp_free_vec(h2);
3037    tcg_temp_free_vec(l1);
3038    tcg_temp_free_vec(l2);
3039}
3040
3041void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
3042                       TCGArg a0, ...)
3043{
3044    va_list va;
3045    TCGv_vec v0, v1, v2, v3, v4, t0;
3046
3047    va_start(va, a0);
3048    v0 = temp_tcgv_vec(arg_temp(a0));
3049    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3050    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3051
3052    switch (opc) {
3053    case INDEX_op_cmp_vec:
3054        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
3055        break;
3056
3057    case INDEX_op_cmpsel_vec:
3058        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3059        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
3060        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
3061        break;
3062
3063    case INDEX_op_rotrv_vec:
3064        t0 = tcg_temp_new_vec(type);
3065        tcg_gen_neg_vec(vece, t0, v2);
3066        tcg_gen_rotlv_vec(vece, v0, v1, t0);
3067        tcg_temp_free_vec(t0);
3068        break;
3069
3070    case INDEX_op_ssadd_vec:
3071        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
3072        break;
3073    case INDEX_op_sssub_vec:
3074        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
3075        break;
3076
3077    default:
3078        g_assert_not_reached();
3079    }
3080    va_end(va);
3081}
3082
3083static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
3084{
3085    switch (op) {
3086    case INDEX_op_goto_ptr:
3087        return C_O0_I1(r);
3088
3089    case INDEX_op_ld8u_i32:
3090    case INDEX_op_ld8u_i64:
3091    case INDEX_op_ld8s_i32:
3092    case INDEX_op_ld8s_i64:
3093    case INDEX_op_ld16u_i32:
3094    case INDEX_op_ld16u_i64:
3095    case INDEX_op_ld16s_i32:
3096    case INDEX_op_ld16s_i64:
3097    case INDEX_op_ld_i32:
3098    case INDEX_op_ld32u_i64:
3099    case INDEX_op_ld32s_i64:
3100    case INDEX_op_ld_i64:
3101        return C_O1_I1(r, r);
3102
3103    case INDEX_op_st8_i32:
3104    case INDEX_op_st8_i64:
3105    case INDEX_op_st16_i32:
3106    case INDEX_op_st16_i64:
3107    case INDEX_op_st_i32:
3108    case INDEX_op_st32_i64:
3109    case INDEX_op_st_i64:
3110        return C_O0_I2(r, r);
3111
3112    case INDEX_op_add_i32:
3113    case INDEX_op_add_i64:
3114    case INDEX_op_shl_i64:
3115    case INDEX_op_shr_i64:
3116    case INDEX_op_sar_i64:
3117    case INDEX_op_rotl_i32:
3118    case INDEX_op_rotl_i64:
3119    case INDEX_op_rotr_i32:
3120    case INDEX_op_rotr_i64:
3121    case INDEX_op_setcond_i32:
3122        return C_O1_I2(r, r, ri);
3123    case INDEX_op_setcond_i64:
3124        return C_O1_I2(r, r, rA);
3125
3126    case INDEX_op_clz_i64:
3127        return C_O1_I2(r, r, rI);
3128
3129    case INDEX_op_sub_i32:
3130    case INDEX_op_sub_i64:
3131    case INDEX_op_and_i32:
3132    case INDEX_op_or_i32:
3133    case INDEX_op_xor_i32:
3134        return C_O1_I2(r, r, ri);
3135    case INDEX_op_and_i64:
3136        return C_O1_I2(r, r, rNKR);
3137    case INDEX_op_or_i64:
3138    case INDEX_op_xor_i64:
3139        return C_O1_I2(r, r, rK);
3140
3141    case INDEX_op_andc_i32:
3142    case INDEX_op_orc_i32:
3143    case INDEX_op_eqv_i32:
3144        return C_O1_I2(r, r, ri);
3145    case INDEX_op_andc_i64:
3146        return C_O1_I2(r, r, rKR);
3147    case INDEX_op_orc_i64:
3148    case INDEX_op_eqv_i64:
3149        return C_O1_I2(r, r, rNK);
3150
3151    case INDEX_op_nand_i32:
3152    case INDEX_op_nand_i64:
3153    case INDEX_op_nor_i32:
3154    case INDEX_op_nor_i64:
3155        return C_O1_I2(r, r, r);
3156
3157    case INDEX_op_mul_i32:
3158        return (HAVE_FACILITY(MISC_INSN_EXT2)
3159                ? C_O1_I2(r, r, ri)
3160                : C_O1_I2(r, 0, ri));
3161    case INDEX_op_mul_i64:
3162        return (HAVE_FACILITY(MISC_INSN_EXT2)
3163                ? C_O1_I2(r, r, rJ)
3164                : C_O1_I2(r, 0, rJ));
3165
3166    case INDEX_op_shl_i32:
3167    case INDEX_op_shr_i32:
3168    case INDEX_op_sar_i32:
3169        return C_O1_I2(r, r, ri);
3170
3171    case INDEX_op_brcond_i32:
3172        return C_O0_I2(r, ri);
3173    case INDEX_op_brcond_i64:
3174        return C_O0_I2(r, rA);
3175
3176    case INDEX_op_bswap16_i32:
3177    case INDEX_op_bswap16_i64:
3178    case INDEX_op_bswap32_i32:
3179    case INDEX_op_bswap32_i64:
3180    case INDEX_op_bswap64_i64:
3181    case INDEX_op_neg_i32:
3182    case INDEX_op_neg_i64:
3183    case INDEX_op_not_i32:
3184    case INDEX_op_not_i64:
3185    case INDEX_op_ext8s_i32:
3186    case INDEX_op_ext8s_i64:
3187    case INDEX_op_ext8u_i32:
3188    case INDEX_op_ext8u_i64:
3189    case INDEX_op_ext16s_i32:
3190    case INDEX_op_ext16s_i64:
3191    case INDEX_op_ext16u_i32:
3192    case INDEX_op_ext16u_i64:
3193    case INDEX_op_ext32s_i64:
3194    case INDEX_op_ext32u_i64:
3195    case INDEX_op_ext_i32_i64:
3196    case INDEX_op_extu_i32_i64:
3197    case INDEX_op_extract_i32:
3198    case INDEX_op_extract_i64:
3199    case INDEX_op_ctpop_i32:
3200    case INDEX_op_ctpop_i64:
3201        return C_O1_I1(r, r);
3202
3203    case INDEX_op_qemu_ld_i32:
3204    case INDEX_op_qemu_ld_i64:
3205        return C_O1_I1(r, L);
3206    case INDEX_op_qemu_st_i64:
3207    case INDEX_op_qemu_st_i32:
3208        return C_O0_I2(L, L);
3209
3210    case INDEX_op_deposit_i32:
3211    case INDEX_op_deposit_i64:
3212        return C_O1_I2(r, rZ, r);
3213
3214    case INDEX_op_movcond_i32:
3215        return C_O1_I4(r, r, ri, rI, r);
3216    case INDEX_op_movcond_i64:
3217        return C_O1_I4(r, r, rA, rI, r);
3218
3219    case INDEX_op_div2_i32:
3220    case INDEX_op_div2_i64:
3221    case INDEX_op_divu2_i32:
3222    case INDEX_op_divu2_i64:
3223        return C_O2_I3(o, m, 0, 1, r);
3224
3225    case INDEX_op_mulu2_i64:
3226        return C_O2_I2(o, m, 0, r);
3227    case INDEX_op_muls2_i64:
3228        return C_O2_I2(o, m, r, r);
3229
3230    case INDEX_op_add2_i32:
3231    case INDEX_op_sub2_i32:
3232        return C_O2_I4(r, r, 0, 1, ri, r);
3233
3234    case INDEX_op_add2_i64:
3235    case INDEX_op_sub2_i64:
3236        return C_O2_I4(r, r, 0, 1, rA, r);
3237
3238    case INDEX_op_st_vec:
3239        return C_O0_I2(v, r);
3240    case INDEX_op_ld_vec:
3241    case INDEX_op_dupm_vec:
3242        return C_O1_I1(v, r);
3243    case INDEX_op_dup_vec:
3244        return C_O1_I1(v, vr);
3245    case INDEX_op_abs_vec:
3246    case INDEX_op_neg_vec:
3247    case INDEX_op_not_vec:
3248    case INDEX_op_rotli_vec:
3249    case INDEX_op_sari_vec:
3250    case INDEX_op_shli_vec:
3251    case INDEX_op_shri_vec:
3252    case INDEX_op_s390_vuph_vec:
3253    case INDEX_op_s390_vupl_vec:
3254        return C_O1_I1(v, v);
3255    case INDEX_op_add_vec:
3256    case INDEX_op_sub_vec:
3257    case INDEX_op_and_vec:
3258    case INDEX_op_andc_vec:
3259    case INDEX_op_or_vec:
3260    case INDEX_op_orc_vec:
3261    case INDEX_op_xor_vec:
3262    case INDEX_op_nand_vec:
3263    case INDEX_op_nor_vec:
3264    case INDEX_op_eqv_vec:
3265    case INDEX_op_cmp_vec:
3266    case INDEX_op_mul_vec:
3267    case INDEX_op_rotlv_vec:
3268    case INDEX_op_rotrv_vec:
3269    case INDEX_op_shlv_vec:
3270    case INDEX_op_shrv_vec:
3271    case INDEX_op_sarv_vec:
3272    case INDEX_op_smax_vec:
3273    case INDEX_op_smin_vec:
3274    case INDEX_op_umax_vec:
3275    case INDEX_op_umin_vec:
3276    case INDEX_op_s390_vpks_vec:
3277        return C_O1_I2(v, v, v);
3278    case INDEX_op_rotls_vec:
3279    case INDEX_op_shls_vec:
3280    case INDEX_op_shrs_vec:
3281    case INDEX_op_sars_vec:
3282        return C_O1_I2(v, v, r);
3283    case INDEX_op_bitsel_vec:
3284        return C_O1_I3(v, v, v, v);
3285
3286    default:
3287        g_assert_not_reached();
3288    }
3289}
3290
3291/*
3292 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3293 * Some distros have fixed this up locally, others have not.
3294 */
3295#ifndef HWCAP_S390_VXRS
3296#define HWCAP_S390_VXRS 2048
3297#endif
3298
3299static void query_s390_facilities(void)
3300{
3301    unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3302    const char *which;
3303
3304    /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
3305       is present on all 64-bit systems, but let's check for it anyway.  */
3306    if (hwcap & HWCAP_S390_STFLE) {
3307        register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3308        register void *r1 __asm__("1") = s390_facilities;
3309
3310        /* stfle 0(%r1) */
3311        asm volatile(".word 0xb2b0,0x1000"
3312                     : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3313    }
3314
3315    /*
3316     * Use of vector registers requires os support beyond the facility bit.
3317     * If the kernel does not advertise support, disable the facility bits.
3318     * There is nothing else we currently care about in the 3rd word, so
3319     * disable VECTOR with one store.
3320     */
3321    if (!(hwcap & HWCAP_S390_VXRS)) {
3322        s390_facilities[2] = 0;
3323    }
3324
3325    /*
3326     * Minimum supported cpu revision is z196.
3327     * Check for all required facilities.
3328     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3329     */
3330    if (!HAVE_FACILITY(LONG_DISP)) {
3331        which = "long-displacement";
3332        goto fail;
3333    }
3334    if (!HAVE_FACILITY(EXT_IMM)) {
3335        which = "extended-immediate";
3336        goto fail;
3337    }
3338    if (!HAVE_FACILITY(GEN_INST_EXT)) {
3339        which = "general-instructions-extension";
3340        goto fail;
3341    }
3342    /*
3343     * Facility 45 is a big bin that contains: distinct-operands,
3344     * fast-BCR-serialization, high-word, population-count,
3345     * interlocked-access-1, and load/store-on-condition-1
3346     */
3347    if (!HAVE_FACILITY(45)) {
3348        which = "45";
3349        goto fail;
3350    }
3351    return;
3352
3353 fail:
3354    error_report("%s: missing required facility %s", __func__, which);
3355    exit(EXIT_FAILURE);
3356}
3357
3358static void tcg_target_init(TCGContext *s)
3359{
3360    query_s390_facilities();
3361
3362    tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3363    tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3364    if (HAVE_FACILITY(VECTOR)) {
3365        tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3366        tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3367    }
3368
3369    tcg_target_call_clobber_regs = 0;
3370    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3371    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3372    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3373    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3374    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3375    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3376    /* The r6 register is technically call-saved, but it's also a parameter
3377       register, so it can get killed by setup for the qemu_st helper.  */
3378    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3379    /* The return register can be considered call-clobbered.  */
3380    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3381
3382    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3383    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3384    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3385    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3386    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3387    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3388    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3389    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3390    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3391    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3392    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3393    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3394    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3395    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3396    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3397    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3398    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3399    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3400    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3401    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3402    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3403    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3404    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3405    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3406
3407    s->reserved_regs = 0;
3408    tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3409    /* XXX many insns can't be used with R0, so we better avoid it for now */
3410    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3411    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3412}
3413
3414#define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
3415                           + TCG_STATIC_CALL_ARGS_SIZE           \
3416                           + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3417
3418static void tcg_target_qemu_prologue(TCGContext *s)
3419{
3420    /* stmg %r6,%r15,48(%r15) (save registers) */
3421    tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3422
3423    /* aghi %r15,-frame_size */
3424    tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3425
3426    tcg_set_frame(s, TCG_REG_CALL_STACK,
3427                  TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3428                  CPU_TEMP_BUF_NLONGS * sizeof(long));
3429
3430#ifndef CONFIG_SOFTMMU
3431    if (guest_base >= 0x80000) {
3432        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3433        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3434    }
3435#endif
3436
3437    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3438
3439    /* br %r3 (go to TB) */
3440    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3441
3442    /*
3443     * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3444     * and fall through to the rest of the epilogue.
3445     */
3446    tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3447    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3448
3449    /* TB epilogue */
3450    tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3451
3452    /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3453    tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3454                 FRAME_SIZE + 48);
3455
3456    /* br %r14 (return) */
3457    tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3458}
3459
3460static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3461{
3462    memset(p, 0x07, count * sizeof(tcg_insn_unit));
3463}
3464
3465typedef struct {
3466    DebugFrameHeader h;
3467    uint8_t fde_def_cfa[4];
3468    uint8_t fde_reg_ofs[18];
3469} DebugFrame;
3470
3471/* We're expecting a 2 byte uleb128 encoded value.  */
3472QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3473
3474#define ELF_HOST_MACHINE  EM_S390
3475
3476static const DebugFrame debug_frame = {
3477    .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3478    .h.cie.id = -1,
3479    .h.cie.version = 1,
3480    .h.cie.code_align = 1,
3481    .h.cie.data_align = 8,                /* sleb128 8 */
3482    .h.cie.return_column = TCG_REG_R14,
3483
3484    /* Total FDE size does not include the "len" member.  */
3485    .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3486
3487    .fde_def_cfa = {
3488        12, TCG_REG_CALL_STACK,         /* DW_CFA_def_cfa %r15, ... */
3489        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
3490        (FRAME_SIZE >> 7)
3491    },
3492    .fde_reg_ofs = {
3493        0x86, 6,                        /* DW_CFA_offset, %r6, 48 */
3494        0x87, 7,                        /* DW_CFA_offset, %r7, 56 */
3495        0x88, 8,                        /* DW_CFA_offset, %r8, 64 */
3496        0x89, 9,                        /* DW_CFA_offset, %r92, 72 */
3497        0x8a, 10,                       /* DW_CFA_offset, %r10, 80 */
3498        0x8b, 11,                       /* DW_CFA_offset, %r11, 88 */
3499        0x8c, 12,                       /* DW_CFA_offset, %r12, 96 */
3500        0x8d, 13,                       /* DW_CFA_offset, %r13, 104 */
3501        0x8e, 14,                       /* DW_CFA_offset, %r14, 112 */
3502    }
3503};
3504
3505void tcg_register_jit(const void *buf, size_t buf_size)
3506{
3507    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
3508}
3509