xref: /openbmc/qemu/tcg/tci.c (revision 07d5d502f2b4a8eedda3c6bdfcab31dc36d1d1d5)
1 /*
2  * Tiny Code Interpreter for QEMU
3  *
4  * Copyright (c) 2009, 2011, 2016 Stefan Weil
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "tcg/tcg.h"
22 #include "tcg/helper-info.h"
23 #include "tcg/tcg-ldst.h"
24 #include "disas/dis-asm.h"
25 #include "tcg-has.h"
26 #include <ffi.h>
27 
28 
29 #define ctpop_tr    glue(ctpop, TCG_TARGET_REG_BITS)
30 #define extract_tr  glue(extract, TCG_TARGET_REG_BITS)
31 
32 /*
33  * Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
34  * Without assertions, the interpreter runs much faster.
35  */
36 #if defined(CONFIG_DEBUG_TCG)
37 # define tci_assert(cond) assert(cond)
38 #else
39 # define tci_assert(cond) ((void)(cond))
40 #endif
41 
42 __thread uintptr_t tci_tb_ptr;
43 
44 static void tci_write_reg64(tcg_target_ulong *regs, uint32_t high_index,
45                             uint32_t low_index, uint64_t value)
46 {
47     regs[low_index] = (uint32_t)value;
48     regs[high_index] = value >> 32;
49 }
50 
51 /* Create a 64 bit value from two 32 bit values. */
52 static uint64_t tci_uint64(uint32_t high, uint32_t low)
53 {
54     return ((uint64_t)high << 32) + low;
55 }
56 
57 /*
58  * Load sets of arguments all at once.  The naming convention is:
59  *   tci_args_<arguments>
60  * where arguments is a sequence of
61  *
62  *   b = immediate (bit position)
63  *   c = condition (TCGCond)
64  *   i = immediate (uint32_t)
65  *   I = immediate (tcg_target_ulong)
66  *   l = label or pointer
67  *   m = immediate (MemOpIdx)
68  *   n = immediate (call return length)
69  *   r = register
70  *   s = signed ldst offset
71  */
72 
73 static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0)
74 {
75     int diff = sextract32(insn, 12, 20);
76     *l0 = diff ? (void *)tb_ptr + diff : NULL;
77 }
78 
79 static void tci_args_r(uint32_t insn, TCGReg *r0)
80 {
81     *r0 = extract32(insn, 8, 4);
82 }
83 
84 static void tci_args_nl(uint32_t insn, const void *tb_ptr,
85                         uint8_t *n0, void **l1)
86 {
87     *n0 = extract32(insn, 8, 4);
88     *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
89 }
90 
91 static void tci_args_rl(uint32_t insn, const void *tb_ptr,
92                         TCGReg *r0, void **l1)
93 {
94     *r0 = extract32(insn, 8, 4);
95     *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr;
96 }
97 
98 static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1)
99 {
100     *r0 = extract32(insn, 8, 4);
101     *r1 = extract32(insn, 12, 4);
102 }
103 
104 static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1)
105 {
106     *r0 = extract32(insn, 8, 4);
107     *i1 = sextract32(insn, 12, 20);
108 }
109 
110 static void tci_args_rrm(uint32_t insn, TCGReg *r0,
111                          TCGReg *r1, MemOpIdx *m2)
112 {
113     *r0 = extract32(insn, 8, 4);
114     *r1 = extract32(insn, 12, 4);
115     *m2 = extract32(insn, 16, 16);
116 }
117 
118 static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2)
119 {
120     *r0 = extract32(insn, 8, 4);
121     *r1 = extract32(insn, 12, 4);
122     *r2 = extract32(insn, 16, 4);
123 }
124 
125 static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2)
126 {
127     *r0 = extract32(insn, 8, 4);
128     *r1 = extract32(insn, 12, 4);
129     *i2 = sextract32(insn, 16, 16);
130 }
131 
132 static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
133                           uint8_t *i2, uint8_t *i3)
134 {
135     *r0 = extract32(insn, 8, 4);
136     *r1 = extract32(insn, 12, 4);
137     *i2 = extract32(insn, 16, 6);
138     *i3 = extract32(insn, 22, 6);
139 }
140 
141 static void tci_args_rrrc(uint32_t insn,
142                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3)
143 {
144     *r0 = extract32(insn, 8, 4);
145     *r1 = extract32(insn, 12, 4);
146     *r2 = extract32(insn, 16, 4);
147     *c3 = extract32(insn, 20, 4);
148 }
149 
150 static void tci_args_rrrbb(uint32_t insn, TCGReg *r0, TCGReg *r1,
151                            TCGReg *r2, uint8_t *i3, uint8_t *i4)
152 {
153     *r0 = extract32(insn, 8, 4);
154     *r1 = extract32(insn, 12, 4);
155     *r2 = extract32(insn, 16, 4);
156     *i3 = extract32(insn, 20, 6);
157     *i4 = extract32(insn, 26, 6);
158 }
159 
160 static void tci_args_rrrr(uint32_t insn,
161                           TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGReg *r3)
162 {
163     *r0 = extract32(insn, 8, 4);
164     *r1 = extract32(insn, 12, 4);
165     *r2 = extract32(insn, 16, 4);
166     *r3 = extract32(insn, 20, 4);
167 }
168 
169 static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1,
170                             TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5)
171 {
172     *r0 = extract32(insn, 8, 4);
173     *r1 = extract32(insn, 12, 4);
174     *r2 = extract32(insn, 16, 4);
175     *r3 = extract32(insn, 20, 4);
176     *r4 = extract32(insn, 24, 4);
177     *c5 = extract32(insn, 28, 4);
178 }
179 
180 static void tci_args_rrrrrr(uint32_t insn, TCGReg *r0, TCGReg *r1,
181                             TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGReg *r5)
182 {
183     *r0 = extract32(insn, 8, 4);
184     *r1 = extract32(insn, 12, 4);
185     *r2 = extract32(insn, 16, 4);
186     *r3 = extract32(insn, 20, 4);
187     *r4 = extract32(insn, 24, 4);
188     *r5 = extract32(insn, 28, 4);
189 }
190 
191 static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
192 {
193     bool result = false;
194     int32_t i0 = u0;
195     int32_t i1 = u1;
196     switch (condition) {
197     case TCG_COND_EQ:
198         result = (u0 == u1);
199         break;
200     case TCG_COND_NE:
201         result = (u0 != u1);
202         break;
203     case TCG_COND_LT:
204         result = (i0 < i1);
205         break;
206     case TCG_COND_GE:
207         result = (i0 >= i1);
208         break;
209     case TCG_COND_LE:
210         result = (i0 <= i1);
211         break;
212     case TCG_COND_GT:
213         result = (i0 > i1);
214         break;
215     case TCG_COND_LTU:
216         result = (u0 < u1);
217         break;
218     case TCG_COND_GEU:
219         result = (u0 >= u1);
220         break;
221     case TCG_COND_LEU:
222         result = (u0 <= u1);
223         break;
224     case TCG_COND_GTU:
225         result = (u0 > u1);
226         break;
227     case TCG_COND_TSTEQ:
228         result = (u0 & u1) == 0;
229         break;
230     case TCG_COND_TSTNE:
231         result = (u0 & u1) != 0;
232         break;
233     default:
234         g_assert_not_reached();
235     }
236     return result;
237 }
238 
239 static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
240 {
241     bool result = false;
242     int64_t i0 = u0;
243     int64_t i1 = u1;
244     switch (condition) {
245     case TCG_COND_EQ:
246         result = (u0 == u1);
247         break;
248     case TCG_COND_NE:
249         result = (u0 != u1);
250         break;
251     case TCG_COND_LT:
252         result = (i0 < i1);
253         break;
254     case TCG_COND_GE:
255         result = (i0 >= i1);
256         break;
257     case TCG_COND_LE:
258         result = (i0 <= i1);
259         break;
260     case TCG_COND_GT:
261         result = (i0 > i1);
262         break;
263     case TCG_COND_LTU:
264         result = (u0 < u1);
265         break;
266     case TCG_COND_GEU:
267         result = (u0 >= u1);
268         break;
269     case TCG_COND_LEU:
270         result = (u0 <= u1);
271         break;
272     case TCG_COND_GTU:
273         result = (u0 > u1);
274         break;
275     case TCG_COND_TSTEQ:
276         result = (u0 & u1) == 0;
277         break;
278     case TCG_COND_TSTNE:
279         result = (u0 & u1) != 0;
280         break;
281     default:
282         g_assert_not_reached();
283     }
284     return result;
285 }
286 
287 static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
288                             MemOpIdx oi, const void *tb_ptr)
289 {
290     MemOp mop = get_memop(oi);
291     uintptr_t ra = (uintptr_t)tb_ptr;
292 
293     switch (mop & MO_SSIZE) {
294     case MO_UB:
295         return helper_ldub_mmu(env, taddr, oi, ra);
296     case MO_SB:
297         return helper_ldsb_mmu(env, taddr, oi, ra);
298     case MO_UW:
299         return helper_lduw_mmu(env, taddr, oi, ra);
300     case MO_SW:
301         return helper_ldsw_mmu(env, taddr, oi, ra);
302     case MO_UL:
303         return helper_ldul_mmu(env, taddr, oi, ra);
304     case MO_SL:
305         return helper_ldsl_mmu(env, taddr, oi, ra);
306     case MO_UQ:
307         return helper_ldq_mmu(env, taddr, oi, ra);
308     default:
309         g_assert_not_reached();
310     }
311 }
312 
313 static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
314                         MemOpIdx oi, const void *tb_ptr)
315 {
316     MemOp mop = get_memop(oi);
317     uintptr_t ra = (uintptr_t)tb_ptr;
318 
319     switch (mop & MO_SIZE) {
320     case MO_UB:
321         helper_stb_mmu(env, taddr, val, oi, ra);
322         break;
323     case MO_UW:
324         helper_stw_mmu(env, taddr, val, oi, ra);
325         break;
326     case MO_UL:
327         helper_stl_mmu(env, taddr, val, oi, ra);
328         break;
329     case MO_UQ:
330         helper_stq_mmu(env, taddr, val, oi, ra);
331         break;
332     default:
333         g_assert_not_reached();
334     }
335 }
336 
337 #if TCG_TARGET_REG_BITS == 64
338 # define CASE_32_64(x) \
339         case glue(glue(INDEX_op_, x), _i64): \
340         case glue(glue(INDEX_op_, x), _i32):
341 # define CASE_64(x) \
342         case glue(glue(INDEX_op_, x), _i64):
343 #else
344 # define CASE_32_64(x) \
345         case glue(glue(INDEX_op_, x), _i32):
346 # define CASE_64(x)
347 #endif
348 
349 /* Interpret pseudo code in tb. */
350 /*
351  * Disable CFI checks.
352  * One possible operation in the pseudo code is a call to binary code.
353  * Therefore, disable CFI checks in the interpreter function
354  */
355 uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
356                                             const void *v_tb_ptr)
357 {
358     const uint32_t *tb_ptr = v_tb_ptr;
359     tcg_target_ulong regs[TCG_TARGET_NB_REGS];
360     uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE)
361                    / sizeof(uint64_t)];
362 
363     regs[TCG_AREG0] = (tcg_target_ulong)env;
364     regs[TCG_REG_CALL_STACK] = (uintptr_t)stack;
365     tci_assert(tb_ptr);
366 
367     for (;;) {
368         uint32_t insn;
369         TCGOpcode opc;
370         TCGReg r0, r1, r2, r3, r4, r5;
371         tcg_target_ulong t1;
372         TCGCond condition;
373         uint8_t pos, len;
374         uint32_t tmp32;
375         uint64_t tmp64, taddr;
376         uint64_t T1, T2;
377         MemOpIdx oi;
378         int32_t ofs;
379         void *ptr;
380 
381         insn = *tb_ptr++;
382         opc = extract32(insn, 0, 8);
383 
384         switch (opc) {
385         case INDEX_op_call:
386             {
387                 void *call_slots[MAX_CALL_IARGS];
388                 ffi_cif *cif;
389                 void *func;
390                 unsigned i, s, n;
391 
392                 tci_args_nl(insn, tb_ptr, &len, &ptr);
393                 func = ((void **)ptr)[0];
394                 cif = ((void **)ptr)[1];
395 
396                 n = cif->nargs;
397                 for (i = s = 0; i < n; ++i) {
398                     ffi_type *t = cif->arg_types[i];
399                     call_slots[i] = &stack[s];
400                     s += DIV_ROUND_UP(t->size, 8);
401                 }
402 
403                 /* Helper functions may need to access the "return address" */
404                 tci_tb_ptr = (uintptr_t)tb_ptr;
405                 ffi_call(cif, func, stack, call_slots);
406             }
407 
408             switch (len) {
409             case 0: /* void */
410                 break;
411             case 1: /* uint32_t */
412                 /*
413                  * The result winds up "left-aligned" in the stack[0] slot.
414                  * Note that libffi has an odd special case in that it will
415                  * always widen an integral result to ffi_arg.
416                  */
417                 if (sizeof(ffi_arg) == 8) {
418                     regs[TCG_REG_R0] = (uint32_t)stack[0];
419                 } else {
420                     regs[TCG_REG_R0] = *(uint32_t *)stack;
421                 }
422                 break;
423             case 2: /* uint64_t */
424                 /*
425                  * For TCG_TARGET_REG_BITS == 32, the register pair
426                  * must stay in host memory order.
427                  */
428                 memcpy(&regs[TCG_REG_R0], stack, 8);
429                 break;
430             case 3: /* Int128 */
431                 memcpy(&regs[TCG_REG_R0], stack, 16);
432                 break;
433             default:
434                 g_assert_not_reached();
435             }
436             break;
437 
438         case INDEX_op_br:
439             tci_args_l(insn, tb_ptr, &ptr);
440             tb_ptr = ptr;
441             continue;
442 #if TCG_TARGET_REG_BITS == 32
443         case INDEX_op_setcond2_i32:
444             tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
445             T1 = tci_uint64(regs[r2], regs[r1]);
446             T2 = tci_uint64(regs[r4], regs[r3]);
447             regs[r0] = tci_compare64(T1, T2, condition);
448             break;
449 #elif TCG_TARGET_REG_BITS == 64
450         case INDEX_op_setcond:
451             tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
452             regs[r0] = tci_compare64(regs[r1], regs[r2], condition);
453             break;
454         case INDEX_op_movcond:
455             tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
456             tmp32 = tci_compare64(regs[r1], regs[r2], condition);
457             regs[r0] = regs[tmp32 ? r3 : r4];
458             break;
459 #endif
460         case INDEX_op_mov:
461             tci_args_rr(insn, &r0, &r1);
462             regs[r0] = regs[r1];
463             break;
464         case INDEX_op_tci_movi:
465             tci_args_ri(insn, &r0, &t1);
466             regs[r0] = t1;
467             break;
468         case INDEX_op_tci_movl:
469             tci_args_rl(insn, tb_ptr, &r0, &ptr);
470             regs[r0] = *(tcg_target_ulong *)ptr;
471             break;
472 
473             /* Load/store operations (32 bit). */
474 
475         CASE_32_64(ld8u)
476             tci_args_rrs(insn, &r0, &r1, &ofs);
477             ptr = (void *)(regs[r1] + ofs);
478             regs[r0] = *(uint8_t *)ptr;
479             break;
480         CASE_32_64(ld8s)
481             tci_args_rrs(insn, &r0, &r1, &ofs);
482             ptr = (void *)(regs[r1] + ofs);
483             regs[r0] = *(int8_t *)ptr;
484             break;
485         CASE_32_64(ld16u)
486             tci_args_rrs(insn, &r0, &r1, &ofs);
487             ptr = (void *)(regs[r1] + ofs);
488             regs[r0] = *(uint16_t *)ptr;
489             break;
490         CASE_32_64(ld16s)
491             tci_args_rrs(insn, &r0, &r1, &ofs);
492             ptr = (void *)(regs[r1] + ofs);
493             regs[r0] = *(int16_t *)ptr;
494             break;
495         case INDEX_op_ld_i32:
496         CASE_64(ld32u)
497             tci_args_rrs(insn, &r0, &r1, &ofs);
498             ptr = (void *)(regs[r1] + ofs);
499             regs[r0] = *(uint32_t *)ptr;
500             break;
501         CASE_32_64(st8)
502             tci_args_rrs(insn, &r0, &r1, &ofs);
503             ptr = (void *)(regs[r1] + ofs);
504             *(uint8_t *)ptr = regs[r0];
505             break;
506         CASE_32_64(st16)
507             tci_args_rrs(insn, &r0, &r1, &ofs);
508             ptr = (void *)(regs[r1] + ofs);
509             *(uint16_t *)ptr = regs[r0];
510             break;
511         case INDEX_op_st_i32:
512         CASE_64(st32)
513             tci_args_rrs(insn, &r0, &r1, &ofs);
514             ptr = (void *)(regs[r1] + ofs);
515             *(uint32_t *)ptr = regs[r0];
516             break;
517 
518             /* Arithmetic operations (mixed 32/64 bit). */
519 
520         case INDEX_op_add:
521             tci_args_rrr(insn, &r0, &r1, &r2);
522             regs[r0] = regs[r1] + regs[r2];
523             break;
524         case INDEX_op_sub:
525             tci_args_rrr(insn, &r0, &r1, &r2);
526             regs[r0] = regs[r1] - regs[r2];
527             break;
528         case INDEX_op_mul:
529             tci_args_rrr(insn, &r0, &r1, &r2);
530             regs[r0] = regs[r1] * regs[r2];
531             break;
532         case INDEX_op_and:
533             tci_args_rrr(insn, &r0, &r1, &r2);
534             regs[r0] = regs[r1] & regs[r2];
535             break;
536         case INDEX_op_or:
537             tci_args_rrr(insn, &r0, &r1, &r2);
538             regs[r0] = regs[r1] | regs[r2];
539             break;
540         case INDEX_op_xor:
541             tci_args_rrr(insn, &r0, &r1, &r2);
542             regs[r0] = regs[r1] ^ regs[r2];
543             break;
544         case INDEX_op_andc:
545             tci_args_rrr(insn, &r0, &r1, &r2);
546             regs[r0] = regs[r1] & ~regs[r2];
547             break;
548         case INDEX_op_orc:
549             tci_args_rrr(insn, &r0, &r1, &r2);
550             regs[r0] = regs[r1] | ~regs[r2];
551             break;
552         case INDEX_op_eqv:
553             tci_args_rrr(insn, &r0, &r1, &r2);
554             regs[r0] = ~(regs[r1] ^ regs[r2]);
555             break;
556         case INDEX_op_nand:
557             tci_args_rrr(insn, &r0, &r1, &r2);
558             regs[r0] = ~(regs[r1] & regs[r2]);
559             break;
560         case INDEX_op_nor:
561             tci_args_rrr(insn, &r0, &r1, &r2);
562             regs[r0] = ~(regs[r1] | regs[r2]);
563             break;
564         case INDEX_op_neg:
565             tci_args_rr(insn, &r0, &r1);
566             regs[r0] = -regs[r1];
567             break;
568         case INDEX_op_not:
569             tci_args_rr(insn, &r0, &r1);
570             regs[r0] = ~regs[r1];
571             break;
572         case INDEX_op_ctpop:
573             tci_args_rr(insn, &r0, &r1);
574             regs[r0] = ctpop_tr(regs[r1]);
575             break;
576         case INDEX_op_muls2:
577             tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
578 #if TCG_TARGET_REG_BITS == 32
579             tmp64 = (int64_t)(int32_t)regs[r2] * (int32_t)regs[r3];
580             tci_write_reg64(regs, r1, r0, tmp64);
581 #else
582             muls64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
583 #endif
584             break;
585         case INDEX_op_mulu2:
586             tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
587 #if TCG_TARGET_REG_BITS == 32
588             tmp64 = (uint64_t)(uint32_t)regs[r2] * (uint32_t)regs[r3];
589             tci_write_reg64(regs, r1, r0, tmp64);
590 #else
591             mulu64(&regs[r0], &regs[r1], regs[r2], regs[r3]);
592 #endif
593             break;
594 
595             /* Arithmetic operations (32 bit). */
596 
597         case INDEX_op_tci_divs32:
598             tci_args_rrr(insn, &r0, &r1, &r2);
599             regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2];
600             break;
601         case INDEX_op_tci_divu32:
602             tci_args_rrr(insn, &r0, &r1, &r2);
603             regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2];
604             break;
605         case INDEX_op_tci_rems32:
606             tci_args_rrr(insn, &r0, &r1, &r2);
607             regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2];
608             break;
609         case INDEX_op_tci_remu32:
610             tci_args_rrr(insn, &r0, &r1, &r2);
611             regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2];
612             break;
613         case INDEX_op_tci_clz32:
614             tci_args_rrr(insn, &r0, &r1, &r2);
615             tmp32 = regs[r1];
616             regs[r0] = tmp32 ? clz32(tmp32) : regs[r2];
617             break;
618         case INDEX_op_tci_ctz32:
619             tci_args_rrr(insn, &r0, &r1, &r2);
620             tmp32 = regs[r1];
621             regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2];
622             break;
623         case INDEX_op_tci_setcond32:
624             tci_args_rrrc(insn, &r0, &r1, &r2, &condition);
625             regs[r0] = tci_compare32(regs[r1], regs[r2], condition);
626             break;
627         case INDEX_op_tci_movcond32:
628             tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition);
629             tmp32 = tci_compare32(regs[r1], regs[r2], condition);
630             regs[r0] = regs[tmp32 ? r3 : r4];
631             break;
632 
633             /* Shift/rotate operations. */
634 
635         case INDEX_op_shl:
636             tci_args_rrr(insn, &r0, &r1, &r2);
637             regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS);
638             break;
639         case INDEX_op_shr:
640             tci_args_rrr(insn, &r0, &r1, &r2);
641             regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS);
642             break;
643         case INDEX_op_sar:
644             tci_args_rrr(insn, &r0, &r1, &r2);
645             regs[r0] = ((tcg_target_long)regs[r1]
646                         >> (regs[r2] % TCG_TARGET_REG_BITS));
647             break;
648         case INDEX_op_tci_rotl32:
649             tci_args_rrr(insn, &r0, &r1, &r2);
650             regs[r0] = rol32(regs[r1], regs[r2] & 31);
651             break;
652         case INDEX_op_tci_rotr32:
653             tci_args_rrr(insn, &r0, &r1, &r2);
654             regs[r0] = ror32(regs[r1], regs[r2] & 31);
655             break;
656         case INDEX_op_deposit_i32:
657             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
658             regs[r0] = deposit32(regs[r1], pos, len, regs[r2]);
659             break;
660         case INDEX_op_extract:
661             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
662             regs[r0] = extract_tr(regs[r1], pos, len);
663             break;
664         case INDEX_op_sextract_i32:
665             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
666             regs[r0] = sextract32(regs[r1], pos, len);
667             break;
668         case INDEX_op_brcond:
669             tci_args_rl(insn, tb_ptr, &r0, &ptr);
670             if (regs[r0]) {
671                 tb_ptr = ptr;
672             }
673             break;
674 #if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_add2_i32
675         case INDEX_op_add2_i32:
676             tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
677             T1 = tci_uint64(regs[r3], regs[r2]);
678             T2 = tci_uint64(regs[r5], regs[r4]);
679             tci_write_reg64(regs, r1, r0, T1 + T2);
680             break;
681 #endif
682 #if TCG_TARGET_REG_BITS == 32 || TCG_TARGET_HAS_sub2_i32
683         case INDEX_op_sub2_i32:
684             tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
685             T1 = tci_uint64(regs[r3], regs[r2]);
686             T2 = tci_uint64(regs[r5], regs[r4]);
687             tci_write_reg64(regs, r1, r0, T1 - T2);
688             break;
689 #endif
690         case INDEX_op_bswap16:
691             tci_args_rr(insn, &r0, &r1);
692             regs[r0] = bswap16(regs[r1]);
693             break;
694         case INDEX_op_bswap32:
695             tci_args_rr(insn, &r0, &r1);
696             regs[r0] = bswap32(regs[r1]);
697             break;
698 #if TCG_TARGET_REG_BITS == 64
699             /* Load/store operations (64 bit). */
700 
701         case INDEX_op_ld32s_i64:
702             tci_args_rrs(insn, &r0, &r1, &ofs);
703             ptr = (void *)(regs[r1] + ofs);
704             regs[r0] = *(int32_t *)ptr;
705             break;
706         case INDEX_op_ld_i64:
707             tci_args_rrs(insn, &r0, &r1, &ofs);
708             ptr = (void *)(regs[r1] + ofs);
709             regs[r0] = *(uint64_t *)ptr;
710             break;
711         case INDEX_op_st_i64:
712             tci_args_rrs(insn, &r0, &r1, &ofs);
713             ptr = (void *)(regs[r1] + ofs);
714             *(uint64_t *)ptr = regs[r0];
715             break;
716 
717             /* Arithmetic operations (64 bit). */
718 
719         case INDEX_op_divs:
720             tci_args_rrr(insn, &r0, &r1, &r2);
721             regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2];
722             break;
723         case INDEX_op_divu:
724             tci_args_rrr(insn, &r0, &r1, &r2);
725             regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2];
726             break;
727         case INDEX_op_rems:
728             tci_args_rrr(insn, &r0, &r1, &r2);
729             regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2];
730             break;
731         case INDEX_op_remu:
732             tci_args_rrr(insn, &r0, &r1, &r2);
733             regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2];
734             break;
735         case INDEX_op_clz:
736             tci_args_rrr(insn, &r0, &r1, &r2);
737             regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2];
738             break;
739         case INDEX_op_ctz:
740             tci_args_rrr(insn, &r0, &r1, &r2);
741             regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
742             break;
743 #if TCG_TARGET_HAS_add2_i64
744         case INDEX_op_add2_i64:
745             tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
746             T1 = regs[r2] + regs[r4];
747             T2 = regs[r3] + regs[r5] + (T1 < regs[r2]);
748             regs[r0] = T1;
749             regs[r1] = T2;
750             break;
751 #endif
752 #if TCG_TARGET_HAS_add2_i64
753         case INDEX_op_sub2_i64:
754             tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
755             T1 = regs[r2] - regs[r4];
756             T2 = regs[r3] - regs[r5] - (regs[r2] < regs[r4]);
757             regs[r0] = T1;
758             regs[r1] = T2;
759             break;
760 #endif
761 
762             /* Shift/rotate operations (64 bit). */
763 
764         case INDEX_op_rotl:
765             tci_args_rrr(insn, &r0, &r1, &r2);
766             regs[r0] = rol64(regs[r1], regs[r2] & 63);
767             break;
768         case INDEX_op_rotr:
769             tci_args_rrr(insn, &r0, &r1, &r2);
770             regs[r0] = ror64(regs[r1], regs[r2] & 63);
771             break;
772         case INDEX_op_deposit_i64:
773             tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
774             regs[r0] = deposit64(regs[r1], pos, len, regs[r2]);
775             break;
776         case INDEX_op_sextract_i64:
777             tci_args_rrbb(insn, &r0, &r1, &pos, &len);
778             regs[r0] = sextract64(regs[r1], pos, len);
779             break;
780         case INDEX_op_ext_i32_i64:
781             tci_args_rr(insn, &r0, &r1);
782             regs[r0] = (int32_t)regs[r1];
783             break;
784         case INDEX_op_extu_i32_i64:
785             tci_args_rr(insn, &r0, &r1);
786             regs[r0] = (uint32_t)regs[r1];
787             break;
788         case INDEX_op_bswap64:
789             tci_args_rr(insn, &r0, &r1);
790             regs[r0] = bswap64(regs[r1]);
791             break;
792 #endif /* TCG_TARGET_REG_BITS == 64 */
793 
794             /* QEMU specific operations. */
795 
796         case INDEX_op_exit_tb:
797             tci_args_l(insn, tb_ptr, &ptr);
798             return (uintptr_t)ptr;
799 
800         case INDEX_op_goto_tb:
801             tci_args_l(insn, tb_ptr, &ptr);
802             tb_ptr = *(void **)ptr;
803             break;
804 
805         case INDEX_op_goto_ptr:
806             tci_args_r(insn, &r0);
807             ptr = (void *)regs[r0];
808             if (!ptr) {
809                 return 0;
810             }
811             tb_ptr = ptr;
812             break;
813 
814         case INDEX_op_qemu_ld_i32:
815             tci_args_rrm(insn, &r0, &r1, &oi);
816             taddr = regs[r1];
817             regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
818             break;
819 
820         case INDEX_op_qemu_ld_i64:
821             if (TCG_TARGET_REG_BITS == 64) {
822                 tci_args_rrm(insn, &r0, &r1, &oi);
823                 taddr = regs[r1];
824             } else {
825                 tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
826                 taddr = regs[r2];
827                 oi = regs[r3];
828             }
829             tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
830             if (TCG_TARGET_REG_BITS == 32) {
831                 tci_write_reg64(regs, r1, r0, tmp64);
832             } else {
833                 regs[r0] = tmp64;
834             }
835             break;
836 
837         case INDEX_op_qemu_st_i32:
838             tci_args_rrm(insn, &r0, &r1, &oi);
839             taddr = regs[r1];
840             tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
841             break;
842 
843         case INDEX_op_qemu_st_i64:
844             if (TCG_TARGET_REG_BITS == 64) {
845                 tci_args_rrm(insn, &r0, &r1, &oi);
846                 tmp64 = regs[r0];
847                 taddr = regs[r1];
848             } else {
849                 tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
850                 tmp64 = tci_uint64(regs[r1], regs[r0]);
851                 taddr = regs[r2];
852                 oi = regs[r3];
853             }
854             tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
855             break;
856 
857         case INDEX_op_mb:
858             /* Ensure ordering for all kinds */
859             smp_mb();
860             break;
861         default:
862             g_assert_not_reached();
863         }
864     }
865 }
866 
867 /*
868  * Disassembler that matches the interpreter
869  */
870 
871 static const char *str_r(TCGReg r)
872 {
873     static const char regs[TCG_TARGET_NB_REGS][4] = {
874         "r0", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
875         "r8", "r9", "r10", "r11", "r12", "r13", "env", "sp"
876     };
877 
878     QEMU_BUILD_BUG_ON(TCG_AREG0 != TCG_REG_R14);
879     QEMU_BUILD_BUG_ON(TCG_REG_CALL_STACK != TCG_REG_R15);
880 
881     assert((unsigned)r < TCG_TARGET_NB_REGS);
882     return regs[r];
883 }
884 
885 static const char *str_c(TCGCond c)
886 {
887     static const char cond[16][8] = {
888         [TCG_COND_NEVER] = "never",
889         [TCG_COND_ALWAYS] = "always",
890         [TCG_COND_EQ] = "eq",
891         [TCG_COND_NE] = "ne",
892         [TCG_COND_LT] = "lt",
893         [TCG_COND_GE] = "ge",
894         [TCG_COND_LE] = "le",
895         [TCG_COND_GT] = "gt",
896         [TCG_COND_LTU] = "ltu",
897         [TCG_COND_GEU] = "geu",
898         [TCG_COND_LEU] = "leu",
899         [TCG_COND_GTU] = "gtu",
900         [TCG_COND_TSTEQ] = "tsteq",
901         [TCG_COND_TSTNE] = "tstne",
902     };
903 
904     assert((unsigned)c < ARRAY_SIZE(cond));
905     assert(cond[c][0] != 0);
906     return cond[c];
907 }
908 
909 /* Disassemble TCI bytecode. */
910 int print_insn_tci(bfd_vma addr, disassemble_info *info)
911 {
912     const uint32_t *tb_ptr = (const void *)(uintptr_t)addr;
913     const TCGOpDef *def;
914     const char *op_name;
915     uint32_t insn;
916     TCGOpcode op;
917     TCGReg r0, r1, r2, r3, r4, r5;
918     tcg_target_ulong i1;
919     int32_t s2;
920     TCGCond c;
921     MemOpIdx oi;
922     uint8_t pos, len;
923     void *ptr;
924 
925     /* TCI is always the host, so we don't need to load indirect. */
926     insn = *tb_ptr++;
927 
928     info->fprintf_func(info->stream, "%08x  ", insn);
929 
930     op = extract32(insn, 0, 8);
931     def = &tcg_op_defs[op];
932     op_name = def->name;
933 
934     switch (op) {
935     case INDEX_op_br:
936     case INDEX_op_exit_tb:
937     case INDEX_op_goto_tb:
938         tci_args_l(insn, tb_ptr, &ptr);
939         info->fprintf_func(info->stream, "%-12s  %p", op_name, ptr);
940         break;
941 
942     case INDEX_op_goto_ptr:
943         tci_args_r(insn, &r0);
944         info->fprintf_func(info->stream, "%-12s  %s", op_name, str_r(r0));
945         break;
946 
947     case INDEX_op_call:
948         tci_args_nl(insn, tb_ptr, &len, &ptr);
949         info->fprintf_func(info->stream, "%-12s  %d, %p", op_name, len, ptr);
950         break;
951 
952     case INDEX_op_brcond:
953         tci_args_rl(insn, tb_ptr, &r0, &ptr);
954         info->fprintf_func(info->stream, "%-12s  %s, 0, ne, %p",
955                            op_name, str_r(r0), ptr);
956         break;
957 
958     case INDEX_op_setcond:
959     case INDEX_op_tci_setcond32:
960         tci_args_rrrc(insn, &r0, &r1, &r2, &c);
961         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
962                            op_name, str_r(r0), str_r(r1), str_r(r2), str_c(c));
963         break;
964 
965     case INDEX_op_tci_movi:
966         tci_args_ri(insn, &r0, &i1);
967         info->fprintf_func(info->stream, "%-12s  %s, 0x%" TCG_PRIlx,
968                            op_name, str_r(r0), i1);
969         break;
970 
971     case INDEX_op_tci_movl:
972         tci_args_rl(insn, tb_ptr, &r0, &ptr);
973         info->fprintf_func(info->stream, "%-12s  %s, %p",
974                            op_name, str_r(r0), ptr);
975         break;
976 
977     case INDEX_op_ld8u_i32:
978     case INDEX_op_ld8u_i64:
979     case INDEX_op_ld8s_i32:
980     case INDEX_op_ld8s_i64:
981     case INDEX_op_ld16u_i32:
982     case INDEX_op_ld16u_i64:
983     case INDEX_op_ld16s_i32:
984     case INDEX_op_ld16s_i64:
985     case INDEX_op_ld32u_i64:
986     case INDEX_op_ld32s_i64:
987     case INDEX_op_ld_i32:
988     case INDEX_op_ld_i64:
989     case INDEX_op_st8_i32:
990     case INDEX_op_st8_i64:
991     case INDEX_op_st16_i32:
992     case INDEX_op_st16_i64:
993     case INDEX_op_st32_i64:
994     case INDEX_op_st_i32:
995     case INDEX_op_st_i64:
996         tci_args_rrs(insn, &r0, &r1, &s2);
997         info->fprintf_func(info->stream, "%-12s  %s, %s, %d",
998                            op_name, str_r(r0), str_r(r1), s2);
999         break;
1000 
1001     case INDEX_op_bswap16:
1002     case INDEX_op_bswap32:
1003     case INDEX_op_ctpop:
1004     case INDEX_op_mov:
1005     case INDEX_op_neg:
1006     case INDEX_op_not:
1007     case INDEX_op_ext_i32_i64:
1008     case INDEX_op_extu_i32_i64:
1009     case INDEX_op_bswap64:
1010         tci_args_rr(insn, &r0, &r1);
1011         info->fprintf_func(info->stream, "%-12s  %s, %s",
1012                            op_name, str_r(r0), str_r(r1));
1013         break;
1014 
1015     case INDEX_op_add:
1016     case INDEX_op_and:
1017     case INDEX_op_andc:
1018     case INDEX_op_clz:
1019     case INDEX_op_ctz:
1020     case INDEX_op_divs:
1021     case INDEX_op_divu:
1022     case INDEX_op_eqv:
1023     case INDEX_op_mul:
1024     case INDEX_op_nand:
1025     case INDEX_op_nor:
1026     case INDEX_op_or:
1027     case INDEX_op_orc:
1028     case INDEX_op_rems:
1029     case INDEX_op_remu:
1030     case INDEX_op_rotl:
1031     case INDEX_op_rotr:
1032     case INDEX_op_sar:
1033     case INDEX_op_shl:
1034     case INDEX_op_shr:
1035     case INDEX_op_sub:
1036     case INDEX_op_xor:
1037     case INDEX_op_tci_ctz32:
1038     case INDEX_op_tci_clz32:
1039     case INDEX_op_tci_divs32:
1040     case INDEX_op_tci_divu32:
1041     case INDEX_op_tci_rems32:
1042     case INDEX_op_tci_remu32:
1043     case INDEX_op_tci_rotl32:
1044     case INDEX_op_tci_rotr32:
1045         tci_args_rrr(insn, &r0, &r1, &r2);
1046         info->fprintf_func(info->stream, "%-12s  %s, %s, %s",
1047                            op_name, str_r(r0), str_r(r1), str_r(r2));
1048         break;
1049 
1050     case INDEX_op_deposit_i32:
1051     case INDEX_op_deposit_i64:
1052         tci_args_rrrbb(insn, &r0, &r1, &r2, &pos, &len);
1053         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %d, %d",
1054                            op_name, str_r(r0), str_r(r1), str_r(r2), pos, len);
1055         break;
1056 
1057     case INDEX_op_extract:
1058     case INDEX_op_sextract_i32:
1059     case INDEX_op_sextract_i64:
1060         tci_args_rrbb(insn, &r0, &r1, &pos, &len);
1061         info->fprintf_func(info->stream, "%-12s  %s,%s,%d,%d",
1062                            op_name, str_r(r0), str_r(r1), pos, len);
1063         break;
1064 
1065     case INDEX_op_tci_movcond32:
1066     case INDEX_op_movcond:
1067     case INDEX_op_setcond2_i32:
1068         tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &c);
1069         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s, %s",
1070                            op_name, str_r(r0), str_r(r1), str_r(r2),
1071                            str_r(r3), str_r(r4), str_c(c));
1072         break;
1073 
1074     case INDEX_op_muls2:
1075     case INDEX_op_mulu2:
1076         tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
1077         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
1078                            op_name, str_r(r0), str_r(r1),
1079                            str_r(r2), str_r(r3));
1080         break;
1081 
1082     case INDEX_op_add2_i32:
1083     case INDEX_op_add2_i64:
1084     case INDEX_op_sub2_i32:
1085     case INDEX_op_sub2_i64:
1086         tci_args_rrrrrr(insn, &r0, &r1, &r2, &r3, &r4, &r5);
1087         info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s, %s, %s",
1088                            op_name, str_r(r0), str_r(r1), str_r(r2),
1089                            str_r(r3), str_r(r4), str_r(r5));
1090         break;
1091 
1092     case INDEX_op_qemu_ld_i64:
1093     case INDEX_op_qemu_st_i64:
1094         if (TCG_TARGET_REG_BITS == 32) {
1095             tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
1096             info->fprintf_func(info->stream, "%-12s  %s, %s, %s, %s",
1097                                op_name, str_r(r0), str_r(r1),
1098                                str_r(r2), str_r(r3));
1099             break;
1100         }
1101         /* fall through */
1102     case INDEX_op_qemu_ld_i32:
1103     case INDEX_op_qemu_st_i32:
1104         tci_args_rrm(insn, &r0, &r1, &oi);
1105         info->fprintf_func(info->stream, "%-12s  %s, %s, %x",
1106                            op_name, str_r(r0), str_r(r1), oi);
1107         break;
1108 
1109     case 0:
1110         /* tcg_out_nop_fill uses zeros */
1111         if (insn == 0) {
1112             info->fprintf_func(info->stream, "align");
1113             break;
1114         }
1115         /* fall through */
1116 
1117     default:
1118         info->fprintf_func(info->stream, "illegal opcode %d", op);
1119         break;
1120     }
1121 
1122     return sizeof(insn);
1123 }
1124