xref: /openbmc/qemu/target/sh4/translate.c (revision 1580b897)
1 /*
2  *  SH4 translation
3  *
4  *  Copyright (c) 2005 Samuel Tardieu
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #define DEBUG_DISAS
21 
22 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg-op.h"
27 #include "exec/cpu_ldst.h"
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
30 #include "exec/translator.h"
31 #include "exec/log.h"
32 #include "qemu/qemu-print.h"
33 
34 
35 typedef struct DisasContext {
36     DisasContextBase base;
37 
38     uint32_t tbflags;  /* should stay unmodified during the TB translation */
39     uint32_t envflags; /* should stay in sync with env->flags using TCG ops */
40     int memidx;
41     int gbank;
42     int fbank;
43     uint32_t delayed_pc;
44     uint32_t features;
45 
46     uint16_t opcode;
47 
48     bool has_movcal;
49 } DisasContext;
50 
51 #if defined(CONFIG_USER_ONLY)
52 #define IS_USER(ctx) 1
53 #else
54 #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
55 #endif
56 
57 /* Target-specific values for ctx->base.is_jmp.  */
58 /* We want to exit back to the cpu loop for some reason.
59    Usually this is to recognize interrupts immediately.  */
60 #define DISAS_STOP    DISAS_TARGET_0
61 
62 /* global register indexes */
63 static TCGv cpu_gregs[32];
64 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
65 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
66 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
67 static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
68 static TCGv cpu_lock_addr, cpu_lock_value;
69 static TCGv cpu_fregs[32];
70 
71 /* internal register indexes */
72 static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
73 
74 #include "exec/gen-icount.h"
75 
76 void sh4_translate_init(void)
77 {
78     int i;
79     static const char * const gregnames[24] = {
80         "R0_BANK0", "R1_BANK0", "R2_BANK0", "R3_BANK0",
81         "R4_BANK0", "R5_BANK0", "R6_BANK0", "R7_BANK0",
82         "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
83         "R0_BANK1", "R1_BANK1", "R2_BANK1", "R3_BANK1",
84         "R4_BANK1", "R5_BANK1", "R6_BANK1", "R7_BANK1"
85     };
86     static const char * const fregnames[32] = {
87          "FPR0_BANK0",  "FPR1_BANK0",  "FPR2_BANK0",  "FPR3_BANK0",
88          "FPR4_BANK0",  "FPR5_BANK0",  "FPR6_BANK0",  "FPR7_BANK0",
89          "FPR8_BANK0",  "FPR9_BANK0", "FPR10_BANK0", "FPR11_BANK0",
90         "FPR12_BANK0", "FPR13_BANK0", "FPR14_BANK0", "FPR15_BANK0",
91          "FPR0_BANK1",  "FPR1_BANK1",  "FPR2_BANK1",  "FPR3_BANK1",
92          "FPR4_BANK1",  "FPR5_BANK1",  "FPR6_BANK1",  "FPR7_BANK1",
93          "FPR8_BANK1",  "FPR9_BANK1", "FPR10_BANK1", "FPR11_BANK1",
94         "FPR12_BANK1", "FPR13_BANK1", "FPR14_BANK1", "FPR15_BANK1",
95     };
96 
97     for (i = 0; i < 24; i++) {
98         cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env,
99                                               offsetof(CPUSH4State, gregs[i]),
100                                               gregnames[i]);
101     }
102     memcpy(cpu_gregs + 24, cpu_gregs + 8, 8 * sizeof(TCGv));
103 
104     cpu_pc = tcg_global_mem_new_i32(cpu_env,
105                                     offsetof(CPUSH4State, pc), "PC");
106     cpu_sr = tcg_global_mem_new_i32(cpu_env,
107                                     offsetof(CPUSH4State, sr), "SR");
108     cpu_sr_m = tcg_global_mem_new_i32(cpu_env,
109                                       offsetof(CPUSH4State, sr_m), "SR_M");
110     cpu_sr_q = tcg_global_mem_new_i32(cpu_env,
111                                       offsetof(CPUSH4State, sr_q), "SR_Q");
112     cpu_sr_t = tcg_global_mem_new_i32(cpu_env,
113                                       offsetof(CPUSH4State, sr_t), "SR_T");
114     cpu_ssr = tcg_global_mem_new_i32(cpu_env,
115                                      offsetof(CPUSH4State, ssr), "SSR");
116     cpu_spc = tcg_global_mem_new_i32(cpu_env,
117                                      offsetof(CPUSH4State, spc), "SPC");
118     cpu_gbr = tcg_global_mem_new_i32(cpu_env,
119                                      offsetof(CPUSH4State, gbr), "GBR");
120     cpu_vbr = tcg_global_mem_new_i32(cpu_env,
121                                      offsetof(CPUSH4State, vbr), "VBR");
122     cpu_sgr = tcg_global_mem_new_i32(cpu_env,
123                                      offsetof(CPUSH4State, sgr), "SGR");
124     cpu_dbr = tcg_global_mem_new_i32(cpu_env,
125                                      offsetof(CPUSH4State, dbr), "DBR");
126     cpu_mach = tcg_global_mem_new_i32(cpu_env,
127                                       offsetof(CPUSH4State, mach), "MACH");
128     cpu_macl = tcg_global_mem_new_i32(cpu_env,
129                                       offsetof(CPUSH4State, macl), "MACL");
130     cpu_pr = tcg_global_mem_new_i32(cpu_env,
131                                     offsetof(CPUSH4State, pr), "PR");
132     cpu_fpscr = tcg_global_mem_new_i32(cpu_env,
133                                        offsetof(CPUSH4State, fpscr), "FPSCR");
134     cpu_fpul = tcg_global_mem_new_i32(cpu_env,
135                                       offsetof(CPUSH4State, fpul), "FPUL");
136 
137     cpu_flags = tcg_global_mem_new_i32(cpu_env,
138 				       offsetof(CPUSH4State, flags), "_flags_");
139     cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env,
140 					    offsetof(CPUSH4State, delayed_pc),
141 					    "_delayed_pc_");
142     cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env,
143                                               offsetof(CPUSH4State,
144                                                        delayed_cond),
145                                               "_delayed_cond_");
146     cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
147                                            offsetof(CPUSH4State, lock_addr),
148                                            "_lock_addr_");
149     cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
150                                             offsetof(CPUSH4State, lock_value),
151                                             "_lock_value_");
152 
153     for (i = 0; i < 32; i++)
154         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
155                                               offsetof(CPUSH4State, fregs[i]),
156                                               fregnames[i]);
157 }
158 
159 void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
160 {
161     SuperHCPU *cpu = SUPERH_CPU(cs);
162     CPUSH4State *env = &cpu->env;
163     int i;
164 
165     qemu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
166                  env->pc, cpu_read_sr(env), env->pr, env->fpscr);
167     qemu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
168                  env->spc, env->ssr, env->gbr, env->vbr);
169     qemu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
170                  env->sgr, env->dbr, env->delayed_pc, env->fpul);
171     for (i = 0; i < 24; i += 4) {
172         qemu_printf("r%d=0x%08x r%d=0x%08x r%d=0x%08x r%d=0x%08x\n",
173 		    i, env->gregs[i], i + 1, env->gregs[i + 1],
174 		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
175     }
176     if (env->flags & DELAY_SLOT) {
177         qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
178 		    env->delayed_pc);
179     } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
180         qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
181 		    env->delayed_pc);
182     } else if (env->flags & DELAY_SLOT_RTE) {
183         qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
184                      env->delayed_pc);
185     }
186 }
187 
188 static void gen_read_sr(TCGv dst)
189 {
190     TCGv t0 = tcg_temp_new();
191     tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q);
192     tcg_gen_or_i32(dst, dst, t0);
193     tcg_gen_shli_i32(t0, cpu_sr_m, SR_M);
194     tcg_gen_or_i32(dst, dst, t0);
195     tcg_gen_shli_i32(t0, cpu_sr_t, SR_T);
196     tcg_gen_or_i32(dst, cpu_sr, t0);
197     tcg_temp_free_i32(t0);
198 }
199 
200 static void gen_write_sr(TCGv src)
201 {
202     tcg_gen_andi_i32(cpu_sr, src,
203                      ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
204     tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1);
205     tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1);
206     tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1);
207 }
208 
209 static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
210 {
211     if (save_pc) {
212         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
213     }
214     if (ctx->delayed_pc != (uint32_t) -1) {
215         tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
216     }
217     if ((ctx->tbflags & TB_FLAG_ENVFLAGS_MASK) != ctx->envflags) {
218         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
219     }
220 }
221 
222 static inline bool use_exit_tb(DisasContext *ctx)
223 {
224     return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
225 }
226 
227 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
228 {
229     if (use_exit_tb(ctx)) {
230         return false;
231     }
232     return translator_use_goto_tb(&ctx->base, dest);
233 }
234 
235 static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
236 {
237     if (use_goto_tb(ctx, dest)) {
238         tcg_gen_goto_tb(n);
239         tcg_gen_movi_i32(cpu_pc, dest);
240         tcg_gen_exit_tb(ctx->base.tb, n);
241     } else {
242         tcg_gen_movi_i32(cpu_pc, dest);
243         if (ctx->base.singlestep_enabled) {
244             gen_helper_debug(cpu_env);
245         } else if (use_exit_tb(ctx)) {
246             tcg_gen_exit_tb(NULL, 0);
247         } else {
248             tcg_gen_lookup_and_goto_ptr();
249         }
250     }
251     ctx->base.is_jmp = DISAS_NORETURN;
252 }
253 
254 static void gen_jump(DisasContext * ctx)
255 {
256     if (ctx->delayed_pc == -1) {
257 	/* Target is not statically known, it comes necessarily from a
258 	   delayed jump as immediate jump are conditinal jumps */
259 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
260         tcg_gen_discard_i32(cpu_delayed_pc);
261         if (ctx->base.singlestep_enabled) {
262             gen_helper_debug(cpu_env);
263         } else if (use_exit_tb(ctx)) {
264             tcg_gen_exit_tb(NULL, 0);
265         } else {
266             tcg_gen_lookup_and_goto_ptr();
267         }
268         ctx->base.is_jmp = DISAS_NORETURN;
269     } else {
270 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
271     }
272 }
273 
274 /* Immediate conditional jump (bt or bf) */
275 static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
276                                  bool jump_if_true)
277 {
278     TCGLabel *l1 = gen_new_label();
279     TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
280 
281     if (ctx->tbflags & GUSA_EXCLUSIVE) {
282         /* When in an exclusive region, we must continue to the end.
283            Therefore, exit the region on a taken branch, but otherwise
284            fall through to the next instruction.  */
285         tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
286         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
287         /* Note that this won't actually use a goto_tb opcode because we
288            disallow it in use_goto_tb, but it handles exit + singlestep.  */
289         gen_goto_tb(ctx, 0, dest);
290         gen_set_label(l1);
291         ctx->base.is_jmp = DISAS_NEXT;
292         return;
293     }
294 
295     gen_save_cpu_state(ctx, false);
296     tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
297     gen_goto_tb(ctx, 0, dest);
298     gen_set_label(l1);
299     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
300     ctx->base.is_jmp = DISAS_NORETURN;
301 }
302 
303 /* Delayed conditional jump (bt or bf) */
304 static void gen_delayed_conditional_jump(DisasContext * ctx)
305 {
306     TCGLabel *l1 = gen_new_label();
307     TCGv ds = tcg_temp_new();
308 
309     tcg_gen_mov_i32(ds, cpu_delayed_cond);
310     tcg_gen_discard_i32(cpu_delayed_cond);
311 
312     if (ctx->tbflags & GUSA_EXCLUSIVE) {
313         /* When in an exclusive region, we must continue to the end.
314            Therefore, exit the region on a taken branch, but otherwise
315            fall through to the next instruction.  */
316         tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
317 
318         /* Leave the gUSA region.  */
319         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
320         gen_jump(ctx);
321 
322         gen_set_label(l1);
323         ctx->base.is_jmp = DISAS_NEXT;
324         return;
325     }
326 
327     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
328     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
329     gen_set_label(l1);
330     gen_jump(ctx);
331 }
332 
333 static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
334 {
335     /* We have already signaled illegal instruction for odd Dr.  */
336     tcg_debug_assert((reg & 1) == 0);
337     reg ^= ctx->fbank;
338     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
339 }
340 
341 static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
342 {
343     /* We have already signaled illegal instruction for odd Dr.  */
344     tcg_debug_assert((reg & 1) == 0);
345     reg ^= ctx->fbank;
346     tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
347 }
348 
349 #define B3_0 (ctx->opcode & 0xf)
350 #define B6_4 ((ctx->opcode >> 4) & 0x7)
351 #define B7_4 ((ctx->opcode >> 4) & 0xf)
352 #define B7_0 (ctx->opcode & 0xff)
353 #define B7_0s ((int32_t) (int8_t) (ctx->opcode & 0xff))
354 #define B11_0s (ctx->opcode & 0x800 ? 0xfffff000 | (ctx->opcode & 0xfff) : \
355   (ctx->opcode & 0xfff))
356 #define B11_8 ((ctx->opcode >> 8) & 0xf)
357 #define B15_12 ((ctx->opcode >> 12) & 0xf)
358 
359 #define REG(x)     cpu_gregs[(x) ^ ctx->gbank]
360 #define ALTREG(x)  cpu_gregs[(x) ^ ctx->gbank ^ 0x10]
361 #define FREG(x)    cpu_fregs[(x) ^ ctx->fbank]
362 
363 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
364 
365 #define CHECK_NOT_DELAY_SLOT \
366     if (ctx->envflags & DELAY_SLOT_MASK) {  \
367         goto do_illegal_slot;               \
368     }
369 
370 #define CHECK_PRIVILEGED \
371     if (IS_USER(ctx)) {                     \
372         goto do_illegal;                    \
373     }
374 
375 #define CHECK_FPU_ENABLED \
376     if (ctx->tbflags & (1u << SR_FD)) {     \
377         goto do_fpu_disabled;               \
378     }
379 
380 #define CHECK_FPSCR_PR_0 \
381     if (ctx->tbflags & FPSCR_PR) {          \
382         goto do_illegal;                    \
383     }
384 
385 #define CHECK_FPSCR_PR_1 \
386     if (!(ctx->tbflags & FPSCR_PR)) {       \
387         goto do_illegal;                    \
388     }
389 
390 #define CHECK_SH4A \
391     if (!(ctx->features & SH_FEATURE_SH4A)) { \
392         goto do_illegal;                      \
393     }
394 
395 static void _decode_opc(DisasContext * ctx)
396 {
397     /* This code tries to make movcal emulation sufficiently
398        accurate for Linux purposes.  This instruction writes
399        memory, and prior to that, always allocates a cache line.
400        It is used in two contexts:
401        - in memcpy, where data is copied in blocks, the first write
402        of to a block uses movca.l for performance.
403        - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
404        to flush the cache. Here, the data written by movcal.l is never
405        written to memory, and the data written is just bogus.
406 
407        To simulate this, we simulate movcal.l, we store the value to memory,
408        but we also remember the previous content. If we see ocbi, we check
409        if movcal.l for that address was done previously. If so, the write should
410        not have hit the memory, so we restore the previous content.
411        When we see an instruction that is neither movca.l
412        nor ocbi, the previous content is discarded.
413 
414        To optimize, we only try to flush stores when we're at the start of
415        TB, or if we already saw movca.l in this TB and did not flush stores
416        yet.  */
417     if (ctx->has_movcal)
418 	{
419 	  int opcode = ctx->opcode & 0xf0ff;
420 	  if (opcode != 0x0093 /* ocbi */
421 	      && opcode != 0x00c3 /* movca.l */)
422 	      {
423                   gen_helper_discard_movcal_backup(cpu_env);
424 		  ctx->has_movcal = 0;
425 	      }
426 	}
427 
428 #if 0
429     fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
430 #endif
431 
432     switch (ctx->opcode) {
433     case 0x0019:		/* div0u */
434         tcg_gen_movi_i32(cpu_sr_m, 0);
435         tcg_gen_movi_i32(cpu_sr_q, 0);
436         tcg_gen_movi_i32(cpu_sr_t, 0);
437 	return;
438     case 0x000b:		/* rts */
439 	CHECK_NOT_DELAY_SLOT
440 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
441         ctx->envflags |= DELAY_SLOT;
442 	ctx->delayed_pc = (uint32_t) - 1;
443 	return;
444     case 0x0028:		/* clrmac */
445 	tcg_gen_movi_i32(cpu_mach, 0);
446 	tcg_gen_movi_i32(cpu_macl, 0);
447 	return;
448     case 0x0048:		/* clrs */
449         tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
450 	return;
451     case 0x0008:		/* clrt */
452         tcg_gen_movi_i32(cpu_sr_t, 0);
453 	return;
454     case 0x0038:		/* ldtlb */
455 	CHECK_PRIVILEGED
456         gen_helper_ldtlb(cpu_env);
457 	return;
458     case 0x002b:		/* rte */
459 	CHECK_PRIVILEGED
460 	CHECK_NOT_DELAY_SLOT
461         gen_write_sr(cpu_ssr);
462 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
463         ctx->envflags |= DELAY_SLOT_RTE;
464 	ctx->delayed_pc = (uint32_t) - 1;
465         ctx->base.is_jmp = DISAS_STOP;
466 	return;
467     case 0x0058:		/* sets */
468         tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
469 	return;
470     case 0x0018:		/* sett */
471         tcg_gen_movi_i32(cpu_sr_t, 1);
472 	return;
473     case 0xfbfd:		/* frchg */
474         CHECK_FPSCR_PR_0
475 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
476         ctx->base.is_jmp = DISAS_STOP;
477 	return;
478     case 0xf3fd:		/* fschg */
479         CHECK_FPSCR_PR_0
480         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
481         ctx->base.is_jmp = DISAS_STOP;
482 	return;
483     case 0xf7fd:                /* fpchg */
484         CHECK_SH4A
485         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_PR);
486         ctx->base.is_jmp = DISAS_STOP;
487         return;
488     case 0x0009:		/* nop */
489 	return;
490     case 0x001b:		/* sleep */
491 	CHECK_PRIVILEGED
492         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next + 2);
493         gen_helper_sleep(cpu_env);
494 	return;
495     }
496 
497     switch (ctx->opcode & 0xf000) {
498     case 0x1000:		/* mov.l Rm,@(disp,Rn) */
499 	{
500 	    TCGv addr = tcg_temp_new();
501 	    tcg_gen_addi_i32(addr, REG(B11_8), B3_0 * 4);
502             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
503 	    tcg_temp_free(addr);
504 	}
505 	return;
506     case 0x5000:		/* mov.l @(disp,Rm),Rn */
507 	{
508 	    TCGv addr = tcg_temp_new();
509 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 4);
510             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
511 	    tcg_temp_free(addr);
512 	}
513 	return;
514     case 0xe000:		/* mov #imm,Rn */
515 #ifdef CONFIG_USER_ONLY
516         /* Detect the start of a gUSA region.  If so, update envflags
517            and end the TB.  This will allow us to see the end of the
518            region (stored in R0) in the next TB.  */
519         if (B11_8 == 15 && B7_0s < 0 &&
520             (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
521             ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
522             ctx->base.is_jmp = DISAS_STOP;
523         }
524 #endif
525 	tcg_gen_movi_i32(REG(B11_8), B7_0s);
526 	return;
527     case 0x9000:		/* mov.w @(disp,PC),Rn */
528 	{
529             TCGv addr = tcg_const_i32(ctx->base.pc_next + 4 + B7_0 * 2);
530             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
531 	    tcg_temp_free(addr);
532 	}
533 	return;
534     case 0xd000:		/* mov.l @(disp,PC),Rn */
535 	{
536             TCGv addr = tcg_const_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
537             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
538 	    tcg_temp_free(addr);
539 	}
540 	return;
541     case 0x7000:		/* add #imm,Rn */
542 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), B7_0s);
543 	return;
544     case 0xa000:		/* bra disp */
545 	CHECK_NOT_DELAY_SLOT
546         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
547         ctx->envflags |= DELAY_SLOT;
548 	return;
549     case 0xb000:		/* bsr disp */
550 	CHECK_NOT_DELAY_SLOT
551         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
552         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
553         ctx->envflags |= DELAY_SLOT;
554 	return;
555     }
556 
557     switch (ctx->opcode & 0xf00f) {
558     case 0x6003:		/* mov Rm,Rn */
559 	tcg_gen_mov_i32(REG(B11_8), REG(B7_4));
560 	return;
561     case 0x2000:		/* mov.b Rm,@Rn */
562         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_UB);
563 	return;
564     case 0x2001:		/* mov.w Rm,@Rn */
565         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_TEUW);
566 	return;
567     case 0x2002:		/* mov.l Rm,@Rn */
568         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
569 	return;
570     case 0x6000:		/* mov.b @Rm,Rn */
571         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
572 	return;
573     case 0x6001:		/* mov.w @Rm,Rn */
574         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESW);
575 	return;
576     case 0x6002:		/* mov.l @Rm,Rn */
577         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESL);
578 	return;
579     case 0x2004:		/* mov.b Rm,@-Rn */
580 	{
581 	    TCGv addr = tcg_temp_new();
582 	    tcg_gen_subi_i32(addr, REG(B11_8), 1);
583             /* might cause re-execution */
584             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
585 	    tcg_gen_mov_i32(REG(B11_8), addr);			/* modify register status */
586 	    tcg_temp_free(addr);
587 	}
588 	return;
589     case 0x2005:		/* mov.w Rm,@-Rn */
590 	{
591 	    TCGv addr = tcg_temp_new();
592 	    tcg_gen_subi_i32(addr, REG(B11_8), 2);
593             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUW);
594 	    tcg_gen_mov_i32(REG(B11_8), addr);
595 	    tcg_temp_free(addr);
596 	}
597 	return;
598     case 0x2006:		/* mov.l Rm,@-Rn */
599 	{
600 	    TCGv addr = tcg_temp_new();
601 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
602             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
603 	    tcg_gen_mov_i32(REG(B11_8), addr);
604         tcg_temp_free(addr);
605 	}
606 	return;
607     case 0x6004:		/* mov.b @Rm+,Rn */
608         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
609 	if ( B11_8 != B7_4 )
610 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 1);
611 	return;
612     case 0x6005:		/* mov.w @Rm+,Rn */
613         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESW);
614 	if ( B11_8 != B7_4 )
615 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
616 	return;
617     case 0x6006:		/* mov.l @Rm+,Rn */
618         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESL);
619 	if ( B11_8 != B7_4 )
620 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
621 	return;
622     case 0x0004:		/* mov.b Rm,@(R0,Rn) */
623 	{
624 	    TCGv addr = tcg_temp_new();
625 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
626             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
627 	    tcg_temp_free(addr);
628 	}
629 	return;
630     case 0x0005:		/* mov.w Rm,@(R0,Rn) */
631 	{
632 	    TCGv addr = tcg_temp_new();
633 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
634             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUW);
635 	    tcg_temp_free(addr);
636 	}
637 	return;
638     case 0x0006:		/* mov.l Rm,@(R0,Rn) */
639 	{
640 	    TCGv addr = tcg_temp_new();
641 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
642             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
643 	    tcg_temp_free(addr);
644 	}
645 	return;
646     case 0x000c:		/* mov.b @(R0,Rm),Rn */
647 	{
648 	    TCGv addr = tcg_temp_new();
649 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
650             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_SB);
651 	    tcg_temp_free(addr);
652 	}
653 	return;
654     case 0x000d:		/* mov.w @(R0,Rm),Rn */
655 	{
656 	    TCGv addr = tcg_temp_new();
657 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
658             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
659 	    tcg_temp_free(addr);
660 	}
661 	return;
662     case 0x000e:		/* mov.l @(R0,Rm),Rn */
663 	{
664 	    TCGv addr = tcg_temp_new();
665 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
666             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
667 	    tcg_temp_free(addr);
668 	}
669 	return;
670     case 0x6008:		/* swap.b Rm,Rn */
671 	{
672             TCGv low = tcg_temp_new();
673             tcg_gen_bswap16_i32(low, REG(B7_4), 0);
674             tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
675 	    tcg_temp_free(low);
676 	}
677 	return;
678     case 0x6009:		/* swap.w Rm,Rn */
679         tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16);
680 	return;
681     case 0x200d:		/* xtrct Rm,Rn */
682 	{
683 	    TCGv high, low;
684 	    high = tcg_temp_new();
685 	    tcg_gen_shli_i32(high, REG(B7_4), 16);
686 	    low = tcg_temp_new();
687 	    tcg_gen_shri_i32(low, REG(B11_8), 16);
688 	    tcg_gen_or_i32(REG(B11_8), high, low);
689 	    tcg_temp_free(low);
690 	    tcg_temp_free(high);
691 	}
692 	return;
693     case 0x300c:		/* add Rm,Rn */
694 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
695 	return;
696     case 0x300e:		/* addc Rm,Rn */
697         {
698             TCGv t0, t1;
699             t0 = tcg_const_tl(0);
700             t1 = tcg_temp_new();
701             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
702             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
703                              REG(B11_8), t0, t1, cpu_sr_t);
704             tcg_temp_free(t0);
705             tcg_temp_free(t1);
706         }
707 	return;
708     case 0x300f:		/* addv Rm,Rn */
709         {
710             TCGv t0, t1, t2;
711             t0 = tcg_temp_new();
712             tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
713             t1 = tcg_temp_new();
714             tcg_gen_xor_i32(t1, t0, REG(B11_8));
715             t2 = tcg_temp_new();
716             tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
717             tcg_gen_andc_i32(cpu_sr_t, t1, t2);
718             tcg_temp_free(t2);
719             tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
720             tcg_temp_free(t1);
721             tcg_gen_mov_i32(REG(B7_4), t0);
722             tcg_temp_free(t0);
723         }
724 	return;
725     case 0x2009:		/* and Rm,Rn */
726 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
727 	return;
728     case 0x3000:		/* cmp/eq Rm,Rn */
729         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4));
730 	return;
731     case 0x3003:		/* cmp/ge Rm,Rn */
732         tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4));
733 	return;
734     case 0x3007:		/* cmp/gt Rm,Rn */
735         tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4));
736 	return;
737     case 0x3006:		/* cmp/hi Rm,Rn */
738         tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4));
739 	return;
740     case 0x3002:		/* cmp/hs Rm,Rn */
741         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4));
742 	return;
743     case 0x200c:		/* cmp/str Rm,Rn */
744 	{
745 	    TCGv cmp1 = tcg_temp_new();
746 	    TCGv cmp2 = tcg_temp_new();
747             tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8));
748             tcg_gen_subi_i32(cmp1, cmp2, 0x01010101);
749             tcg_gen_andc_i32(cmp1, cmp1, cmp2);
750             tcg_gen_andi_i32(cmp1, cmp1, 0x80808080);
751             tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0);
752 	    tcg_temp_free(cmp2);
753 	    tcg_temp_free(cmp1);
754 	}
755 	return;
756     case 0x2007:		/* div0s Rm,Rn */
757         tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31);         /* SR_Q */
758         tcg_gen_shri_i32(cpu_sr_m, REG(B7_4), 31);          /* SR_M */
759         tcg_gen_xor_i32(cpu_sr_t, cpu_sr_q, cpu_sr_m);      /* SR_T */
760 	return;
761     case 0x3004:		/* div1 Rm,Rn */
762         {
763             TCGv t0 = tcg_temp_new();
764             TCGv t1 = tcg_temp_new();
765             TCGv t2 = tcg_temp_new();
766             TCGv zero = tcg_const_i32(0);
767 
768             /* shift left arg1, saving the bit being pushed out and inserting
769                T on the right */
770             tcg_gen_shri_i32(t0, REG(B11_8), 31);
771             tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
772             tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t);
773 
774             /* Add or subtract arg0 from arg1 depending if Q == M. To avoid
775                using 64-bit temps, we compute arg0's high part from q ^ m, so
776                that it is 0x00000000 when adding the value or 0xffffffff when
777                subtracting it. */
778             tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m);
779             tcg_gen_subi_i32(t1, t1, 1);
780             tcg_gen_neg_i32(t2, REG(B7_4));
781             tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2);
782             tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1);
783 
784             /* compute T and Q depending on carry */
785             tcg_gen_andi_i32(t1, t1, 1);
786             tcg_gen_xor_i32(t1, t1, t0);
787             tcg_gen_xori_i32(cpu_sr_t, t1, 1);
788             tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1);
789 
790             tcg_temp_free(zero);
791             tcg_temp_free(t2);
792             tcg_temp_free(t1);
793             tcg_temp_free(t0);
794         }
795 	return;
796     case 0x300d:		/* dmuls.l Rm,Rn */
797         tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
798 	return;
799     case 0x3005:		/* dmulu.l Rm,Rn */
800         tcg_gen_mulu2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
801 	return;
802     case 0x600e:		/* exts.b Rm,Rn */
803 	tcg_gen_ext8s_i32(REG(B11_8), REG(B7_4));
804 	return;
805     case 0x600f:		/* exts.w Rm,Rn */
806 	tcg_gen_ext16s_i32(REG(B11_8), REG(B7_4));
807 	return;
808     case 0x600c:		/* extu.b Rm,Rn */
809 	tcg_gen_ext8u_i32(REG(B11_8), REG(B7_4));
810 	return;
811     case 0x600d:		/* extu.w Rm,Rn */
812 	tcg_gen_ext16u_i32(REG(B11_8), REG(B7_4));
813 	return;
814     case 0x000f:		/* mac.l @Rm+,@Rn+ */
815 	{
816 	    TCGv arg0, arg1;
817 	    arg0 = tcg_temp_new();
818             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
819 	    arg1 = tcg_temp_new();
820             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
821             gen_helper_macl(cpu_env, arg0, arg1);
822 	    tcg_temp_free(arg1);
823 	    tcg_temp_free(arg0);
824 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
825 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
826 	}
827 	return;
828     case 0x400f:		/* mac.w @Rm+,@Rn+ */
829 	{
830 	    TCGv arg0, arg1;
831 	    arg0 = tcg_temp_new();
832             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
833 	    arg1 = tcg_temp_new();
834             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
835             gen_helper_macw(cpu_env, arg0, arg1);
836 	    tcg_temp_free(arg1);
837 	    tcg_temp_free(arg0);
838 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
839 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
840 	}
841 	return;
842     case 0x0007:		/* mul.l Rm,Rn */
843 	tcg_gen_mul_i32(cpu_macl, REG(B7_4), REG(B11_8));
844 	return;
845     case 0x200f:		/* muls.w Rm,Rn */
846 	{
847 	    TCGv arg0, arg1;
848 	    arg0 = tcg_temp_new();
849 	    tcg_gen_ext16s_i32(arg0, REG(B7_4));
850 	    arg1 = tcg_temp_new();
851 	    tcg_gen_ext16s_i32(arg1, REG(B11_8));
852 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
853 	    tcg_temp_free(arg1);
854 	    tcg_temp_free(arg0);
855 	}
856 	return;
857     case 0x200e:		/* mulu.w Rm,Rn */
858 	{
859 	    TCGv arg0, arg1;
860 	    arg0 = tcg_temp_new();
861 	    tcg_gen_ext16u_i32(arg0, REG(B7_4));
862 	    arg1 = tcg_temp_new();
863 	    tcg_gen_ext16u_i32(arg1, REG(B11_8));
864 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
865 	    tcg_temp_free(arg1);
866 	    tcg_temp_free(arg0);
867 	}
868 	return;
869     case 0x600b:		/* neg Rm,Rn */
870 	tcg_gen_neg_i32(REG(B11_8), REG(B7_4));
871 	return;
872     case 0x600a:		/* negc Rm,Rn */
873         {
874             TCGv t0 = tcg_const_i32(0);
875             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
876                              REG(B7_4), t0, cpu_sr_t, t0);
877             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
878                              t0, t0, REG(B11_8), cpu_sr_t);
879             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
880             tcg_temp_free(t0);
881         }
882 	return;
883     case 0x6007:		/* not Rm,Rn */
884 	tcg_gen_not_i32(REG(B11_8), REG(B7_4));
885 	return;
886     case 0x200b:		/* or Rm,Rn */
887 	tcg_gen_or_i32(REG(B11_8), REG(B11_8), REG(B7_4));
888 	return;
889     case 0x400c:		/* shad Rm,Rn */
890 	{
891             TCGv t0 = tcg_temp_new();
892             TCGv t1 = tcg_temp_new();
893             TCGv t2 = tcg_temp_new();
894 
895             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
896 
897             /* positive case: shift to the left */
898             tcg_gen_shl_i32(t1, REG(B11_8), t0);
899 
900             /* negative case: shift to the right in two steps to
901                correctly handle the -32 case */
902             tcg_gen_xori_i32(t0, t0, 0x1f);
903             tcg_gen_sar_i32(t2, REG(B11_8), t0);
904             tcg_gen_sari_i32(t2, t2, 1);
905 
906             /* select between the two cases */
907             tcg_gen_movi_i32(t0, 0);
908             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
909 
910             tcg_temp_free(t0);
911             tcg_temp_free(t1);
912             tcg_temp_free(t2);
913 	}
914 	return;
915     case 0x400d:		/* shld Rm,Rn */
916 	{
917             TCGv t0 = tcg_temp_new();
918             TCGv t1 = tcg_temp_new();
919             TCGv t2 = tcg_temp_new();
920 
921             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
922 
923             /* positive case: shift to the left */
924             tcg_gen_shl_i32(t1, REG(B11_8), t0);
925 
926             /* negative case: shift to the right in two steps to
927                correctly handle the -32 case */
928             tcg_gen_xori_i32(t0, t0, 0x1f);
929             tcg_gen_shr_i32(t2, REG(B11_8), t0);
930             tcg_gen_shri_i32(t2, t2, 1);
931 
932             /* select between the two cases */
933             tcg_gen_movi_i32(t0, 0);
934             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
935 
936             tcg_temp_free(t0);
937             tcg_temp_free(t1);
938             tcg_temp_free(t2);
939 	}
940 	return;
941     case 0x3008:		/* sub Rm,Rn */
942 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
943 	return;
944     case 0x300a:		/* subc Rm,Rn */
945         {
946             TCGv t0, t1;
947             t0 = tcg_const_tl(0);
948             t1 = tcg_temp_new();
949             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
950             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
951                              REG(B11_8), t0, t1, cpu_sr_t);
952             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
953             tcg_temp_free(t0);
954             tcg_temp_free(t1);
955         }
956 	return;
957     case 0x300b:		/* subv Rm,Rn */
958         {
959             TCGv t0, t1, t2;
960             t0 = tcg_temp_new();
961             tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
962             t1 = tcg_temp_new();
963             tcg_gen_xor_i32(t1, t0, REG(B7_4));
964             t2 = tcg_temp_new();
965             tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
966             tcg_gen_and_i32(t1, t1, t2);
967             tcg_temp_free(t2);
968             tcg_gen_shri_i32(cpu_sr_t, t1, 31);
969             tcg_temp_free(t1);
970             tcg_gen_mov_i32(REG(B11_8), t0);
971             tcg_temp_free(t0);
972         }
973 	return;
974     case 0x2008:		/* tst Rm,Rn */
975 	{
976 	    TCGv val = tcg_temp_new();
977 	    tcg_gen_and_i32(val, REG(B7_4), REG(B11_8));
978             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
979 	    tcg_temp_free(val);
980 	}
981 	return;
982     case 0x200a:		/* xor Rm,Rn */
983 	tcg_gen_xor_i32(REG(B11_8), REG(B11_8), REG(B7_4));
984 	return;
985     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
986 	CHECK_FPU_ENABLED
987         if (ctx->tbflags & FPSCR_SZ) {
988             int xsrc = XHACK(B7_4);
989             int xdst = XHACK(B11_8);
990             tcg_gen_mov_i32(FREG(xdst), FREG(xsrc));
991             tcg_gen_mov_i32(FREG(xdst + 1), FREG(xsrc + 1));
992 	} else {
993             tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
994 	}
995 	return;
996     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
997 	CHECK_FPU_ENABLED
998         if (ctx->tbflags & FPSCR_SZ) {
999             TCGv_i64 fp = tcg_temp_new_i64();
1000             gen_load_fpr64(ctx, fp, XHACK(B7_4));
1001             tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEQ);
1002             tcg_temp_free_i64(fp);
1003 	} else {
1004             tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
1005 	}
1006 	return;
1007     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
1008 	CHECK_FPU_ENABLED
1009         if (ctx->tbflags & FPSCR_SZ) {
1010             TCGv_i64 fp = tcg_temp_new_i64();
1011             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ);
1012             gen_store_fpr64(ctx, fp, XHACK(B11_8));
1013             tcg_temp_free_i64(fp);
1014 	} else {
1015             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
1016 	}
1017 	return;
1018     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
1019 	CHECK_FPU_ENABLED
1020         if (ctx->tbflags & FPSCR_SZ) {
1021             TCGv_i64 fp = tcg_temp_new_i64();
1022             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ);
1023             gen_store_fpr64(ctx, fp, XHACK(B11_8));
1024             tcg_temp_free_i64(fp);
1025             tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
1026 	} else {
1027             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
1028 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
1029 	}
1030 	return;
1031     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
1032 	CHECK_FPU_ENABLED
1033         {
1034             TCGv addr = tcg_temp_new_i32();
1035             if (ctx->tbflags & FPSCR_SZ) {
1036                 TCGv_i64 fp = tcg_temp_new_i64();
1037                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
1038                 tcg_gen_subi_i32(addr, REG(B11_8), 8);
1039                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ);
1040                 tcg_temp_free_i64(fp);
1041             } else {
1042                 tcg_gen_subi_i32(addr, REG(B11_8), 4);
1043                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1044             }
1045             tcg_gen_mov_i32(REG(B11_8), addr);
1046             tcg_temp_free(addr);
1047         }
1048 	return;
1049     case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
1050 	CHECK_FPU_ENABLED
1051 	{
1052 	    TCGv addr = tcg_temp_new_i32();
1053 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
1054             if (ctx->tbflags & FPSCR_SZ) {
1055                 TCGv_i64 fp = tcg_temp_new_i64();
1056                 tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEQ);
1057                 gen_store_fpr64(ctx, fp, XHACK(B11_8));
1058                 tcg_temp_free_i64(fp);
1059 	    } else {
1060                 tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
1061 	    }
1062 	    tcg_temp_free(addr);
1063 	}
1064 	return;
1065     case 0xf007: /* fmov {F,D,X}Rn,@(R0,Rn) - FPSCR: Nothing */
1066 	CHECK_FPU_ENABLED
1067 	{
1068 	    TCGv addr = tcg_temp_new();
1069 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
1070             if (ctx->tbflags & FPSCR_SZ) {
1071                 TCGv_i64 fp = tcg_temp_new_i64();
1072                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
1073                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ);
1074                 tcg_temp_free_i64(fp);
1075 	    } else {
1076                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1077 	    }
1078 	    tcg_temp_free(addr);
1079 	}
1080 	return;
1081     case 0xf000: /* fadd Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1082     case 0xf001: /* fsub Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1083     case 0xf002: /* fmul Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1084     case 0xf003: /* fdiv Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1085     case 0xf004: /* fcmp/eq Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1086     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1087 	{
1088 	    CHECK_FPU_ENABLED
1089             if (ctx->tbflags & FPSCR_PR) {
1090                 TCGv_i64 fp0, fp1;
1091 
1092                 if (ctx->opcode & 0x0110) {
1093                     goto do_illegal;
1094                 }
1095 		fp0 = tcg_temp_new_i64();
1096 		fp1 = tcg_temp_new_i64();
1097                 gen_load_fpr64(ctx, fp0, B11_8);
1098                 gen_load_fpr64(ctx, fp1, B7_4);
1099                 switch (ctx->opcode & 0xf00f) {
1100                 case 0xf000:		/* fadd Rm,Rn */
1101                     gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
1102                     break;
1103                 case 0xf001:		/* fsub Rm,Rn */
1104                     gen_helper_fsub_DT(fp0, cpu_env, fp0, fp1);
1105                     break;
1106                 case 0xf002:		/* fmul Rm,Rn */
1107                     gen_helper_fmul_DT(fp0, cpu_env, fp0, fp1);
1108                     break;
1109                 case 0xf003:		/* fdiv Rm,Rn */
1110                     gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
1111                     break;
1112                 case 0xf004:		/* fcmp/eq Rm,Rn */
1113                     gen_helper_fcmp_eq_DT(cpu_sr_t, cpu_env, fp0, fp1);
1114                     return;
1115                 case 0xf005:		/* fcmp/gt Rm,Rn */
1116                     gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1);
1117                     return;
1118                 }
1119                 gen_store_fpr64(ctx, fp0, B11_8);
1120                 tcg_temp_free_i64(fp0);
1121                 tcg_temp_free_i64(fp1);
1122 	    } else {
1123                 switch (ctx->opcode & 0xf00f) {
1124                 case 0xf000:		/* fadd Rm,Rn */
1125                     gen_helper_fadd_FT(FREG(B11_8), cpu_env,
1126                                        FREG(B11_8), FREG(B7_4));
1127                     break;
1128                 case 0xf001:		/* fsub Rm,Rn */
1129                     gen_helper_fsub_FT(FREG(B11_8), cpu_env,
1130                                        FREG(B11_8), FREG(B7_4));
1131                     break;
1132                 case 0xf002:		/* fmul Rm,Rn */
1133                     gen_helper_fmul_FT(FREG(B11_8), cpu_env,
1134                                        FREG(B11_8), FREG(B7_4));
1135                     break;
1136                 case 0xf003:		/* fdiv Rm,Rn */
1137                     gen_helper_fdiv_FT(FREG(B11_8), cpu_env,
1138                                        FREG(B11_8), FREG(B7_4));
1139                     break;
1140                 case 0xf004:		/* fcmp/eq Rm,Rn */
1141                     gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env,
1142                                           FREG(B11_8), FREG(B7_4));
1143                     return;
1144                 case 0xf005:		/* fcmp/gt Rm,Rn */
1145                     gen_helper_fcmp_gt_FT(cpu_sr_t, cpu_env,
1146                                           FREG(B11_8), FREG(B7_4));
1147                     return;
1148                 }
1149 	    }
1150 	}
1151 	return;
1152     case 0xf00e: /* fmac FR0,RM,Rn */
1153         CHECK_FPU_ENABLED
1154         CHECK_FPSCR_PR_0
1155         gen_helper_fmac_FT(FREG(B11_8), cpu_env,
1156                            FREG(0), FREG(B7_4), FREG(B11_8));
1157         return;
1158     }
1159 
1160     switch (ctx->opcode & 0xff00) {
1161     case 0xc900:		/* and #imm,R0 */
1162 	tcg_gen_andi_i32(REG(0), REG(0), B7_0);
1163 	return;
1164     case 0xcd00:		/* and.b #imm,@(R0,GBR) */
1165 	{
1166 	    TCGv addr, val;
1167 	    addr = tcg_temp_new();
1168 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1169 	    val = tcg_temp_new();
1170             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1171 	    tcg_gen_andi_i32(val, val, B7_0);
1172             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1173 	    tcg_temp_free(val);
1174 	    tcg_temp_free(addr);
1175 	}
1176 	return;
1177     case 0x8b00:		/* bf label */
1178 	CHECK_NOT_DELAY_SLOT
1179         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, false);
1180 	return;
1181     case 0x8f00:		/* bf/s label */
1182 	CHECK_NOT_DELAY_SLOT
1183         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
1184         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1185         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
1186 	return;
1187     case 0x8900:		/* bt label */
1188 	CHECK_NOT_DELAY_SLOT
1189         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, true);
1190 	return;
1191     case 0x8d00:		/* bt/s label */
1192 	CHECK_NOT_DELAY_SLOT
1193         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
1194         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1195         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
1196 	return;
1197     case 0x8800:		/* cmp/eq #imm,R0 */
1198         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
1199 	return;
1200     case 0xc400:		/* mov.b @(disp,GBR),R0 */
1201 	{
1202 	    TCGv addr = tcg_temp_new();
1203 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1204             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1205 	    tcg_temp_free(addr);
1206 	}
1207 	return;
1208     case 0xc500:		/* mov.w @(disp,GBR),R0 */
1209 	{
1210 	    TCGv addr = tcg_temp_new();
1211 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1212             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
1213 	    tcg_temp_free(addr);
1214 	}
1215 	return;
1216     case 0xc600:		/* mov.l @(disp,GBR),R0 */
1217 	{
1218 	    TCGv addr = tcg_temp_new();
1219 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1220             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
1221 	    tcg_temp_free(addr);
1222 	}
1223 	return;
1224     case 0xc000:		/* mov.b R0,@(disp,GBR) */
1225 	{
1226 	    TCGv addr = tcg_temp_new();
1227 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1228             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1229 	    tcg_temp_free(addr);
1230 	}
1231 	return;
1232     case 0xc100:		/* mov.w R0,@(disp,GBR) */
1233 	{
1234 	    TCGv addr = tcg_temp_new();
1235 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1236             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
1237 	    tcg_temp_free(addr);
1238 	}
1239 	return;
1240     case 0xc200:		/* mov.l R0,@(disp,GBR) */
1241 	{
1242 	    TCGv addr = tcg_temp_new();
1243 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1244             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
1245 	    tcg_temp_free(addr);
1246 	}
1247 	return;
1248     case 0x8000:		/* mov.b R0,@(disp,Rn) */
1249 	{
1250 	    TCGv addr = tcg_temp_new();
1251 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1252             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1253 	    tcg_temp_free(addr);
1254 	}
1255 	return;
1256     case 0x8100:		/* mov.w R0,@(disp,Rn) */
1257 	{
1258 	    TCGv addr = tcg_temp_new();
1259 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1260             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
1261 	    tcg_temp_free(addr);
1262 	}
1263 	return;
1264     case 0x8400:		/* mov.b @(disp,Rn),R0 */
1265 	{
1266 	    TCGv addr = tcg_temp_new();
1267 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1268             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1269 	    tcg_temp_free(addr);
1270 	}
1271 	return;
1272     case 0x8500:		/* mov.w @(disp,Rn),R0 */
1273 	{
1274 	    TCGv addr = tcg_temp_new();
1275 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1276             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
1277 	    tcg_temp_free(addr);
1278 	}
1279 	return;
1280     case 0xc700:		/* mova @(disp,PC),R0 */
1281         tcg_gen_movi_i32(REG(0), ((ctx->base.pc_next & 0xfffffffc) +
1282                                   4 + B7_0 * 4) & ~3);
1283 	return;
1284     case 0xcb00:		/* or #imm,R0 */
1285 	tcg_gen_ori_i32(REG(0), REG(0), B7_0);
1286 	return;
1287     case 0xcf00:		/* or.b #imm,@(R0,GBR) */
1288 	{
1289 	    TCGv addr, val;
1290 	    addr = tcg_temp_new();
1291 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1292 	    val = tcg_temp_new();
1293             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1294 	    tcg_gen_ori_i32(val, val, B7_0);
1295             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1296 	    tcg_temp_free(val);
1297 	    tcg_temp_free(addr);
1298 	}
1299 	return;
1300     case 0xc300:		/* trapa #imm */
1301 	{
1302 	    TCGv imm;
1303 	    CHECK_NOT_DELAY_SLOT
1304             gen_save_cpu_state(ctx, true);
1305 	    imm = tcg_const_i32(B7_0);
1306             gen_helper_trapa(cpu_env, imm);
1307 	    tcg_temp_free(imm);
1308             ctx->base.is_jmp = DISAS_NORETURN;
1309 	}
1310 	return;
1311     case 0xc800:		/* tst #imm,R0 */
1312 	{
1313 	    TCGv val = tcg_temp_new();
1314 	    tcg_gen_andi_i32(val, REG(0), B7_0);
1315             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1316 	    tcg_temp_free(val);
1317 	}
1318 	return;
1319     case 0xcc00:		/* tst.b #imm,@(R0,GBR) */
1320 	{
1321 	    TCGv val = tcg_temp_new();
1322 	    tcg_gen_add_i32(val, REG(0), cpu_gbr);
1323             tcg_gen_qemu_ld_i32(val, val, ctx->memidx, MO_UB);
1324 	    tcg_gen_andi_i32(val, val, B7_0);
1325             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1326 	    tcg_temp_free(val);
1327 	}
1328 	return;
1329     case 0xca00:		/* xor #imm,R0 */
1330 	tcg_gen_xori_i32(REG(0), REG(0), B7_0);
1331 	return;
1332     case 0xce00:		/* xor.b #imm,@(R0,GBR) */
1333 	{
1334 	    TCGv addr, val;
1335 	    addr = tcg_temp_new();
1336 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1337 	    val = tcg_temp_new();
1338             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1339 	    tcg_gen_xori_i32(val, val, B7_0);
1340             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1341 	    tcg_temp_free(val);
1342 	    tcg_temp_free(addr);
1343 	}
1344 	return;
1345     }
1346 
1347     switch (ctx->opcode & 0xf08f) {
1348     case 0x408e:		/* ldc Rm,Rn_BANK */
1349 	CHECK_PRIVILEGED
1350 	tcg_gen_mov_i32(ALTREG(B6_4), REG(B11_8));
1351 	return;
1352     case 0x4087:		/* ldc.l @Rm+,Rn_BANK */
1353 	CHECK_PRIVILEGED
1354         tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
1355 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1356 	return;
1357     case 0x0082:		/* stc Rm_BANK,Rn */
1358 	CHECK_PRIVILEGED
1359 	tcg_gen_mov_i32(REG(B11_8), ALTREG(B6_4));
1360 	return;
1361     case 0x4083:		/* stc.l Rm_BANK,@-Rn */
1362 	CHECK_PRIVILEGED
1363 	{
1364 	    TCGv addr = tcg_temp_new();
1365 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1366             tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
1367 	    tcg_gen_mov_i32(REG(B11_8), addr);
1368 	    tcg_temp_free(addr);
1369 	}
1370 	return;
1371     }
1372 
1373     switch (ctx->opcode & 0xf0ff) {
1374     case 0x0023:		/* braf Rn */
1375 	CHECK_NOT_DELAY_SLOT
1376         tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
1377         ctx->envflags |= DELAY_SLOT;
1378 	ctx->delayed_pc = (uint32_t) - 1;
1379 	return;
1380     case 0x0003:		/* bsrf Rn */
1381 	CHECK_NOT_DELAY_SLOT
1382         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1383 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
1384         ctx->envflags |= DELAY_SLOT;
1385 	ctx->delayed_pc = (uint32_t) - 1;
1386 	return;
1387     case 0x4015:		/* cmp/pl Rn */
1388         tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0);
1389 	return;
1390     case 0x4011:		/* cmp/pz Rn */
1391         tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0);
1392 	return;
1393     case 0x4010:		/* dt Rn */
1394 	tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1);
1395         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0);
1396 	return;
1397     case 0x402b:		/* jmp @Rn */
1398 	CHECK_NOT_DELAY_SLOT
1399 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1400         ctx->envflags |= DELAY_SLOT;
1401 	ctx->delayed_pc = (uint32_t) - 1;
1402 	return;
1403     case 0x400b:		/* jsr @Rn */
1404 	CHECK_NOT_DELAY_SLOT
1405         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1406 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1407         ctx->envflags |= DELAY_SLOT;
1408 	ctx->delayed_pc = (uint32_t) - 1;
1409 	return;
1410     case 0x400e:		/* ldc Rm,SR */
1411 	CHECK_PRIVILEGED
1412         {
1413             TCGv val = tcg_temp_new();
1414             tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
1415             gen_write_sr(val);
1416             tcg_temp_free(val);
1417             ctx->base.is_jmp = DISAS_STOP;
1418         }
1419 	return;
1420     case 0x4007:		/* ldc.l @Rm+,SR */
1421 	CHECK_PRIVILEGED
1422 	{
1423 	    TCGv val = tcg_temp_new();
1424             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
1425             tcg_gen_andi_i32(val, val, 0x700083f3);
1426             gen_write_sr(val);
1427 	    tcg_temp_free(val);
1428 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1429             ctx->base.is_jmp = DISAS_STOP;
1430 	}
1431 	return;
1432     case 0x0002:		/* stc SR,Rn */
1433 	CHECK_PRIVILEGED
1434         gen_read_sr(REG(B11_8));
1435 	return;
1436     case 0x4003:		/* stc SR,@-Rn */
1437 	CHECK_PRIVILEGED
1438 	{
1439 	    TCGv addr = tcg_temp_new();
1440             TCGv val = tcg_temp_new();
1441 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1442             gen_read_sr(val);
1443             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1444 	    tcg_gen_mov_i32(REG(B11_8), addr);
1445             tcg_temp_free(val);
1446 	    tcg_temp_free(addr);
1447 	}
1448 	return;
1449 #define LD(reg,ldnum,ldpnum,prechk)		\
1450   case ldnum:							\
1451     prechk    							\
1452     tcg_gen_mov_i32 (cpu_##reg, REG(B11_8));			\
1453     return;							\
1454   case ldpnum:							\
1455     prechk    							\
1456     tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
1457     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);		\
1458     return;
1459 #define ST(reg,stnum,stpnum,prechk)		\
1460   case stnum:							\
1461     prechk    							\
1462     tcg_gen_mov_i32 (REG(B11_8), cpu_##reg);			\
1463     return;							\
1464   case stpnum:							\
1465     prechk    							\
1466     {								\
1467 	TCGv addr = tcg_temp_new();				\
1468 	tcg_gen_subi_i32(addr, REG(B11_8), 4);			\
1469         tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
1470 	tcg_gen_mov_i32(REG(B11_8), addr);			\
1471 	tcg_temp_free(addr);					\
1472     }								\
1473     return;
1474 #define LDST(reg,ldnum,ldpnum,stnum,stpnum,prechk)		\
1475 	LD(reg,ldnum,ldpnum,prechk)				\
1476 	ST(reg,stnum,stpnum,prechk)
1477 	LDST(gbr,  0x401e, 0x4017, 0x0012, 0x4013, {})
1478 	LDST(vbr,  0x402e, 0x4027, 0x0022, 0x4023, CHECK_PRIVILEGED)
1479 	LDST(ssr,  0x403e, 0x4037, 0x0032, 0x4033, CHECK_PRIVILEGED)
1480 	LDST(spc,  0x404e, 0x4047, 0x0042, 0x4043, CHECK_PRIVILEGED)
1481 	ST(sgr,  0x003a, 0x4032, CHECK_PRIVILEGED)
1482         LD(sgr,  0x403a, 0x4036, CHECK_PRIVILEGED CHECK_SH4A)
1483 	LDST(dbr,  0x40fa, 0x40f6, 0x00fa, 0x40f2, CHECK_PRIVILEGED)
1484 	LDST(mach, 0x400a, 0x4006, 0x000a, 0x4002, {})
1485 	LDST(macl, 0x401a, 0x4016, 0x001a, 0x4012, {})
1486 	LDST(pr,   0x402a, 0x4026, 0x002a, 0x4022, {})
1487 	LDST(fpul, 0x405a, 0x4056, 0x005a, 0x4052, {CHECK_FPU_ENABLED})
1488     case 0x406a:		/* lds Rm,FPSCR */
1489 	CHECK_FPU_ENABLED
1490         gen_helper_ld_fpscr(cpu_env, REG(B11_8));
1491         ctx->base.is_jmp = DISAS_STOP;
1492 	return;
1493     case 0x4066:		/* lds.l @Rm+,FPSCR */
1494 	CHECK_FPU_ENABLED
1495 	{
1496 	    TCGv addr = tcg_temp_new();
1497             tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
1498 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1499             gen_helper_ld_fpscr(cpu_env, addr);
1500 	    tcg_temp_free(addr);
1501             ctx->base.is_jmp = DISAS_STOP;
1502 	}
1503 	return;
1504     case 0x006a:		/* sts FPSCR,Rn */
1505 	CHECK_FPU_ENABLED
1506 	tcg_gen_andi_i32(REG(B11_8), cpu_fpscr, 0x003fffff);
1507 	return;
1508     case 0x4062:		/* sts FPSCR,@-Rn */
1509 	CHECK_FPU_ENABLED
1510 	{
1511 	    TCGv addr, val;
1512 	    val = tcg_temp_new();
1513 	    tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
1514 	    addr = tcg_temp_new();
1515 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1516             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1517 	    tcg_gen_mov_i32(REG(B11_8), addr);
1518 	    tcg_temp_free(addr);
1519 	    tcg_temp_free(val);
1520 	}
1521 	return;
1522     case 0x00c3:		/* movca.l R0,@Rm */
1523         {
1524             TCGv val = tcg_temp_new();
1525             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
1526             gen_helper_movcal(cpu_env, REG(B11_8), val);
1527             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1528             tcg_temp_free(val);
1529         }
1530         ctx->has_movcal = 1;
1531 	return;
1532     case 0x40a9:                /* movua.l @Rm,R0 */
1533         CHECK_SH4A
1534         /* Load non-boundary-aligned data */
1535         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1536                             MO_TEUL | MO_UNALN);
1537         return;
1538     case 0x40e9:                /* movua.l @Rm+,R0 */
1539         CHECK_SH4A
1540         /* Load non-boundary-aligned data */
1541         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1542                             MO_TEUL | MO_UNALN);
1543         tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1544         return;
1545     case 0x0029:		/* movt Rn */
1546         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
1547 	return;
1548     case 0x0073:
1549         /* MOVCO.L
1550          *     LDST -> T
1551          *     If (T == 1) R0 -> (Rn)
1552          *     0 -> LDST
1553          *
1554          * The above description doesn't work in a parallel context.
1555          * Since we currently support no smp boards, this implies user-mode.
1556          * But we can still support the official mechanism while user-mode
1557          * is single-threaded.  */
1558         CHECK_SH4A
1559         {
1560             TCGLabel *fail = gen_new_label();
1561             TCGLabel *done = gen_new_label();
1562 
1563             if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1564                 TCGv tmp;
1565 
1566                 tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8),
1567                                    cpu_lock_addr, fail);
1568                 tmp = tcg_temp_new();
1569                 tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
1570                                            REG(0), ctx->memidx, MO_TEUL);
1571                 tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
1572                 tcg_temp_free(tmp);
1573             } else {
1574                 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
1575                 tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1576                 tcg_gen_movi_i32(cpu_sr_t, 1);
1577             }
1578             tcg_gen_br(done);
1579 
1580             gen_set_label(fail);
1581             tcg_gen_movi_i32(cpu_sr_t, 0);
1582 
1583             gen_set_label(done);
1584             tcg_gen_movi_i32(cpu_lock_addr, -1);
1585         }
1586         return;
1587     case 0x0063:
1588         /* MOVLI.L @Rm,R0
1589          *     1 -> LDST
1590          *     (Rm) -> R0
1591          *     When interrupt/exception
1592          *     occurred 0 -> LDST
1593          *
1594          * In a parallel context, we must also save the loaded value
1595          * for use with the cmpxchg that we'll use with movco.l.  */
1596         CHECK_SH4A
1597         if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1598             TCGv tmp = tcg_temp_new();
1599             tcg_gen_mov_i32(tmp, REG(B11_8));
1600             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1601             tcg_gen_mov_i32(cpu_lock_value, REG(0));
1602             tcg_gen_mov_i32(cpu_lock_addr, tmp);
1603             tcg_temp_free(tmp);
1604         } else {
1605             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1606             tcg_gen_movi_i32(cpu_lock_addr, 0);
1607         }
1608         return;
1609     case 0x0093:		/* ocbi @Rn */
1610 	{
1611             gen_helper_ocbi(cpu_env, REG(B11_8));
1612 	}
1613 	return;
1614     case 0x00a3:		/* ocbp @Rn */
1615     case 0x00b3:		/* ocbwb @Rn */
1616         /* These instructions are supposed to do nothing in case of
1617            a cache miss. Given that we only partially emulate caches
1618            it is safe to simply ignore them. */
1619 	return;
1620     case 0x0083:		/* pref @Rn */
1621 	return;
1622     case 0x00d3:		/* prefi @Rn */
1623         CHECK_SH4A
1624         return;
1625     case 0x00e3:		/* icbi @Rn */
1626         CHECK_SH4A
1627         return;
1628     case 0x00ab:		/* synco */
1629         CHECK_SH4A
1630         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1631         return;
1632     case 0x4024:		/* rotcl Rn */
1633 	{
1634 	    TCGv tmp = tcg_temp_new();
1635             tcg_gen_mov_i32(tmp, cpu_sr_t);
1636             tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1637 	    tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1638             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1639 	    tcg_temp_free(tmp);
1640 	}
1641 	return;
1642     case 0x4025:		/* rotcr Rn */
1643 	{
1644 	    TCGv tmp = tcg_temp_new();
1645             tcg_gen_shli_i32(tmp, cpu_sr_t, 31);
1646             tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1647 	    tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1648             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1649 	    tcg_temp_free(tmp);
1650 	}
1651 	return;
1652     case 0x4004:		/* rotl Rn */
1653 	tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
1654         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1655 	return;
1656     case 0x4005:		/* rotr Rn */
1657         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1658 	tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
1659 	return;
1660     case 0x4000:		/* shll Rn */
1661     case 0x4020:		/* shal Rn */
1662         tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1663 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1664 	return;
1665     case 0x4021:		/* shar Rn */
1666         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1667 	tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
1668 	return;
1669     case 0x4001:		/* shlr Rn */
1670         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1671 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1672 	return;
1673     case 0x4008:		/* shll2 Rn */
1674 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 2);
1675 	return;
1676     case 0x4018:		/* shll8 Rn */
1677 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 8);
1678 	return;
1679     case 0x4028:		/* shll16 Rn */
1680 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 16);
1681 	return;
1682     case 0x4009:		/* shlr2 Rn */
1683 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 2);
1684 	return;
1685     case 0x4019:		/* shlr8 Rn */
1686 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 8);
1687 	return;
1688     case 0x4029:		/* shlr16 Rn */
1689 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16);
1690 	return;
1691     case 0x401b:		/* tas.b @Rn */
1692         {
1693             TCGv val = tcg_const_i32(0x80);
1694             tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val,
1695                                         ctx->memidx, MO_UB);
1696             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1697             tcg_temp_free(val);
1698         }
1699         return;
1700     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
1701 	CHECK_FPU_ENABLED
1702         tcg_gen_mov_i32(FREG(B11_8), cpu_fpul);
1703 	return;
1704     case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */
1705 	CHECK_FPU_ENABLED
1706         tcg_gen_mov_i32(cpu_fpul, FREG(B11_8));
1707 	return;
1708     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
1709 	CHECK_FPU_ENABLED
1710         if (ctx->tbflags & FPSCR_PR) {
1711 	    TCGv_i64 fp;
1712             if (ctx->opcode & 0x0100) {
1713                 goto do_illegal;
1714             }
1715 	    fp = tcg_temp_new_i64();
1716             gen_helper_float_DT(fp, cpu_env, cpu_fpul);
1717             gen_store_fpr64(ctx, fp, B11_8);
1718 	    tcg_temp_free_i64(fp);
1719 	}
1720 	else {
1721             gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul);
1722 	}
1723 	return;
1724     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1725 	CHECK_FPU_ENABLED
1726         if (ctx->tbflags & FPSCR_PR) {
1727 	    TCGv_i64 fp;
1728             if (ctx->opcode & 0x0100) {
1729                 goto do_illegal;
1730             }
1731 	    fp = tcg_temp_new_i64();
1732             gen_load_fpr64(ctx, fp, B11_8);
1733             gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
1734 	    tcg_temp_free_i64(fp);
1735 	}
1736 	else {
1737             gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8));
1738 	}
1739 	return;
1740     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
1741 	CHECK_FPU_ENABLED
1742         tcg_gen_xori_i32(FREG(B11_8), FREG(B11_8), 0x80000000);
1743 	return;
1744     case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
1745 	CHECK_FPU_ENABLED
1746         tcg_gen_andi_i32(FREG(B11_8), FREG(B11_8), 0x7fffffff);
1747 	return;
1748     case 0xf06d: /* fsqrt FRn */
1749 	CHECK_FPU_ENABLED
1750         if (ctx->tbflags & FPSCR_PR) {
1751             if (ctx->opcode & 0x0100) {
1752                 goto do_illegal;
1753             }
1754 	    TCGv_i64 fp = tcg_temp_new_i64();
1755             gen_load_fpr64(ctx, fp, B11_8);
1756             gen_helper_fsqrt_DT(fp, cpu_env, fp);
1757             gen_store_fpr64(ctx, fp, B11_8);
1758 	    tcg_temp_free_i64(fp);
1759 	} else {
1760             gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1761 	}
1762 	return;
1763     case 0xf07d: /* fsrra FRn */
1764 	CHECK_FPU_ENABLED
1765         CHECK_FPSCR_PR_0
1766         gen_helper_fsrra_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1767 	break;
1768     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
1769 	CHECK_FPU_ENABLED
1770         CHECK_FPSCR_PR_0
1771         tcg_gen_movi_i32(FREG(B11_8), 0);
1772         return;
1773     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
1774 	CHECK_FPU_ENABLED
1775         CHECK_FPSCR_PR_0
1776         tcg_gen_movi_i32(FREG(B11_8), 0x3f800000);
1777         return;
1778     case 0xf0ad: /* fcnvsd FPUL,DRn */
1779 	CHECK_FPU_ENABLED
1780 	{
1781 	    TCGv_i64 fp = tcg_temp_new_i64();
1782             gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
1783             gen_store_fpr64(ctx, fp, B11_8);
1784 	    tcg_temp_free_i64(fp);
1785 	}
1786 	return;
1787     case 0xf0bd: /* fcnvds DRn,FPUL */
1788 	CHECK_FPU_ENABLED
1789 	{
1790 	    TCGv_i64 fp = tcg_temp_new_i64();
1791             gen_load_fpr64(ctx, fp, B11_8);
1792             gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
1793 	    tcg_temp_free_i64(fp);
1794 	}
1795 	return;
1796     case 0xf0ed: /* fipr FVm,FVn */
1797         CHECK_FPU_ENABLED
1798         CHECK_FPSCR_PR_1
1799         {
1800             TCGv m = tcg_const_i32((ctx->opcode >> 8) & 3);
1801             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1802             gen_helper_fipr(cpu_env, m, n);
1803             tcg_temp_free(m);
1804             tcg_temp_free(n);
1805             return;
1806         }
1807         break;
1808     case 0xf0fd: /* ftrv XMTRX,FVn */
1809         CHECK_FPU_ENABLED
1810         CHECK_FPSCR_PR_1
1811         {
1812             if ((ctx->opcode & 0x0300) != 0x0100) {
1813                 goto do_illegal;
1814             }
1815             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1816             gen_helper_ftrv(cpu_env, n);
1817             tcg_temp_free(n);
1818             return;
1819         }
1820         break;
1821     }
1822 #if 0
1823     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
1824             ctx->opcode, ctx->base.pc_next);
1825     fflush(stderr);
1826 #endif
1827  do_illegal:
1828     if (ctx->envflags & DELAY_SLOT_MASK) {
1829  do_illegal_slot:
1830         gen_save_cpu_state(ctx, true);
1831         gen_helper_raise_slot_illegal_instruction(cpu_env);
1832     } else {
1833         gen_save_cpu_state(ctx, true);
1834         gen_helper_raise_illegal_instruction(cpu_env);
1835     }
1836     ctx->base.is_jmp = DISAS_NORETURN;
1837     return;
1838 
1839  do_fpu_disabled:
1840     gen_save_cpu_state(ctx, true);
1841     if (ctx->envflags & DELAY_SLOT_MASK) {
1842         gen_helper_raise_slot_fpu_disable(cpu_env);
1843     } else {
1844         gen_helper_raise_fpu_disable(cpu_env);
1845     }
1846     ctx->base.is_jmp = DISAS_NORETURN;
1847     return;
1848 }
1849 
1850 static void decode_opc(DisasContext * ctx)
1851 {
1852     uint32_t old_flags = ctx->envflags;
1853 
1854     _decode_opc(ctx);
1855 
1856     if (old_flags & DELAY_SLOT_MASK) {
1857         /* go out of the delay slot */
1858         ctx->envflags &= ~DELAY_SLOT_MASK;
1859 
1860         /* When in an exclusive region, we must continue to the end
1861            for conditional branches.  */
1862         if (ctx->tbflags & GUSA_EXCLUSIVE
1863             && old_flags & DELAY_SLOT_CONDITIONAL) {
1864             gen_delayed_conditional_jump(ctx);
1865             return;
1866         }
1867         /* Otherwise this is probably an invalid gUSA region.
1868            Drop the GUSA bits so the next TB doesn't see them.  */
1869         ctx->envflags &= ~GUSA_MASK;
1870 
1871         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
1872         if (old_flags & DELAY_SLOT_CONDITIONAL) {
1873 	    gen_delayed_conditional_jump(ctx);
1874         } else {
1875             gen_jump(ctx);
1876 	}
1877     }
1878 }
1879 
1880 #ifdef CONFIG_USER_ONLY
1881 /* For uniprocessors, SH4 uses optimistic restartable atomic sequences.
1882    Upon an interrupt, a real kernel would simply notice magic values in
1883    the registers and reset the PC to the start of the sequence.
1884 
1885    For QEMU, we cannot do this in quite the same way.  Instead, we notice
1886    the normal start of such a sequence (mov #-x,r15).  While we can handle
1887    any sequence via cpu_exec_step_atomic, we can recognize the "normal"
1888    sequences and transform them into atomic operations as seen by the host.
1889 */
1890 static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
1891 {
1892     uint16_t insns[5];
1893     int ld_adr, ld_dst, ld_mop;
1894     int op_dst, op_src, op_opc;
1895     int mv_src, mt_dst, st_src, st_mop;
1896     TCGv op_arg;
1897     uint32_t pc = ctx->base.pc_next;
1898     uint32_t pc_end = ctx->base.tb->cs_base;
1899     int max_insns = (pc_end - pc) / 2;
1900     int i;
1901 
1902     /* The state machine below will consume only a few insns.
1903        If there are more than that in a region, fail now.  */
1904     if (max_insns > ARRAY_SIZE(insns)) {
1905         goto fail;
1906     }
1907 
1908     /* Read all of the insns for the region.  */
1909     for (i = 0; i < max_insns; ++i) {
1910         insns[i] = translator_lduw(env, pc + i * 2);
1911     }
1912 
1913     ld_adr = ld_dst = ld_mop = -1;
1914     mv_src = -1;
1915     op_dst = op_src = op_opc = -1;
1916     mt_dst = -1;
1917     st_src = st_mop = -1;
1918     op_arg = NULL;
1919     i = 0;
1920 
1921 #define NEXT_INSN \
1922     do { if (i >= max_insns) goto fail; ctx->opcode = insns[i++]; } while (0)
1923 
1924     /*
1925      * Expect a load to begin the region.
1926      */
1927     NEXT_INSN;
1928     switch (ctx->opcode & 0xf00f) {
1929     case 0x6000: /* mov.b @Rm,Rn */
1930         ld_mop = MO_SB;
1931         break;
1932     case 0x6001: /* mov.w @Rm,Rn */
1933         ld_mop = MO_TESW;
1934         break;
1935     case 0x6002: /* mov.l @Rm,Rn */
1936         ld_mop = MO_TESL;
1937         break;
1938     default:
1939         goto fail;
1940     }
1941     ld_adr = B7_4;
1942     ld_dst = B11_8;
1943     if (ld_adr == ld_dst) {
1944         goto fail;
1945     }
1946     /* Unless we see a mov, any two-operand operation must use ld_dst.  */
1947     op_dst = ld_dst;
1948 
1949     /*
1950      * Expect an optional register move.
1951      */
1952     NEXT_INSN;
1953     switch (ctx->opcode & 0xf00f) {
1954     case 0x6003: /* mov Rm,Rn */
1955         /*
1956          * Here we want to recognize ld_dst being saved for later consumption,
1957          * or for another input register being copied so that ld_dst need not
1958          * be clobbered during the operation.
1959          */
1960         op_dst = B11_8;
1961         mv_src = B7_4;
1962         if (op_dst == ld_dst) {
1963             /* Overwriting the load output.  */
1964             goto fail;
1965         }
1966         if (mv_src != ld_dst) {
1967             /* Copying a new input; constrain op_src to match the load.  */
1968             op_src = ld_dst;
1969         }
1970         break;
1971 
1972     default:
1973         /* Put back and re-examine as operation.  */
1974         --i;
1975     }
1976 
1977     /*
1978      * Expect the operation.
1979      */
1980     NEXT_INSN;
1981     switch (ctx->opcode & 0xf00f) {
1982     case 0x300c: /* add Rm,Rn */
1983         op_opc = INDEX_op_add_i32;
1984         goto do_reg_op;
1985     case 0x2009: /* and Rm,Rn */
1986         op_opc = INDEX_op_and_i32;
1987         goto do_reg_op;
1988     case 0x200a: /* xor Rm,Rn */
1989         op_opc = INDEX_op_xor_i32;
1990         goto do_reg_op;
1991     case 0x200b: /* or Rm,Rn */
1992         op_opc = INDEX_op_or_i32;
1993     do_reg_op:
1994         /* The operation register should be as expected, and the
1995            other input cannot depend on the load.  */
1996         if (op_dst != B11_8) {
1997             goto fail;
1998         }
1999         if (op_src < 0) {
2000             /* Unconstrainted input.  */
2001             op_src = B7_4;
2002         } else if (op_src == B7_4) {
2003             /* Constrained input matched load.  All operations are
2004                commutative; "swap" them by "moving" the load output
2005                to the (implicit) first argument and the move source
2006                to the (explicit) second argument.  */
2007             op_src = mv_src;
2008         } else {
2009             goto fail;
2010         }
2011         op_arg = REG(op_src);
2012         break;
2013 
2014     case 0x6007: /* not Rm,Rn */
2015         if (ld_dst != B7_4 || mv_src >= 0) {
2016             goto fail;
2017         }
2018         op_dst = B11_8;
2019         op_opc = INDEX_op_xor_i32;
2020         op_arg = tcg_const_i32(-1);
2021         break;
2022 
2023     case 0x7000 ... 0x700f: /* add #imm,Rn */
2024         if (op_dst != B11_8 || mv_src >= 0) {
2025             goto fail;
2026         }
2027         op_opc = INDEX_op_add_i32;
2028         op_arg = tcg_const_i32(B7_0s);
2029         break;
2030 
2031     case 0x3000: /* cmp/eq Rm,Rn */
2032         /* Looking for the middle of a compare-and-swap sequence,
2033            beginning with the compare.  Operands can be either order,
2034            but with only one overlapping the load.  */
2035         if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) {
2036             goto fail;
2037         }
2038         op_opc = INDEX_op_setcond_i32;  /* placeholder */
2039         op_src = (ld_dst == B11_8 ? B7_4 : B11_8);
2040         op_arg = REG(op_src);
2041 
2042         NEXT_INSN;
2043         switch (ctx->opcode & 0xff00) {
2044         case 0x8b00: /* bf label */
2045         case 0x8f00: /* bf/s label */
2046             if (pc + (i + 1 + B7_0s) * 2 != pc_end) {
2047                 goto fail;
2048             }
2049             if ((ctx->opcode & 0xff00) == 0x8b00) { /* bf label */
2050                 break;
2051             }
2052             /* We're looking to unconditionally modify Rn with the
2053                result of the comparison, within the delay slot of
2054                the branch.  This is used by older gcc.  */
2055             NEXT_INSN;
2056             if ((ctx->opcode & 0xf0ff) == 0x0029) { /* movt Rn */
2057                 mt_dst = B11_8;
2058             } else {
2059                 goto fail;
2060             }
2061             break;
2062 
2063         default:
2064             goto fail;
2065         }
2066         break;
2067 
2068     case 0x2008: /* tst Rm,Rn */
2069         /* Looking for a compare-and-swap against zero.  */
2070         if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) {
2071             goto fail;
2072         }
2073         op_opc = INDEX_op_setcond_i32;
2074         op_arg = tcg_const_i32(0);
2075 
2076         NEXT_INSN;
2077         if ((ctx->opcode & 0xff00) != 0x8900 /* bt label */
2078             || pc + (i + 1 + B7_0s) * 2 != pc_end) {
2079             goto fail;
2080         }
2081         break;
2082 
2083     default:
2084         /* Put back and re-examine as store.  */
2085         --i;
2086     }
2087 
2088     /*
2089      * Expect the store.
2090      */
2091     /* The store must be the last insn.  */
2092     if (i != max_insns - 1) {
2093         goto fail;
2094     }
2095     NEXT_INSN;
2096     switch (ctx->opcode & 0xf00f) {
2097     case 0x2000: /* mov.b Rm,@Rn */
2098         st_mop = MO_UB;
2099         break;
2100     case 0x2001: /* mov.w Rm,@Rn */
2101         st_mop = MO_UW;
2102         break;
2103     case 0x2002: /* mov.l Rm,@Rn */
2104         st_mop = MO_UL;
2105         break;
2106     default:
2107         goto fail;
2108     }
2109     /* The store must match the load.  */
2110     if (ld_adr != B11_8 || st_mop != (ld_mop & MO_SIZE)) {
2111         goto fail;
2112     }
2113     st_src = B7_4;
2114 
2115 #undef NEXT_INSN
2116 
2117     /*
2118      * Emit the operation.
2119      */
2120     switch (op_opc) {
2121     case -1:
2122         /* No operation found.  Look for exchange pattern.  */
2123         if (st_src == ld_dst || mv_src >= 0) {
2124             goto fail;
2125         }
2126         tcg_gen_atomic_xchg_i32(REG(ld_dst), REG(ld_adr), REG(st_src),
2127                                 ctx->memidx, ld_mop);
2128         break;
2129 
2130     case INDEX_op_add_i32:
2131         if (op_dst != st_src) {
2132             goto fail;
2133         }
2134         if (op_dst == ld_dst && st_mop == MO_UL) {
2135             tcg_gen_atomic_add_fetch_i32(REG(ld_dst), REG(ld_adr),
2136                                          op_arg, ctx->memidx, ld_mop);
2137         } else {
2138             tcg_gen_atomic_fetch_add_i32(REG(ld_dst), REG(ld_adr),
2139                                          op_arg, ctx->memidx, ld_mop);
2140             if (op_dst != ld_dst) {
2141                 /* Note that mop sizes < 4 cannot use add_fetch
2142                    because it won't carry into the higher bits.  */
2143                 tcg_gen_add_i32(REG(op_dst), REG(ld_dst), op_arg);
2144             }
2145         }
2146         break;
2147 
2148     case INDEX_op_and_i32:
2149         if (op_dst != st_src) {
2150             goto fail;
2151         }
2152         if (op_dst == ld_dst) {
2153             tcg_gen_atomic_and_fetch_i32(REG(ld_dst), REG(ld_adr),
2154                                          op_arg, ctx->memidx, ld_mop);
2155         } else {
2156             tcg_gen_atomic_fetch_and_i32(REG(ld_dst), REG(ld_adr),
2157                                          op_arg, ctx->memidx, ld_mop);
2158             tcg_gen_and_i32(REG(op_dst), REG(ld_dst), op_arg);
2159         }
2160         break;
2161 
2162     case INDEX_op_or_i32:
2163         if (op_dst != st_src) {
2164             goto fail;
2165         }
2166         if (op_dst == ld_dst) {
2167             tcg_gen_atomic_or_fetch_i32(REG(ld_dst), REG(ld_adr),
2168                                         op_arg, ctx->memidx, ld_mop);
2169         } else {
2170             tcg_gen_atomic_fetch_or_i32(REG(ld_dst), REG(ld_adr),
2171                                         op_arg, ctx->memidx, ld_mop);
2172             tcg_gen_or_i32(REG(op_dst), REG(ld_dst), op_arg);
2173         }
2174         break;
2175 
2176     case INDEX_op_xor_i32:
2177         if (op_dst != st_src) {
2178             goto fail;
2179         }
2180         if (op_dst == ld_dst) {
2181             tcg_gen_atomic_xor_fetch_i32(REG(ld_dst), REG(ld_adr),
2182                                          op_arg, ctx->memidx, ld_mop);
2183         } else {
2184             tcg_gen_atomic_fetch_xor_i32(REG(ld_dst), REG(ld_adr),
2185                                          op_arg, ctx->memidx, ld_mop);
2186             tcg_gen_xor_i32(REG(op_dst), REG(ld_dst), op_arg);
2187         }
2188         break;
2189 
2190     case INDEX_op_setcond_i32:
2191         if (st_src == ld_dst) {
2192             goto fail;
2193         }
2194         tcg_gen_atomic_cmpxchg_i32(REG(ld_dst), REG(ld_adr), op_arg,
2195                                    REG(st_src), ctx->memidx, ld_mop);
2196         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_dst), op_arg);
2197         if (mt_dst >= 0) {
2198             tcg_gen_mov_i32(REG(mt_dst), cpu_sr_t);
2199         }
2200         break;
2201 
2202     default:
2203         g_assert_not_reached();
2204     }
2205 
2206     /* If op_src is not a valid register, then op_arg was a constant.  */
2207     if (op_src < 0 && op_arg) {
2208         tcg_temp_free_i32(op_arg);
2209     }
2210 
2211     /* The entire region has been translated.  */
2212     ctx->envflags &= ~GUSA_MASK;
2213     ctx->base.pc_next = pc_end;
2214     ctx->base.num_insns += max_insns - 1;
2215     return;
2216 
2217  fail:
2218     qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
2219                   pc, pc_end);
2220 
2221     /* Restart with the EXCLUSIVE bit set, within a TB run via
2222        cpu_exec_step_atomic holding the exclusive lock.  */
2223     ctx->envflags |= GUSA_EXCLUSIVE;
2224     gen_save_cpu_state(ctx, false);
2225     gen_helper_exclusive(cpu_env);
2226     ctx->base.is_jmp = DISAS_NORETURN;
2227 
2228     /* We're not executing an instruction, but we must report one for the
2229        purposes of accounting within the TB.  We might as well report the
2230        entire region consumed via ctx->base.pc_next so that it's immediately
2231        available in the disassembly dump.  */
2232     ctx->base.pc_next = pc_end;
2233     ctx->base.num_insns += max_insns - 1;
2234 }
2235 #endif
2236 
2237 static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
2238 {
2239     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2240     CPUSH4State *env = cs->env_ptr;
2241     uint32_t tbflags;
2242     int bound;
2243 
2244     ctx->tbflags = tbflags = ctx->base.tb->flags;
2245     ctx->envflags = tbflags & TB_FLAG_ENVFLAGS_MASK;
2246     ctx->memidx = (tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
2247     /* We don't know if the delayed pc came from a dynamic or static branch,
2248        so assume it is a dynamic branch.  */
2249     ctx->delayed_pc = -1; /* use delayed pc from env pointer */
2250     ctx->features = env->features;
2251     ctx->has_movcal = (tbflags & TB_FLAG_PENDING_MOVCA);
2252     ctx->gbank = ((tbflags & (1 << SR_MD)) &&
2253                   (tbflags & (1 << SR_RB))) * 0x10;
2254     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
2255 
2256     if (tbflags & GUSA_MASK) {
2257         uint32_t pc = ctx->base.pc_next;
2258         uint32_t pc_end = ctx->base.tb->cs_base;
2259         int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
2260         int max_insns = (pc_end - pc) / 2;
2261 
2262         if (pc != pc_end + backup || max_insns < 2) {
2263             /* This is a malformed gUSA region.  Don't do anything special,
2264                since the interpreter is likely to get confused.  */
2265             ctx->envflags &= ~GUSA_MASK;
2266         } else if (tbflags & GUSA_EXCLUSIVE) {
2267             /* Regardless of single-stepping or the end of the page,
2268                we must complete execution of the gUSA region while
2269                holding the exclusive lock.  */
2270             ctx->base.max_insns = max_insns;
2271             return;
2272         }
2273     }
2274 
2275     /* Since the ISA is fixed-width, we can bound by the number
2276        of instructions remaining on the page.  */
2277     bound = -(ctx->base.pc_next | TARGET_PAGE_MASK) / 2;
2278     ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
2279 }
2280 
2281 static void sh4_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
2282 {
2283 }
2284 
2285 static void sh4_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
2286 {
2287     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2288 
2289     tcg_gen_insn_start(ctx->base.pc_next, ctx->envflags);
2290 }
2291 
2292 static bool sh4_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cs,
2293                                     const CPUBreakpoint *bp)
2294 {
2295     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2296 
2297     /* We have hit a breakpoint - make sure PC is up-to-date */
2298     gen_save_cpu_state(ctx, true);
2299     gen_helper_debug(cpu_env);
2300     ctx->base.is_jmp = DISAS_NORETURN;
2301     /* The address covered by the breakpoint must be included in
2302        [tb->pc, tb->pc + tb->size) in order to for it to be
2303        properly cleared -- thus we increment the PC here so that
2304        the logic setting tb->size below does the right thing.  */
2305     ctx->base.pc_next += 2;
2306     return true;
2307 }
2308 
2309 static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
2310 {
2311     CPUSH4State *env = cs->env_ptr;
2312     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2313 
2314 #ifdef CONFIG_USER_ONLY
2315     if (unlikely(ctx->envflags & GUSA_MASK)
2316         && !(ctx->envflags & GUSA_EXCLUSIVE)) {
2317         /* We're in an gUSA region, and we have not already fallen
2318            back on using an exclusive region.  Attempt to parse the
2319            region into a single supported atomic operation.  Failure
2320            is handled within the parser by raising an exception to
2321            retry using an exclusive region.  */
2322         decode_gusa(ctx, env);
2323         return;
2324     }
2325 #endif
2326 
2327     ctx->opcode = translator_lduw(env, ctx->base.pc_next);
2328     decode_opc(ctx);
2329     ctx->base.pc_next += 2;
2330 }
2331 
2332 static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
2333 {
2334     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2335 
2336     if (ctx->tbflags & GUSA_EXCLUSIVE) {
2337         /* Ending the region of exclusivity.  Clear the bits.  */
2338         ctx->envflags &= ~GUSA_MASK;
2339     }
2340 
2341     switch (ctx->base.is_jmp) {
2342     case DISAS_STOP:
2343         gen_save_cpu_state(ctx, true);
2344         if (ctx->base.singlestep_enabled) {
2345             gen_helper_debug(cpu_env);
2346         } else {
2347             tcg_gen_exit_tb(NULL, 0);
2348         }
2349         break;
2350     case DISAS_NEXT:
2351     case DISAS_TOO_MANY:
2352         gen_save_cpu_state(ctx, false);
2353         gen_goto_tb(ctx, 0, ctx->base.pc_next);
2354         break;
2355     case DISAS_NORETURN:
2356         break;
2357     default:
2358         g_assert_not_reached();
2359     }
2360 }
2361 
2362 static void sh4_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
2363 {
2364     qemu_log("IN:\n");  /* , lookup_symbol(dcbase->pc_first)); */
2365     log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
2366 }
2367 
2368 static const TranslatorOps sh4_tr_ops = {
2369     .init_disas_context = sh4_tr_init_disas_context,
2370     .tb_start           = sh4_tr_tb_start,
2371     .insn_start         = sh4_tr_insn_start,
2372     .breakpoint_check   = sh4_tr_breakpoint_check,
2373     .translate_insn     = sh4_tr_translate_insn,
2374     .tb_stop            = sh4_tr_tb_stop,
2375     .disas_log          = sh4_tr_disas_log,
2376 };
2377 
2378 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
2379 {
2380     DisasContext ctx;
2381 
2382     translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns);
2383 }
2384 
2385 void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
2386                           target_ulong *data)
2387 {
2388     env->pc = data[0];
2389     env->flags = data[1];
2390     /* Theoretically delayed_pc should also be restored. In practice the
2391        branch instruction is re-executed after exception, so the delayed
2392        branch target will be recomputed. */
2393 }
2394