xref: /openbmc/qemu/target/sh4/translate.c (revision f2b90109)
1 /*
2  *  SH4 translation
3  *
4  *  Copyright (c) 2005 Samuel Tardieu
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #define DEBUG_DISAS
21 
22 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg-op.h"
27 #include "exec/cpu_ldst.h"
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
30 #include "exec/translator.h"
31 #include "exec/log.h"
32 #include "qemu/qemu-print.h"
33 
34 
35 typedef struct DisasContext {
36     DisasContextBase base;
37 
38     uint32_t tbflags;  /* should stay unmodified during the TB translation */
39     uint32_t envflags; /* should stay in sync with env->flags using TCG ops */
40     int memidx;
41     int gbank;
42     int fbank;
43     uint32_t delayed_pc;
44     uint32_t features;
45 
46     uint16_t opcode;
47 
48     bool has_movcal;
49 } DisasContext;
50 
51 #if defined(CONFIG_USER_ONLY)
52 #define IS_USER(ctx) 1
53 #define UNALIGN(C)   (ctx->tbflags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN)
54 #else
55 #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
56 #define UNALIGN(C)   0
57 #endif
58 
59 /* Target-specific values for ctx->base.is_jmp.  */
60 /* We want to exit back to the cpu loop for some reason.
61    Usually this is to recognize interrupts immediately.  */
62 #define DISAS_STOP    DISAS_TARGET_0
63 
64 /* global register indexes */
65 static TCGv cpu_gregs[32];
66 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
67 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
68 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
69 static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
70 static TCGv cpu_lock_addr, cpu_lock_value;
71 static TCGv cpu_fregs[32];
72 
73 /* internal register indexes */
74 static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
75 
76 #include "exec/gen-icount.h"
77 
78 void sh4_translate_init(void)
79 {
80     int i;
81     static const char * const gregnames[24] = {
82         "R0_BANK0", "R1_BANK0", "R2_BANK0", "R3_BANK0",
83         "R4_BANK0", "R5_BANK0", "R6_BANK0", "R7_BANK0",
84         "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
85         "R0_BANK1", "R1_BANK1", "R2_BANK1", "R3_BANK1",
86         "R4_BANK1", "R5_BANK1", "R6_BANK1", "R7_BANK1"
87     };
88     static const char * const fregnames[32] = {
89          "FPR0_BANK0",  "FPR1_BANK0",  "FPR2_BANK0",  "FPR3_BANK0",
90          "FPR4_BANK0",  "FPR5_BANK0",  "FPR6_BANK0",  "FPR7_BANK0",
91          "FPR8_BANK0",  "FPR9_BANK0", "FPR10_BANK0", "FPR11_BANK0",
92         "FPR12_BANK0", "FPR13_BANK0", "FPR14_BANK0", "FPR15_BANK0",
93          "FPR0_BANK1",  "FPR1_BANK1",  "FPR2_BANK1",  "FPR3_BANK1",
94          "FPR4_BANK1",  "FPR5_BANK1",  "FPR6_BANK1",  "FPR7_BANK1",
95          "FPR8_BANK1",  "FPR9_BANK1", "FPR10_BANK1", "FPR11_BANK1",
96         "FPR12_BANK1", "FPR13_BANK1", "FPR14_BANK1", "FPR15_BANK1",
97     };
98 
99     for (i = 0; i < 24; i++) {
100         cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env,
101                                               offsetof(CPUSH4State, gregs[i]),
102                                               gregnames[i]);
103     }
104     memcpy(cpu_gregs + 24, cpu_gregs + 8, 8 * sizeof(TCGv));
105 
106     cpu_pc = tcg_global_mem_new_i32(cpu_env,
107                                     offsetof(CPUSH4State, pc), "PC");
108     cpu_sr = tcg_global_mem_new_i32(cpu_env,
109                                     offsetof(CPUSH4State, sr), "SR");
110     cpu_sr_m = tcg_global_mem_new_i32(cpu_env,
111                                       offsetof(CPUSH4State, sr_m), "SR_M");
112     cpu_sr_q = tcg_global_mem_new_i32(cpu_env,
113                                       offsetof(CPUSH4State, sr_q), "SR_Q");
114     cpu_sr_t = tcg_global_mem_new_i32(cpu_env,
115                                       offsetof(CPUSH4State, sr_t), "SR_T");
116     cpu_ssr = tcg_global_mem_new_i32(cpu_env,
117                                      offsetof(CPUSH4State, ssr), "SSR");
118     cpu_spc = tcg_global_mem_new_i32(cpu_env,
119                                      offsetof(CPUSH4State, spc), "SPC");
120     cpu_gbr = tcg_global_mem_new_i32(cpu_env,
121                                      offsetof(CPUSH4State, gbr), "GBR");
122     cpu_vbr = tcg_global_mem_new_i32(cpu_env,
123                                      offsetof(CPUSH4State, vbr), "VBR");
124     cpu_sgr = tcg_global_mem_new_i32(cpu_env,
125                                      offsetof(CPUSH4State, sgr), "SGR");
126     cpu_dbr = tcg_global_mem_new_i32(cpu_env,
127                                      offsetof(CPUSH4State, dbr), "DBR");
128     cpu_mach = tcg_global_mem_new_i32(cpu_env,
129                                       offsetof(CPUSH4State, mach), "MACH");
130     cpu_macl = tcg_global_mem_new_i32(cpu_env,
131                                       offsetof(CPUSH4State, macl), "MACL");
132     cpu_pr = tcg_global_mem_new_i32(cpu_env,
133                                     offsetof(CPUSH4State, pr), "PR");
134     cpu_fpscr = tcg_global_mem_new_i32(cpu_env,
135                                        offsetof(CPUSH4State, fpscr), "FPSCR");
136     cpu_fpul = tcg_global_mem_new_i32(cpu_env,
137                                       offsetof(CPUSH4State, fpul), "FPUL");
138 
139     cpu_flags = tcg_global_mem_new_i32(cpu_env,
140 				       offsetof(CPUSH4State, flags), "_flags_");
141     cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env,
142 					    offsetof(CPUSH4State, delayed_pc),
143 					    "_delayed_pc_");
144     cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env,
145                                               offsetof(CPUSH4State,
146                                                        delayed_cond),
147                                               "_delayed_cond_");
148     cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
149                                            offsetof(CPUSH4State, lock_addr),
150                                            "_lock_addr_");
151     cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
152                                             offsetof(CPUSH4State, lock_value),
153                                             "_lock_value_");
154 
155     for (i = 0; i < 32; i++)
156         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
157                                               offsetof(CPUSH4State, fregs[i]),
158                                               fregnames[i]);
159 }
160 
161 void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
162 {
163     SuperHCPU *cpu = SUPERH_CPU(cs);
164     CPUSH4State *env = &cpu->env;
165     int i;
166 
167     qemu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
168                  env->pc, cpu_read_sr(env), env->pr, env->fpscr);
169     qemu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
170                  env->spc, env->ssr, env->gbr, env->vbr);
171     qemu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
172                  env->sgr, env->dbr, env->delayed_pc, env->fpul);
173     for (i = 0; i < 24; i += 4) {
174         qemu_printf("r%d=0x%08x r%d=0x%08x r%d=0x%08x r%d=0x%08x\n",
175 		    i, env->gregs[i], i + 1, env->gregs[i + 1],
176 		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
177     }
178     if (env->flags & TB_FLAG_DELAY_SLOT) {
179         qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
180 		    env->delayed_pc);
181     } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
182         qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
183 		    env->delayed_pc);
184     } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
185         qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
186                      env->delayed_pc);
187     }
188 }
189 
190 static void gen_read_sr(TCGv dst)
191 {
192     TCGv t0 = tcg_temp_new();
193     tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q);
194     tcg_gen_or_i32(dst, dst, t0);
195     tcg_gen_shli_i32(t0, cpu_sr_m, SR_M);
196     tcg_gen_or_i32(dst, dst, t0);
197     tcg_gen_shli_i32(t0, cpu_sr_t, SR_T);
198     tcg_gen_or_i32(dst, cpu_sr, t0);
199 }
200 
201 static void gen_write_sr(TCGv src)
202 {
203     tcg_gen_andi_i32(cpu_sr, src,
204                      ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
205     tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1);
206     tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1);
207     tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1);
208 }
209 
210 static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
211 {
212     if (save_pc) {
213         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
214     }
215     if (ctx->delayed_pc != (uint32_t) -1) {
216         tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
217     }
218     if ((ctx->tbflags & TB_FLAG_ENVFLAGS_MASK) != ctx->envflags) {
219         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
220     }
221 }
222 
223 static inline bool use_exit_tb(DisasContext *ctx)
224 {
225     return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
226 }
227 
228 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
229 {
230     if (use_exit_tb(ctx)) {
231         return false;
232     }
233     return translator_use_goto_tb(&ctx->base, dest);
234 }
235 
236 static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
237 {
238     if (use_goto_tb(ctx, dest)) {
239         tcg_gen_goto_tb(n);
240         tcg_gen_movi_i32(cpu_pc, dest);
241         tcg_gen_exit_tb(ctx->base.tb, n);
242     } else {
243         tcg_gen_movi_i32(cpu_pc, dest);
244         if (use_exit_tb(ctx)) {
245             tcg_gen_exit_tb(NULL, 0);
246         } else {
247             tcg_gen_lookup_and_goto_ptr();
248         }
249     }
250     ctx->base.is_jmp = DISAS_NORETURN;
251 }
252 
253 static void gen_jump(DisasContext * ctx)
254 {
255     if (ctx->delayed_pc == -1) {
256 	/* Target is not statically known, it comes necessarily from a
257 	   delayed jump as immediate jump are conditinal jumps */
258 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
259         tcg_gen_discard_i32(cpu_delayed_pc);
260         if (use_exit_tb(ctx)) {
261             tcg_gen_exit_tb(NULL, 0);
262         } else {
263             tcg_gen_lookup_and_goto_ptr();
264         }
265         ctx->base.is_jmp = DISAS_NORETURN;
266     } else {
267 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
268     }
269 }
270 
271 /* Immediate conditional jump (bt or bf) */
272 static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
273                                  bool jump_if_true)
274 {
275     TCGLabel *l1 = gen_new_label();
276     TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
277 
278     if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
279         /* When in an exclusive region, we must continue to the end.
280            Therefore, exit the region on a taken branch, but otherwise
281            fall through to the next instruction.  */
282         tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
283         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
284         /* Note that this won't actually use a goto_tb opcode because we
285            disallow it in use_goto_tb, but it handles exit + singlestep.  */
286         gen_goto_tb(ctx, 0, dest);
287         gen_set_label(l1);
288         ctx->base.is_jmp = DISAS_NEXT;
289         return;
290     }
291 
292     gen_save_cpu_state(ctx, false);
293     tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
294     gen_goto_tb(ctx, 0, dest);
295     gen_set_label(l1);
296     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
297     ctx->base.is_jmp = DISAS_NORETURN;
298 }
299 
300 /* Delayed conditional jump (bt or bf) */
301 static void gen_delayed_conditional_jump(DisasContext * ctx)
302 {
303     TCGLabel *l1 = gen_new_label();
304     TCGv ds = tcg_temp_new();
305 
306     tcg_gen_mov_i32(ds, cpu_delayed_cond);
307     tcg_gen_discard_i32(cpu_delayed_cond);
308 
309     if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
310         /* When in an exclusive region, we must continue to the end.
311            Therefore, exit the region on a taken branch, but otherwise
312            fall through to the next instruction.  */
313         tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
314 
315         /* Leave the gUSA region.  */
316         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
317         gen_jump(ctx);
318 
319         gen_set_label(l1);
320         ctx->base.is_jmp = DISAS_NEXT;
321         return;
322     }
323 
324     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
325     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
326     gen_set_label(l1);
327     gen_jump(ctx);
328 }
329 
330 static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
331 {
332     /* We have already signaled illegal instruction for odd Dr.  */
333     tcg_debug_assert((reg & 1) == 0);
334     reg ^= ctx->fbank;
335     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
336 }
337 
338 static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
339 {
340     /* We have already signaled illegal instruction for odd Dr.  */
341     tcg_debug_assert((reg & 1) == 0);
342     reg ^= ctx->fbank;
343     tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
344 }
345 
346 #define B3_0 (ctx->opcode & 0xf)
347 #define B6_4 ((ctx->opcode >> 4) & 0x7)
348 #define B7_4 ((ctx->opcode >> 4) & 0xf)
349 #define B7_0 (ctx->opcode & 0xff)
350 #define B7_0s ((int32_t) (int8_t) (ctx->opcode & 0xff))
351 #define B11_0s (ctx->opcode & 0x800 ? 0xfffff000 | (ctx->opcode & 0xfff) : \
352   (ctx->opcode & 0xfff))
353 #define B11_8 ((ctx->opcode >> 8) & 0xf)
354 #define B15_12 ((ctx->opcode >> 12) & 0xf)
355 
356 #define REG(x)     cpu_gregs[(x) ^ ctx->gbank]
357 #define ALTREG(x)  cpu_gregs[(x) ^ ctx->gbank ^ 0x10]
358 #define FREG(x)    cpu_fregs[(x) ^ ctx->fbank]
359 
360 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
361 
362 #define CHECK_NOT_DELAY_SLOT \
363     if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
364         goto do_illegal_slot;                       \
365     }
366 
367 #define CHECK_PRIVILEGED \
368     if (IS_USER(ctx)) {                     \
369         goto do_illegal;                    \
370     }
371 
372 #define CHECK_FPU_ENABLED \
373     if (ctx->tbflags & (1u << SR_FD)) {     \
374         goto do_fpu_disabled;               \
375     }
376 
377 #define CHECK_FPSCR_PR_0 \
378     if (ctx->tbflags & FPSCR_PR) {          \
379         goto do_illegal;                    \
380     }
381 
382 #define CHECK_FPSCR_PR_1 \
383     if (!(ctx->tbflags & FPSCR_PR)) {       \
384         goto do_illegal;                    \
385     }
386 
387 #define CHECK_SH4A \
388     if (!(ctx->features & SH_FEATURE_SH4A)) { \
389         goto do_illegal;                      \
390     }
391 
392 static void _decode_opc(DisasContext * ctx)
393 {
394     /* This code tries to make movcal emulation sufficiently
395        accurate for Linux purposes.  This instruction writes
396        memory, and prior to that, always allocates a cache line.
397        It is used in two contexts:
398        - in memcpy, where data is copied in blocks, the first write
399        of to a block uses movca.l for performance.
400        - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
401        to flush the cache. Here, the data written by movcal.l is never
402        written to memory, and the data written is just bogus.
403 
404        To simulate this, we simulate movcal.l, we store the value to memory,
405        but we also remember the previous content. If we see ocbi, we check
406        if movcal.l for that address was done previously. If so, the write should
407        not have hit the memory, so we restore the previous content.
408        When we see an instruction that is neither movca.l
409        nor ocbi, the previous content is discarded.
410 
411        To optimize, we only try to flush stores when we're at the start of
412        TB, or if we already saw movca.l in this TB and did not flush stores
413        yet.  */
414     if (ctx->has_movcal)
415 	{
416 	  int opcode = ctx->opcode & 0xf0ff;
417 	  if (opcode != 0x0093 /* ocbi */
418 	      && opcode != 0x00c3 /* movca.l */)
419 	      {
420                   gen_helper_discard_movcal_backup(cpu_env);
421 		  ctx->has_movcal = 0;
422 	      }
423 	}
424 
425 #if 0
426     fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
427 #endif
428 
429     switch (ctx->opcode) {
430     case 0x0019:		/* div0u */
431         tcg_gen_movi_i32(cpu_sr_m, 0);
432         tcg_gen_movi_i32(cpu_sr_q, 0);
433         tcg_gen_movi_i32(cpu_sr_t, 0);
434 	return;
435     case 0x000b:		/* rts */
436 	CHECK_NOT_DELAY_SLOT
437 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
438         ctx->envflags |= TB_FLAG_DELAY_SLOT;
439 	ctx->delayed_pc = (uint32_t) - 1;
440 	return;
441     case 0x0028:		/* clrmac */
442 	tcg_gen_movi_i32(cpu_mach, 0);
443 	tcg_gen_movi_i32(cpu_macl, 0);
444 	return;
445     case 0x0048:		/* clrs */
446         tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
447 	return;
448     case 0x0008:		/* clrt */
449         tcg_gen_movi_i32(cpu_sr_t, 0);
450 	return;
451     case 0x0038:		/* ldtlb */
452 	CHECK_PRIVILEGED
453         gen_helper_ldtlb(cpu_env);
454 	return;
455     case 0x002b:		/* rte */
456 	CHECK_PRIVILEGED
457 	CHECK_NOT_DELAY_SLOT
458         gen_write_sr(cpu_ssr);
459 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
460         ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
461 	ctx->delayed_pc = (uint32_t) - 1;
462         ctx->base.is_jmp = DISAS_STOP;
463 	return;
464     case 0x0058:		/* sets */
465         tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
466 	return;
467     case 0x0018:		/* sett */
468         tcg_gen_movi_i32(cpu_sr_t, 1);
469 	return;
470     case 0xfbfd:		/* frchg */
471         CHECK_FPSCR_PR_0
472 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
473         ctx->base.is_jmp = DISAS_STOP;
474 	return;
475     case 0xf3fd:		/* fschg */
476         CHECK_FPSCR_PR_0
477         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
478         ctx->base.is_jmp = DISAS_STOP;
479 	return;
480     case 0xf7fd:                /* fpchg */
481         CHECK_SH4A
482         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_PR);
483         ctx->base.is_jmp = DISAS_STOP;
484         return;
485     case 0x0009:		/* nop */
486 	return;
487     case 0x001b:		/* sleep */
488 	CHECK_PRIVILEGED
489         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next + 2);
490         gen_helper_sleep(cpu_env);
491 	return;
492     }
493 
494     switch (ctx->opcode & 0xf000) {
495     case 0x1000:		/* mov.l Rm,@(disp,Rn) */
496 	{
497 	    TCGv addr = tcg_temp_new();
498 	    tcg_gen_addi_i32(addr, REG(B11_8), B3_0 * 4);
499             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx,
500                                 MO_TEUL | UNALIGN(ctx));
501 	}
502 	return;
503     case 0x5000:		/* mov.l @(disp,Rm),Rn */
504 	{
505 	    TCGv addr = tcg_temp_new();
506 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 4);
507             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
508                                 MO_TESL | UNALIGN(ctx));
509 	}
510 	return;
511     case 0xe000:		/* mov #imm,Rn */
512 #ifdef CONFIG_USER_ONLY
513         /*
514          * Detect the start of a gUSA region (mov #-n, r15).
515          * If so, update envflags and end the TB.  This will allow us
516          * to see the end of the region (stored in R0) in the next TB.
517          */
518         if (B11_8 == 15 && B7_0s < 0 &&
519             (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
520             ctx->envflags =
521                 deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
522             ctx->base.is_jmp = DISAS_STOP;
523         }
524 #endif
525 	tcg_gen_movi_i32(REG(B11_8), B7_0s);
526 	return;
527     case 0x9000:		/* mov.w @(disp,PC),Rn */
528 	{
529             TCGv addr = tcg_const_i32(ctx->base.pc_next + 4 + B7_0 * 2);
530             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
531 	}
532 	return;
533     case 0xd000:		/* mov.l @(disp,PC),Rn */
534 	{
535             TCGv addr = tcg_const_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
536             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
537 	}
538 	return;
539     case 0x7000:		/* add #imm,Rn */
540 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), B7_0s);
541 	return;
542     case 0xa000:		/* bra disp */
543 	CHECK_NOT_DELAY_SLOT
544         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
545         ctx->envflags |= TB_FLAG_DELAY_SLOT;
546 	return;
547     case 0xb000:		/* bsr disp */
548 	CHECK_NOT_DELAY_SLOT
549         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
550         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
551         ctx->envflags |= TB_FLAG_DELAY_SLOT;
552 	return;
553     }
554 
555     switch (ctx->opcode & 0xf00f) {
556     case 0x6003:		/* mov Rm,Rn */
557 	tcg_gen_mov_i32(REG(B11_8), REG(B7_4));
558 	return;
559     case 0x2000:		/* mov.b Rm,@Rn */
560         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_UB);
561 	return;
562     case 0x2001:		/* mov.w Rm,@Rn */
563         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx,
564                             MO_TEUW | UNALIGN(ctx));
565 	return;
566     case 0x2002:		/* mov.l Rm,@Rn */
567         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx,
568                             MO_TEUL | UNALIGN(ctx));
569 	return;
570     case 0x6000:		/* mov.b @Rm,Rn */
571         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
572 	return;
573     case 0x6001:		/* mov.w @Rm,Rn */
574         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx,
575                             MO_TESW | UNALIGN(ctx));
576 	return;
577     case 0x6002:		/* mov.l @Rm,Rn */
578         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx,
579                             MO_TESL | UNALIGN(ctx));
580 	return;
581     case 0x2004:		/* mov.b Rm,@-Rn */
582 	{
583 	    TCGv addr = tcg_temp_new();
584 	    tcg_gen_subi_i32(addr, REG(B11_8), 1);
585             /* might cause re-execution */
586             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
587 	    tcg_gen_mov_i32(REG(B11_8), addr);			/* modify register status */
588 	}
589 	return;
590     case 0x2005:		/* mov.w Rm,@-Rn */
591 	{
592 	    TCGv addr = tcg_temp_new();
593 	    tcg_gen_subi_i32(addr, REG(B11_8), 2);
594             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx,
595                                 MO_TEUW | UNALIGN(ctx));
596 	    tcg_gen_mov_i32(REG(B11_8), addr);
597 	}
598 	return;
599     case 0x2006:		/* mov.l Rm,@-Rn */
600 	{
601 	    TCGv addr = tcg_temp_new();
602 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
603             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx,
604                                 MO_TEUL | UNALIGN(ctx));
605 	    tcg_gen_mov_i32(REG(B11_8), addr);
606 	}
607 	return;
608     case 0x6004:		/* mov.b @Rm+,Rn */
609         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
610 	if ( B11_8 != B7_4 )
611 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 1);
612 	return;
613     case 0x6005:		/* mov.w @Rm+,Rn */
614         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx,
615                             MO_TESW | UNALIGN(ctx));
616 	if ( B11_8 != B7_4 )
617 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
618 	return;
619     case 0x6006:		/* mov.l @Rm+,Rn */
620         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx,
621                             MO_TESL | UNALIGN(ctx));
622 	if ( B11_8 != B7_4 )
623 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
624 	return;
625     case 0x0004:		/* mov.b Rm,@(R0,Rn) */
626 	{
627 	    TCGv addr = tcg_temp_new();
628 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
629             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
630 	}
631 	return;
632     case 0x0005:		/* mov.w Rm,@(R0,Rn) */
633 	{
634 	    TCGv addr = tcg_temp_new();
635 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
636             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx,
637                                 MO_TEUW | UNALIGN(ctx));
638 	}
639 	return;
640     case 0x0006:		/* mov.l Rm,@(R0,Rn) */
641 	{
642 	    TCGv addr = tcg_temp_new();
643 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
644             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx,
645                                 MO_TEUL | UNALIGN(ctx));
646 	}
647 	return;
648     case 0x000c:		/* mov.b @(R0,Rm),Rn */
649 	{
650 	    TCGv addr = tcg_temp_new();
651 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
652             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_SB);
653 	}
654 	return;
655     case 0x000d:		/* mov.w @(R0,Rm),Rn */
656 	{
657 	    TCGv addr = tcg_temp_new();
658 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
659             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
660                                 MO_TESW | UNALIGN(ctx));
661 	}
662 	return;
663     case 0x000e:		/* mov.l @(R0,Rm),Rn */
664 	{
665 	    TCGv addr = tcg_temp_new();
666 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
667             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
668                                 MO_TESL | UNALIGN(ctx));
669 	}
670 	return;
671     case 0x6008:		/* swap.b Rm,Rn */
672 	{
673             TCGv low = tcg_temp_new();
674             tcg_gen_bswap16_i32(low, REG(B7_4), 0);
675             tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
676 	}
677 	return;
678     case 0x6009:		/* swap.w Rm,Rn */
679         tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16);
680 	return;
681     case 0x200d:		/* xtrct Rm,Rn */
682 	{
683 	    TCGv high, low;
684 	    high = tcg_temp_new();
685 	    tcg_gen_shli_i32(high, REG(B7_4), 16);
686 	    low = tcg_temp_new();
687 	    tcg_gen_shri_i32(low, REG(B11_8), 16);
688 	    tcg_gen_or_i32(REG(B11_8), high, low);
689 	}
690 	return;
691     case 0x300c:		/* add Rm,Rn */
692 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
693 	return;
694     case 0x300e:		/* addc Rm,Rn */
695         {
696             TCGv t0, t1;
697             t0 = tcg_const_tl(0);
698             t1 = tcg_temp_new();
699             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
700             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
701                              REG(B11_8), t0, t1, cpu_sr_t);
702         }
703 	return;
704     case 0x300f:		/* addv Rm,Rn */
705         {
706             TCGv t0, t1, t2;
707             t0 = tcg_temp_new();
708             tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
709             t1 = tcg_temp_new();
710             tcg_gen_xor_i32(t1, t0, REG(B11_8));
711             t2 = tcg_temp_new();
712             tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
713             tcg_gen_andc_i32(cpu_sr_t, t1, t2);
714             tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
715             tcg_gen_mov_i32(REG(B7_4), t0);
716         }
717 	return;
718     case 0x2009:		/* and Rm,Rn */
719 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
720 	return;
721     case 0x3000:		/* cmp/eq Rm,Rn */
722         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4));
723 	return;
724     case 0x3003:		/* cmp/ge Rm,Rn */
725         tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4));
726 	return;
727     case 0x3007:		/* cmp/gt Rm,Rn */
728         tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4));
729 	return;
730     case 0x3006:		/* cmp/hi Rm,Rn */
731         tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4));
732 	return;
733     case 0x3002:		/* cmp/hs Rm,Rn */
734         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4));
735 	return;
736     case 0x200c:		/* cmp/str Rm,Rn */
737 	{
738 	    TCGv cmp1 = tcg_temp_new();
739 	    TCGv cmp2 = tcg_temp_new();
740             tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8));
741             tcg_gen_subi_i32(cmp1, cmp2, 0x01010101);
742             tcg_gen_andc_i32(cmp1, cmp1, cmp2);
743             tcg_gen_andi_i32(cmp1, cmp1, 0x80808080);
744             tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0);
745 	}
746 	return;
747     case 0x2007:		/* div0s Rm,Rn */
748         tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31);         /* SR_Q */
749         tcg_gen_shri_i32(cpu_sr_m, REG(B7_4), 31);          /* SR_M */
750         tcg_gen_xor_i32(cpu_sr_t, cpu_sr_q, cpu_sr_m);      /* SR_T */
751 	return;
752     case 0x3004:		/* div1 Rm,Rn */
753         {
754             TCGv t0 = tcg_temp_new();
755             TCGv t1 = tcg_temp_new();
756             TCGv t2 = tcg_temp_new();
757             TCGv zero = tcg_const_i32(0);
758 
759             /* shift left arg1, saving the bit being pushed out and inserting
760                T on the right */
761             tcg_gen_shri_i32(t0, REG(B11_8), 31);
762             tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
763             tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t);
764 
765             /* Add or subtract arg0 from arg1 depending if Q == M. To avoid
766                using 64-bit temps, we compute arg0's high part from q ^ m, so
767                that it is 0x00000000 when adding the value or 0xffffffff when
768                subtracting it. */
769             tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m);
770             tcg_gen_subi_i32(t1, t1, 1);
771             tcg_gen_neg_i32(t2, REG(B7_4));
772             tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2);
773             tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1);
774 
775             /* compute T and Q depending on carry */
776             tcg_gen_andi_i32(t1, t1, 1);
777             tcg_gen_xor_i32(t1, t1, t0);
778             tcg_gen_xori_i32(cpu_sr_t, t1, 1);
779             tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1);
780         }
781 	return;
782     case 0x300d:		/* dmuls.l Rm,Rn */
783         tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
784 	return;
785     case 0x3005:		/* dmulu.l Rm,Rn */
786         tcg_gen_mulu2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
787 	return;
788     case 0x600e:		/* exts.b Rm,Rn */
789 	tcg_gen_ext8s_i32(REG(B11_8), REG(B7_4));
790 	return;
791     case 0x600f:		/* exts.w Rm,Rn */
792 	tcg_gen_ext16s_i32(REG(B11_8), REG(B7_4));
793 	return;
794     case 0x600c:		/* extu.b Rm,Rn */
795 	tcg_gen_ext8u_i32(REG(B11_8), REG(B7_4));
796 	return;
797     case 0x600d:		/* extu.w Rm,Rn */
798 	tcg_gen_ext16u_i32(REG(B11_8), REG(B7_4));
799 	return;
800     case 0x000f:		/* mac.l @Rm+,@Rn+ */
801 	{
802 	    TCGv arg0, arg1;
803 	    arg0 = tcg_temp_new();
804             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
805 	    arg1 = tcg_temp_new();
806             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
807             gen_helper_macl(cpu_env, arg0, arg1);
808 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
809 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
810 	}
811 	return;
812     case 0x400f:		/* mac.w @Rm+,@Rn+ */
813 	{
814 	    TCGv arg0, arg1;
815 	    arg0 = tcg_temp_new();
816             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
817 	    arg1 = tcg_temp_new();
818             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
819             gen_helper_macw(cpu_env, arg0, arg1);
820 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
821 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
822 	}
823 	return;
824     case 0x0007:		/* mul.l Rm,Rn */
825 	tcg_gen_mul_i32(cpu_macl, REG(B7_4), REG(B11_8));
826 	return;
827     case 0x200f:		/* muls.w Rm,Rn */
828 	{
829 	    TCGv arg0, arg1;
830 	    arg0 = tcg_temp_new();
831 	    tcg_gen_ext16s_i32(arg0, REG(B7_4));
832 	    arg1 = tcg_temp_new();
833 	    tcg_gen_ext16s_i32(arg1, REG(B11_8));
834 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
835 	}
836 	return;
837     case 0x200e:		/* mulu.w Rm,Rn */
838 	{
839 	    TCGv arg0, arg1;
840 	    arg0 = tcg_temp_new();
841 	    tcg_gen_ext16u_i32(arg0, REG(B7_4));
842 	    arg1 = tcg_temp_new();
843 	    tcg_gen_ext16u_i32(arg1, REG(B11_8));
844 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
845 	}
846 	return;
847     case 0x600b:		/* neg Rm,Rn */
848 	tcg_gen_neg_i32(REG(B11_8), REG(B7_4));
849 	return;
850     case 0x600a:		/* negc Rm,Rn */
851         {
852             TCGv t0 = tcg_const_i32(0);
853             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
854                              REG(B7_4), t0, cpu_sr_t, t0);
855             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
856                              t0, t0, REG(B11_8), cpu_sr_t);
857             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
858         }
859 	return;
860     case 0x6007:		/* not Rm,Rn */
861 	tcg_gen_not_i32(REG(B11_8), REG(B7_4));
862 	return;
863     case 0x200b:		/* or Rm,Rn */
864 	tcg_gen_or_i32(REG(B11_8), REG(B11_8), REG(B7_4));
865 	return;
866     case 0x400c:		/* shad Rm,Rn */
867 	{
868             TCGv t0 = tcg_temp_new();
869             TCGv t1 = tcg_temp_new();
870             TCGv t2 = tcg_temp_new();
871 
872             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
873 
874             /* positive case: shift to the left */
875             tcg_gen_shl_i32(t1, REG(B11_8), t0);
876 
877             /* negative case: shift to the right in two steps to
878                correctly handle the -32 case */
879             tcg_gen_xori_i32(t0, t0, 0x1f);
880             tcg_gen_sar_i32(t2, REG(B11_8), t0);
881             tcg_gen_sari_i32(t2, t2, 1);
882 
883             /* select between the two cases */
884             tcg_gen_movi_i32(t0, 0);
885             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
886 	}
887 	return;
888     case 0x400d:		/* shld Rm,Rn */
889 	{
890             TCGv t0 = tcg_temp_new();
891             TCGv t1 = tcg_temp_new();
892             TCGv t2 = tcg_temp_new();
893 
894             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
895 
896             /* positive case: shift to the left */
897             tcg_gen_shl_i32(t1, REG(B11_8), t0);
898 
899             /* negative case: shift to the right in two steps to
900                correctly handle the -32 case */
901             tcg_gen_xori_i32(t0, t0, 0x1f);
902             tcg_gen_shr_i32(t2, REG(B11_8), t0);
903             tcg_gen_shri_i32(t2, t2, 1);
904 
905             /* select between the two cases */
906             tcg_gen_movi_i32(t0, 0);
907             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
908 	}
909 	return;
910     case 0x3008:		/* sub Rm,Rn */
911 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
912 	return;
913     case 0x300a:		/* subc Rm,Rn */
914         {
915             TCGv t0, t1;
916             t0 = tcg_const_tl(0);
917             t1 = tcg_temp_new();
918             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
919             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
920                              REG(B11_8), t0, t1, cpu_sr_t);
921             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
922         }
923 	return;
924     case 0x300b:		/* subv Rm,Rn */
925         {
926             TCGv t0, t1, t2;
927             t0 = tcg_temp_new();
928             tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
929             t1 = tcg_temp_new();
930             tcg_gen_xor_i32(t1, t0, REG(B7_4));
931             t2 = tcg_temp_new();
932             tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
933             tcg_gen_and_i32(t1, t1, t2);
934             tcg_gen_shri_i32(cpu_sr_t, t1, 31);
935             tcg_gen_mov_i32(REG(B11_8), t0);
936         }
937 	return;
938     case 0x2008:		/* tst Rm,Rn */
939 	{
940 	    TCGv val = tcg_temp_new();
941 	    tcg_gen_and_i32(val, REG(B7_4), REG(B11_8));
942             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
943 	}
944 	return;
945     case 0x200a:		/* xor Rm,Rn */
946 	tcg_gen_xor_i32(REG(B11_8), REG(B11_8), REG(B7_4));
947 	return;
948     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
949 	CHECK_FPU_ENABLED
950         if (ctx->tbflags & FPSCR_SZ) {
951             int xsrc = XHACK(B7_4);
952             int xdst = XHACK(B11_8);
953             tcg_gen_mov_i32(FREG(xdst), FREG(xsrc));
954             tcg_gen_mov_i32(FREG(xdst + 1), FREG(xsrc + 1));
955 	} else {
956             tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
957 	}
958 	return;
959     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
960 	CHECK_FPU_ENABLED
961         if (ctx->tbflags & FPSCR_SZ) {
962             TCGv_i64 fp = tcg_temp_new_i64();
963             gen_load_fpr64(ctx, fp, XHACK(B7_4));
964             tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
965 	} else {
966             tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
967 	}
968 	return;
969     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
970 	CHECK_FPU_ENABLED
971         if (ctx->tbflags & FPSCR_SZ) {
972             TCGv_i64 fp = tcg_temp_new_i64();
973             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
974             gen_store_fpr64(ctx, fp, XHACK(B11_8));
975 	} else {
976             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
977 	}
978 	return;
979     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
980 	CHECK_FPU_ENABLED
981         if (ctx->tbflags & FPSCR_SZ) {
982             TCGv_i64 fp = tcg_temp_new_i64();
983             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
984             gen_store_fpr64(ctx, fp, XHACK(B11_8));
985             tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
986 	} else {
987             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
988 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
989 	}
990 	return;
991     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
992 	CHECK_FPU_ENABLED
993         {
994             TCGv addr = tcg_temp_new_i32();
995             if (ctx->tbflags & FPSCR_SZ) {
996                 TCGv_i64 fp = tcg_temp_new_i64();
997                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
998                 tcg_gen_subi_i32(addr, REG(B11_8), 8);
999                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
1000             } else {
1001                 tcg_gen_subi_i32(addr, REG(B11_8), 4);
1002                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1003             }
1004             tcg_gen_mov_i32(REG(B11_8), addr);
1005         }
1006 	return;
1007     case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
1008 	CHECK_FPU_ENABLED
1009 	{
1010 	    TCGv addr = tcg_temp_new_i32();
1011 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
1012             if (ctx->tbflags & FPSCR_SZ) {
1013                 TCGv_i64 fp = tcg_temp_new_i64();
1014                 tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
1015                 gen_store_fpr64(ctx, fp, XHACK(B11_8));
1016 	    } else {
1017                 tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
1018 	    }
1019 	}
1020 	return;
1021     case 0xf007: /* fmov {F,D,X}Rn,@(R0,Rn) - FPSCR: Nothing */
1022 	CHECK_FPU_ENABLED
1023 	{
1024 	    TCGv addr = tcg_temp_new();
1025 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
1026             if (ctx->tbflags & FPSCR_SZ) {
1027                 TCGv_i64 fp = tcg_temp_new_i64();
1028                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
1029                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
1030 	    } else {
1031                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1032 	    }
1033 	}
1034 	return;
1035     case 0xf000: /* fadd Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1036     case 0xf001: /* fsub Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1037     case 0xf002: /* fmul Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1038     case 0xf003: /* fdiv Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1039     case 0xf004: /* fcmp/eq Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1040     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1041 	{
1042 	    CHECK_FPU_ENABLED
1043             if (ctx->tbflags & FPSCR_PR) {
1044                 TCGv_i64 fp0, fp1;
1045 
1046                 if (ctx->opcode & 0x0110) {
1047                     goto do_illegal;
1048                 }
1049 		fp0 = tcg_temp_new_i64();
1050 		fp1 = tcg_temp_new_i64();
1051                 gen_load_fpr64(ctx, fp0, B11_8);
1052                 gen_load_fpr64(ctx, fp1, B7_4);
1053                 switch (ctx->opcode & 0xf00f) {
1054                 case 0xf000:		/* fadd Rm,Rn */
1055                     gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
1056                     break;
1057                 case 0xf001:		/* fsub Rm,Rn */
1058                     gen_helper_fsub_DT(fp0, cpu_env, fp0, fp1);
1059                     break;
1060                 case 0xf002:		/* fmul Rm,Rn */
1061                     gen_helper_fmul_DT(fp0, cpu_env, fp0, fp1);
1062                     break;
1063                 case 0xf003:		/* fdiv Rm,Rn */
1064                     gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
1065                     break;
1066                 case 0xf004:		/* fcmp/eq Rm,Rn */
1067                     gen_helper_fcmp_eq_DT(cpu_sr_t, cpu_env, fp0, fp1);
1068                     return;
1069                 case 0xf005:		/* fcmp/gt Rm,Rn */
1070                     gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1);
1071                     return;
1072                 }
1073                 gen_store_fpr64(ctx, fp0, B11_8);
1074 	    } else {
1075                 switch (ctx->opcode & 0xf00f) {
1076                 case 0xf000:		/* fadd Rm,Rn */
1077                     gen_helper_fadd_FT(FREG(B11_8), cpu_env,
1078                                        FREG(B11_8), FREG(B7_4));
1079                     break;
1080                 case 0xf001:		/* fsub Rm,Rn */
1081                     gen_helper_fsub_FT(FREG(B11_8), cpu_env,
1082                                        FREG(B11_8), FREG(B7_4));
1083                     break;
1084                 case 0xf002:		/* fmul Rm,Rn */
1085                     gen_helper_fmul_FT(FREG(B11_8), cpu_env,
1086                                        FREG(B11_8), FREG(B7_4));
1087                     break;
1088                 case 0xf003:		/* fdiv Rm,Rn */
1089                     gen_helper_fdiv_FT(FREG(B11_8), cpu_env,
1090                                        FREG(B11_8), FREG(B7_4));
1091                     break;
1092                 case 0xf004:		/* fcmp/eq Rm,Rn */
1093                     gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env,
1094                                           FREG(B11_8), FREG(B7_4));
1095                     return;
1096                 case 0xf005:		/* fcmp/gt Rm,Rn */
1097                     gen_helper_fcmp_gt_FT(cpu_sr_t, cpu_env,
1098                                           FREG(B11_8), FREG(B7_4));
1099                     return;
1100                 }
1101 	    }
1102 	}
1103 	return;
1104     case 0xf00e: /* fmac FR0,RM,Rn */
1105         CHECK_FPU_ENABLED
1106         CHECK_FPSCR_PR_0
1107         gen_helper_fmac_FT(FREG(B11_8), cpu_env,
1108                            FREG(0), FREG(B7_4), FREG(B11_8));
1109         return;
1110     }
1111 
1112     switch (ctx->opcode & 0xff00) {
1113     case 0xc900:		/* and #imm,R0 */
1114 	tcg_gen_andi_i32(REG(0), REG(0), B7_0);
1115 	return;
1116     case 0xcd00:		/* and.b #imm,@(R0,GBR) */
1117 	{
1118 	    TCGv addr, val;
1119 	    addr = tcg_temp_new();
1120 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1121 	    val = tcg_temp_new();
1122             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1123 	    tcg_gen_andi_i32(val, val, B7_0);
1124             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1125 	}
1126 	return;
1127     case 0x8b00:		/* bf label */
1128 	CHECK_NOT_DELAY_SLOT
1129         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, false);
1130 	return;
1131     case 0x8f00:		/* bf/s label */
1132 	CHECK_NOT_DELAY_SLOT
1133         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
1134         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1135         ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
1136 	return;
1137     case 0x8900:		/* bt label */
1138 	CHECK_NOT_DELAY_SLOT
1139         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, true);
1140 	return;
1141     case 0x8d00:		/* bt/s label */
1142 	CHECK_NOT_DELAY_SLOT
1143         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
1144         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1145         ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
1146 	return;
1147     case 0x8800:		/* cmp/eq #imm,R0 */
1148         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
1149 	return;
1150     case 0xc400:		/* mov.b @(disp,GBR),R0 */
1151 	{
1152 	    TCGv addr = tcg_temp_new();
1153 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1154             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1155 	}
1156 	return;
1157     case 0xc500:		/* mov.w @(disp,GBR),R0 */
1158 	{
1159 	    TCGv addr = tcg_temp_new();
1160 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1161             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
1162 	}
1163 	return;
1164     case 0xc600:		/* mov.l @(disp,GBR),R0 */
1165 	{
1166 	    TCGv addr = tcg_temp_new();
1167 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1168             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
1169 	}
1170 	return;
1171     case 0xc000:		/* mov.b R0,@(disp,GBR) */
1172 	{
1173 	    TCGv addr = tcg_temp_new();
1174 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1175             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1176 	}
1177 	return;
1178     case 0xc100:		/* mov.w R0,@(disp,GBR) */
1179 	{
1180 	    TCGv addr = tcg_temp_new();
1181 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1182             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
1183 	}
1184 	return;
1185     case 0xc200:		/* mov.l R0,@(disp,GBR) */
1186 	{
1187 	    TCGv addr = tcg_temp_new();
1188 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1189             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
1190 	}
1191 	return;
1192     case 0x8000:		/* mov.b R0,@(disp,Rn) */
1193 	{
1194 	    TCGv addr = tcg_temp_new();
1195 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1196             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1197 	}
1198 	return;
1199     case 0x8100:		/* mov.w R0,@(disp,Rn) */
1200 	{
1201 	    TCGv addr = tcg_temp_new();
1202 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1203             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx,
1204                                 MO_TEUW | UNALIGN(ctx));
1205 	}
1206 	return;
1207     case 0x8400:		/* mov.b @(disp,Rn),R0 */
1208 	{
1209 	    TCGv addr = tcg_temp_new();
1210 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1211             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1212 	}
1213 	return;
1214     case 0x8500:		/* mov.w @(disp,Rn),R0 */
1215 	{
1216 	    TCGv addr = tcg_temp_new();
1217 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1218             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx,
1219                                 MO_TESW | UNALIGN(ctx));
1220 	}
1221 	return;
1222     case 0xc700:		/* mova @(disp,PC),R0 */
1223         tcg_gen_movi_i32(REG(0), ((ctx->base.pc_next & 0xfffffffc) +
1224                                   4 + B7_0 * 4) & ~3);
1225 	return;
1226     case 0xcb00:		/* or #imm,R0 */
1227 	tcg_gen_ori_i32(REG(0), REG(0), B7_0);
1228 	return;
1229     case 0xcf00:		/* or.b #imm,@(R0,GBR) */
1230 	{
1231 	    TCGv addr, val;
1232 	    addr = tcg_temp_new();
1233 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1234 	    val = tcg_temp_new();
1235             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1236 	    tcg_gen_ori_i32(val, val, B7_0);
1237             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1238 	}
1239 	return;
1240     case 0xc300:		/* trapa #imm */
1241 	{
1242 	    TCGv imm;
1243 	    CHECK_NOT_DELAY_SLOT
1244             gen_save_cpu_state(ctx, true);
1245 	    imm = tcg_const_i32(B7_0);
1246             gen_helper_trapa(cpu_env, imm);
1247             ctx->base.is_jmp = DISAS_NORETURN;
1248 	}
1249 	return;
1250     case 0xc800:		/* tst #imm,R0 */
1251 	{
1252 	    TCGv val = tcg_temp_new();
1253 	    tcg_gen_andi_i32(val, REG(0), B7_0);
1254             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1255 	}
1256 	return;
1257     case 0xcc00:		/* tst.b #imm,@(R0,GBR) */
1258 	{
1259 	    TCGv val = tcg_temp_new();
1260 	    tcg_gen_add_i32(val, REG(0), cpu_gbr);
1261             tcg_gen_qemu_ld_i32(val, val, ctx->memidx, MO_UB);
1262 	    tcg_gen_andi_i32(val, val, B7_0);
1263             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1264 	}
1265 	return;
1266     case 0xca00:		/* xor #imm,R0 */
1267 	tcg_gen_xori_i32(REG(0), REG(0), B7_0);
1268 	return;
1269     case 0xce00:		/* xor.b #imm,@(R0,GBR) */
1270 	{
1271 	    TCGv addr, val;
1272 	    addr = tcg_temp_new();
1273 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1274 	    val = tcg_temp_new();
1275             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1276 	    tcg_gen_xori_i32(val, val, B7_0);
1277             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1278 	}
1279 	return;
1280     }
1281 
1282     switch (ctx->opcode & 0xf08f) {
1283     case 0x408e:		/* ldc Rm,Rn_BANK */
1284 	CHECK_PRIVILEGED
1285 	tcg_gen_mov_i32(ALTREG(B6_4), REG(B11_8));
1286 	return;
1287     case 0x4087:		/* ldc.l @Rm+,Rn_BANK */
1288 	CHECK_PRIVILEGED
1289         tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
1290 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1291 	return;
1292     case 0x0082:		/* stc Rm_BANK,Rn */
1293 	CHECK_PRIVILEGED
1294 	tcg_gen_mov_i32(REG(B11_8), ALTREG(B6_4));
1295 	return;
1296     case 0x4083:		/* stc.l Rm_BANK,@-Rn */
1297 	CHECK_PRIVILEGED
1298 	{
1299 	    TCGv addr = tcg_temp_new();
1300 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1301             tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
1302 	    tcg_gen_mov_i32(REG(B11_8), addr);
1303 	}
1304 	return;
1305     }
1306 
1307     switch (ctx->opcode & 0xf0ff) {
1308     case 0x0023:		/* braf Rn */
1309 	CHECK_NOT_DELAY_SLOT
1310         tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
1311         ctx->envflags |= TB_FLAG_DELAY_SLOT;
1312 	ctx->delayed_pc = (uint32_t) - 1;
1313 	return;
1314     case 0x0003:		/* bsrf Rn */
1315 	CHECK_NOT_DELAY_SLOT
1316         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1317 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
1318         ctx->envflags |= TB_FLAG_DELAY_SLOT;
1319 	ctx->delayed_pc = (uint32_t) - 1;
1320 	return;
1321     case 0x4015:		/* cmp/pl Rn */
1322         tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0);
1323 	return;
1324     case 0x4011:		/* cmp/pz Rn */
1325         tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0);
1326 	return;
1327     case 0x4010:		/* dt Rn */
1328 	tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1);
1329         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0);
1330 	return;
1331     case 0x402b:		/* jmp @Rn */
1332 	CHECK_NOT_DELAY_SLOT
1333 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1334         ctx->envflags |= TB_FLAG_DELAY_SLOT;
1335 	ctx->delayed_pc = (uint32_t) - 1;
1336 	return;
1337     case 0x400b:		/* jsr @Rn */
1338 	CHECK_NOT_DELAY_SLOT
1339         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1340 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1341         ctx->envflags |= TB_FLAG_DELAY_SLOT;
1342 	ctx->delayed_pc = (uint32_t) - 1;
1343 	return;
1344     case 0x400e:		/* ldc Rm,SR */
1345 	CHECK_PRIVILEGED
1346         {
1347             TCGv val = tcg_temp_new();
1348             tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
1349             gen_write_sr(val);
1350             ctx->base.is_jmp = DISAS_STOP;
1351         }
1352 	return;
1353     case 0x4007:		/* ldc.l @Rm+,SR */
1354 	CHECK_PRIVILEGED
1355 	{
1356 	    TCGv val = tcg_temp_new();
1357             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
1358             tcg_gen_andi_i32(val, val, 0x700083f3);
1359             gen_write_sr(val);
1360 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1361             ctx->base.is_jmp = DISAS_STOP;
1362 	}
1363 	return;
1364     case 0x0002:		/* stc SR,Rn */
1365 	CHECK_PRIVILEGED
1366         gen_read_sr(REG(B11_8));
1367 	return;
1368     case 0x4003:		/* stc SR,@-Rn */
1369 	CHECK_PRIVILEGED
1370 	{
1371 	    TCGv addr = tcg_temp_new();
1372             TCGv val = tcg_temp_new();
1373 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1374             gen_read_sr(val);
1375             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1376 	    tcg_gen_mov_i32(REG(B11_8), addr);
1377 	}
1378 	return;
1379 #define LD(reg,ldnum,ldpnum,prechk)		\
1380   case ldnum:							\
1381     prechk    							\
1382     tcg_gen_mov_i32 (cpu_##reg, REG(B11_8));			\
1383     return;							\
1384   case ldpnum:							\
1385     prechk    							\
1386     tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
1387     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);		\
1388     return;
1389 #define ST(reg,stnum,stpnum,prechk)		\
1390   case stnum:							\
1391     prechk    							\
1392     tcg_gen_mov_i32 (REG(B11_8), cpu_##reg);			\
1393     return;							\
1394   case stpnum:							\
1395     prechk    							\
1396     {								\
1397 	TCGv addr = tcg_temp_new();				\
1398 	tcg_gen_subi_i32(addr, REG(B11_8), 4);			\
1399         tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
1400 	tcg_gen_mov_i32(REG(B11_8), addr);			\
1401     }								\
1402     return;
1403 #define LDST(reg,ldnum,ldpnum,stnum,stpnum,prechk)		\
1404 	LD(reg,ldnum,ldpnum,prechk)				\
1405 	ST(reg,stnum,stpnum,prechk)
1406 	LDST(gbr,  0x401e, 0x4017, 0x0012, 0x4013, {})
1407 	LDST(vbr,  0x402e, 0x4027, 0x0022, 0x4023, CHECK_PRIVILEGED)
1408 	LDST(ssr,  0x403e, 0x4037, 0x0032, 0x4033, CHECK_PRIVILEGED)
1409 	LDST(spc,  0x404e, 0x4047, 0x0042, 0x4043, CHECK_PRIVILEGED)
1410 	ST(sgr,  0x003a, 0x4032, CHECK_PRIVILEGED)
1411         LD(sgr,  0x403a, 0x4036, CHECK_PRIVILEGED CHECK_SH4A)
1412 	LDST(dbr,  0x40fa, 0x40f6, 0x00fa, 0x40f2, CHECK_PRIVILEGED)
1413 	LDST(mach, 0x400a, 0x4006, 0x000a, 0x4002, {})
1414 	LDST(macl, 0x401a, 0x4016, 0x001a, 0x4012, {})
1415 	LDST(pr,   0x402a, 0x4026, 0x002a, 0x4022, {})
1416 	LDST(fpul, 0x405a, 0x4056, 0x005a, 0x4052, {CHECK_FPU_ENABLED})
1417     case 0x406a:		/* lds Rm,FPSCR */
1418 	CHECK_FPU_ENABLED
1419         gen_helper_ld_fpscr(cpu_env, REG(B11_8));
1420         ctx->base.is_jmp = DISAS_STOP;
1421 	return;
1422     case 0x4066:		/* lds.l @Rm+,FPSCR */
1423 	CHECK_FPU_ENABLED
1424 	{
1425 	    TCGv addr = tcg_temp_new();
1426             tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
1427 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1428             gen_helper_ld_fpscr(cpu_env, addr);
1429             ctx->base.is_jmp = DISAS_STOP;
1430 	}
1431 	return;
1432     case 0x006a:		/* sts FPSCR,Rn */
1433 	CHECK_FPU_ENABLED
1434 	tcg_gen_andi_i32(REG(B11_8), cpu_fpscr, 0x003fffff);
1435 	return;
1436     case 0x4062:		/* sts FPSCR,@-Rn */
1437 	CHECK_FPU_ENABLED
1438 	{
1439 	    TCGv addr, val;
1440 	    val = tcg_temp_new();
1441 	    tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
1442 	    addr = tcg_temp_new();
1443 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1444             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1445 	    tcg_gen_mov_i32(REG(B11_8), addr);
1446 	}
1447 	return;
1448     case 0x00c3:		/* movca.l R0,@Rm */
1449         {
1450             TCGv val = tcg_temp_new();
1451             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
1452             gen_helper_movcal(cpu_env, REG(B11_8), val);
1453             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1454         }
1455         ctx->has_movcal = 1;
1456 	return;
1457     case 0x40a9:                /* movua.l @Rm,R0 */
1458         CHECK_SH4A
1459         /* Load non-boundary-aligned data */
1460         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1461                             MO_TEUL | MO_UNALN);
1462         return;
1463     case 0x40e9:                /* movua.l @Rm+,R0 */
1464         CHECK_SH4A
1465         /* Load non-boundary-aligned data */
1466         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1467                             MO_TEUL | MO_UNALN);
1468         tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1469         return;
1470     case 0x0029:		/* movt Rn */
1471         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
1472 	return;
1473     case 0x0073:
1474         /* MOVCO.L
1475          *     LDST -> T
1476          *     If (T == 1) R0 -> (Rn)
1477          *     0 -> LDST
1478          *
1479          * The above description doesn't work in a parallel context.
1480          * Since we currently support no smp boards, this implies user-mode.
1481          * But we can still support the official mechanism while user-mode
1482          * is single-threaded.  */
1483         CHECK_SH4A
1484         {
1485             TCGLabel *fail = gen_new_label();
1486             TCGLabel *done = gen_new_label();
1487 
1488             if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1489                 TCGv tmp;
1490 
1491                 tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8),
1492                                    cpu_lock_addr, fail);
1493                 tmp = tcg_temp_new();
1494                 tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
1495                                            REG(0), ctx->memidx, MO_TEUL);
1496                 tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
1497             } else {
1498                 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
1499                 tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1500                 tcg_gen_movi_i32(cpu_sr_t, 1);
1501             }
1502             tcg_gen_br(done);
1503 
1504             gen_set_label(fail);
1505             tcg_gen_movi_i32(cpu_sr_t, 0);
1506 
1507             gen_set_label(done);
1508             tcg_gen_movi_i32(cpu_lock_addr, -1);
1509         }
1510         return;
1511     case 0x0063:
1512         /* MOVLI.L @Rm,R0
1513          *     1 -> LDST
1514          *     (Rm) -> R0
1515          *     When interrupt/exception
1516          *     occurred 0 -> LDST
1517          *
1518          * In a parallel context, we must also save the loaded value
1519          * for use with the cmpxchg that we'll use with movco.l.  */
1520         CHECK_SH4A
1521         if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1522             TCGv tmp = tcg_temp_new();
1523             tcg_gen_mov_i32(tmp, REG(B11_8));
1524             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1525             tcg_gen_mov_i32(cpu_lock_value, REG(0));
1526             tcg_gen_mov_i32(cpu_lock_addr, tmp);
1527         } else {
1528             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1529             tcg_gen_movi_i32(cpu_lock_addr, 0);
1530         }
1531         return;
1532     case 0x0093:		/* ocbi @Rn */
1533 	{
1534             gen_helper_ocbi(cpu_env, REG(B11_8));
1535 	}
1536 	return;
1537     case 0x00a3:		/* ocbp @Rn */
1538     case 0x00b3:		/* ocbwb @Rn */
1539         /* These instructions are supposed to do nothing in case of
1540            a cache miss. Given that we only partially emulate caches
1541            it is safe to simply ignore them. */
1542 	return;
1543     case 0x0083:		/* pref @Rn */
1544 	return;
1545     case 0x00d3:		/* prefi @Rn */
1546         CHECK_SH4A
1547         return;
1548     case 0x00e3:		/* icbi @Rn */
1549         CHECK_SH4A
1550         return;
1551     case 0x00ab:		/* synco */
1552         CHECK_SH4A
1553         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1554         return;
1555     case 0x4024:		/* rotcl Rn */
1556 	{
1557 	    TCGv tmp = tcg_temp_new();
1558             tcg_gen_mov_i32(tmp, cpu_sr_t);
1559             tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1560 	    tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1561             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1562 	}
1563 	return;
1564     case 0x4025:		/* rotcr Rn */
1565 	{
1566 	    TCGv tmp = tcg_temp_new();
1567             tcg_gen_shli_i32(tmp, cpu_sr_t, 31);
1568             tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1569 	    tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1570             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1571 	}
1572 	return;
1573     case 0x4004:		/* rotl Rn */
1574 	tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
1575         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1576 	return;
1577     case 0x4005:		/* rotr Rn */
1578         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1579 	tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
1580 	return;
1581     case 0x4000:		/* shll Rn */
1582     case 0x4020:		/* shal Rn */
1583         tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1584 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1585 	return;
1586     case 0x4021:		/* shar Rn */
1587         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1588 	tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
1589 	return;
1590     case 0x4001:		/* shlr Rn */
1591         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1592 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1593 	return;
1594     case 0x4008:		/* shll2 Rn */
1595 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 2);
1596 	return;
1597     case 0x4018:		/* shll8 Rn */
1598 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 8);
1599 	return;
1600     case 0x4028:		/* shll16 Rn */
1601 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 16);
1602 	return;
1603     case 0x4009:		/* shlr2 Rn */
1604 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 2);
1605 	return;
1606     case 0x4019:		/* shlr8 Rn */
1607 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 8);
1608 	return;
1609     case 0x4029:		/* shlr16 Rn */
1610 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16);
1611 	return;
1612     case 0x401b:		/* tas.b @Rn */
1613         {
1614             TCGv val = tcg_const_i32(0x80);
1615             tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val,
1616                                         ctx->memidx, MO_UB);
1617             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1618         }
1619         return;
1620     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
1621 	CHECK_FPU_ENABLED
1622         tcg_gen_mov_i32(FREG(B11_8), cpu_fpul);
1623 	return;
1624     case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */
1625 	CHECK_FPU_ENABLED
1626         tcg_gen_mov_i32(cpu_fpul, FREG(B11_8));
1627 	return;
1628     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
1629 	CHECK_FPU_ENABLED
1630         if (ctx->tbflags & FPSCR_PR) {
1631 	    TCGv_i64 fp;
1632             if (ctx->opcode & 0x0100) {
1633                 goto do_illegal;
1634             }
1635 	    fp = tcg_temp_new_i64();
1636             gen_helper_float_DT(fp, cpu_env, cpu_fpul);
1637             gen_store_fpr64(ctx, fp, B11_8);
1638 	}
1639 	else {
1640             gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul);
1641 	}
1642 	return;
1643     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1644 	CHECK_FPU_ENABLED
1645         if (ctx->tbflags & FPSCR_PR) {
1646 	    TCGv_i64 fp;
1647             if (ctx->opcode & 0x0100) {
1648                 goto do_illegal;
1649             }
1650 	    fp = tcg_temp_new_i64();
1651             gen_load_fpr64(ctx, fp, B11_8);
1652             gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
1653 	}
1654 	else {
1655             gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8));
1656 	}
1657 	return;
1658     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
1659 	CHECK_FPU_ENABLED
1660         tcg_gen_xori_i32(FREG(B11_8), FREG(B11_8), 0x80000000);
1661 	return;
1662     case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
1663 	CHECK_FPU_ENABLED
1664         tcg_gen_andi_i32(FREG(B11_8), FREG(B11_8), 0x7fffffff);
1665 	return;
1666     case 0xf06d: /* fsqrt FRn */
1667 	CHECK_FPU_ENABLED
1668         if (ctx->tbflags & FPSCR_PR) {
1669             if (ctx->opcode & 0x0100) {
1670                 goto do_illegal;
1671             }
1672 	    TCGv_i64 fp = tcg_temp_new_i64();
1673             gen_load_fpr64(ctx, fp, B11_8);
1674             gen_helper_fsqrt_DT(fp, cpu_env, fp);
1675             gen_store_fpr64(ctx, fp, B11_8);
1676 	} else {
1677             gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1678 	}
1679 	return;
1680     case 0xf07d: /* fsrra FRn */
1681 	CHECK_FPU_ENABLED
1682         CHECK_FPSCR_PR_0
1683         gen_helper_fsrra_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1684 	break;
1685     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
1686 	CHECK_FPU_ENABLED
1687         CHECK_FPSCR_PR_0
1688         tcg_gen_movi_i32(FREG(B11_8), 0);
1689         return;
1690     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
1691 	CHECK_FPU_ENABLED
1692         CHECK_FPSCR_PR_0
1693         tcg_gen_movi_i32(FREG(B11_8), 0x3f800000);
1694         return;
1695     case 0xf0ad: /* fcnvsd FPUL,DRn */
1696 	CHECK_FPU_ENABLED
1697 	{
1698 	    TCGv_i64 fp = tcg_temp_new_i64();
1699             gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
1700             gen_store_fpr64(ctx, fp, B11_8);
1701 	}
1702 	return;
1703     case 0xf0bd: /* fcnvds DRn,FPUL */
1704 	CHECK_FPU_ENABLED
1705 	{
1706 	    TCGv_i64 fp = tcg_temp_new_i64();
1707             gen_load_fpr64(ctx, fp, B11_8);
1708             gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
1709 	}
1710 	return;
1711     case 0xf0ed: /* fipr FVm,FVn */
1712         CHECK_FPU_ENABLED
1713         CHECK_FPSCR_PR_1
1714         {
1715             TCGv m = tcg_const_i32((ctx->opcode >> 8) & 3);
1716             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1717             gen_helper_fipr(cpu_env, m, n);
1718             return;
1719         }
1720         break;
1721     case 0xf0fd: /* ftrv XMTRX,FVn */
1722         CHECK_FPU_ENABLED
1723         CHECK_FPSCR_PR_1
1724         {
1725             if ((ctx->opcode & 0x0300) != 0x0100) {
1726                 goto do_illegal;
1727             }
1728             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1729             gen_helper_ftrv(cpu_env, n);
1730             return;
1731         }
1732         break;
1733     }
1734 #if 0
1735     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
1736             ctx->opcode, ctx->base.pc_next);
1737     fflush(stderr);
1738 #endif
1739  do_illegal:
1740     if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
1741  do_illegal_slot:
1742         gen_save_cpu_state(ctx, true);
1743         gen_helper_raise_slot_illegal_instruction(cpu_env);
1744     } else {
1745         gen_save_cpu_state(ctx, true);
1746         gen_helper_raise_illegal_instruction(cpu_env);
1747     }
1748     ctx->base.is_jmp = DISAS_NORETURN;
1749     return;
1750 
1751  do_fpu_disabled:
1752     gen_save_cpu_state(ctx, true);
1753     if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
1754         gen_helper_raise_slot_fpu_disable(cpu_env);
1755     } else {
1756         gen_helper_raise_fpu_disable(cpu_env);
1757     }
1758     ctx->base.is_jmp = DISAS_NORETURN;
1759     return;
1760 }
1761 
1762 static void decode_opc(DisasContext * ctx)
1763 {
1764     uint32_t old_flags = ctx->envflags;
1765 
1766     _decode_opc(ctx);
1767 
1768     if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
1769         /* go out of the delay slot */
1770         ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
1771 
1772         /* When in an exclusive region, we must continue to the end
1773            for conditional branches.  */
1774         if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
1775             && old_flags & TB_FLAG_DELAY_SLOT_COND) {
1776             gen_delayed_conditional_jump(ctx);
1777             return;
1778         }
1779         /* Otherwise this is probably an invalid gUSA region.
1780            Drop the GUSA bits so the next TB doesn't see them.  */
1781         ctx->envflags &= ~TB_FLAG_GUSA_MASK;
1782 
1783         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
1784         if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
1785 	    gen_delayed_conditional_jump(ctx);
1786         } else {
1787             gen_jump(ctx);
1788 	}
1789     }
1790 }
1791 
1792 #ifdef CONFIG_USER_ONLY
1793 /* For uniprocessors, SH4 uses optimistic restartable atomic sequences.
1794    Upon an interrupt, a real kernel would simply notice magic values in
1795    the registers and reset the PC to the start of the sequence.
1796 
1797    For QEMU, we cannot do this in quite the same way.  Instead, we notice
1798    the normal start of such a sequence (mov #-x,r15).  While we can handle
1799    any sequence via cpu_exec_step_atomic, we can recognize the "normal"
1800    sequences and transform them into atomic operations as seen by the host.
1801 */
1802 static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
1803 {
1804     uint16_t insns[5];
1805     int ld_adr, ld_dst, ld_mop;
1806     int op_dst, op_src, op_opc;
1807     int mv_src, mt_dst, st_src, st_mop;
1808     TCGv op_arg;
1809     uint32_t pc = ctx->base.pc_next;
1810     uint32_t pc_end = ctx->base.tb->cs_base;
1811     int max_insns = (pc_end - pc) / 2;
1812     int i;
1813 
1814     /* The state machine below will consume only a few insns.
1815        If there are more than that in a region, fail now.  */
1816     if (max_insns > ARRAY_SIZE(insns)) {
1817         goto fail;
1818     }
1819 
1820     /* Read all of the insns for the region.  */
1821     for (i = 0; i < max_insns; ++i) {
1822         insns[i] = translator_lduw(env, &ctx->base, pc + i * 2);
1823     }
1824 
1825     ld_adr = ld_dst = ld_mop = -1;
1826     mv_src = -1;
1827     op_dst = op_src = op_opc = -1;
1828     mt_dst = -1;
1829     st_src = st_mop = -1;
1830     op_arg = NULL;
1831     i = 0;
1832 
1833 #define NEXT_INSN \
1834     do { if (i >= max_insns) goto fail; ctx->opcode = insns[i++]; } while (0)
1835 
1836     /*
1837      * Expect a load to begin the region.
1838      */
1839     NEXT_INSN;
1840     switch (ctx->opcode & 0xf00f) {
1841     case 0x6000: /* mov.b @Rm,Rn */
1842         ld_mop = MO_SB;
1843         break;
1844     case 0x6001: /* mov.w @Rm,Rn */
1845         ld_mop = MO_TESW;
1846         break;
1847     case 0x6002: /* mov.l @Rm,Rn */
1848         ld_mop = MO_TESL;
1849         break;
1850     default:
1851         goto fail;
1852     }
1853     ld_adr = B7_4;
1854     ld_dst = B11_8;
1855     if (ld_adr == ld_dst) {
1856         goto fail;
1857     }
1858     /* Unless we see a mov, any two-operand operation must use ld_dst.  */
1859     op_dst = ld_dst;
1860 
1861     /*
1862      * Expect an optional register move.
1863      */
1864     NEXT_INSN;
1865     switch (ctx->opcode & 0xf00f) {
1866     case 0x6003: /* mov Rm,Rn */
1867         /*
1868          * Here we want to recognize ld_dst being saved for later consumption,
1869          * or for another input register being copied so that ld_dst need not
1870          * be clobbered during the operation.
1871          */
1872         op_dst = B11_8;
1873         mv_src = B7_4;
1874         if (op_dst == ld_dst) {
1875             /* Overwriting the load output.  */
1876             goto fail;
1877         }
1878         if (mv_src != ld_dst) {
1879             /* Copying a new input; constrain op_src to match the load.  */
1880             op_src = ld_dst;
1881         }
1882         break;
1883 
1884     default:
1885         /* Put back and re-examine as operation.  */
1886         --i;
1887     }
1888 
1889     /*
1890      * Expect the operation.
1891      */
1892     NEXT_INSN;
1893     switch (ctx->opcode & 0xf00f) {
1894     case 0x300c: /* add Rm,Rn */
1895         op_opc = INDEX_op_add_i32;
1896         goto do_reg_op;
1897     case 0x2009: /* and Rm,Rn */
1898         op_opc = INDEX_op_and_i32;
1899         goto do_reg_op;
1900     case 0x200a: /* xor Rm,Rn */
1901         op_opc = INDEX_op_xor_i32;
1902         goto do_reg_op;
1903     case 0x200b: /* or Rm,Rn */
1904         op_opc = INDEX_op_or_i32;
1905     do_reg_op:
1906         /* The operation register should be as expected, and the
1907            other input cannot depend on the load.  */
1908         if (op_dst != B11_8) {
1909             goto fail;
1910         }
1911         if (op_src < 0) {
1912             /* Unconstrainted input.  */
1913             op_src = B7_4;
1914         } else if (op_src == B7_4) {
1915             /* Constrained input matched load.  All operations are
1916                commutative; "swap" them by "moving" the load output
1917                to the (implicit) first argument and the move source
1918                to the (explicit) second argument.  */
1919             op_src = mv_src;
1920         } else {
1921             goto fail;
1922         }
1923         op_arg = REG(op_src);
1924         break;
1925 
1926     case 0x6007: /* not Rm,Rn */
1927         if (ld_dst != B7_4 || mv_src >= 0) {
1928             goto fail;
1929         }
1930         op_dst = B11_8;
1931         op_opc = INDEX_op_xor_i32;
1932         op_arg = tcg_const_i32(-1);
1933         break;
1934 
1935     case 0x7000 ... 0x700f: /* add #imm,Rn */
1936         if (op_dst != B11_8 || mv_src >= 0) {
1937             goto fail;
1938         }
1939         op_opc = INDEX_op_add_i32;
1940         op_arg = tcg_const_i32(B7_0s);
1941         break;
1942 
1943     case 0x3000: /* cmp/eq Rm,Rn */
1944         /* Looking for the middle of a compare-and-swap sequence,
1945            beginning with the compare.  Operands can be either order,
1946            but with only one overlapping the load.  */
1947         if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) {
1948             goto fail;
1949         }
1950         op_opc = INDEX_op_setcond_i32;  /* placeholder */
1951         op_src = (ld_dst == B11_8 ? B7_4 : B11_8);
1952         op_arg = REG(op_src);
1953 
1954         NEXT_INSN;
1955         switch (ctx->opcode & 0xff00) {
1956         case 0x8b00: /* bf label */
1957         case 0x8f00: /* bf/s label */
1958             if (pc + (i + 1 + B7_0s) * 2 != pc_end) {
1959                 goto fail;
1960             }
1961             if ((ctx->opcode & 0xff00) == 0x8b00) { /* bf label */
1962                 break;
1963             }
1964             /* We're looking to unconditionally modify Rn with the
1965                result of the comparison, within the delay slot of
1966                the branch.  This is used by older gcc.  */
1967             NEXT_INSN;
1968             if ((ctx->opcode & 0xf0ff) == 0x0029) { /* movt Rn */
1969                 mt_dst = B11_8;
1970             } else {
1971                 goto fail;
1972             }
1973             break;
1974 
1975         default:
1976             goto fail;
1977         }
1978         break;
1979 
1980     case 0x2008: /* tst Rm,Rn */
1981         /* Looking for a compare-and-swap against zero.  */
1982         if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) {
1983             goto fail;
1984         }
1985         op_opc = INDEX_op_setcond_i32;
1986         op_arg = tcg_const_i32(0);
1987 
1988         NEXT_INSN;
1989         if ((ctx->opcode & 0xff00) != 0x8900 /* bt label */
1990             || pc + (i + 1 + B7_0s) * 2 != pc_end) {
1991             goto fail;
1992         }
1993         break;
1994 
1995     default:
1996         /* Put back and re-examine as store.  */
1997         --i;
1998     }
1999 
2000     /*
2001      * Expect the store.
2002      */
2003     /* The store must be the last insn.  */
2004     if (i != max_insns - 1) {
2005         goto fail;
2006     }
2007     NEXT_INSN;
2008     switch (ctx->opcode & 0xf00f) {
2009     case 0x2000: /* mov.b Rm,@Rn */
2010         st_mop = MO_UB;
2011         break;
2012     case 0x2001: /* mov.w Rm,@Rn */
2013         st_mop = MO_UW;
2014         break;
2015     case 0x2002: /* mov.l Rm,@Rn */
2016         st_mop = MO_UL;
2017         break;
2018     default:
2019         goto fail;
2020     }
2021     /* The store must match the load.  */
2022     if (ld_adr != B11_8 || st_mop != (ld_mop & MO_SIZE)) {
2023         goto fail;
2024     }
2025     st_src = B7_4;
2026 
2027 #undef NEXT_INSN
2028 
2029     /*
2030      * Emit the operation.
2031      */
2032     switch (op_opc) {
2033     case -1:
2034         /* No operation found.  Look for exchange pattern.  */
2035         if (st_src == ld_dst || mv_src >= 0) {
2036             goto fail;
2037         }
2038         tcg_gen_atomic_xchg_i32(REG(ld_dst), REG(ld_adr), REG(st_src),
2039                                 ctx->memidx, ld_mop);
2040         break;
2041 
2042     case INDEX_op_add_i32:
2043         if (op_dst != st_src) {
2044             goto fail;
2045         }
2046         if (op_dst == ld_dst && st_mop == MO_UL) {
2047             tcg_gen_atomic_add_fetch_i32(REG(ld_dst), REG(ld_adr),
2048                                          op_arg, ctx->memidx, ld_mop);
2049         } else {
2050             tcg_gen_atomic_fetch_add_i32(REG(ld_dst), REG(ld_adr),
2051                                          op_arg, ctx->memidx, ld_mop);
2052             if (op_dst != ld_dst) {
2053                 /* Note that mop sizes < 4 cannot use add_fetch
2054                    because it won't carry into the higher bits.  */
2055                 tcg_gen_add_i32(REG(op_dst), REG(ld_dst), op_arg);
2056             }
2057         }
2058         break;
2059 
2060     case INDEX_op_and_i32:
2061         if (op_dst != st_src) {
2062             goto fail;
2063         }
2064         if (op_dst == ld_dst) {
2065             tcg_gen_atomic_and_fetch_i32(REG(ld_dst), REG(ld_adr),
2066                                          op_arg, ctx->memidx, ld_mop);
2067         } else {
2068             tcg_gen_atomic_fetch_and_i32(REG(ld_dst), REG(ld_adr),
2069                                          op_arg, ctx->memidx, ld_mop);
2070             tcg_gen_and_i32(REG(op_dst), REG(ld_dst), op_arg);
2071         }
2072         break;
2073 
2074     case INDEX_op_or_i32:
2075         if (op_dst != st_src) {
2076             goto fail;
2077         }
2078         if (op_dst == ld_dst) {
2079             tcg_gen_atomic_or_fetch_i32(REG(ld_dst), REG(ld_adr),
2080                                         op_arg, ctx->memidx, ld_mop);
2081         } else {
2082             tcg_gen_atomic_fetch_or_i32(REG(ld_dst), REG(ld_adr),
2083                                         op_arg, ctx->memidx, ld_mop);
2084             tcg_gen_or_i32(REG(op_dst), REG(ld_dst), op_arg);
2085         }
2086         break;
2087 
2088     case INDEX_op_xor_i32:
2089         if (op_dst != st_src) {
2090             goto fail;
2091         }
2092         if (op_dst == ld_dst) {
2093             tcg_gen_atomic_xor_fetch_i32(REG(ld_dst), REG(ld_adr),
2094                                          op_arg, ctx->memidx, ld_mop);
2095         } else {
2096             tcg_gen_atomic_fetch_xor_i32(REG(ld_dst), REG(ld_adr),
2097                                          op_arg, ctx->memidx, ld_mop);
2098             tcg_gen_xor_i32(REG(op_dst), REG(ld_dst), op_arg);
2099         }
2100         break;
2101 
2102     case INDEX_op_setcond_i32:
2103         if (st_src == ld_dst) {
2104             goto fail;
2105         }
2106         tcg_gen_atomic_cmpxchg_i32(REG(ld_dst), REG(ld_adr), op_arg,
2107                                    REG(st_src), ctx->memidx, ld_mop);
2108         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_dst), op_arg);
2109         if (mt_dst >= 0) {
2110             tcg_gen_mov_i32(REG(mt_dst), cpu_sr_t);
2111         }
2112         break;
2113 
2114     default:
2115         g_assert_not_reached();
2116     }
2117 
2118     /* The entire region has been translated.  */
2119     ctx->envflags &= ~TB_FLAG_GUSA_MASK;
2120     ctx->base.pc_next = pc_end;
2121     ctx->base.num_insns += max_insns - 1;
2122     return;
2123 
2124  fail:
2125     qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
2126                   pc, pc_end);
2127 
2128     /* Restart with the EXCLUSIVE bit set, within a TB run via
2129        cpu_exec_step_atomic holding the exclusive lock.  */
2130     ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
2131     gen_save_cpu_state(ctx, false);
2132     gen_helper_exclusive(cpu_env);
2133     ctx->base.is_jmp = DISAS_NORETURN;
2134 
2135     /* We're not executing an instruction, but we must report one for the
2136        purposes of accounting within the TB.  We might as well report the
2137        entire region consumed via ctx->base.pc_next so that it's immediately
2138        available in the disassembly dump.  */
2139     ctx->base.pc_next = pc_end;
2140     ctx->base.num_insns += max_insns - 1;
2141 }
2142 #endif
2143 
2144 static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
2145 {
2146     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2147     CPUSH4State *env = cs->env_ptr;
2148     uint32_t tbflags;
2149     int bound;
2150 
2151     ctx->tbflags = tbflags = ctx->base.tb->flags;
2152     ctx->envflags = tbflags & TB_FLAG_ENVFLAGS_MASK;
2153     ctx->memidx = (tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
2154     /* We don't know if the delayed pc came from a dynamic or static branch,
2155        so assume it is a dynamic branch.  */
2156     ctx->delayed_pc = -1; /* use delayed pc from env pointer */
2157     ctx->features = env->features;
2158     ctx->has_movcal = (tbflags & TB_FLAG_PENDING_MOVCA);
2159     ctx->gbank = ((tbflags & (1 << SR_MD)) &&
2160                   (tbflags & (1 << SR_RB))) * 0x10;
2161     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
2162 
2163 #ifdef CONFIG_USER_ONLY
2164     if (tbflags & TB_FLAG_GUSA_MASK) {
2165         /* In gUSA exclusive region. */
2166         uint32_t pc = ctx->base.pc_next;
2167         uint32_t pc_end = ctx->base.tb->cs_base;
2168         int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
2169         int max_insns = (pc_end - pc) / 2;
2170 
2171         if (pc != pc_end + backup || max_insns < 2) {
2172             /* This is a malformed gUSA region.  Don't do anything special,
2173                since the interpreter is likely to get confused.  */
2174             ctx->envflags &= ~TB_FLAG_GUSA_MASK;
2175         } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
2176             /* Regardless of single-stepping or the end of the page,
2177                we must complete execution of the gUSA region while
2178                holding the exclusive lock.  */
2179             ctx->base.max_insns = max_insns;
2180             return;
2181         }
2182     }
2183 #endif
2184 
2185     /* Since the ISA is fixed-width, we can bound by the number
2186        of instructions remaining on the page.  */
2187     bound = -(ctx->base.pc_next | TARGET_PAGE_MASK) / 2;
2188     ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
2189 }
2190 
2191 static void sh4_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
2192 {
2193 }
2194 
2195 static void sh4_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
2196 {
2197     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2198 
2199     tcg_gen_insn_start(ctx->base.pc_next, ctx->envflags);
2200 }
2201 
2202 static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
2203 {
2204     CPUSH4State *env = cs->env_ptr;
2205     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2206 
2207 #ifdef CONFIG_USER_ONLY
2208     if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
2209         && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
2210         /* We're in an gUSA region, and we have not already fallen
2211            back on using an exclusive region.  Attempt to parse the
2212            region into a single supported atomic operation.  Failure
2213            is handled within the parser by raising an exception to
2214            retry using an exclusive region.  */
2215         decode_gusa(ctx, env);
2216         return;
2217     }
2218 #endif
2219 
2220     ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
2221     decode_opc(ctx);
2222     ctx->base.pc_next += 2;
2223 }
2224 
2225 static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
2226 {
2227     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2228 
2229     if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
2230         /* Ending the region of exclusivity.  Clear the bits.  */
2231         ctx->envflags &= ~TB_FLAG_GUSA_MASK;
2232     }
2233 
2234     switch (ctx->base.is_jmp) {
2235     case DISAS_STOP:
2236         gen_save_cpu_state(ctx, true);
2237         tcg_gen_exit_tb(NULL, 0);
2238         break;
2239     case DISAS_NEXT:
2240     case DISAS_TOO_MANY:
2241         gen_save_cpu_state(ctx, false);
2242         gen_goto_tb(ctx, 0, ctx->base.pc_next);
2243         break;
2244     case DISAS_NORETURN:
2245         break;
2246     default:
2247         g_assert_not_reached();
2248     }
2249 }
2250 
2251 static void sh4_tr_disas_log(const DisasContextBase *dcbase,
2252                              CPUState *cs, FILE *logfile)
2253 {
2254     fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first));
2255     target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size);
2256 }
2257 
2258 static const TranslatorOps sh4_tr_ops = {
2259     .init_disas_context = sh4_tr_init_disas_context,
2260     .tb_start           = sh4_tr_tb_start,
2261     .insn_start         = sh4_tr_insn_start,
2262     .translate_insn     = sh4_tr_translate_insn,
2263     .tb_stop            = sh4_tr_tb_stop,
2264     .disas_log          = sh4_tr_disas_log,
2265 };
2266 
2267 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns,
2268                            target_ulong pc, void *host_pc)
2269 {
2270     DisasContext ctx;
2271 
2272     translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base);
2273 }
2274