xref: /openbmc/qemu/target/sh4/translate.c (revision 6016b7b4)
1 /*
2  *  SH4 translation
3  *
4  *  Copyright (c) 2005 Samuel Tardieu
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #define DEBUG_DISAS
21 
22 #include "qemu/osdep.h"
23 #include "cpu.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg-op.h"
27 #include "exec/cpu_ldst.h"
28 #include "exec/helper-proto.h"
29 #include "exec/helper-gen.h"
30 #include "exec/translator.h"
31 #include "exec/log.h"
32 #include "qemu/qemu-print.h"
33 
34 
35 typedef struct DisasContext {
36     DisasContextBase base;
37 
38     uint32_t tbflags;  /* should stay unmodified during the TB translation */
39     uint32_t envflags; /* should stay in sync with env->flags using TCG ops */
40     int memidx;
41     int gbank;
42     int fbank;
43     uint32_t delayed_pc;
44     uint32_t features;
45 
46     uint16_t opcode;
47 
48     bool has_movcal;
49 } DisasContext;
50 
51 #if defined(CONFIG_USER_ONLY)
52 #define IS_USER(ctx) 1
53 #else
54 #define IS_USER(ctx) (!(ctx->tbflags & (1u << SR_MD)))
55 #endif
56 
57 /* Target-specific values for ctx->base.is_jmp.  */
58 /* We want to exit back to the cpu loop for some reason.
59    Usually this is to recognize interrupts immediately.  */
60 #define DISAS_STOP    DISAS_TARGET_0
61 
62 /* global register indexes */
63 static TCGv cpu_gregs[32];
64 static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t;
65 static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr;
66 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
67 static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
68 static TCGv cpu_lock_addr, cpu_lock_value;
69 static TCGv cpu_fregs[32];
70 
71 /* internal register indexes */
72 static TCGv cpu_flags, cpu_delayed_pc, cpu_delayed_cond;
73 
74 #include "exec/gen-icount.h"
75 
76 void sh4_translate_init(void)
77 {
78     int i;
79     static const char * const gregnames[24] = {
80         "R0_BANK0", "R1_BANK0", "R2_BANK0", "R3_BANK0",
81         "R4_BANK0", "R5_BANK0", "R6_BANK0", "R7_BANK0",
82         "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15",
83         "R0_BANK1", "R1_BANK1", "R2_BANK1", "R3_BANK1",
84         "R4_BANK1", "R5_BANK1", "R6_BANK1", "R7_BANK1"
85     };
86     static const char * const fregnames[32] = {
87          "FPR0_BANK0",  "FPR1_BANK0",  "FPR2_BANK0",  "FPR3_BANK0",
88          "FPR4_BANK0",  "FPR5_BANK0",  "FPR6_BANK0",  "FPR7_BANK0",
89          "FPR8_BANK0",  "FPR9_BANK0", "FPR10_BANK0", "FPR11_BANK0",
90         "FPR12_BANK0", "FPR13_BANK0", "FPR14_BANK0", "FPR15_BANK0",
91          "FPR0_BANK1",  "FPR1_BANK1",  "FPR2_BANK1",  "FPR3_BANK1",
92          "FPR4_BANK1",  "FPR5_BANK1",  "FPR6_BANK1",  "FPR7_BANK1",
93          "FPR8_BANK1",  "FPR9_BANK1", "FPR10_BANK1", "FPR11_BANK1",
94         "FPR12_BANK1", "FPR13_BANK1", "FPR14_BANK1", "FPR15_BANK1",
95     };
96 
97     for (i = 0; i < 24; i++) {
98         cpu_gregs[i] = tcg_global_mem_new_i32(cpu_env,
99                                               offsetof(CPUSH4State, gregs[i]),
100                                               gregnames[i]);
101     }
102     memcpy(cpu_gregs + 24, cpu_gregs + 8, 8 * sizeof(TCGv));
103 
104     cpu_pc = tcg_global_mem_new_i32(cpu_env,
105                                     offsetof(CPUSH4State, pc), "PC");
106     cpu_sr = tcg_global_mem_new_i32(cpu_env,
107                                     offsetof(CPUSH4State, sr), "SR");
108     cpu_sr_m = tcg_global_mem_new_i32(cpu_env,
109                                       offsetof(CPUSH4State, sr_m), "SR_M");
110     cpu_sr_q = tcg_global_mem_new_i32(cpu_env,
111                                       offsetof(CPUSH4State, sr_q), "SR_Q");
112     cpu_sr_t = tcg_global_mem_new_i32(cpu_env,
113                                       offsetof(CPUSH4State, sr_t), "SR_T");
114     cpu_ssr = tcg_global_mem_new_i32(cpu_env,
115                                      offsetof(CPUSH4State, ssr), "SSR");
116     cpu_spc = tcg_global_mem_new_i32(cpu_env,
117                                      offsetof(CPUSH4State, spc), "SPC");
118     cpu_gbr = tcg_global_mem_new_i32(cpu_env,
119                                      offsetof(CPUSH4State, gbr), "GBR");
120     cpu_vbr = tcg_global_mem_new_i32(cpu_env,
121                                      offsetof(CPUSH4State, vbr), "VBR");
122     cpu_sgr = tcg_global_mem_new_i32(cpu_env,
123                                      offsetof(CPUSH4State, sgr), "SGR");
124     cpu_dbr = tcg_global_mem_new_i32(cpu_env,
125                                      offsetof(CPUSH4State, dbr), "DBR");
126     cpu_mach = tcg_global_mem_new_i32(cpu_env,
127                                       offsetof(CPUSH4State, mach), "MACH");
128     cpu_macl = tcg_global_mem_new_i32(cpu_env,
129                                       offsetof(CPUSH4State, macl), "MACL");
130     cpu_pr = tcg_global_mem_new_i32(cpu_env,
131                                     offsetof(CPUSH4State, pr), "PR");
132     cpu_fpscr = tcg_global_mem_new_i32(cpu_env,
133                                        offsetof(CPUSH4State, fpscr), "FPSCR");
134     cpu_fpul = tcg_global_mem_new_i32(cpu_env,
135                                       offsetof(CPUSH4State, fpul), "FPUL");
136 
137     cpu_flags = tcg_global_mem_new_i32(cpu_env,
138 				       offsetof(CPUSH4State, flags), "_flags_");
139     cpu_delayed_pc = tcg_global_mem_new_i32(cpu_env,
140 					    offsetof(CPUSH4State, delayed_pc),
141 					    "_delayed_pc_");
142     cpu_delayed_cond = tcg_global_mem_new_i32(cpu_env,
143                                               offsetof(CPUSH4State,
144                                                        delayed_cond),
145                                               "_delayed_cond_");
146     cpu_lock_addr = tcg_global_mem_new_i32(cpu_env,
147                                            offsetof(CPUSH4State, lock_addr),
148                                            "_lock_addr_");
149     cpu_lock_value = tcg_global_mem_new_i32(cpu_env,
150                                             offsetof(CPUSH4State, lock_value),
151                                             "_lock_value_");
152 
153     for (i = 0; i < 32; i++)
154         cpu_fregs[i] = tcg_global_mem_new_i32(cpu_env,
155                                               offsetof(CPUSH4State, fregs[i]),
156                                               fregnames[i]);
157 }
158 
159 void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
160 {
161     SuperHCPU *cpu = SUPERH_CPU(cs);
162     CPUSH4State *env = &cpu->env;
163     int i;
164 
165     qemu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n",
166                  env->pc, cpu_read_sr(env), env->pr, env->fpscr);
167     qemu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n",
168                  env->spc, env->ssr, env->gbr, env->vbr);
169     qemu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n",
170                  env->sgr, env->dbr, env->delayed_pc, env->fpul);
171     for (i = 0; i < 24; i += 4) {
172         qemu_printf("r%d=0x%08x r%d=0x%08x r%d=0x%08x r%d=0x%08x\n",
173 		    i, env->gregs[i], i + 1, env->gregs[i + 1],
174 		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
175     }
176     if (env->flags & DELAY_SLOT) {
177         qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
178 		    env->delayed_pc);
179     } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
180         qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
181 		    env->delayed_pc);
182     } else if (env->flags & DELAY_SLOT_RTE) {
183         qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
184                      env->delayed_pc);
185     }
186 }
187 
188 static void gen_read_sr(TCGv dst)
189 {
190     TCGv t0 = tcg_temp_new();
191     tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q);
192     tcg_gen_or_i32(dst, dst, t0);
193     tcg_gen_shli_i32(t0, cpu_sr_m, SR_M);
194     tcg_gen_or_i32(dst, dst, t0);
195     tcg_gen_shli_i32(t0, cpu_sr_t, SR_T);
196     tcg_gen_or_i32(dst, cpu_sr, t0);
197     tcg_temp_free_i32(t0);
198 }
199 
200 static void gen_write_sr(TCGv src)
201 {
202     tcg_gen_andi_i32(cpu_sr, src,
203                      ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T)));
204     tcg_gen_extract_i32(cpu_sr_q, src, SR_Q, 1);
205     tcg_gen_extract_i32(cpu_sr_m, src, SR_M, 1);
206     tcg_gen_extract_i32(cpu_sr_t, src, SR_T, 1);
207 }
208 
209 static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
210 {
211     if (save_pc) {
212         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
213     }
214     if (ctx->delayed_pc != (uint32_t) -1) {
215         tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
216     }
217     if ((ctx->tbflags & TB_FLAG_ENVFLAGS_MASK) != ctx->envflags) {
218         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
219     }
220 }
221 
222 static inline bool use_exit_tb(DisasContext *ctx)
223 {
224     return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
225 }
226 
227 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
228 {
229     if (use_exit_tb(ctx)) {
230         return false;
231     }
232     return translator_use_goto_tb(&ctx->base, dest);
233 }
234 
235 static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
236 {
237     if (use_goto_tb(ctx, dest)) {
238         tcg_gen_goto_tb(n);
239         tcg_gen_movi_i32(cpu_pc, dest);
240         tcg_gen_exit_tb(ctx->base.tb, n);
241     } else {
242         tcg_gen_movi_i32(cpu_pc, dest);
243         if (use_exit_tb(ctx)) {
244             tcg_gen_exit_tb(NULL, 0);
245         } else {
246             tcg_gen_lookup_and_goto_ptr();
247         }
248     }
249     ctx->base.is_jmp = DISAS_NORETURN;
250 }
251 
252 static void gen_jump(DisasContext * ctx)
253 {
254     if (ctx->delayed_pc == -1) {
255 	/* Target is not statically known, it comes necessarily from a
256 	   delayed jump as immediate jump are conditinal jumps */
257 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
258         tcg_gen_discard_i32(cpu_delayed_pc);
259         if (use_exit_tb(ctx)) {
260             tcg_gen_exit_tb(NULL, 0);
261         } else {
262             tcg_gen_lookup_and_goto_ptr();
263         }
264         ctx->base.is_jmp = DISAS_NORETURN;
265     } else {
266 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
267     }
268 }
269 
270 /* Immediate conditional jump (bt or bf) */
271 static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
272                                  bool jump_if_true)
273 {
274     TCGLabel *l1 = gen_new_label();
275     TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
276 
277     if (ctx->tbflags & GUSA_EXCLUSIVE) {
278         /* When in an exclusive region, we must continue to the end.
279            Therefore, exit the region on a taken branch, but otherwise
280            fall through to the next instruction.  */
281         tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
282         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
283         /* Note that this won't actually use a goto_tb opcode because we
284            disallow it in use_goto_tb, but it handles exit + singlestep.  */
285         gen_goto_tb(ctx, 0, dest);
286         gen_set_label(l1);
287         ctx->base.is_jmp = DISAS_NEXT;
288         return;
289     }
290 
291     gen_save_cpu_state(ctx, false);
292     tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
293     gen_goto_tb(ctx, 0, dest);
294     gen_set_label(l1);
295     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
296     ctx->base.is_jmp = DISAS_NORETURN;
297 }
298 
299 /* Delayed conditional jump (bt or bf) */
300 static void gen_delayed_conditional_jump(DisasContext * ctx)
301 {
302     TCGLabel *l1 = gen_new_label();
303     TCGv ds = tcg_temp_new();
304 
305     tcg_gen_mov_i32(ds, cpu_delayed_cond);
306     tcg_gen_discard_i32(cpu_delayed_cond);
307 
308     if (ctx->tbflags & GUSA_EXCLUSIVE) {
309         /* When in an exclusive region, we must continue to the end.
310            Therefore, exit the region on a taken branch, but otherwise
311            fall through to the next instruction.  */
312         tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
313 
314         /* Leave the gUSA region.  */
315         tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
316         gen_jump(ctx);
317 
318         gen_set_label(l1);
319         ctx->base.is_jmp = DISAS_NEXT;
320         return;
321     }
322 
323     tcg_gen_brcondi_i32(TCG_COND_NE, ds, 0, l1);
324     gen_goto_tb(ctx, 1, ctx->base.pc_next + 2);
325     gen_set_label(l1);
326     gen_jump(ctx);
327 }
328 
329 static inline void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
330 {
331     /* We have already signaled illegal instruction for odd Dr.  */
332     tcg_debug_assert((reg & 1) == 0);
333     reg ^= ctx->fbank;
334     tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]);
335 }
336 
337 static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
338 {
339     /* We have already signaled illegal instruction for odd Dr.  */
340     tcg_debug_assert((reg & 1) == 0);
341     reg ^= ctx->fbank;
342     tcg_gen_extr_i64_i32(cpu_fregs[reg + 1], cpu_fregs[reg], t);
343 }
344 
345 #define B3_0 (ctx->opcode & 0xf)
346 #define B6_4 ((ctx->opcode >> 4) & 0x7)
347 #define B7_4 ((ctx->opcode >> 4) & 0xf)
348 #define B7_0 (ctx->opcode & 0xff)
349 #define B7_0s ((int32_t) (int8_t) (ctx->opcode & 0xff))
350 #define B11_0s (ctx->opcode & 0x800 ? 0xfffff000 | (ctx->opcode & 0xfff) : \
351   (ctx->opcode & 0xfff))
352 #define B11_8 ((ctx->opcode >> 8) & 0xf)
353 #define B15_12 ((ctx->opcode >> 12) & 0xf)
354 
355 #define REG(x)     cpu_gregs[(x) ^ ctx->gbank]
356 #define ALTREG(x)  cpu_gregs[(x) ^ ctx->gbank ^ 0x10]
357 #define FREG(x)    cpu_fregs[(x) ^ ctx->fbank]
358 
359 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
360 
361 #define CHECK_NOT_DELAY_SLOT \
362     if (ctx->envflags & DELAY_SLOT_MASK) {  \
363         goto do_illegal_slot;               \
364     }
365 
366 #define CHECK_PRIVILEGED \
367     if (IS_USER(ctx)) {                     \
368         goto do_illegal;                    \
369     }
370 
371 #define CHECK_FPU_ENABLED \
372     if (ctx->tbflags & (1u << SR_FD)) {     \
373         goto do_fpu_disabled;               \
374     }
375 
376 #define CHECK_FPSCR_PR_0 \
377     if (ctx->tbflags & FPSCR_PR) {          \
378         goto do_illegal;                    \
379     }
380 
381 #define CHECK_FPSCR_PR_1 \
382     if (!(ctx->tbflags & FPSCR_PR)) {       \
383         goto do_illegal;                    \
384     }
385 
386 #define CHECK_SH4A \
387     if (!(ctx->features & SH_FEATURE_SH4A)) { \
388         goto do_illegal;                      \
389     }
390 
391 static void _decode_opc(DisasContext * ctx)
392 {
393     /* This code tries to make movcal emulation sufficiently
394        accurate for Linux purposes.  This instruction writes
395        memory, and prior to that, always allocates a cache line.
396        It is used in two contexts:
397        - in memcpy, where data is copied in blocks, the first write
398        of to a block uses movca.l for performance.
399        - in arch/sh/mm/cache-sh4.c, movcal.l + ocbi combination is used
400        to flush the cache. Here, the data written by movcal.l is never
401        written to memory, and the data written is just bogus.
402 
403        To simulate this, we simulate movcal.l, we store the value to memory,
404        but we also remember the previous content. If we see ocbi, we check
405        if movcal.l for that address was done previously. If so, the write should
406        not have hit the memory, so we restore the previous content.
407        When we see an instruction that is neither movca.l
408        nor ocbi, the previous content is discarded.
409 
410        To optimize, we only try to flush stores when we're at the start of
411        TB, or if we already saw movca.l in this TB and did not flush stores
412        yet.  */
413     if (ctx->has_movcal)
414 	{
415 	  int opcode = ctx->opcode & 0xf0ff;
416 	  if (opcode != 0x0093 /* ocbi */
417 	      && opcode != 0x00c3 /* movca.l */)
418 	      {
419                   gen_helper_discard_movcal_backup(cpu_env);
420 		  ctx->has_movcal = 0;
421 	      }
422 	}
423 
424 #if 0
425     fprintf(stderr, "Translating opcode 0x%04x\n", ctx->opcode);
426 #endif
427 
428     switch (ctx->opcode) {
429     case 0x0019:		/* div0u */
430         tcg_gen_movi_i32(cpu_sr_m, 0);
431         tcg_gen_movi_i32(cpu_sr_q, 0);
432         tcg_gen_movi_i32(cpu_sr_t, 0);
433 	return;
434     case 0x000b:		/* rts */
435 	CHECK_NOT_DELAY_SLOT
436 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
437         ctx->envflags |= DELAY_SLOT;
438 	ctx->delayed_pc = (uint32_t) - 1;
439 	return;
440     case 0x0028:		/* clrmac */
441 	tcg_gen_movi_i32(cpu_mach, 0);
442 	tcg_gen_movi_i32(cpu_macl, 0);
443 	return;
444     case 0x0048:		/* clrs */
445         tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S));
446 	return;
447     case 0x0008:		/* clrt */
448         tcg_gen_movi_i32(cpu_sr_t, 0);
449 	return;
450     case 0x0038:		/* ldtlb */
451 	CHECK_PRIVILEGED
452         gen_helper_ldtlb(cpu_env);
453 	return;
454     case 0x002b:		/* rte */
455 	CHECK_PRIVILEGED
456 	CHECK_NOT_DELAY_SLOT
457         gen_write_sr(cpu_ssr);
458 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
459         ctx->envflags |= DELAY_SLOT_RTE;
460 	ctx->delayed_pc = (uint32_t) - 1;
461         ctx->base.is_jmp = DISAS_STOP;
462 	return;
463     case 0x0058:		/* sets */
464         tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
465 	return;
466     case 0x0018:		/* sett */
467         tcg_gen_movi_i32(cpu_sr_t, 1);
468 	return;
469     case 0xfbfd:		/* frchg */
470         CHECK_FPSCR_PR_0
471 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
472         ctx->base.is_jmp = DISAS_STOP;
473 	return;
474     case 0xf3fd:		/* fschg */
475         CHECK_FPSCR_PR_0
476         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
477         ctx->base.is_jmp = DISAS_STOP;
478 	return;
479     case 0xf7fd:                /* fpchg */
480         CHECK_SH4A
481         tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_PR);
482         ctx->base.is_jmp = DISAS_STOP;
483         return;
484     case 0x0009:		/* nop */
485 	return;
486     case 0x001b:		/* sleep */
487 	CHECK_PRIVILEGED
488         tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next + 2);
489         gen_helper_sleep(cpu_env);
490 	return;
491     }
492 
493     switch (ctx->opcode & 0xf000) {
494     case 0x1000:		/* mov.l Rm,@(disp,Rn) */
495 	{
496 	    TCGv addr = tcg_temp_new();
497 	    tcg_gen_addi_i32(addr, REG(B11_8), B3_0 * 4);
498             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
499 	    tcg_temp_free(addr);
500 	}
501 	return;
502     case 0x5000:		/* mov.l @(disp,Rm),Rn */
503 	{
504 	    TCGv addr = tcg_temp_new();
505 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 4);
506             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
507 	    tcg_temp_free(addr);
508 	}
509 	return;
510     case 0xe000:		/* mov #imm,Rn */
511 #ifdef CONFIG_USER_ONLY
512         /* Detect the start of a gUSA region.  If so, update envflags
513            and end the TB.  This will allow us to see the end of the
514            region (stored in R0) in the next TB.  */
515         if (B11_8 == 15 && B7_0s < 0 &&
516             (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
517             ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
518             ctx->base.is_jmp = DISAS_STOP;
519         }
520 #endif
521 	tcg_gen_movi_i32(REG(B11_8), B7_0s);
522 	return;
523     case 0x9000:		/* mov.w @(disp,PC),Rn */
524 	{
525             TCGv addr = tcg_const_i32(ctx->base.pc_next + 4 + B7_0 * 2);
526             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
527 	    tcg_temp_free(addr);
528 	}
529 	return;
530     case 0xd000:		/* mov.l @(disp,PC),Rn */
531 	{
532             TCGv addr = tcg_const_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
533             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
534 	    tcg_temp_free(addr);
535 	}
536 	return;
537     case 0x7000:		/* add #imm,Rn */
538 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), B7_0s);
539 	return;
540     case 0xa000:		/* bra disp */
541 	CHECK_NOT_DELAY_SLOT
542         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
543         ctx->envflags |= DELAY_SLOT;
544 	return;
545     case 0xb000:		/* bsr disp */
546 	CHECK_NOT_DELAY_SLOT
547         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
548         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
549         ctx->envflags |= DELAY_SLOT;
550 	return;
551     }
552 
553     switch (ctx->opcode & 0xf00f) {
554     case 0x6003:		/* mov Rm,Rn */
555 	tcg_gen_mov_i32(REG(B11_8), REG(B7_4));
556 	return;
557     case 0x2000:		/* mov.b Rm,@Rn */
558         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_UB);
559 	return;
560     case 0x2001:		/* mov.w Rm,@Rn */
561         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_TEUW);
562 	return;
563     case 0x2002:		/* mov.l Rm,@Rn */
564         tcg_gen_qemu_st_i32(REG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
565 	return;
566     case 0x6000:		/* mov.b @Rm,Rn */
567         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
568 	return;
569     case 0x6001:		/* mov.w @Rm,Rn */
570         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESW);
571 	return;
572     case 0x6002:		/* mov.l @Rm,Rn */
573         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESL);
574 	return;
575     case 0x2004:		/* mov.b Rm,@-Rn */
576 	{
577 	    TCGv addr = tcg_temp_new();
578 	    tcg_gen_subi_i32(addr, REG(B11_8), 1);
579             /* might cause re-execution */
580             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
581 	    tcg_gen_mov_i32(REG(B11_8), addr);			/* modify register status */
582 	    tcg_temp_free(addr);
583 	}
584 	return;
585     case 0x2005:		/* mov.w Rm,@-Rn */
586 	{
587 	    TCGv addr = tcg_temp_new();
588 	    tcg_gen_subi_i32(addr, REG(B11_8), 2);
589             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUW);
590 	    tcg_gen_mov_i32(REG(B11_8), addr);
591 	    tcg_temp_free(addr);
592 	}
593 	return;
594     case 0x2006:		/* mov.l Rm,@-Rn */
595 	{
596 	    TCGv addr = tcg_temp_new();
597 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
598             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
599 	    tcg_gen_mov_i32(REG(B11_8), addr);
600         tcg_temp_free(addr);
601 	}
602 	return;
603     case 0x6004:		/* mov.b @Rm+,Rn */
604         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_SB);
605 	if ( B11_8 != B7_4 )
606 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 1);
607 	return;
608     case 0x6005:		/* mov.w @Rm+,Rn */
609         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESW);
610 	if ( B11_8 != B7_4 )
611 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
612 	return;
613     case 0x6006:		/* mov.l @Rm+,Rn */
614         tcg_gen_qemu_ld_i32(REG(B11_8), REG(B7_4), ctx->memidx, MO_TESL);
615 	if ( B11_8 != B7_4 )
616 		tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
617 	return;
618     case 0x0004:		/* mov.b Rm,@(R0,Rn) */
619 	{
620 	    TCGv addr = tcg_temp_new();
621 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
622             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_UB);
623 	    tcg_temp_free(addr);
624 	}
625 	return;
626     case 0x0005:		/* mov.w Rm,@(R0,Rn) */
627 	{
628 	    TCGv addr = tcg_temp_new();
629 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
630             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUW);
631 	    tcg_temp_free(addr);
632 	}
633 	return;
634     case 0x0006:		/* mov.l Rm,@(R0,Rn) */
635 	{
636 	    TCGv addr = tcg_temp_new();
637 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
638             tcg_gen_qemu_st_i32(REG(B7_4), addr, ctx->memidx, MO_TEUL);
639 	    tcg_temp_free(addr);
640 	}
641 	return;
642     case 0x000c:		/* mov.b @(R0,Rm),Rn */
643 	{
644 	    TCGv addr = tcg_temp_new();
645 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
646             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_SB);
647 	    tcg_temp_free(addr);
648 	}
649 	return;
650     case 0x000d:		/* mov.w @(R0,Rm),Rn */
651 	{
652 	    TCGv addr = tcg_temp_new();
653 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
654             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
655 	    tcg_temp_free(addr);
656 	}
657 	return;
658     case 0x000e:		/* mov.l @(R0,Rm),Rn */
659 	{
660 	    TCGv addr = tcg_temp_new();
661 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
662             tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
663 	    tcg_temp_free(addr);
664 	}
665 	return;
666     case 0x6008:		/* swap.b Rm,Rn */
667 	{
668             TCGv low = tcg_temp_new();
669             tcg_gen_bswap16_i32(low, REG(B7_4), 0);
670             tcg_gen_deposit_i32(REG(B11_8), REG(B7_4), low, 0, 16);
671 	    tcg_temp_free(low);
672 	}
673 	return;
674     case 0x6009:		/* swap.w Rm,Rn */
675         tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16);
676 	return;
677     case 0x200d:		/* xtrct Rm,Rn */
678 	{
679 	    TCGv high, low;
680 	    high = tcg_temp_new();
681 	    tcg_gen_shli_i32(high, REG(B7_4), 16);
682 	    low = tcg_temp_new();
683 	    tcg_gen_shri_i32(low, REG(B11_8), 16);
684 	    tcg_gen_or_i32(REG(B11_8), high, low);
685 	    tcg_temp_free(low);
686 	    tcg_temp_free(high);
687 	}
688 	return;
689     case 0x300c:		/* add Rm,Rn */
690 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
691 	return;
692     case 0x300e:		/* addc Rm,Rn */
693         {
694             TCGv t0, t1;
695             t0 = tcg_const_tl(0);
696             t1 = tcg_temp_new();
697             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
698             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
699                              REG(B11_8), t0, t1, cpu_sr_t);
700             tcg_temp_free(t0);
701             tcg_temp_free(t1);
702         }
703 	return;
704     case 0x300f:		/* addv Rm,Rn */
705         {
706             TCGv t0, t1, t2;
707             t0 = tcg_temp_new();
708             tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
709             t1 = tcg_temp_new();
710             tcg_gen_xor_i32(t1, t0, REG(B11_8));
711             t2 = tcg_temp_new();
712             tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
713             tcg_gen_andc_i32(cpu_sr_t, t1, t2);
714             tcg_temp_free(t2);
715             tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31);
716             tcg_temp_free(t1);
717             tcg_gen_mov_i32(REG(B7_4), t0);
718             tcg_temp_free(t0);
719         }
720 	return;
721     case 0x2009:		/* and Rm,Rn */
722 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
723 	return;
724     case 0x3000:		/* cmp/eq Rm,Rn */
725         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4));
726 	return;
727     case 0x3003:		/* cmp/ge Rm,Rn */
728         tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4));
729 	return;
730     case 0x3007:		/* cmp/gt Rm,Rn */
731         tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4));
732 	return;
733     case 0x3006:		/* cmp/hi Rm,Rn */
734         tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4));
735 	return;
736     case 0x3002:		/* cmp/hs Rm,Rn */
737         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4));
738 	return;
739     case 0x200c:		/* cmp/str Rm,Rn */
740 	{
741 	    TCGv cmp1 = tcg_temp_new();
742 	    TCGv cmp2 = tcg_temp_new();
743             tcg_gen_xor_i32(cmp2, REG(B7_4), REG(B11_8));
744             tcg_gen_subi_i32(cmp1, cmp2, 0x01010101);
745             tcg_gen_andc_i32(cmp1, cmp1, cmp2);
746             tcg_gen_andi_i32(cmp1, cmp1, 0x80808080);
747             tcg_gen_setcondi_i32(TCG_COND_NE, cpu_sr_t, cmp1, 0);
748 	    tcg_temp_free(cmp2);
749 	    tcg_temp_free(cmp1);
750 	}
751 	return;
752     case 0x2007:		/* div0s Rm,Rn */
753         tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31);         /* SR_Q */
754         tcg_gen_shri_i32(cpu_sr_m, REG(B7_4), 31);          /* SR_M */
755         tcg_gen_xor_i32(cpu_sr_t, cpu_sr_q, cpu_sr_m);      /* SR_T */
756 	return;
757     case 0x3004:		/* div1 Rm,Rn */
758         {
759             TCGv t0 = tcg_temp_new();
760             TCGv t1 = tcg_temp_new();
761             TCGv t2 = tcg_temp_new();
762             TCGv zero = tcg_const_i32(0);
763 
764             /* shift left arg1, saving the bit being pushed out and inserting
765                T on the right */
766             tcg_gen_shri_i32(t0, REG(B11_8), 31);
767             tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
768             tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t);
769 
770             /* Add or subtract arg0 from arg1 depending if Q == M. To avoid
771                using 64-bit temps, we compute arg0's high part from q ^ m, so
772                that it is 0x00000000 when adding the value or 0xffffffff when
773                subtracting it. */
774             tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m);
775             tcg_gen_subi_i32(t1, t1, 1);
776             tcg_gen_neg_i32(t2, REG(B7_4));
777             tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2);
778             tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1);
779 
780             /* compute T and Q depending on carry */
781             tcg_gen_andi_i32(t1, t1, 1);
782             tcg_gen_xor_i32(t1, t1, t0);
783             tcg_gen_xori_i32(cpu_sr_t, t1, 1);
784             tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1);
785 
786             tcg_temp_free(zero);
787             tcg_temp_free(t2);
788             tcg_temp_free(t1);
789             tcg_temp_free(t0);
790         }
791 	return;
792     case 0x300d:		/* dmuls.l Rm,Rn */
793         tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
794 	return;
795     case 0x3005:		/* dmulu.l Rm,Rn */
796         tcg_gen_mulu2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8));
797 	return;
798     case 0x600e:		/* exts.b Rm,Rn */
799 	tcg_gen_ext8s_i32(REG(B11_8), REG(B7_4));
800 	return;
801     case 0x600f:		/* exts.w Rm,Rn */
802 	tcg_gen_ext16s_i32(REG(B11_8), REG(B7_4));
803 	return;
804     case 0x600c:		/* extu.b Rm,Rn */
805 	tcg_gen_ext8u_i32(REG(B11_8), REG(B7_4));
806 	return;
807     case 0x600d:		/* extu.w Rm,Rn */
808 	tcg_gen_ext16u_i32(REG(B11_8), REG(B7_4));
809 	return;
810     case 0x000f:		/* mac.l @Rm+,@Rn+ */
811 	{
812 	    TCGv arg0, arg1;
813 	    arg0 = tcg_temp_new();
814             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
815 	    arg1 = tcg_temp_new();
816             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
817             gen_helper_macl(cpu_env, arg0, arg1);
818 	    tcg_temp_free(arg1);
819 	    tcg_temp_free(arg0);
820 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
821 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
822 	}
823 	return;
824     case 0x400f:		/* mac.w @Rm+,@Rn+ */
825 	{
826 	    TCGv arg0, arg1;
827 	    arg0 = tcg_temp_new();
828             tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
829 	    arg1 = tcg_temp_new();
830             tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
831             gen_helper_macw(cpu_env, arg0, arg1);
832 	    tcg_temp_free(arg1);
833 	    tcg_temp_free(arg0);
834 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
835 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
836 	}
837 	return;
838     case 0x0007:		/* mul.l Rm,Rn */
839 	tcg_gen_mul_i32(cpu_macl, REG(B7_4), REG(B11_8));
840 	return;
841     case 0x200f:		/* muls.w Rm,Rn */
842 	{
843 	    TCGv arg0, arg1;
844 	    arg0 = tcg_temp_new();
845 	    tcg_gen_ext16s_i32(arg0, REG(B7_4));
846 	    arg1 = tcg_temp_new();
847 	    tcg_gen_ext16s_i32(arg1, REG(B11_8));
848 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
849 	    tcg_temp_free(arg1);
850 	    tcg_temp_free(arg0);
851 	}
852 	return;
853     case 0x200e:		/* mulu.w Rm,Rn */
854 	{
855 	    TCGv arg0, arg1;
856 	    arg0 = tcg_temp_new();
857 	    tcg_gen_ext16u_i32(arg0, REG(B7_4));
858 	    arg1 = tcg_temp_new();
859 	    tcg_gen_ext16u_i32(arg1, REG(B11_8));
860 	    tcg_gen_mul_i32(cpu_macl, arg0, arg1);
861 	    tcg_temp_free(arg1);
862 	    tcg_temp_free(arg0);
863 	}
864 	return;
865     case 0x600b:		/* neg Rm,Rn */
866 	tcg_gen_neg_i32(REG(B11_8), REG(B7_4));
867 	return;
868     case 0x600a:		/* negc Rm,Rn */
869         {
870             TCGv t0 = tcg_const_i32(0);
871             tcg_gen_add2_i32(REG(B11_8), cpu_sr_t,
872                              REG(B7_4), t0, cpu_sr_t, t0);
873             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
874                              t0, t0, REG(B11_8), cpu_sr_t);
875             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
876             tcg_temp_free(t0);
877         }
878 	return;
879     case 0x6007:		/* not Rm,Rn */
880 	tcg_gen_not_i32(REG(B11_8), REG(B7_4));
881 	return;
882     case 0x200b:		/* or Rm,Rn */
883 	tcg_gen_or_i32(REG(B11_8), REG(B11_8), REG(B7_4));
884 	return;
885     case 0x400c:		/* shad Rm,Rn */
886 	{
887             TCGv t0 = tcg_temp_new();
888             TCGv t1 = tcg_temp_new();
889             TCGv t2 = tcg_temp_new();
890 
891             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
892 
893             /* positive case: shift to the left */
894             tcg_gen_shl_i32(t1, REG(B11_8), t0);
895 
896             /* negative case: shift to the right in two steps to
897                correctly handle the -32 case */
898             tcg_gen_xori_i32(t0, t0, 0x1f);
899             tcg_gen_sar_i32(t2, REG(B11_8), t0);
900             tcg_gen_sari_i32(t2, t2, 1);
901 
902             /* select between the two cases */
903             tcg_gen_movi_i32(t0, 0);
904             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
905 
906             tcg_temp_free(t0);
907             tcg_temp_free(t1);
908             tcg_temp_free(t2);
909 	}
910 	return;
911     case 0x400d:		/* shld Rm,Rn */
912 	{
913             TCGv t0 = tcg_temp_new();
914             TCGv t1 = tcg_temp_new();
915             TCGv t2 = tcg_temp_new();
916 
917             tcg_gen_andi_i32(t0, REG(B7_4), 0x1f);
918 
919             /* positive case: shift to the left */
920             tcg_gen_shl_i32(t1, REG(B11_8), t0);
921 
922             /* negative case: shift to the right in two steps to
923                correctly handle the -32 case */
924             tcg_gen_xori_i32(t0, t0, 0x1f);
925             tcg_gen_shr_i32(t2, REG(B11_8), t0);
926             tcg_gen_shri_i32(t2, t2, 1);
927 
928             /* select between the two cases */
929             tcg_gen_movi_i32(t0, 0);
930             tcg_gen_movcond_i32(TCG_COND_GE, REG(B11_8), REG(B7_4), t0, t1, t2);
931 
932             tcg_temp_free(t0);
933             tcg_temp_free(t1);
934             tcg_temp_free(t2);
935 	}
936 	return;
937     case 0x3008:		/* sub Rm,Rn */
938 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
939 	return;
940     case 0x300a:		/* subc Rm,Rn */
941         {
942             TCGv t0, t1;
943             t0 = tcg_const_tl(0);
944             t1 = tcg_temp_new();
945             tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0);
946             tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t,
947                              REG(B11_8), t0, t1, cpu_sr_t);
948             tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1);
949             tcg_temp_free(t0);
950             tcg_temp_free(t1);
951         }
952 	return;
953     case 0x300b:		/* subv Rm,Rn */
954         {
955             TCGv t0, t1, t2;
956             t0 = tcg_temp_new();
957             tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
958             t1 = tcg_temp_new();
959             tcg_gen_xor_i32(t1, t0, REG(B7_4));
960             t2 = tcg_temp_new();
961             tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
962             tcg_gen_and_i32(t1, t1, t2);
963             tcg_temp_free(t2);
964             tcg_gen_shri_i32(cpu_sr_t, t1, 31);
965             tcg_temp_free(t1);
966             tcg_gen_mov_i32(REG(B11_8), t0);
967             tcg_temp_free(t0);
968         }
969 	return;
970     case 0x2008:		/* tst Rm,Rn */
971 	{
972 	    TCGv val = tcg_temp_new();
973 	    tcg_gen_and_i32(val, REG(B7_4), REG(B11_8));
974             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
975 	    tcg_temp_free(val);
976 	}
977 	return;
978     case 0x200a:		/* xor Rm,Rn */
979 	tcg_gen_xor_i32(REG(B11_8), REG(B11_8), REG(B7_4));
980 	return;
981     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
982 	CHECK_FPU_ENABLED
983         if (ctx->tbflags & FPSCR_SZ) {
984             int xsrc = XHACK(B7_4);
985             int xdst = XHACK(B11_8);
986             tcg_gen_mov_i32(FREG(xdst), FREG(xsrc));
987             tcg_gen_mov_i32(FREG(xdst + 1), FREG(xsrc + 1));
988 	} else {
989             tcg_gen_mov_i32(FREG(B11_8), FREG(B7_4));
990 	}
991 	return;
992     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
993 	CHECK_FPU_ENABLED
994         if (ctx->tbflags & FPSCR_SZ) {
995             TCGv_i64 fp = tcg_temp_new_i64();
996             gen_load_fpr64(ctx, fp, XHACK(B7_4));
997             tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEQ);
998             tcg_temp_free_i64(fp);
999 	} else {
1000             tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
1001 	}
1002 	return;
1003     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
1004 	CHECK_FPU_ENABLED
1005         if (ctx->tbflags & FPSCR_SZ) {
1006             TCGv_i64 fp = tcg_temp_new_i64();
1007             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ);
1008             gen_store_fpr64(ctx, fp, XHACK(B11_8));
1009             tcg_temp_free_i64(fp);
1010 	} else {
1011             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
1012 	}
1013 	return;
1014     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
1015 	CHECK_FPU_ENABLED
1016         if (ctx->tbflags & FPSCR_SZ) {
1017             TCGv_i64 fp = tcg_temp_new_i64();
1018             tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEQ);
1019             gen_store_fpr64(ctx, fp, XHACK(B11_8));
1020             tcg_temp_free_i64(fp);
1021             tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
1022 	} else {
1023             tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
1024 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
1025 	}
1026 	return;
1027     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
1028 	CHECK_FPU_ENABLED
1029         {
1030             TCGv addr = tcg_temp_new_i32();
1031             if (ctx->tbflags & FPSCR_SZ) {
1032                 TCGv_i64 fp = tcg_temp_new_i64();
1033                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
1034                 tcg_gen_subi_i32(addr, REG(B11_8), 8);
1035                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ);
1036                 tcg_temp_free_i64(fp);
1037             } else {
1038                 tcg_gen_subi_i32(addr, REG(B11_8), 4);
1039                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1040             }
1041             tcg_gen_mov_i32(REG(B11_8), addr);
1042             tcg_temp_free(addr);
1043         }
1044 	return;
1045     case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
1046 	CHECK_FPU_ENABLED
1047 	{
1048 	    TCGv addr = tcg_temp_new_i32();
1049 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
1050             if (ctx->tbflags & FPSCR_SZ) {
1051                 TCGv_i64 fp = tcg_temp_new_i64();
1052                 tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEQ);
1053                 gen_store_fpr64(ctx, fp, XHACK(B11_8));
1054                 tcg_temp_free_i64(fp);
1055 	    } else {
1056                 tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
1057 	    }
1058 	    tcg_temp_free(addr);
1059 	}
1060 	return;
1061     case 0xf007: /* fmov {F,D,X}Rn,@(R0,Rn) - FPSCR: Nothing */
1062 	CHECK_FPU_ENABLED
1063 	{
1064 	    TCGv addr = tcg_temp_new();
1065 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
1066             if (ctx->tbflags & FPSCR_SZ) {
1067                 TCGv_i64 fp = tcg_temp_new_i64();
1068                 gen_load_fpr64(ctx, fp, XHACK(B7_4));
1069                 tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ);
1070                 tcg_temp_free_i64(fp);
1071 	    } else {
1072                 tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
1073 	    }
1074 	    tcg_temp_free(addr);
1075 	}
1076 	return;
1077     case 0xf000: /* fadd Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1078     case 0xf001: /* fsub Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1079     case 0xf002: /* fmul Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1080     case 0xf003: /* fdiv Rm,Rn - FPSCR: R[PR,Enable.O/U/I]/W[Cause,Flag] */
1081     case 0xf004: /* fcmp/eq Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1082     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1083 	{
1084 	    CHECK_FPU_ENABLED
1085             if (ctx->tbflags & FPSCR_PR) {
1086                 TCGv_i64 fp0, fp1;
1087 
1088                 if (ctx->opcode & 0x0110) {
1089                     goto do_illegal;
1090                 }
1091 		fp0 = tcg_temp_new_i64();
1092 		fp1 = tcg_temp_new_i64();
1093                 gen_load_fpr64(ctx, fp0, B11_8);
1094                 gen_load_fpr64(ctx, fp1, B7_4);
1095                 switch (ctx->opcode & 0xf00f) {
1096                 case 0xf000:		/* fadd Rm,Rn */
1097                     gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
1098                     break;
1099                 case 0xf001:		/* fsub Rm,Rn */
1100                     gen_helper_fsub_DT(fp0, cpu_env, fp0, fp1);
1101                     break;
1102                 case 0xf002:		/* fmul Rm,Rn */
1103                     gen_helper_fmul_DT(fp0, cpu_env, fp0, fp1);
1104                     break;
1105                 case 0xf003:		/* fdiv Rm,Rn */
1106                     gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
1107                     break;
1108                 case 0xf004:		/* fcmp/eq Rm,Rn */
1109                     gen_helper_fcmp_eq_DT(cpu_sr_t, cpu_env, fp0, fp1);
1110                     return;
1111                 case 0xf005:		/* fcmp/gt Rm,Rn */
1112                     gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1);
1113                     return;
1114                 }
1115                 gen_store_fpr64(ctx, fp0, B11_8);
1116                 tcg_temp_free_i64(fp0);
1117                 tcg_temp_free_i64(fp1);
1118 	    } else {
1119                 switch (ctx->opcode & 0xf00f) {
1120                 case 0xf000:		/* fadd Rm,Rn */
1121                     gen_helper_fadd_FT(FREG(B11_8), cpu_env,
1122                                        FREG(B11_8), FREG(B7_4));
1123                     break;
1124                 case 0xf001:		/* fsub Rm,Rn */
1125                     gen_helper_fsub_FT(FREG(B11_8), cpu_env,
1126                                        FREG(B11_8), FREG(B7_4));
1127                     break;
1128                 case 0xf002:		/* fmul Rm,Rn */
1129                     gen_helper_fmul_FT(FREG(B11_8), cpu_env,
1130                                        FREG(B11_8), FREG(B7_4));
1131                     break;
1132                 case 0xf003:		/* fdiv Rm,Rn */
1133                     gen_helper_fdiv_FT(FREG(B11_8), cpu_env,
1134                                        FREG(B11_8), FREG(B7_4));
1135                     break;
1136                 case 0xf004:		/* fcmp/eq Rm,Rn */
1137                     gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env,
1138                                           FREG(B11_8), FREG(B7_4));
1139                     return;
1140                 case 0xf005:		/* fcmp/gt Rm,Rn */
1141                     gen_helper_fcmp_gt_FT(cpu_sr_t, cpu_env,
1142                                           FREG(B11_8), FREG(B7_4));
1143                     return;
1144                 }
1145 	    }
1146 	}
1147 	return;
1148     case 0xf00e: /* fmac FR0,RM,Rn */
1149         CHECK_FPU_ENABLED
1150         CHECK_FPSCR_PR_0
1151         gen_helper_fmac_FT(FREG(B11_8), cpu_env,
1152                            FREG(0), FREG(B7_4), FREG(B11_8));
1153         return;
1154     }
1155 
1156     switch (ctx->opcode & 0xff00) {
1157     case 0xc900:		/* and #imm,R0 */
1158 	tcg_gen_andi_i32(REG(0), REG(0), B7_0);
1159 	return;
1160     case 0xcd00:		/* and.b #imm,@(R0,GBR) */
1161 	{
1162 	    TCGv addr, val;
1163 	    addr = tcg_temp_new();
1164 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1165 	    val = tcg_temp_new();
1166             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1167 	    tcg_gen_andi_i32(val, val, B7_0);
1168             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1169 	    tcg_temp_free(val);
1170 	    tcg_temp_free(addr);
1171 	}
1172 	return;
1173     case 0x8b00:		/* bf label */
1174 	CHECK_NOT_DELAY_SLOT
1175         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, false);
1176 	return;
1177     case 0x8f00:		/* bf/s label */
1178 	CHECK_NOT_DELAY_SLOT
1179         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
1180         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1181         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
1182 	return;
1183     case 0x8900:		/* bt label */
1184 	CHECK_NOT_DELAY_SLOT
1185         gen_conditional_jump(ctx, ctx->base.pc_next + 4 + B7_0s * 2, true);
1186 	return;
1187     case 0x8d00:		/* bt/s label */
1188 	CHECK_NOT_DELAY_SLOT
1189         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
1190         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
1191         ctx->envflags |= DELAY_SLOT_CONDITIONAL;
1192 	return;
1193     case 0x8800:		/* cmp/eq #imm,R0 */
1194         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
1195 	return;
1196     case 0xc400:		/* mov.b @(disp,GBR),R0 */
1197 	{
1198 	    TCGv addr = tcg_temp_new();
1199 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1200             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1201 	    tcg_temp_free(addr);
1202 	}
1203 	return;
1204     case 0xc500:		/* mov.w @(disp,GBR),R0 */
1205 	{
1206 	    TCGv addr = tcg_temp_new();
1207 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1208             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
1209 	    tcg_temp_free(addr);
1210 	}
1211 	return;
1212     case 0xc600:		/* mov.l @(disp,GBR),R0 */
1213 	{
1214 	    TCGv addr = tcg_temp_new();
1215 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1216             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
1217 	    tcg_temp_free(addr);
1218 	}
1219 	return;
1220     case 0xc000:		/* mov.b R0,@(disp,GBR) */
1221 	{
1222 	    TCGv addr = tcg_temp_new();
1223 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0);
1224             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1225 	    tcg_temp_free(addr);
1226 	}
1227 	return;
1228     case 0xc100:		/* mov.w R0,@(disp,GBR) */
1229 	{
1230 	    TCGv addr = tcg_temp_new();
1231 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
1232             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
1233 	    tcg_temp_free(addr);
1234 	}
1235 	return;
1236     case 0xc200:		/* mov.l R0,@(disp,GBR) */
1237 	{
1238 	    TCGv addr = tcg_temp_new();
1239 	    tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
1240             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
1241 	    tcg_temp_free(addr);
1242 	}
1243 	return;
1244     case 0x8000:		/* mov.b R0,@(disp,Rn) */
1245 	{
1246 	    TCGv addr = tcg_temp_new();
1247 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1248             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_UB);
1249 	    tcg_temp_free(addr);
1250 	}
1251 	return;
1252     case 0x8100:		/* mov.w R0,@(disp,Rn) */
1253 	{
1254 	    TCGv addr = tcg_temp_new();
1255 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1256             tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
1257 	    tcg_temp_free(addr);
1258 	}
1259 	return;
1260     case 0x8400:		/* mov.b @(disp,Rn),R0 */
1261 	{
1262 	    TCGv addr = tcg_temp_new();
1263 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0);
1264             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_SB);
1265 	    tcg_temp_free(addr);
1266 	}
1267 	return;
1268     case 0x8500:		/* mov.w @(disp,Rn),R0 */
1269 	{
1270 	    TCGv addr = tcg_temp_new();
1271 	    tcg_gen_addi_i32(addr, REG(B7_4), B3_0 * 2);
1272             tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
1273 	    tcg_temp_free(addr);
1274 	}
1275 	return;
1276     case 0xc700:		/* mova @(disp,PC),R0 */
1277         tcg_gen_movi_i32(REG(0), ((ctx->base.pc_next & 0xfffffffc) +
1278                                   4 + B7_0 * 4) & ~3);
1279 	return;
1280     case 0xcb00:		/* or #imm,R0 */
1281 	tcg_gen_ori_i32(REG(0), REG(0), B7_0);
1282 	return;
1283     case 0xcf00:		/* or.b #imm,@(R0,GBR) */
1284 	{
1285 	    TCGv addr, val;
1286 	    addr = tcg_temp_new();
1287 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1288 	    val = tcg_temp_new();
1289             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1290 	    tcg_gen_ori_i32(val, val, B7_0);
1291             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1292 	    tcg_temp_free(val);
1293 	    tcg_temp_free(addr);
1294 	}
1295 	return;
1296     case 0xc300:		/* trapa #imm */
1297 	{
1298 	    TCGv imm;
1299 	    CHECK_NOT_DELAY_SLOT
1300             gen_save_cpu_state(ctx, true);
1301 	    imm = tcg_const_i32(B7_0);
1302             gen_helper_trapa(cpu_env, imm);
1303 	    tcg_temp_free(imm);
1304             ctx->base.is_jmp = DISAS_NORETURN;
1305 	}
1306 	return;
1307     case 0xc800:		/* tst #imm,R0 */
1308 	{
1309 	    TCGv val = tcg_temp_new();
1310 	    tcg_gen_andi_i32(val, REG(0), B7_0);
1311             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1312 	    tcg_temp_free(val);
1313 	}
1314 	return;
1315     case 0xcc00:		/* tst.b #imm,@(R0,GBR) */
1316 	{
1317 	    TCGv val = tcg_temp_new();
1318 	    tcg_gen_add_i32(val, REG(0), cpu_gbr);
1319             tcg_gen_qemu_ld_i32(val, val, ctx->memidx, MO_UB);
1320 	    tcg_gen_andi_i32(val, val, B7_0);
1321             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1322 	    tcg_temp_free(val);
1323 	}
1324 	return;
1325     case 0xca00:		/* xor #imm,R0 */
1326 	tcg_gen_xori_i32(REG(0), REG(0), B7_0);
1327 	return;
1328     case 0xce00:		/* xor.b #imm,@(R0,GBR) */
1329 	{
1330 	    TCGv addr, val;
1331 	    addr = tcg_temp_new();
1332 	    tcg_gen_add_i32(addr, REG(0), cpu_gbr);
1333 	    val = tcg_temp_new();
1334             tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB);
1335 	    tcg_gen_xori_i32(val, val, B7_0);
1336             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB);
1337 	    tcg_temp_free(val);
1338 	    tcg_temp_free(addr);
1339 	}
1340 	return;
1341     }
1342 
1343     switch (ctx->opcode & 0xf08f) {
1344     case 0x408e:		/* ldc Rm,Rn_BANK */
1345 	CHECK_PRIVILEGED
1346 	tcg_gen_mov_i32(ALTREG(B6_4), REG(B11_8));
1347 	return;
1348     case 0x4087:		/* ldc.l @Rm+,Rn_BANK */
1349 	CHECK_PRIVILEGED
1350         tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
1351 	tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1352 	return;
1353     case 0x0082:		/* stc Rm_BANK,Rn */
1354 	CHECK_PRIVILEGED
1355 	tcg_gen_mov_i32(REG(B11_8), ALTREG(B6_4));
1356 	return;
1357     case 0x4083:		/* stc.l Rm_BANK,@-Rn */
1358 	CHECK_PRIVILEGED
1359 	{
1360 	    TCGv addr = tcg_temp_new();
1361 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1362             tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
1363 	    tcg_gen_mov_i32(REG(B11_8), addr);
1364 	    tcg_temp_free(addr);
1365 	}
1366 	return;
1367     }
1368 
1369     switch (ctx->opcode & 0xf0ff) {
1370     case 0x0023:		/* braf Rn */
1371 	CHECK_NOT_DELAY_SLOT
1372         tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
1373         ctx->envflags |= DELAY_SLOT;
1374 	ctx->delayed_pc = (uint32_t) - 1;
1375 	return;
1376     case 0x0003:		/* bsrf Rn */
1377 	CHECK_NOT_DELAY_SLOT
1378         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1379 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
1380         ctx->envflags |= DELAY_SLOT;
1381 	ctx->delayed_pc = (uint32_t) - 1;
1382 	return;
1383     case 0x4015:		/* cmp/pl Rn */
1384         tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0);
1385 	return;
1386     case 0x4011:		/* cmp/pz Rn */
1387         tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0);
1388 	return;
1389     case 0x4010:		/* dt Rn */
1390 	tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1);
1391         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0);
1392 	return;
1393     case 0x402b:		/* jmp @Rn */
1394 	CHECK_NOT_DELAY_SLOT
1395 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1396         ctx->envflags |= DELAY_SLOT;
1397 	ctx->delayed_pc = (uint32_t) - 1;
1398 	return;
1399     case 0x400b:		/* jsr @Rn */
1400 	CHECK_NOT_DELAY_SLOT
1401         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
1402 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
1403         ctx->envflags |= DELAY_SLOT;
1404 	ctx->delayed_pc = (uint32_t) - 1;
1405 	return;
1406     case 0x400e:		/* ldc Rm,SR */
1407 	CHECK_PRIVILEGED
1408         {
1409             TCGv val = tcg_temp_new();
1410             tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3);
1411             gen_write_sr(val);
1412             tcg_temp_free(val);
1413             ctx->base.is_jmp = DISAS_STOP;
1414         }
1415 	return;
1416     case 0x4007:		/* ldc.l @Rm+,SR */
1417 	CHECK_PRIVILEGED
1418 	{
1419 	    TCGv val = tcg_temp_new();
1420             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
1421             tcg_gen_andi_i32(val, val, 0x700083f3);
1422             gen_write_sr(val);
1423 	    tcg_temp_free(val);
1424 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1425             ctx->base.is_jmp = DISAS_STOP;
1426 	}
1427 	return;
1428     case 0x0002:		/* stc SR,Rn */
1429 	CHECK_PRIVILEGED
1430         gen_read_sr(REG(B11_8));
1431 	return;
1432     case 0x4003:		/* stc SR,@-Rn */
1433 	CHECK_PRIVILEGED
1434 	{
1435 	    TCGv addr = tcg_temp_new();
1436             TCGv val = tcg_temp_new();
1437 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1438             gen_read_sr(val);
1439             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1440 	    tcg_gen_mov_i32(REG(B11_8), addr);
1441             tcg_temp_free(val);
1442 	    tcg_temp_free(addr);
1443 	}
1444 	return;
1445 #define LD(reg,ldnum,ldpnum,prechk)		\
1446   case ldnum:							\
1447     prechk    							\
1448     tcg_gen_mov_i32 (cpu_##reg, REG(B11_8));			\
1449     return;							\
1450   case ldpnum:							\
1451     prechk    							\
1452     tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
1453     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);		\
1454     return;
1455 #define ST(reg,stnum,stpnum,prechk)		\
1456   case stnum:							\
1457     prechk    							\
1458     tcg_gen_mov_i32 (REG(B11_8), cpu_##reg);			\
1459     return;							\
1460   case stpnum:							\
1461     prechk    							\
1462     {								\
1463 	TCGv addr = tcg_temp_new();				\
1464 	tcg_gen_subi_i32(addr, REG(B11_8), 4);			\
1465         tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
1466 	tcg_gen_mov_i32(REG(B11_8), addr);			\
1467 	tcg_temp_free(addr);					\
1468     }								\
1469     return;
1470 #define LDST(reg,ldnum,ldpnum,stnum,stpnum,prechk)		\
1471 	LD(reg,ldnum,ldpnum,prechk)				\
1472 	ST(reg,stnum,stpnum,prechk)
1473 	LDST(gbr,  0x401e, 0x4017, 0x0012, 0x4013, {})
1474 	LDST(vbr,  0x402e, 0x4027, 0x0022, 0x4023, CHECK_PRIVILEGED)
1475 	LDST(ssr,  0x403e, 0x4037, 0x0032, 0x4033, CHECK_PRIVILEGED)
1476 	LDST(spc,  0x404e, 0x4047, 0x0042, 0x4043, CHECK_PRIVILEGED)
1477 	ST(sgr,  0x003a, 0x4032, CHECK_PRIVILEGED)
1478         LD(sgr,  0x403a, 0x4036, CHECK_PRIVILEGED CHECK_SH4A)
1479 	LDST(dbr,  0x40fa, 0x40f6, 0x00fa, 0x40f2, CHECK_PRIVILEGED)
1480 	LDST(mach, 0x400a, 0x4006, 0x000a, 0x4002, {})
1481 	LDST(macl, 0x401a, 0x4016, 0x001a, 0x4012, {})
1482 	LDST(pr,   0x402a, 0x4026, 0x002a, 0x4022, {})
1483 	LDST(fpul, 0x405a, 0x4056, 0x005a, 0x4052, {CHECK_FPU_ENABLED})
1484     case 0x406a:		/* lds Rm,FPSCR */
1485 	CHECK_FPU_ENABLED
1486         gen_helper_ld_fpscr(cpu_env, REG(B11_8));
1487         ctx->base.is_jmp = DISAS_STOP;
1488 	return;
1489     case 0x4066:		/* lds.l @Rm+,FPSCR */
1490 	CHECK_FPU_ENABLED
1491 	{
1492 	    TCGv addr = tcg_temp_new();
1493             tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
1494 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1495             gen_helper_ld_fpscr(cpu_env, addr);
1496 	    tcg_temp_free(addr);
1497             ctx->base.is_jmp = DISAS_STOP;
1498 	}
1499 	return;
1500     case 0x006a:		/* sts FPSCR,Rn */
1501 	CHECK_FPU_ENABLED
1502 	tcg_gen_andi_i32(REG(B11_8), cpu_fpscr, 0x003fffff);
1503 	return;
1504     case 0x4062:		/* sts FPSCR,@-Rn */
1505 	CHECK_FPU_ENABLED
1506 	{
1507 	    TCGv addr, val;
1508 	    val = tcg_temp_new();
1509 	    tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
1510 	    addr = tcg_temp_new();
1511 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
1512             tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
1513 	    tcg_gen_mov_i32(REG(B11_8), addr);
1514 	    tcg_temp_free(addr);
1515 	    tcg_temp_free(val);
1516 	}
1517 	return;
1518     case 0x00c3:		/* movca.l R0,@Rm */
1519         {
1520             TCGv val = tcg_temp_new();
1521             tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
1522             gen_helper_movcal(cpu_env, REG(B11_8), val);
1523             tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1524             tcg_temp_free(val);
1525         }
1526         ctx->has_movcal = 1;
1527 	return;
1528     case 0x40a9:                /* movua.l @Rm,R0 */
1529         CHECK_SH4A
1530         /* Load non-boundary-aligned data */
1531         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1532                             MO_TEUL | MO_UNALN);
1533         return;
1534     case 0x40e9:                /* movua.l @Rm+,R0 */
1535         CHECK_SH4A
1536         /* Load non-boundary-aligned data */
1537         tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
1538                             MO_TEUL | MO_UNALN);
1539         tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
1540         return;
1541     case 0x0029:		/* movt Rn */
1542         tcg_gen_mov_i32(REG(B11_8), cpu_sr_t);
1543 	return;
1544     case 0x0073:
1545         /* MOVCO.L
1546          *     LDST -> T
1547          *     If (T == 1) R0 -> (Rn)
1548          *     0 -> LDST
1549          *
1550          * The above description doesn't work in a parallel context.
1551          * Since we currently support no smp boards, this implies user-mode.
1552          * But we can still support the official mechanism while user-mode
1553          * is single-threaded.  */
1554         CHECK_SH4A
1555         {
1556             TCGLabel *fail = gen_new_label();
1557             TCGLabel *done = gen_new_label();
1558 
1559             if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1560                 TCGv tmp;
1561 
1562                 tcg_gen_brcond_i32(TCG_COND_NE, REG(B11_8),
1563                                    cpu_lock_addr, fail);
1564                 tmp = tcg_temp_new();
1565                 tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
1566                                            REG(0), ctx->memidx, MO_TEUL);
1567                 tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
1568                 tcg_temp_free(tmp);
1569             } else {
1570                 tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
1571                 tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
1572                 tcg_gen_movi_i32(cpu_sr_t, 1);
1573             }
1574             tcg_gen_br(done);
1575 
1576             gen_set_label(fail);
1577             tcg_gen_movi_i32(cpu_sr_t, 0);
1578 
1579             gen_set_label(done);
1580             tcg_gen_movi_i32(cpu_lock_addr, -1);
1581         }
1582         return;
1583     case 0x0063:
1584         /* MOVLI.L @Rm,R0
1585          *     1 -> LDST
1586          *     (Rm) -> R0
1587          *     When interrupt/exception
1588          *     occurred 0 -> LDST
1589          *
1590          * In a parallel context, we must also save the loaded value
1591          * for use with the cmpxchg that we'll use with movco.l.  */
1592         CHECK_SH4A
1593         if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
1594             TCGv tmp = tcg_temp_new();
1595             tcg_gen_mov_i32(tmp, REG(B11_8));
1596             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1597             tcg_gen_mov_i32(cpu_lock_value, REG(0));
1598             tcg_gen_mov_i32(cpu_lock_addr, tmp);
1599             tcg_temp_free(tmp);
1600         } else {
1601             tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
1602             tcg_gen_movi_i32(cpu_lock_addr, 0);
1603         }
1604         return;
1605     case 0x0093:		/* ocbi @Rn */
1606 	{
1607             gen_helper_ocbi(cpu_env, REG(B11_8));
1608 	}
1609 	return;
1610     case 0x00a3:		/* ocbp @Rn */
1611     case 0x00b3:		/* ocbwb @Rn */
1612         /* These instructions are supposed to do nothing in case of
1613            a cache miss. Given that we only partially emulate caches
1614            it is safe to simply ignore them. */
1615 	return;
1616     case 0x0083:		/* pref @Rn */
1617 	return;
1618     case 0x00d3:		/* prefi @Rn */
1619         CHECK_SH4A
1620         return;
1621     case 0x00e3:		/* icbi @Rn */
1622         CHECK_SH4A
1623         return;
1624     case 0x00ab:		/* synco */
1625         CHECK_SH4A
1626         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
1627         return;
1628     case 0x4024:		/* rotcl Rn */
1629 	{
1630 	    TCGv tmp = tcg_temp_new();
1631             tcg_gen_mov_i32(tmp, cpu_sr_t);
1632             tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1633 	    tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1634             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1635 	    tcg_temp_free(tmp);
1636 	}
1637 	return;
1638     case 0x4025:		/* rotcr Rn */
1639 	{
1640 	    TCGv tmp = tcg_temp_new();
1641             tcg_gen_shli_i32(tmp, cpu_sr_t, 31);
1642             tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1643 	    tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1644             tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp);
1645 	    tcg_temp_free(tmp);
1646 	}
1647 	return;
1648     case 0x4004:		/* rotl Rn */
1649 	tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1);
1650         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1651 	return;
1652     case 0x4005:		/* rotr Rn */
1653         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0);
1654 	tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1);
1655 	return;
1656     case 0x4000:		/* shll Rn */
1657     case 0x4020:		/* shal Rn */
1658         tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31);
1659 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1);
1660 	return;
1661     case 0x4021:		/* shar Rn */
1662         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1663 	tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1);
1664 	return;
1665     case 0x4001:		/* shlr Rn */
1666         tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1);
1667 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1);
1668 	return;
1669     case 0x4008:		/* shll2 Rn */
1670 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 2);
1671 	return;
1672     case 0x4018:		/* shll8 Rn */
1673 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 8);
1674 	return;
1675     case 0x4028:		/* shll16 Rn */
1676 	tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 16);
1677 	return;
1678     case 0x4009:		/* shlr2 Rn */
1679 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 2);
1680 	return;
1681     case 0x4019:		/* shlr8 Rn */
1682 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 8);
1683 	return;
1684     case 0x4029:		/* shlr16 Rn */
1685 	tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 16);
1686 	return;
1687     case 0x401b:		/* tas.b @Rn */
1688         {
1689             TCGv val = tcg_const_i32(0x80);
1690             tcg_gen_atomic_fetch_or_i32(val, REG(B11_8), val,
1691                                         ctx->memidx, MO_UB);
1692             tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0);
1693             tcg_temp_free(val);
1694         }
1695         return;
1696     case 0xf00d: /* fsts FPUL,FRn - FPSCR: Nothing */
1697 	CHECK_FPU_ENABLED
1698         tcg_gen_mov_i32(FREG(B11_8), cpu_fpul);
1699 	return;
1700     case 0xf01d: /* flds FRm,FPUL - FPSCR: Nothing */
1701 	CHECK_FPU_ENABLED
1702         tcg_gen_mov_i32(cpu_fpul, FREG(B11_8));
1703 	return;
1704     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
1705 	CHECK_FPU_ENABLED
1706         if (ctx->tbflags & FPSCR_PR) {
1707 	    TCGv_i64 fp;
1708             if (ctx->opcode & 0x0100) {
1709                 goto do_illegal;
1710             }
1711 	    fp = tcg_temp_new_i64();
1712             gen_helper_float_DT(fp, cpu_env, cpu_fpul);
1713             gen_store_fpr64(ctx, fp, B11_8);
1714 	    tcg_temp_free_i64(fp);
1715 	}
1716 	else {
1717             gen_helper_float_FT(FREG(B11_8), cpu_env, cpu_fpul);
1718 	}
1719 	return;
1720     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
1721 	CHECK_FPU_ENABLED
1722         if (ctx->tbflags & FPSCR_PR) {
1723 	    TCGv_i64 fp;
1724             if (ctx->opcode & 0x0100) {
1725                 goto do_illegal;
1726             }
1727 	    fp = tcg_temp_new_i64();
1728             gen_load_fpr64(ctx, fp, B11_8);
1729             gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
1730 	    tcg_temp_free_i64(fp);
1731 	}
1732 	else {
1733             gen_helper_ftrc_FT(cpu_fpul, cpu_env, FREG(B11_8));
1734 	}
1735 	return;
1736     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
1737 	CHECK_FPU_ENABLED
1738         tcg_gen_xori_i32(FREG(B11_8), FREG(B11_8), 0x80000000);
1739 	return;
1740     case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
1741 	CHECK_FPU_ENABLED
1742         tcg_gen_andi_i32(FREG(B11_8), FREG(B11_8), 0x7fffffff);
1743 	return;
1744     case 0xf06d: /* fsqrt FRn */
1745 	CHECK_FPU_ENABLED
1746         if (ctx->tbflags & FPSCR_PR) {
1747             if (ctx->opcode & 0x0100) {
1748                 goto do_illegal;
1749             }
1750 	    TCGv_i64 fp = tcg_temp_new_i64();
1751             gen_load_fpr64(ctx, fp, B11_8);
1752             gen_helper_fsqrt_DT(fp, cpu_env, fp);
1753             gen_store_fpr64(ctx, fp, B11_8);
1754 	    tcg_temp_free_i64(fp);
1755 	} else {
1756             gen_helper_fsqrt_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1757 	}
1758 	return;
1759     case 0xf07d: /* fsrra FRn */
1760 	CHECK_FPU_ENABLED
1761         CHECK_FPSCR_PR_0
1762         gen_helper_fsrra_FT(FREG(B11_8), cpu_env, FREG(B11_8));
1763 	break;
1764     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
1765 	CHECK_FPU_ENABLED
1766         CHECK_FPSCR_PR_0
1767         tcg_gen_movi_i32(FREG(B11_8), 0);
1768         return;
1769     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
1770 	CHECK_FPU_ENABLED
1771         CHECK_FPSCR_PR_0
1772         tcg_gen_movi_i32(FREG(B11_8), 0x3f800000);
1773         return;
1774     case 0xf0ad: /* fcnvsd FPUL,DRn */
1775 	CHECK_FPU_ENABLED
1776 	{
1777 	    TCGv_i64 fp = tcg_temp_new_i64();
1778             gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
1779             gen_store_fpr64(ctx, fp, B11_8);
1780 	    tcg_temp_free_i64(fp);
1781 	}
1782 	return;
1783     case 0xf0bd: /* fcnvds DRn,FPUL */
1784 	CHECK_FPU_ENABLED
1785 	{
1786 	    TCGv_i64 fp = tcg_temp_new_i64();
1787             gen_load_fpr64(ctx, fp, B11_8);
1788             gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
1789 	    tcg_temp_free_i64(fp);
1790 	}
1791 	return;
1792     case 0xf0ed: /* fipr FVm,FVn */
1793         CHECK_FPU_ENABLED
1794         CHECK_FPSCR_PR_1
1795         {
1796             TCGv m = tcg_const_i32((ctx->opcode >> 8) & 3);
1797             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1798             gen_helper_fipr(cpu_env, m, n);
1799             tcg_temp_free(m);
1800             tcg_temp_free(n);
1801             return;
1802         }
1803         break;
1804     case 0xf0fd: /* ftrv XMTRX,FVn */
1805         CHECK_FPU_ENABLED
1806         CHECK_FPSCR_PR_1
1807         {
1808             if ((ctx->opcode & 0x0300) != 0x0100) {
1809                 goto do_illegal;
1810             }
1811             TCGv n = tcg_const_i32((ctx->opcode >> 10) & 3);
1812             gen_helper_ftrv(cpu_env, n);
1813             tcg_temp_free(n);
1814             return;
1815         }
1816         break;
1817     }
1818 #if 0
1819     fprintf(stderr, "unknown instruction 0x%04x at pc 0x%08x\n",
1820             ctx->opcode, ctx->base.pc_next);
1821     fflush(stderr);
1822 #endif
1823  do_illegal:
1824     if (ctx->envflags & DELAY_SLOT_MASK) {
1825  do_illegal_slot:
1826         gen_save_cpu_state(ctx, true);
1827         gen_helper_raise_slot_illegal_instruction(cpu_env);
1828     } else {
1829         gen_save_cpu_state(ctx, true);
1830         gen_helper_raise_illegal_instruction(cpu_env);
1831     }
1832     ctx->base.is_jmp = DISAS_NORETURN;
1833     return;
1834 
1835  do_fpu_disabled:
1836     gen_save_cpu_state(ctx, true);
1837     if (ctx->envflags & DELAY_SLOT_MASK) {
1838         gen_helper_raise_slot_fpu_disable(cpu_env);
1839     } else {
1840         gen_helper_raise_fpu_disable(cpu_env);
1841     }
1842     ctx->base.is_jmp = DISAS_NORETURN;
1843     return;
1844 }
1845 
1846 static void decode_opc(DisasContext * ctx)
1847 {
1848     uint32_t old_flags = ctx->envflags;
1849 
1850     _decode_opc(ctx);
1851 
1852     if (old_flags & DELAY_SLOT_MASK) {
1853         /* go out of the delay slot */
1854         ctx->envflags &= ~DELAY_SLOT_MASK;
1855 
1856         /* When in an exclusive region, we must continue to the end
1857            for conditional branches.  */
1858         if (ctx->tbflags & GUSA_EXCLUSIVE
1859             && old_flags & DELAY_SLOT_CONDITIONAL) {
1860             gen_delayed_conditional_jump(ctx);
1861             return;
1862         }
1863         /* Otherwise this is probably an invalid gUSA region.
1864            Drop the GUSA bits so the next TB doesn't see them.  */
1865         ctx->envflags &= ~GUSA_MASK;
1866 
1867         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
1868         if (old_flags & DELAY_SLOT_CONDITIONAL) {
1869 	    gen_delayed_conditional_jump(ctx);
1870         } else {
1871             gen_jump(ctx);
1872 	}
1873     }
1874 }
1875 
1876 #ifdef CONFIG_USER_ONLY
1877 /* For uniprocessors, SH4 uses optimistic restartable atomic sequences.
1878    Upon an interrupt, a real kernel would simply notice magic values in
1879    the registers and reset the PC to the start of the sequence.
1880 
1881    For QEMU, we cannot do this in quite the same way.  Instead, we notice
1882    the normal start of such a sequence (mov #-x,r15).  While we can handle
1883    any sequence via cpu_exec_step_atomic, we can recognize the "normal"
1884    sequences and transform them into atomic operations as seen by the host.
1885 */
1886 static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
1887 {
1888     uint16_t insns[5];
1889     int ld_adr, ld_dst, ld_mop;
1890     int op_dst, op_src, op_opc;
1891     int mv_src, mt_dst, st_src, st_mop;
1892     TCGv op_arg;
1893     uint32_t pc = ctx->base.pc_next;
1894     uint32_t pc_end = ctx->base.tb->cs_base;
1895     int max_insns = (pc_end - pc) / 2;
1896     int i;
1897 
1898     /* The state machine below will consume only a few insns.
1899        If there are more than that in a region, fail now.  */
1900     if (max_insns > ARRAY_SIZE(insns)) {
1901         goto fail;
1902     }
1903 
1904     /* Read all of the insns for the region.  */
1905     for (i = 0; i < max_insns; ++i) {
1906         insns[i] = translator_lduw(env, &ctx->base, pc + i * 2);
1907     }
1908 
1909     ld_adr = ld_dst = ld_mop = -1;
1910     mv_src = -1;
1911     op_dst = op_src = op_opc = -1;
1912     mt_dst = -1;
1913     st_src = st_mop = -1;
1914     op_arg = NULL;
1915     i = 0;
1916 
1917 #define NEXT_INSN \
1918     do { if (i >= max_insns) goto fail; ctx->opcode = insns[i++]; } while (0)
1919 
1920     /*
1921      * Expect a load to begin the region.
1922      */
1923     NEXT_INSN;
1924     switch (ctx->opcode & 0xf00f) {
1925     case 0x6000: /* mov.b @Rm,Rn */
1926         ld_mop = MO_SB;
1927         break;
1928     case 0x6001: /* mov.w @Rm,Rn */
1929         ld_mop = MO_TESW;
1930         break;
1931     case 0x6002: /* mov.l @Rm,Rn */
1932         ld_mop = MO_TESL;
1933         break;
1934     default:
1935         goto fail;
1936     }
1937     ld_adr = B7_4;
1938     ld_dst = B11_8;
1939     if (ld_adr == ld_dst) {
1940         goto fail;
1941     }
1942     /* Unless we see a mov, any two-operand operation must use ld_dst.  */
1943     op_dst = ld_dst;
1944 
1945     /*
1946      * Expect an optional register move.
1947      */
1948     NEXT_INSN;
1949     switch (ctx->opcode & 0xf00f) {
1950     case 0x6003: /* mov Rm,Rn */
1951         /*
1952          * Here we want to recognize ld_dst being saved for later consumption,
1953          * or for another input register being copied so that ld_dst need not
1954          * be clobbered during the operation.
1955          */
1956         op_dst = B11_8;
1957         mv_src = B7_4;
1958         if (op_dst == ld_dst) {
1959             /* Overwriting the load output.  */
1960             goto fail;
1961         }
1962         if (mv_src != ld_dst) {
1963             /* Copying a new input; constrain op_src to match the load.  */
1964             op_src = ld_dst;
1965         }
1966         break;
1967 
1968     default:
1969         /* Put back and re-examine as operation.  */
1970         --i;
1971     }
1972 
1973     /*
1974      * Expect the operation.
1975      */
1976     NEXT_INSN;
1977     switch (ctx->opcode & 0xf00f) {
1978     case 0x300c: /* add Rm,Rn */
1979         op_opc = INDEX_op_add_i32;
1980         goto do_reg_op;
1981     case 0x2009: /* and Rm,Rn */
1982         op_opc = INDEX_op_and_i32;
1983         goto do_reg_op;
1984     case 0x200a: /* xor Rm,Rn */
1985         op_opc = INDEX_op_xor_i32;
1986         goto do_reg_op;
1987     case 0x200b: /* or Rm,Rn */
1988         op_opc = INDEX_op_or_i32;
1989     do_reg_op:
1990         /* The operation register should be as expected, and the
1991            other input cannot depend on the load.  */
1992         if (op_dst != B11_8) {
1993             goto fail;
1994         }
1995         if (op_src < 0) {
1996             /* Unconstrainted input.  */
1997             op_src = B7_4;
1998         } else if (op_src == B7_4) {
1999             /* Constrained input matched load.  All operations are
2000                commutative; "swap" them by "moving" the load output
2001                to the (implicit) first argument and the move source
2002                to the (explicit) second argument.  */
2003             op_src = mv_src;
2004         } else {
2005             goto fail;
2006         }
2007         op_arg = REG(op_src);
2008         break;
2009 
2010     case 0x6007: /* not Rm,Rn */
2011         if (ld_dst != B7_4 || mv_src >= 0) {
2012             goto fail;
2013         }
2014         op_dst = B11_8;
2015         op_opc = INDEX_op_xor_i32;
2016         op_arg = tcg_const_i32(-1);
2017         break;
2018 
2019     case 0x7000 ... 0x700f: /* add #imm,Rn */
2020         if (op_dst != B11_8 || mv_src >= 0) {
2021             goto fail;
2022         }
2023         op_opc = INDEX_op_add_i32;
2024         op_arg = tcg_const_i32(B7_0s);
2025         break;
2026 
2027     case 0x3000: /* cmp/eq Rm,Rn */
2028         /* Looking for the middle of a compare-and-swap sequence,
2029            beginning with the compare.  Operands can be either order,
2030            but with only one overlapping the load.  */
2031         if ((ld_dst == B11_8) + (ld_dst == B7_4) != 1 || mv_src >= 0) {
2032             goto fail;
2033         }
2034         op_opc = INDEX_op_setcond_i32;  /* placeholder */
2035         op_src = (ld_dst == B11_8 ? B7_4 : B11_8);
2036         op_arg = REG(op_src);
2037 
2038         NEXT_INSN;
2039         switch (ctx->opcode & 0xff00) {
2040         case 0x8b00: /* bf label */
2041         case 0x8f00: /* bf/s label */
2042             if (pc + (i + 1 + B7_0s) * 2 != pc_end) {
2043                 goto fail;
2044             }
2045             if ((ctx->opcode & 0xff00) == 0x8b00) { /* bf label */
2046                 break;
2047             }
2048             /* We're looking to unconditionally modify Rn with the
2049                result of the comparison, within the delay slot of
2050                the branch.  This is used by older gcc.  */
2051             NEXT_INSN;
2052             if ((ctx->opcode & 0xf0ff) == 0x0029) { /* movt Rn */
2053                 mt_dst = B11_8;
2054             } else {
2055                 goto fail;
2056             }
2057             break;
2058 
2059         default:
2060             goto fail;
2061         }
2062         break;
2063 
2064     case 0x2008: /* tst Rm,Rn */
2065         /* Looking for a compare-and-swap against zero.  */
2066         if (ld_dst != B11_8 || ld_dst != B7_4 || mv_src >= 0) {
2067             goto fail;
2068         }
2069         op_opc = INDEX_op_setcond_i32;
2070         op_arg = tcg_const_i32(0);
2071 
2072         NEXT_INSN;
2073         if ((ctx->opcode & 0xff00) != 0x8900 /* bt label */
2074             || pc + (i + 1 + B7_0s) * 2 != pc_end) {
2075             goto fail;
2076         }
2077         break;
2078 
2079     default:
2080         /* Put back and re-examine as store.  */
2081         --i;
2082     }
2083 
2084     /*
2085      * Expect the store.
2086      */
2087     /* The store must be the last insn.  */
2088     if (i != max_insns - 1) {
2089         goto fail;
2090     }
2091     NEXT_INSN;
2092     switch (ctx->opcode & 0xf00f) {
2093     case 0x2000: /* mov.b Rm,@Rn */
2094         st_mop = MO_UB;
2095         break;
2096     case 0x2001: /* mov.w Rm,@Rn */
2097         st_mop = MO_UW;
2098         break;
2099     case 0x2002: /* mov.l Rm,@Rn */
2100         st_mop = MO_UL;
2101         break;
2102     default:
2103         goto fail;
2104     }
2105     /* The store must match the load.  */
2106     if (ld_adr != B11_8 || st_mop != (ld_mop & MO_SIZE)) {
2107         goto fail;
2108     }
2109     st_src = B7_4;
2110 
2111 #undef NEXT_INSN
2112 
2113     /*
2114      * Emit the operation.
2115      */
2116     switch (op_opc) {
2117     case -1:
2118         /* No operation found.  Look for exchange pattern.  */
2119         if (st_src == ld_dst || mv_src >= 0) {
2120             goto fail;
2121         }
2122         tcg_gen_atomic_xchg_i32(REG(ld_dst), REG(ld_adr), REG(st_src),
2123                                 ctx->memidx, ld_mop);
2124         break;
2125 
2126     case INDEX_op_add_i32:
2127         if (op_dst != st_src) {
2128             goto fail;
2129         }
2130         if (op_dst == ld_dst && st_mop == MO_UL) {
2131             tcg_gen_atomic_add_fetch_i32(REG(ld_dst), REG(ld_adr),
2132                                          op_arg, ctx->memidx, ld_mop);
2133         } else {
2134             tcg_gen_atomic_fetch_add_i32(REG(ld_dst), REG(ld_adr),
2135                                          op_arg, ctx->memidx, ld_mop);
2136             if (op_dst != ld_dst) {
2137                 /* Note that mop sizes < 4 cannot use add_fetch
2138                    because it won't carry into the higher bits.  */
2139                 tcg_gen_add_i32(REG(op_dst), REG(ld_dst), op_arg);
2140             }
2141         }
2142         break;
2143 
2144     case INDEX_op_and_i32:
2145         if (op_dst != st_src) {
2146             goto fail;
2147         }
2148         if (op_dst == ld_dst) {
2149             tcg_gen_atomic_and_fetch_i32(REG(ld_dst), REG(ld_adr),
2150                                          op_arg, ctx->memidx, ld_mop);
2151         } else {
2152             tcg_gen_atomic_fetch_and_i32(REG(ld_dst), REG(ld_adr),
2153                                          op_arg, ctx->memidx, ld_mop);
2154             tcg_gen_and_i32(REG(op_dst), REG(ld_dst), op_arg);
2155         }
2156         break;
2157 
2158     case INDEX_op_or_i32:
2159         if (op_dst != st_src) {
2160             goto fail;
2161         }
2162         if (op_dst == ld_dst) {
2163             tcg_gen_atomic_or_fetch_i32(REG(ld_dst), REG(ld_adr),
2164                                         op_arg, ctx->memidx, ld_mop);
2165         } else {
2166             tcg_gen_atomic_fetch_or_i32(REG(ld_dst), REG(ld_adr),
2167                                         op_arg, ctx->memidx, ld_mop);
2168             tcg_gen_or_i32(REG(op_dst), REG(ld_dst), op_arg);
2169         }
2170         break;
2171 
2172     case INDEX_op_xor_i32:
2173         if (op_dst != st_src) {
2174             goto fail;
2175         }
2176         if (op_dst == ld_dst) {
2177             tcg_gen_atomic_xor_fetch_i32(REG(ld_dst), REG(ld_adr),
2178                                          op_arg, ctx->memidx, ld_mop);
2179         } else {
2180             tcg_gen_atomic_fetch_xor_i32(REG(ld_dst), REG(ld_adr),
2181                                          op_arg, ctx->memidx, ld_mop);
2182             tcg_gen_xor_i32(REG(op_dst), REG(ld_dst), op_arg);
2183         }
2184         break;
2185 
2186     case INDEX_op_setcond_i32:
2187         if (st_src == ld_dst) {
2188             goto fail;
2189         }
2190         tcg_gen_atomic_cmpxchg_i32(REG(ld_dst), REG(ld_adr), op_arg,
2191                                    REG(st_src), ctx->memidx, ld_mop);
2192         tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(ld_dst), op_arg);
2193         if (mt_dst >= 0) {
2194             tcg_gen_mov_i32(REG(mt_dst), cpu_sr_t);
2195         }
2196         break;
2197 
2198     default:
2199         g_assert_not_reached();
2200     }
2201 
2202     /* If op_src is not a valid register, then op_arg was a constant.  */
2203     if (op_src < 0 && op_arg) {
2204         tcg_temp_free_i32(op_arg);
2205     }
2206 
2207     /* The entire region has been translated.  */
2208     ctx->envflags &= ~GUSA_MASK;
2209     ctx->base.pc_next = pc_end;
2210     ctx->base.num_insns += max_insns - 1;
2211     return;
2212 
2213  fail:
2214     qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
2215                   pc, pc_end);
2216 
2217     /* Restart with the EXCLUSIVE bit set, within a TB run via
2218        cpu_exec_step_atomic holding the exclusive lock.  */
2219     ctx->envflags |= GUSA_EXCLUSIVE;
2220     gen_save_cpu_state(ctx, false);
2221     gen_helper_exclusive(cpu_env);
2222     ctx->base.is_jmp = DISAS_NORETURN;
2223 
2224     /* We're not executing an instruction, but we must report one for the
2225        purposes of accounting within the TB.  We might as well report the
2226        entire region consumed via ctx->base.pc_next so that it's immediately
2227        available in the disassembly dump.  */
2228     ctx->base.pc_next = pc_end;
2229     ctx->base.num_insns += max_insns - 1;
2230 }
2231 #endif
2232 
2233 static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
2234 {
2235     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2236     CPUSH4State *env = cs->env_ptr;
2237     uint32_t tbflags;
2238     int bound;
2239 
2240     ctx->tbflags = tbflags = ctx->base.tb->flags;
2241     ctx->envflags = tbflags & TB_FLAG_ENVFLAGS_MASK;
2242     ctx->memidx = (tbflags & (1u << SR_MD)) == 0 ? 1 : 0;
2243     /* We don't know if the delayed pc came from a dynamic or static branch,
2244        so assume it is a dynamic branch.  */
2245     ctx->delayed_pc = -1; /* use delayed pc from env pointer */
2246     ctx->features = env->features;
2247     ctx->has_movcal = (tbflags & TB_FLAG_PENDING_MOVCA);
2248     ctx->gbank = ((tbflags & (1 << SR_MD)) &&
2249                   (tbflags & (1 << SR_RB))) * 0x10;
2250     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
2251 
2252     if (tbflags & GUSA_MASK) {
2253         uint32_t pc = ctx->base.pc_next;
2254         uint32_t pc_end = ctx->base.tb->cs_base;
2255         int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
2256         int max_insns = (pc_end - pc) / 2;
2257 
2258         if (pc != pc_end + backup || max_insns < 2) {
2259             /* This is a malformed gUSA region.  Don't do anything special,
2260                since the interpreter is likely to get confused.  */
2261             ctx->envflags &= ~GUSA_MASK;
2262         } else if (tbflags & GUSA_EXCLUSIVE) {
2263             /* Regardless of single-stepping or the end of the page,
2264                we must complete execution of the gUSA region while
2265                holding the exclusive lock.  */
2266             ctx->base.max_insns = max_insns;
2267             return;
2268         }
2269     }
2270 
2271     /* Since the ISA is fixed-width, we can bound by the number
2272        of instructions remaining on the page.  */
2273     bound = -(ctx->base.pc_next | TARGET_PAGE_MASK) / 2;
2274     ctx->base.max_insns = MIN(ctx->base.max_insns, bound);
2275 }
2276 
2277 static void sh4_tr_tb_start(DisasContextBase *dcbase, CPUState *cs)
2278 {
2279 }
2280 
2281 static void sh4_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
2282 {
2283     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2284 
2285     tcg_gen_insn_start(ctx->base.pc_next, ctx->envflags);
2286 }
2287 
2288 static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
2289 {
2290     CPUSH4State *env = cs->env_ptr;
2291     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2292 
2293 #ifdef CONFIG_USER_ONLY
2294     if (unlikely(ctx->envflags & GUSA_MASK)
2295         && !(ctx->envflags & GUSA_EXCLUSIVE)) {
2296         /* We're in an gUSA region, and we have not already fallen
2297            back on using an exclusive region.  Attempt to parse the
2298            region into a single supported atomic operation.  Failure
2299            is handled within the parser by raising an exception to
2300            retry using an exclusive region.  */
2301         decode_gusa(ctx, env);
2302         return;
2303     }
2304 #endif
2305 
2306     ctx->opcode = translator_lduw(env, &ctx->base, ctx->base.pc_next);
2307     decode_opc(ctx);
2308     ctx->base.pc_next += 2;
2309 }
2310 
2311 static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
2312 {
2313     DisasContext *ctx = container_of(dcbase, DisasContext, base);
2314 
2315     if (ctx->tbflags & GUSA_EXCLUSIVE) {
2316         /* Ending the region of exclusivity.  Clear the bits.  */
2317         ctx->envflags &= ~GUSA_MASK;
2318     }
2319 
2320     switch (ctx->base.is_jmp) {
2321     case DISAS_STOP:
2322         gen_save_cpu_state(ctx, true);
2323         tcg_gen_exit_tb(NULL, 0);
2324         break;
2325     case DISAS_NEXT:
2326     case DISAS_TOO_MANY:
2327         gen_save_cpu_state(ctx, false);
2328         gen_goto_tb(ctx, 0, ctx->base.pc_next);
2329         break;
2330     case DISAS_NORETURN:
2331         break;
2332     default:
2333         g_assert_not_reached();
2334     }
2335 }
2336 
2337 static void sh4_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs)
2338 {
2339     qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
2340     log_target_disas(cs, dcbase->pc_first, dcbase->tb->size);
2341 }
2342 
2343 static const TranslatorOps sh4_tr_ops = {
2344     .init_disas_context = sh4_tr_init_disas_context,
2345     .tb_start           = sh4_tr_tb_start,
2346     .insn_start         = sh4_tr_insn_start,
2347     .translate_insn     = sh4_tr_translate_insn,
2348     .tb_stop            = sh4_tr_tb_stop,
2349     .disas_log          = sh4_tr_disas_log,
2350 };
2351 
2352 void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
2353 {
2354     DisasContext ctx;
2355 
2356     translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns);
2357 }
2358 
2359 void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
2360                           target_ulong *data)
2361 {
2362     env->pc = data[0];
2363     env->flags = data[1];
2364     /* Theoretically delayed_pc should also be restored. In practice the
2365        branch instruction is re-executed after exception, so the delayed
2366        branch target will be recomputed. */
2367 }
2368