xref: /openbmc/qemu/linux-user/i386/cpu_loop.c (revision fc8c745d)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "qemu.h"
23 #include "cpu_loop-common.h"
24 
25 /***********************************************************/
26 /* CPUX86 core interface */
27 
28 uint64_t cpu_get_tsc(CPUX86State *env)
29 {
30     return cpu_get_host_ticks();
31 }
32 
33 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
34               int flags)
35 {
36     unsigned int e1, e2;
37     uint32_t *p;
38     e1 = (addr << 16) | (limit & 0xffff);
39     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
40     e2 |= flags;
41     p = ptr;
42     p[0] = tswap32(e1);
43     p[1] = tswap32(e2);
44 }
45 
46 static uint64_t *idt_table;
47 #ifdef TARGET_X86_64
48 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
49                        uint64_t addr, unsigned int sel)
50 {
51     uint32_t *p, e1, e2;
52     e1 = (addr & 0xffff) | (sel << 16);
53     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
54     p = ptr;
55     p[0] = tswap32(e1);
56     p[1] = tswap32(e2);
57     p[2] = tswap32(addr >> 32);
58     p[3] = 0;
59 }
60 /* only dpl matters as we do only user space emulation */
61 static void set_idt(int n, unsigned int dpl)
62 {
63     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
64 }
65 #else
66 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
67                      uint32_t addr, unsigned int sel)
68 {
69     uint32_t *p, e1, e2;
70     e1 = (addr & 0xffff) | (sel << 16);
71     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
72     p = ptr;
73     p[0] = tswap32(e1);
74     p[1] = tswap32(e2);
75 }
76 
77 /* only dpl matters as we do only user space emulation */
78 static void set_idt(int n, unsigned int dpl)
79 {
80     set_gate(idt_table + n, 0, dpl, 0, 0);
81 }
82 #endif
83 
84 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
85 {
86     target_siginfo_t info = {
87         .si_signo = sig,
88         .si_code = code,
89         ._sifields._sigfault._addr = addr
90     };
91 
92     queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
93 }
94 
95 #ifdef TARGET_X86_64
96 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
97 {
98     /*
99      * For all the vsyscalls, NULL means "don't write anything" not
100      * "write it at address 0".
101      */
102     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
103         return true;
104     }
105 
106     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
107     gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
108     return false;
109 }
110 
111 /*
112  * Since v3.1, the kernel traps and emulates the vsyscall page.
113  * Entry points other than the official generate SIGSEGV.
114  */
115 static void emulate_vsyscall(CPUX86State *env)
116 {
117     int syscall;
118     abi_ulong ret;
119     uint64_t caller;
120 
121     /*
122      * Validate the entry point.  We have already validated the page
123      * during translation to get here; now verify the offset.
124      */
125     switch (env->eip & ~TARGET_PAGE_MASK) {
126     case 0x000:
127         syscall = TARGET_NR_gettimeofday;
128         break;
129     case 0x400:
130         syscall = TARGET_NR_time;
131         break;
132     case 0x800:
133         syscall = TARGET_NR_getcpu;
134         break;
135     default:
136         goto sigsegv;
137     }
138 
139     /*
140      * Validate the return address.
141      * Note that the kernel treats this the same as an invalid entry point.
142      */
143     if (get_user_u64(caller, env->regs[R_ESP])) {
144         goto sigsegv;
145     }
146 
147     /*
148      * Validate the the pointer arguments.
149      */
150     switch (syscall) {
151     case TARGET_NR_gettimeofday:
152         if (!write_ok_or_segv(env, env->regs[R_EDI],
153                               sizeof(struct target_timeval)) ||
154             !write_ok_or_segv(env, env->regs[R_ESI],
155                               sizeof(struct target_timezone))) {
156             return;
157         }
158         break;
159     case TARGET_NR_time:
160         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
161             return;
162         }
163         break;
164     case TARGET_NR_getcpu:
165         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
166             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
167             return;
168         }
169         break;
170     default:
171         g_assert_not_reached();
172     }
173 
174     /*
175      * Perform the syscall.  None of the vsyscalls should need restarting.
176      */
177     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
178                      env->regs[R_EDX], env->regs[10], env->regs[8],
179                      env->regs[9], 0, 0);
180     g_assert(ret != -TARGET_ERESTARTSYS);
181     g_assert(ret != -TARGET_QEMU_ESIGRETURN);
182     if (ret == -TARGET_EFAULT) {
183         goto sigsegv;
184     }
185     env->regs[R_EAX] = ret;
186 
187     /* Emulate a ret instruction to leave the vsyscall page.  */
188     env->eip = caller;
189     env->regs[R_ESP] += 8;
190     return;
191 
192  sigsegv:
193     /* Like force_sig(SIGSEGV).  */
194     gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
195 }
196 #endif
197 
198 void cpu_loop(CPUX86State *env)
199 {
200     CPUState *cs = env_cpu(env);
201     int trapnr;
202     abi_ulong pc;
203     abi_ulong ret;
204 
205     for(;;) {
206         cpu_exec_start(cs);
207         trapnr = cpu_exec(cs);
208         cpu_exec_end(cs);
209         process_queued_cpu_work(cs);
210 
211         switch(trapnr) {
212         case 0x80:
213             /* linux syscall from int $0x80 */
214             ret = do_syscall(env,
215                              env->regs[R_EAX],
216                              env->regs[R_EBX],
217                              env->regs[R_ECX],
218                              env->regs[R_EDX],
219                              env->regs[R_ESI],
220                              env->regs[R_EDI],
221                              env->regs[R_EBP],
222                              0, 0);
223             if (ret == -TARGET_ERESTARTSYS) {
224                 env->eip -= 2;
225             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
226                 env->regs[R_EAX] = ret;
227             }
228             break;
229 #ifndef TARGET_ABI32
230         case EXCP_SYSCALL:
231             /* linux syscall from syscall instruction */
232             ret = do_syscall(env,
233                              env->regs[R_EAX],
234                              env->regs[R_EDI],
235                              env->regs[R_ESI],
236                              env->regs[R_EDX],
237                              env->regs[10],
238                              env->regs[8],
239                              env->regs[9],
240                              0, 0);
241             if (ret == -TARGET_ERESTARTSYS) {
242                 env->eip -= 2;
243             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
244                 env->regs[R_EAX] = ret;
245             }
246             break;
247 #endif
248 #ifdef TARGET_X86_64
249         case EXCP_VSYSCALL:
250             emulate_vsyscall(env);
251             break;
252 #endif
253         case EXCP0B_NOSEG:
254         case EXCP0C_STACK:
255             gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
256             break;
257         case EXCP0D_GPF:
258             /* XXX: potential problem if ABI32 */
259 #ifndef TARGET_X86_64
260             if (env->eflags & VM_MASK) {
261                 handle_vm86_fault(env);
262                 break;
263             }
264 #endif
265             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
266             break;
267         case EXCP0E_PAGE:
268             gen_signal(env, TARGET_SIGSEGV,
269                        (env->error_code & 1 ?
270                         TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
271                        env->cr[2]);
272             break;
273         case EXCP00_DIVZ:
274 #ifndef TARGET_X86_64
275             if (env->eflags & VM_MASK) {
276                 handle_vm86_trap(env, trapnr);
277                 break;
278             }
279 #endif
280             gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
281             break;
282         case EXCP01_DB:
283         case EXCP03_INT3:
284 #ifndef TARGET_X86_64
285             if (env->eflags & VM_MASK) {
286                 handle_vm86_trap(env, trapnr);
287                 break;
288             }
289 #endif
290             if (trapnr == EXCP01_DB) {
291                 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
292             } else {
293                 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
294             }
295             break;
296         case EXCP04_INTO:
297         case EXCP05_BOUND:
298 #ifndef TARGET_X86_64
299             if (env->eflags & VM_MASK) {
300                 handle_vm86_trap(env, trapnr);
301                 break;
302             }
303 #endif
304             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
305             break;
306         case EXCP06_ILLOP:
307             gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
308             break;
309         case EXCP_INTERRUPT:
310             /* just indicate that signals should be handled asap */
311             break;
312         case EXCP_DEBUG:
313             gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
314             break;
315         case EXCP_ATOMIC:
316             cpu_exec_step_atomic(cs);
317             break;
318         default:
319             pc = env->segs[R_CS].base + env->eip;
320             EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
321                       (long)pc, trapnr);
322             abort();
323         }
324         process_pending_signals(env);
325     }
326 }
327 
328 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
329 {
330     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
331     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
332     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
333         env->cr[4] |= CR4_OSFXSR_MASK;
334         env->hflags |= HF_OSFXSR_MASK;
335     }
336 #ifndef TARGET_ABI32
337     /* enable 64 bit mode if possible */
338     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
339         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
340         exit(EXIT_FAILURE);
341     }
342     env->cr[4] |= CR4_PAE_MASK;
343     env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
344     env->hflags |= HF_LMA_MASK;
345 #endif
346 
347     /* flags setup : we activate the IRQs by default as in user mode */
348     env->eflags |= IF_MASK;
349 
350     /* linux register setup */
351 #ifndef TARGET_ABI32
352     env->regs[R_EAX] = regs->rax;
353     env->regs[R_EBX] = regs->rbx;
354     env->regs[R_ECX] = regs->rcx;
355     env->regs[R_EDX] = regs->rdx;
356     env->regs[R_ESI] = regs->rsi;
357     env->regs[R_EDI] = regs->rdi;
358     env->regs[R_EBP] = regs->rbp;
359     env->regs[R_ESP] = regs->rsp;
360     env->eip = regs->rip;
361 #else
362     env->regs[R_EAX] = regs->eax;
363     env->regs[R_EBX] = regs->ebx;
364     env->regs[R_ECX] = regs->ecx;
365     env->regs[R_EDX] = regs->edx;
366     env->regs[R_ESI] = regs->esi;
367     env->regs[R_EDI] = regs->edi;
368     env->regs[R_EBP] = regs->ebp;
369     env->regs[R_ESP] = regs->esp;
370     env->eip = regs->eip;
371 #endif
372 
373     /* linux interrupt setup */
374 #ifndef TARGET_ABI32
375     env->idt.limit = 511;
376 #else
377     env->idt.limit = 255;
378 #endif
379     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
380                                 PROT_READ|PROT_WRITE,
381                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
382     idt_table = g2h_untagged(env->idt.base);
383     set_idt(0, 0);
384     set_idt(1, 0);
385     set_idt(2, 0);
386     set_idt(3, 3);
387     set_idt(4, 3);
388     set_idt(5, 0);
389     set_idt(6, 0);
390     set_idt(7, 0);
391     set_idt(8, 0);
392     set_idt(9, 0);
393     set_idt(10, 0);
394     set_idt(11, 0);
395     set_idt(12, 0);
396     set_idt(13, 0);
397     set_idt(14, 0);
398     set_idt(15, 0);
399     set_idt(16, 0);
400     set_idt(17, 0);
401     set_idt(18, 0);
402     set_idt(19, 0);
403     set_idt(0x80, 3);
404 
405     /* linux segment setup */
406     {
407         uint64_t *gdt_table;
408         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
409                                     PROT_READ|PROT_WRITE,
410                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
411         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
412         gdt_table = g2h_untagged(env->gdt.base);
413 #ifdef TARGET_ABI32
414         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
415                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
416                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
417 #else
418         /* 64 bit code segment */
419         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
420                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
421                  DESC_L_MASK |
422                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
423 #endif
424         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
425                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
426                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
427     }
428     cpu_x86_load_seg(env, R_CS, __USER_CS);
429     cpu_x86_load_seg(env, R_SS, __USER_DS);
430 #ifdef TARGET_ABI32
431     cpu_x86_load_seg(env, R_DS, __USER_DS);
432     cpu_x86_load_seg(env, R_ES, __USER_DS);
433     cpu_x86_load_seg(env, R_FS, __USER_DS);
434     cpu_x86_load_seg(env, R_GS, __USER_DS);
435     /* This hack makes Wine work... */
436     env->segs[R_FS].selector = 0;
437 #else
438     cpu_x86_load_seg(env, R_DS, 0);
439     cpu_x86_load_seg(env, R_ES, 0);
440     cpu_x86_load_seg(env, R_FS, 0);
441     cpu_x86_load_seg(env, R_GS, 0);
442 #endif
443 }
444