xref: /openbmc/qemu/linux-user/i386/cpu_loop.c (revision f75e4f2234e7339c16c1dba048bf131a2a948f84)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu/timer.h"
23 #include "user-internals.h"
24 #include "cpu_loop-common.h"
25 #include "signal-common.h"
26 #include "user-mmap.h"
27 
28 /***********************************************************/
29 /* CPUX86 core interface */
30 
31 uint64_t cpu_get_tsc(CPUX86State *env)
32 {
33     return cpu_get_host_ticks();
34 }
35 
36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37               int flags)
38 {
39     unsigned int e1, e2;
40     uint32_t *p;
41     e1 = (addr << 16) | (limit & 0xffff);
42     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43     e2 |= flags;
44     p = ptr;
45     p[0] = tswap32(e1);
46     p[1] = tswap32(e2);
47 }
48 
49 static uint64_t *idt_table;
50 #ifdef TARGET_X86_64
51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52                        uint64_t addr, unsigned int sel)
53 {
54     uint32_t *p, e1, e2;
55     e1 = (addr & 0xffff) | (sel << 16);
56     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57     p = ptr;
58     p[0] = tswap32(e1);
59     p[1] = tswap32(e2);
60     p[2] = tswap32(addr >> 32);
61     p[3] = 0;
62 }
63 /* only dpl matters as we do only user space emulation */
64 static void set_idt(int n, unsigned int dpl)
65 {
66     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
67 }
68 #else
69 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
70                      uint32_t addr, unsigned int sel)
71 {
72     uint32_t *p, e1, e2;
73     e1 = (addr & 0xffff) | (sel << 16);
74     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
75     p = ptr;
76     p[0] = tswap32(e1);
77     p[1] = tswap32(e2);
78 }
79 
80 /* only dpl matters as we do only user space emulation */
81 static void set_idt(int n, unsigned int dpl)
82 {
83     set_gate(idt_table + n, 0, dpl, 0, 0);
84 }
85 #endif
86 
87 #ifdef TARGET_X86_64
88 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
89 {
90     /*
91      * For all the vsyscalls, NULL means "don't write anything" not
92      * "write it at address 0".
93      */
94     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
95         return true;
96     }
97 
98     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
99     force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
100     return false;
101 }
102 
103 /*
104  * Since v3.1, the kernel traps and emulates the vsyscall page.
105  * Entry points other than the official generate SIGSEGV.
106  */
107 static void emulate_vsyscall(CPUX86State *env)
108 {
109     int syscall;
110     abi_ulong ret;
111     uint64_t caller;
112 
113     /*
114      * Validate the entry point.  We have already validated the page
115      * during translation to get here; now verify the offset.
116      */
117     switch (env->eip & ~TARGET_PAGE_MASK) {
118     case 0x000:
119         syscall = TARGET_NR_gettimeofday;
120         break;
121     case 0x400:
122         syscall = TARGET_NR_time;
123         break;
124     case 0x800:
125         syscall = TARGET_NR_getcpu;
126         break;
127     default:
128         goto sigsegv;
129     }
130 
131     /*
132      * Validate the return address.
133      * Note that the kernel treats this the same as an invalid entry point.
134      */
135     if (get_user_u64(caller, env->regs[R_ESP])) {
136         goto sigsegv;
137     }
138 
139     /*
140      * Validate the pointer arguments.
141      */
142     switch (syscall) {
143     case TARGET_NR_gettimeofday:
144         if (!write_ok_or_segv(env, env->regs[R_EDI],
145                               sizeof(struct target_timeval)) ||
146             !write_ok_or_segv(env, env->regs[R_ESI],
147                               sizeof(struct target_timezone))) {
148             return;
149         }
150         break;
151     case TARGET_NR_time:
152         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
153             return;
154         }
155         break;
156     case TARGET_NR_getcpu:
157         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
158             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
159             return;
160         }
161         break;
162     default:
163         g_assert_not_reached();
164     }
165 
166     /*
167      * Perform the syscall.  None of the vsyscalls should need restarting.
168      */
169     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
170                      env->regs[R_EDX], env->regs[10], env->regs[8],
171                      env->regs[9], 0, 0);
172     g_assert(ret != -QEMU_ERESTARTSYS);
173     g_assert(ret != -QEMU_ESIGRETURN);
174     if (ret == -TARGET_EFAULT) {
175         goto sigsegv;
176     }
177     env->regs[R_EAX] = ret;
178 
179     /* Emulate a ret instruction to leave the vsyscall page.  */
180     env->eip = caller;
181     env->regs[R_ESP] += 8;
182     return;
183 
184  sigsegv:
185     force_sig(TARGET_SIGSEGV);
186 }
187 #endif
188 
189 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
190 {
191 #ifndef TARGET_X86_64
192     if (env->eflags & VM_MASK) {
193         handle_vm86_trap(env, trapnr);
194         return true;
195     }
196 #endif
197     return false;
198 }
199 
200 void cpu_loop(CPUX86State *env)
201 {
202     CPUState *cs = env_cpu(env);
203     int trapnr;
204     abi_ulong ret;
205 
206     for(;;) {
207         cpu_exec_start(cs);
208         trapnr = cpu_exec(cs);
209         cpu_exec_end(cs);
210         process_queued_cpu_work(cs);
211 
212         switch(trapnr) {
213         case 0x80:
214             /* linux syscall from int $0x80 */
215             ret = do_syscall(env,
216                              env->regs[R_EAX],
217                              env->regs[R_EBX],
218                              env->regs[R_ECX],
219                              env->regs[R_EDX],
220                              env->regs[R_ESI],
221                              env->regs[R_EDI],
222                              env->regs[R_EBP],
223                              0, 0);
224             if (ret == -QEMU_ERESTARTSYS) {
225                 env->eip -= 2;
226             } else if (ret != -QEMU_ESIGRETURN) {
227                 env->regs[R_EAX] = ret;
228             }
229             break;
230 #ifndef TARGET_ABI32
231         case EXCP_SYSCALL:
232             /* linux syscall from syscall instruction */
233             ret = do_syscall(env,
234                              env->regs[R_EAX],
235                              env->regs[R_EDI],
236                              env->regs[R_ESI],
237                              env->regs[R_EDX],
238                              env->regs[10],
239                              env->regs[8],
240                              env->regs[9],
241                              0, 0);
242             if (ret == -QEMU_ERESTARTSYS) {
243                 env->eip -= 2;
244             } else if (ret != -QEMU_ESIGRETURN) {
245                 env->regs[R_EAX] = ret;
246             }
247             break;
248 #endif
249 #ifdef TARGET_X86_64
250         case EXCP_VSYSCALL:
251             emulate_vsyscall(env);
252             break;
253 #endif
254         case EXCP0B_NOSEG:
255         case EXCP0C_STACK:
256             force_sig(TARGET_SIGBUS);
257             break;
258         case EXCP0D_GPF:
259             /* XXX: potential problem if ABI32 */
260             if (maybe_handle_vm86_trap(env, trapnr)) {
261                 break;
262             }
263             force_sig(TARGET_SIGSEGV);
264             break;
265         case EXCP0E_PAGE:
266             force_sig_fault(TARGET_SIGSEGV,
267                             (env->error_code & PG_ERROR_P_MASK ?
268                              TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
269                             env->cr[2]);
270             break;
271         case EXCP00_DIVZ:
272             if (maybe_handle_vm86_trap(env, trapnr)) {
273                 break;
274             }
275             force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
276             break;
277         case EXCP01_DB:
278             if (maybe_handle_vm86_trap(env, trapnr)) {
279                 break;
280             }
281             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
282             break;
283         case EXCP03_INT3:
284             if (maybe_handle_vm86_trap(env, trapnr)) {
285                 break;
286             }
287             force_sig(TARGET_SIGTRAP);
288             break;
289         case EXCP04_INTO:
290         case EXCP05_BOUND:
291             if (maybe_handle_vm86_trap(env, trapnr)) {
292                 break;
293             }
294             force_sig(TARGET_SIGSEGV);
295             break;
296         case EXCP06_ILLOP:
297             force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
298             break;
299         case EXCP_INTERRUPT:
300             /* just indicate that signals should be handled asap */
301             break;
302         case EXCP_DEBUG:
303             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
304             break;
305         case EXCP_ATOMIC:
306             cpu_exec_step_atomic(cs);
307             break;
308         default:
309             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
310                       trapnr);
311             abort();
312         }
313         process_pending_signals(env);
314     }
315 }
316 
317 static void target_cpu_free(void *obj)
318 {
319     CPUArchState *env = ((CPUState *)obj)->env_ptr;
320     target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES);
321     g_free(obj);
322 }
323 
324 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
325 {
326     CPUState *cpu = env_cpu(env);
327     OBJECT(cpu)->free = target_cpu_free;
328     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
329     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
330     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
331         env->cr[4] |= CR4_OSFXSR_MASK;
332         env->hflags |= HF_OSFXSR_MASK;
333     }
334 #ifndef TARGET_ABI32
335     /* enable 64 bit mode if possible */
336     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
337         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
338         exit(EXIT_FAILURE);
339     }
340     env->cr[4] |= CR4_PAE_MASK;
341     env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
342     env->hflags |= HF_LMA_MASK;
343 #endif
344 
345     /* flags setup : we activate the IRQs by default as in user mode */
346     env->eflags |= IF_MASK;
347 
348     /* linux register setup */
349 #ifndef TARGET_ABI32
350     env->regs[R_EAX] = regs->rax;
351     env->regs[R_EBX] = regs->rbx;
352     env->regs[R_ECX] = regs->rcx;
353     env->regs[R_EDX] = regs->rdx;
354     env->regs[R_ESI] = regs->rsi;
355     env->regs[R_EDI] = regs->rdi;
356     env->regs[R_EBP] = regs->rbp;
357     env->regs[R_ESP] = regs->rsp;
358     env->eip = regs->rip;
359 #else
360     env->regs[R_EAX] = regs->eax;
361     env->regs[R_EBX] = regs->ebx;
362     env->regs[R_ECX] = regs->ecx;
363     env->regs[R_EDX] = regs->edx;
364     env->regs[R_ESI] = regs->esi;
365     env->regs[R_EDI] = regs->edi;
366     env->regs[R_EBP] = regs->ebp;
367     env->regs[R_ESP] = regs->esp;
368     env->eip = regs->eip;
369 #endif
370 
371     /* linux interrupt setup */
372 #ifndef TARGET_ABI32
373     env->idt.limit = 511;
374 #else
375     env->idt.limit = 255;
376 #endif
377     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
378                                 PROT_READ|PROT_WRITE,
379                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
380     idt_table = g2h_untagged(env->idt.base);
381     set_idt(0, 0);
382     set_idt(1, 0);
383     set_idt(2, 0);
384     set_idt(3, 3);
385     set_idt(4, 3);
386     set_idt(5, 0);
387     set_idt(6, 0);
388     set_idt(7, 0);
389     set_idt(8, 0);
390     set_idt(9, 0);
391     set_idt(10, 0);
392     set_idt(11, 0);
393     set_idt(12, 0);
394     set_idt(13, 0);
395     set_idt(14, 0);
396     set_idt(15, 0);
397     set_idt(16, 0);
398     set_idt(17, 0);
399     set_idt(18, 0);
400     set_idt(19, 0);
401     set_idt(0x80, 3);
402 
403     /* linux segment setup */
404     {
405         uint64_t *gdt_table;
406         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
407                                     PROT_READ|PROT_WRITE,
408                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
409         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
410         gdt_table = g2h_untagged(env->gdt.base);
411 #ifdef TARGET_ABI32
412         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
413                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
414                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
415 #else
416         /* 64 bit code segment */
417         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
418                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
419                  DESC_L_MASK |
420                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
421 #endif
422         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
423                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
424                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
425     }
426     cpu_x86_load_seg(env, R_CS, __USER_CS);
427     cpu_x86_load_seg(env, R_SS, __USER_DS);
428 #ifdef TARGET_ABI32
429     cpu_x86_load_seg(env, R_DS, __USER_DS);
430     cpu_x86_load_seg(env, R_ES, __USER_DS);
431     cpu_x86_load_seg(env, R_FS, __USER_DS);
432     cpu_x86_load_seg(env, R_GS, __USER_DS);
433     /* This hack makes Wine work... */
434     env->segs[R_FS].selector = 0;
435 #else
436     cpu_x86_load_seg(env, R_DS, 0);
437     cpu_x86_load_seg(env, R_ES, 0);
438     cpu_x86_load_seg(env, R_FS, 0);
439     cpu_x86_load_seg(env, R_GS, 0);
440 #endif
441 }
442