xref: /openbmc/qemu/linux-user/i386/cpu_loop.c (revision 2113aed687cb0b84ad512c440c1edf6eea8fcde2)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu-common.h"
22 #include "qemu.h"
23 #include "cpu_loop-common.h"
24 #include "signal-common.h"
25 
26 /***********************************************************/
27 /* CPUX86 core interface */
28 
29 uint64_t cpu_get_tsc(CPUX86State *env)
30 {
31     return cpu_get_host_ticks();
32 }
33 
34 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
35               int flags)
36 {
37     unsigned int e1, e2;
38     uint32_t *p;
39     e1 = (addr << 16) | (limit & 0xffff);
40     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
41     e2 |= flags;
42     p = ptr;
43     p[0] = tswap32(e1);
44     p[1] = tswap32(e2);
45 }
46 
47 static uint64_t *idt_table;
48 #ifdef TARGET_X86_64
49 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
50                        uint64_t addr, unsigned int sel)
51 {
52     uint32_t *p, e1, e2;
53     e1 = (addr & 0xffff) | (sel << 16);
54     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
55     p = ptr;
56     p[0] = tswap32(e1);
57     p[1] = tswap32(e2);
58     p[2] = tswap32(addr >> 32);
59     p[3] = 0;
60 }
61 /* only dpl matters as we do only user space emulation */
62 static void set_idt(int n, unsigned int dpl)
63 {
64     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
65 }
66 #else
67 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
68                      uint32_t addr, unsigned int sel)
69 {
70     uint32_t *p, e1, e2;
71     e1 = (addr & 0xffff) | (sel << 16);
72     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
73     p = ptr;
74     p[0] = tswap32(e1);
75     p[1] = tswap32(e2);
76 }
77 
78 /* only dpl matters as we do only user space emulation */
79 static void set_idt(int n, unsigned int dpl)
80 {
81     set_gate(idt_table + n, 0, dpl, 0, 0);
82 }
83 #endif
84 
85 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr)
86 {
87     target_siginfo_t info = {
88         .si_signo = sig,
89         .si_code = code,
90         ._sifields._sigfault._addr = addr
91     };
92 
93     queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info);
94 }
95 
96 #ifdef TARGET_X86_64
97 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
98 {
99     /*
100      * For all the vsyscalls, NULL means "don't write anything" not
101      * "write it at address 0".
102      */
103     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
104         return true;
105     }
106 
107     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
108     gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
109     return false;
110 }
111 
112 /*
113  * Since v3.1, the kernel traps and emulates the vsyscall page.
114  * Entry points other than the official generate SIGSEGV.
115  */
116 static void emulate_vsyscall(CPUX86State *env)
117 {
118     int syscall;
119     abi_ulong ret;
120     uint64_t caller;
121 
122     /*
123      * Validate the entry point.  We have already validated the page
124      * during translation to get here; now verify the offset.
125      */
126     switch (env->eip & ~TARGET_PAGE_MASK) {
127     case 0x000:
128         syscall = TARGET_NR_gettimeofday;
129         break;
130     case 0x400:
131         syscall = TARGET_NR_time;
132         break;
133     case 0x800:
134         syscall = TARGET_NR_getcpu;
135         break;
136     default:
137         goto sigsegv;
138     }
139 
140     /*
141      * Validate the return address.
142      * Note that the kernel treats this the same as an invalid entry point.
143      */
144     if (get_user_u64(caller, env->regs[R_ESP])) {
145         goto sigsegv;
146     }
147 
148     /*
149      * Validate the the pointer arguments.
150      */
151     switch (syscall) {
152     case TARGET_NR_gettimeofday:
153         if (!write_ok_or_segv(env, env->regs[R_EDI],
154                               sizeof(struct target_timeval)) ||
155             !write_ok_or_segv(env, env->regs[R_ESI],
156                               sizeof(struct target_timezone))) {
157             return;
158         }
159         break;
160     case TARGET_NR_time:
161         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
162             return;
163         }
164         break;
165     case TARGET_NR_getcpu:
166         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
167             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
168             return;
169         }
170         break;
171     default:
172         g_assert_not_reached();
173     }
174 
175     /*
176      * Perform the syscall.  None of the vsyscalls should need restarting.
177      */
178     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
179                      env->regs[R_EDX], env->regs[10], env->regs[8],
180                      env->regs[9], 0, 0);
181     g_assert(ret != -TARGET_ERESTARTSYS);
182     g_assert(ret != -TARGET_QEMU_ESIGRETURN);
183     if (ret == -TARGET_EFAULT) {
184         goto sigsegv;
185     }
186     env->regs[R_EAX] = ret;
187 
188     /* Emulate a ret instruction to leave the vsyscall page.  */
189     env->eip = caller;
190     env->regs[R_ESP] += 8;
191     return;
192 
193  sigsegv:
194     /* Like force_sig(SIGSEGV).  */
195     gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
196 }
197 #endif
198 
199 void cpu_loop(CPUX86State *env)
200 {
201     CPUState *cs = env_cpu(env);
202     int trapnr;
203     abi_ulong pc;
204     abi_ulong ret;
205 
206     for(;;) {
207         cpu_exec_start(cs);
208         trapnr = cpu_exec(cs);
209         cpu_exec_end(cs);
210         process_queued_cpu_work(cs);
211 
212         switch(trapnr) {
213         case 0x80:
214             /* linux syscall from int $0x80 */
215             ret = do_syscall(env,
216                              env->regs[R_EAX],
217                              env->regs[R_EBX],
218                              env->regs[R_ECX],
219                              env->regs[R_EDX],
220                              env->regs[R_ESI],
221                              env->regs[R_EDI],
222                              env->regs[R_EBP],
223                              0, 0);
224             if (ret == -TARGET_ERESTARTSYS) {
225                 env->eip -= 2;
226             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
227                 env->regs[R_EAX] = ret;
228             }
229             break;
230 #ifndef TARGET_ABI32
231         case EXCP_SYSCALL:
232             /* linux syscall from syscall instruction */
233             ret = do_syscall(env,
234                              env->regs[R_EAX],
235                              env->regs[R_EDI],
236                              env->regs[R_ESI],
237                              env->regs[R_EDX],
238                              env->regs[10],
239                              env->regs[8],
240                              env->regs[9],
241                              0, 0);
242             if (ret == -TARGET_ERESTARTSYS) {
243                 env->eip -= 2;
244             } else if (ret != -TARGET_QEMU_ESIGRETURN) {
245                 env->regs[R_EAX] = ret;
246             }
247             break;
248 #endif
249 #ifdef TARGET_X86_64
250         case EXCP_VSYSCALL:
251             emulate_vsyscall(env);
252             break;
253 #endif
254         case EXCP0B_NOSEG:
255         case EXCP0C_STACK:
256             gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0);
257             break;
258         case EXCP0D_GPF:
259             /* XXX: potential problem if ABI32 */
260 #ifndef TARGET_X86_64
261             if (env->eflags & VM_MASK) {
262                 handle_vm86_fault(env);
263                 break;
264             }
265 #endif
266             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
267             break;
268         case EXCP0E_PAGE:
269             gen_signal(env, TARGET_SIGSEGV,
270                        (env->error_code & 1 ?
271                         TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
272                        env->cr[2]);
273             break;
274         case EXCP00_DIVZ:
275 #ifndef TARGET_X86_64
276             if (env->eflags & VM_MASK) {
277                 handle_vm86_trap(env, trapnr);
278                 break;
279             }
280 #endif
281             gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
282             break;
283         case EXCP01_DB:
284         case EXCP03_INT3:
285 #ifndef TARGET_X86_64
286             if (env->eflags & VM_MASK) {
287                 handle_vm86_trap(env, trapnr);
288                 break;
289             }
290 #endif
291             if (trapnr == EXCP01_DB) {
292                 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
293             } else {
294                 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0);
295             }
296             break;
297         case EXCP04_INTO:
298         case EXCP05_BOUND:
299 #ifndef TARGET_X86_64
300             if (env->eflags & VM_MASK) {
301                 handle_vm86_trap(env, trapnr);
302                 break;
303             }
304 #endif
305             gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0);
306             break;
307         case EXCP06_ILLOP:
308             gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
309             break;
310         case EXCP_INTERRUPT:
311             /* just indicate that signals should be handled asap */
312             break;
313         case EXCP_DEBUG:
314             gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0);
315             break;
316         case EXCP_ATOMIC:
317             cpu_exec_step_atomic(cs);
318             break;
319         default:
320             pc = env->segs[R_CS].base + env->eip;
321             EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
322                       (long)pc, trapnr);
323             abort();
324         }
325         process_pending_signals(env);
326     }
327 }
328 
329 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
330 {
331     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
332     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
333     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
334         env->cr[4] |= CR4_OSFXSR_MASK;
335         env->hflags |= HF_OSFXSR_MASK;
336     }
337 #ifndef TARGET_ABI32
338     /* enable 64 bit mode if possible */
339     if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) {
340         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
341         exit(EXIT_FAILURE);
342     }
343     env->cr[4] |= CR4_PAE_MASK;
344     env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
345     env->hflags |= HF_LMA_MASK;
346 #endif
347 
348     /* flags setup : we activate the IRQs by default as in user mode */
349     env->eflags |= IF_MASK;
350 
351     /* linux register setup */
352 #ifndef TARGET_ABI32
353     env->regs[R_EAX] = regs->rax;
354     env->regs[R_EBX] = regs->rbx;
355     env->regs[R_ECX] = regs->rcx;
356     env->regs[R_EDX] = regs->rdx;
357     env->regs[R_ESI] = regs->rsi;
358     env->regs[R_EDI] = regs->rdi;
359     env->regs[R_EBP] = regs->rbp;
360     env->regs[R_ESP] = regs->rsp;
361     env->eip = regs->rip;
362 #else
363     env->regs[R_EAX] = regs->eax;
364     env->regs[R_EBX] = regs->ebx;
365     env->regs[R_ECX] = regs->ecx;
366     env->regs[R_EDX] = regs->edx;
367     env->regs[R_ESI] = regs->esi;
368     env->regs[R_EDI] = regs->edi;
369     env->regs[R_EBP] = regs->ebp;
370     env->regs[R_ESP] = regs->esp;
371     env->eip = regs->eip;
372 #endif
373 
374     /* linux interrupt setup */
375 #ifndef TARGET_ABI32
376     env->idt.limit = 511;
377 #else
378     env->idt.limit = 255;
379 #endif
380     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
381                                 PROT_READ|PROT_WRITE,
382                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
383     idt_table = g2h_untagged(env->idt.base);
384     set_idt(0, 0);
385     set_idt(1, 0);
386     set_idt(2, 0);
387     set_idt(3, 3);
388     set_idt(4, 3);
389     set_idt(5, 0);
390     set_idt(6, 0);
391     set_idt(7, 0);
392     set_idt(8, 0);
393     set_idt(9, 0);
394     set_idt(10, 0);
395     set_idt(11, 0);
396     set_idt(12, 0);
397     set_idt(13, 0);
398     set_idt(14, 0);
399     set_idt(15, 0);
400     set_idt(16, 0);
401     set_idt(17, 0);
402     set_idt(18, 0);
403     set_idt(19, 0);
404     set_idt(0x80, 3);
405 
406     /* linux segment setup */
407     {
408         uint64_t *gdt_table;
409         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
410                                     PROT_READ|PROT_WRITE,
411                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
412         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
413         gdt_table = g2h_untagged(env->gdt.base);
414 #ifdef TARGET_ABI32
415         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
416                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
417                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
418 #else
419         /* 64 bit code segment */
420         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
421                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
422                  DESC_L_MASK |
423                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
424 #endif
425         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
426                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
427                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
428     }
429     cpu_x86_load_seg(env, R_CS, __USER_CS);
430     cpu_x86_load_seg(env, R_SS, __USER_DS);
431 #ifdef TARGET_ABI32
432     cpu_x86_load_seg(env, R_DS, __USER_DS);
433     cpu_x86_load_seg(env, R_ES, __USER_DS);
434     cpu_x86_load_seg(env, R_FS, __USER_DS);
435     cpu_x86_load_seg(env, R_GS, __USER_DS);
436     /* This hack makes Wine work... */
437     env->segs[R_FS].selector = 0;
438 #else
439     cpu_x86_load_seg(env, R_DS, 0);
440     cpu_x86_load_seg(env, R_ES, 0);
441     cpu_x86_load_seg(env, R_FS, 0);
442     cpu_x86_load_seg(env, R_GS, 0);
443 #endif
444 }
445