xref: /openbmc/qemu/linux-user/i386/cpu_loop.c (revision 243975c0553a61646e7c24beaa12f4451536ea6b)
1 /*
2  *  qemu user cpu loop
3  *
4  *  Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qemu.h"
22 #include "qemu/timer.h"
23 #include "user-internals.h"
24 #include "cpu_loop-common.h"
25 #include "signal-common.h"
26 #include "user-mmap.h"
27 
28 /***********************************************************/
29 /* CPUX86 core interface */
30 
31 uint64_t cpu_get_tsc(CPUX86State *env)
32 {
33     return cpu_get_host_ticks();
34 }
35 
36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37               int flags)
38 {
39     unsigned int e1, e2;
40     uint32_t *p;
41     e1 = (addr << 16) | (limit & 0xffff);
42     e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43     e2 |= flags;
44     p = ptr;
45     p[0] = tswap32(e1);
46     p[1] = tswap32(e2);
47 }
48 
49 static uint64_t *idt_table;
50 
51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52                        uint64_t addr, unsigned int sel)
53 {
54     uint32_t *p, e1, e2;
55     e1 = (addr & 0xffff) | (sel << 16);
56     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57     p = ptr;
58     p[0] = tswap32(e1);
59     p[1] = tswap32(e2);
60     p[2] = tswap32(addr >> 32);
61     p[3] = 0;
62 }
63 
64 #ifdef TARGET_X86_64
65 /* only dpl matters as we do only user space emulation */
66 static void set_idt(int n, unsigned int dpl, bool is64)
67 {
68     set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
69 }
70 #else
71 static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
72                      uint32_t addr, unsigned int sel)
73 {
74     uint32_t *p, e1, e2;
75     e1 = (addr & 0xffff) | (sel << 16);
76     e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
77     p = ptr;
78     p[0] = tswap32(e1);
79     p[1] = tswap32(e2);
80 }
81 
82 /* only dpl matters as we do only user space emulation */
83 static void set_idt(int n, unsigned int dpl, bool is64)
84 {
85     if (is64) {
86         set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
87     } else {
88         set_gate(idt_table + n, 0, dpl, 0, 0);
89     }
90 }
91 #endif
92 
93 #ifdef TARGET_X86_64
94 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
95 {
96     /*
97      * For all the vsyscalls, NULL means "don't write anything" not
98      * "write it at address 0".
99      */
100     if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
101         return true;
102     }
103 
104     env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
105     force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
106     return false;
107 }
108 
109 /*
110  * Since v3.1, the kernel traps and emulates the vsyscall page.
111  * Entry points other than the official generate SIGSEGV.
112  */
113 static void emulate_vsyscall(CPUX86State *env)
114 {
115     int syscall;
116     abi_ulong ret;
117     uint64_t caller;
118 
119     /*
120      * Validate the entry point.  We have already validated the page
121      * during translation to get here; now verify the offset.
122      */
123     switch (env->eip & ~TARGET_PAGE_MASK) {
124     case 0x000:
125         syscall = TARGET_NR_gettimeofday;
126         break;
127     case 0x400:
128         syscall = TARGET_NR_time;
129         break;
130     case 0x800:
131         syscall = TARGET_NR_getcpu;
132         break;
133     default:
134         goto sigsegv;
135     }
136 
137     /*
138      * Validate the return address.
139      * Note that the kernel treats this the same as an invalid entry point.
140      */
141     if (get_user_u64(caller, env->regs[R_ESP])) {
142         goto sigsegv;
143     }
144 
145     /*
146      * Validate the pointer arguments.
147      */
148     switch (syscall) {
149     case TARGET_NR_gettimeofday:
150         if (!write_ok_or_segv(env, env->regs[R_EDI],
151                               sizeof(struct target_timeval)) ||
152             !write_ok_or_segv(env, env->regs[R_ESI],
153                               sizeof(struct target_timezone))) {
154             return;
155         }
156         break;
157     case TARGET_NR_time:
158         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
159             return;
160         }
161         break;
162     case TARGET_NR_getcpu:
163         if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
164             !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
165             return;
166         }
167         break;
168     default:
169         g_assert_not_reached();
170     }
171 
172     /*
173      * Perform the syscall.  None of the vsyscalls should need restarting.
174      */
175     ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
176                      env->regs[R_EDX], env->regs[10], env->regs[8],
177                      env->regs[9], 0, 0);
178     g_assert(ret != -QEMU_ERESTARTSYS);
179     g_assert(ret != -QEMU_ESIGRETURN);
180     if (ret == -TARGET_EFAULT) {
181         goto sigsegv;
182     }
183     env->regs[R_EAX] = ret;
184 
185     /* Emulate a ret instruction to leave the vsyscall page.  */
186     env->eip = caller;
187     env->regs[R_ESP] += 8;
188     return;
189 
190  sigsegv:
191     force_sig(TARGET_SIGSEGV);
192 }
193 #endif
194 
195 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
196 {
197 #ifndef TARGET_X86_64
198     if (env->eflags & VM_MASK) {
199         handle_vm86_trap(env, trapnr);
200         return true;
201     }
202 #endif
203     return false;
204 }
205 
206 void cpu_loop(CPUX86State *env)
207 {
208     CPUState *cs = env_cpu(env);
209     int trapnr;
210     abi_ulong ret;
211 
212     for(;;) {
213         cpu_exec_start(cs);
214         trapnr = cpu_exec(cs);
215         cpu_exec_end(cs);
216         process_queued_cpu_work(cs);
217 
218         switch(trapnr) {
219         case 0x80:
220 #ifndef TARGET_X86_64
221         case EXCP_SYSCALL:
222 #endif
223             /* linux syscall from int $0x80 */
224             ret = do_syscall(env,
225                              env->regs[R_EAX],
226                              env->regs[R_EBX],
227                              env->regs[R_ECX],
228                              env->regs[R_EDX],
229                              env->regs[R_ESI],
230                              env->regs[R_EDI],
231                              env->regs[R_EBP],
232                              0, 0);
233             if (ret == -QEMU_ERESTARTSYS) {
234                 env->eip -= 2;
235             } else if (ret != -QEMU_ESIGRETURN) {
236                 env->regs[R_EAX] = ret;
237             }
238             break;
239 #ifdef TARGET_X86_64
240         case EXCP_SYSCALL:
241             /* linux syscall from syscall instruction.  */
242             ret = do_syscall(env,
243                              env->regs[R_EAX],
244                              env->regs[R_EDI],
245                              env->regs[R_ESI],
246                              env->regs[R_EDX],
247                              env->regs[10],
248                              env->regs[8],
249                              env->regs[9],
250                              0, 0);
251             if (ret == -QEMU_ERESTARTSYS) {
252                 env->eip -= 2;
253             } else if (ret != -QEMU_ESIGRETURN) {
254                 env->regs[R_EAX] = ret;
255             }
256             break;
257         case EXCP_VSYSCALL:
258             emulate_vsyscall(env);
259             break;
260 #endif
261         case EXCP0B_NOSEG:
262         case EXCP0C_STACK:
263             force_sig(TARGET_SIGBUS);
264             break;
265         case EXCP0D_GPF:
266             /* XXX: potential problem if ABI32 */
267             if (maybe_handle_vm86_trap(env, trapnr)) {
268                 break;
269             }
270             force_sig(TARGET_SIGSEGV);
271             break;
272         case EXCP0E_PAGE:
273             force_sig_fault(TARGET_SIGSEGV,
274                             (env->error_code & PG_ERROR_P_MASK ?
275                              TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
276                             env->cr[2]);
277             break;
278         case EXCP00_DIVZ:
279             if (maybe_handle_vm86_trap(env, trapnr)) {
280                 break;
281             }
282             force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
283             break;
284         case EXCP01_DB:
285             if (maybe_handle_vm86_trap(env, trapnr)) {
286                 break;
287             }
288             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
289             break;
290         case EXCP03_INT3:
291             if (maybe_handle_vm86_trap(env, trapnr)) {
292                 break;
293             }
294             force_sig(TARGET_SIGTRAP);
295             break;
296         case EXCP04_INTO:
297         case EXCP05_BOUND:
298             if (maybe_handle_vm86_trap(env, trapnr)) {
299                 break;
300             }
301             force_sig(TARGET_SIGSEGV);
302             break;
303         case EXCP06_ILLOP:
304             force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
305             break;
306         case EXCP_INTERRUPT:
307             /* just indicate that signals should be handled asap */
308             break;
309         case EXCP_DEBUG:
310             force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
311             break;
312         case EXCP_ATOMIC:
313             cpu_exec_step_atomic(cs);
314             break;
315         default:
316             EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
317                       trapnr);
318             abort();
319         }
320         process_pending_signals(env);
321     }
322 }
323 
324 static void target_cpu_free(void *obj)
325 {
326     CPUArchState *env = ((CPUState *)obj)->env_ptr;
327     target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES);
328     g_free(obj);
329 }
330 
331 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
332 {
333     CPUState *cpu = env_cpu(env);
334     bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0;
335     int i;
336 
337     OBJECT(cpu)->free = target_cpu_free;
338     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
339     env->hflags |= HF_PE_MASK | HF_CPL_MASK;
340     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
341         env->cr[4] |= CR4_OSFXSR_MASK;
342         env->hflags |= HF_OSFXSR_MASK;
343     }
344 
345     /* enable 64 bit mode if possible */
346     if (is64) {
347         env->cr[4] |= CR4_PAE_MASK;
348         env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
349         env->hflags |= HF_LMA_MASK;
350     }
351 #ifndef TARGET_ABI32
352     else {
353         fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
354         exit(EXIT_FAILURE);
355     }
356 #endif
357 
358     /* flags setup : we activate the IRQs by default as in user mode */
359     env->eflags |= IF_MASK;
360 
361     /* linux register setup */
362 #ifndef TARGET_ABI32
363     env->regs[R_EAX] = regs->rax;
364     env->regs[R_EBX] = regs->rbx;
365     env->regs[R_ECX] = regs->rcx;
366     env->regs[R_EDX] = regs->rdx;
367     env->regs[R_ESI] = regs->rsi;
368     env->regs[R_EDI] = regs->rdi;
369     env->regs[R_EBP] = regs->rbp;
370     env->regs[R_ESP] = regs->rsp;
371     env->eip = regs->rip;
372 #else
373     env->regs[R_EAX] = regs->eax;
374     env->regs[R_EBX] = regs->ebx;
375     env->regs[R_ECX] = regs->ecx;
376     env->regs[R_EDX] = regs->edx;
377     env->regs[R_ESI] = regs->esi;
378     env->regs[R_EDI] = regs->edi;
379     env->regs[R_EBP] = regs->ebp;
380     env->regs[R_ESP] = regs->esp;
381     env->eip = regs->eip;
382 #endif
383 
384     /* linux interrupt setup */
385 #ifndef TARGET_ABI32
386     env->idt.limit = 511;
387 #else
388     env->idt.limit = 255;
389 #endif
390     env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
391                                 PROT_READ|PROT_WRITE,
392                                 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
393     idt_table = g2h_untagged(env->idt.base);
394     for (i = 0; i < 20; i++) {
395         set_idt(i, 0, is64);
396     }
397     set_idt(3, 3, is64);
398     set_idt(4, 3, is64);
399     set_idt(0x80, 3, is64);
400 
401     /* linux segment setup */
402     {
403         uint64_t *gdt_table;
404         env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
405                                     PROT_READ|PROT_WRITE,
406                                     MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
407         env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
408         gdt_table = g2h_untagged(env->gdt.base);
409 #ifdef TARGET_ABI32
410         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
411                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
412                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
413 #else
414         /* 64 bit code segment */
415         write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
416                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
417                  DESC_L_MASK |
418                  (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
419 #endif
420         write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
421                  DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
422                  (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
423     }
424     cpu_x86_load_seg(env, R_CS, __USER_CS);
425     cpu_x86_load_seg(env, R_SS, __USER_DS);
426 #ifdef TARGET_ABI32
427     cpu_x86_load_seg(env, R_DS, __USER_DS);
428     cpu_x86_load_seg(env, R_ES, __USER_DS);
429     cpu_x86_load_seg(env, R_FS, __USER_DS);
430     cpu_x86_load_seg(env, R_GS, __USER_DS);
431     /* This hack makes Wine work... */
432     env->segs[R_FS].selector = 0;
433 #else
434     cpu_x86_load_seg(env, R_DS, 0);
435     cpu_x86_load_seg(env, R_ES, 0);
436     cpu_x86_load_seg(env, R_FS, 0);
437     cpu_x86_load_seg(env, R_GS, 0);
438 #endif
439 }
440