xref: /openbmc/qemu/linux-user/i386/cpu_loop.c (revision ef929281f1ddb1ce74f5fe39377a88e6cc8237aa)
1  /*
2   *  qemu user cpu loop
3   *
4   *  Copyright (c) 2003-2008 Fabrice Bellard
5   *
6   *  This program is free software; you can redistribute it and/or modify
7   *  it under the terms of the GNU General Public License as published by
8   *  the Free Software Foundation; either version 2 of the License, or
9   *  (at your option) any later version.
10   *
11   *  This program is distributed in the hope that it will be useful,
12   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   *  GNU General Public License for more details.
15   *
16   *  You should have received a copy of the GNU General Public License
17   *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18   */
19  
20  #include "qemu/osdep.h"
21  #include "qemu.h"
22  #include "qemu/timer.h"
23  #include "user-internals.h"
24  #include "cpu_loop-common.h"
25  #include "signal-common.h"
26  #include "user-mmap.h"
27  
28  /***********************************************************/
29  /* CPUX86 core interface */
30  
31  uint64_t cpu_get_tsc(CPUX86State *env)
32  {
33      return cpu_get_host_ticks();
34  }
35  
36  static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
37                int flags)
38  {
39      unsigned int e1, e2;
40      uint32_t *p;
41      e1 = (addr << 16) | (limit & 0xffff);
42      e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000);
43      e2 |= flags;
44      p = ptr;
45      p[0] = tswap32(e1);
46      p[1] = tswap32(e2);
47  }
48  
49  static uint64_t *idt_table;
50  
51  static void set_gate64(void *ptr, unsigned int type, unsigned int dpl,
52                         uint64_t addr, unsigned int sel)
53  {
54      uint32_t *p, e1, e2;
55      e1 = (addr & 0xffff) | (sel << 16);
56      e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
57      p = ptr;
58      p[0] = tswap32(e1);
59      p[1] = tswap32(e2);
60      p[2] = tswap32(addr >> 32);
61      p[3] = 0;
62  }
63  
64  #ifdef TARGET_X86_64
65  /* only dpl matters as we do only user space emulation */
66  static void set_idt(int n, unsigned int dpl, bool is64)
67  {
68      set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
69  }
70  #else
71  static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
72                       uint32_t addr, unsigned int sel)
73  {
74      uint32_t *p, e1, e2;
75      e1 = (addr & 0xffff) | (sel << 16);
76      e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8);
77      p = ptr;
78      p[0] = tswap32(e1);
79      p[1] = tswap32(e2);
80  }
81  
82  /* only dpl matters as we do only user space emulation */
83  static void set_idt(int n, unsigned int dpl, bool is64)
84  {
85      if (is64) {
86          set_gate64(idt_table + n * 2, 0, dpl, 0, 0);
87      } else {
88          set_gate(idt_table + n, 0, dpl, 0, 0);
89      }
90  }
91  #endif
92  
93  #ifdef TARGET_X86_64
94  static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len)
95  {
96      /*
97       * For all the vsyscalls, NULL means "don't write anything" not
98       * "write it at address 0".
99       */
100      if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) {
101          return true;
102      }
103  
104      env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
105      force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr);
106      return false;
107  }
108  
109  /*
110   * Since v3.1, the kernel traps and emulates the vsyscall page.
111   * Entry points other than the official generate SIGSEGV.
112   */
113  static void emulate_vsyscall(CPUX86State *env)
114  {
115      int syscall;
116      abi_ulong ret;
117      uint64_t caller;
118  
119      /*
120       * Validate the entry point.  We have already validated the page
121       * during translation to get here; now verify the offset.
122       */
123      switch (env->eip & ~TARGET_PAGE_MASK) {
124      case 0x000:
125          syscall = TARGET_NR_gettimeofday;
126          break;
127      case 0x400:
128          syscall = TARGET_NR_time;
129          break;
130      case 0x800:
131          syscall = TARGET_NR_getcpu;
132          break;
133      default:
134          goto sigsegv;
135      }
136  
137      /*
138       * Validate the return address.
139       * Note that the kernel treats this the same as an invalid entry point.
140       */
141      if (get_user_u64(caller, env->regs[R_ESP])) {
142          goto sigsegv;
143      }
144  
145      /*
146       * Validate the pointer arguments.
147       */
148      switch (syscall) {
149      case TARGET_NR_gettimeofday:
150          if (!write_ok_or_segv(env, env->regs[R_EDI],
151                                sizeof(struct target_timeval)) ||
152              !write_ok_or_segv(env, env->regs[R_ESI],
153                                sizeof(struct target_timezone))) {
154              return;
155          }
156          break;
157      case TARGET_NR_time:
158          if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) {
159              return;
160          }
161          break;
162      case TARGET_NR_getcpu:
163          if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) ||
164              !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) {
165              return;
166          }
167          break;
168      default:
169          g_assert_not_reached();
170      }
171  
172      /*
173       * Perform the syscall.  None of the vsyscalls should need restarting.
174       */
175      ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI],
176                       env->regs[R_EDX], env->regs[10], env->regs[8],
177                       env->regs[9], 0, 0);
178      g_assert(ret != -QEMU_ERESTARTSYS);
179      g_assert(ret != -QEMU_ESIGRETURN);
180      if (ret == -TARGET_EFAULT) {
181          goto sigsegv;
182      }
183      env->regs[R_EAX] = ret;
184  
185      /* Emulate a ret instruction to leave the vsyscall page.  */
186      env->eip = caller;
187      env->regs[R_ESP] += 8;
188      return;
189  
190   sigsegv:
191      force_sig(TARGET_SIGSEGV);
192  }
193  #endif
194  
195  static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
196  {
197  #ifndef TARGET_X86_64
198      if (env->eflags & VM_MASK) {
199          handle_vm86_trap(env, trapnr);
200          return true;
201      }
202  #endif
203      return false;
204  }
205  
206  void cpu_loop(CPUX86State *env)
207  {
208      CPUState *cs = env_cpu(env);
209      int trapnr;
210      abi_ulong ret;
211  
212      for(;;) {
213          cpu_exec_start(cs);
214          trapnr = cpu_exec(cs);
215          cpu_exec_end(cs);
216          process_queued_cpu_work(cs);
217  
218          switch(trapnr) {
219          case 0x80:
220  #ifndef TARGET_X86_64
221          case EXCP_SYSCALL:
222  #endif
223              /* linux syscall from int $0x80 */
224              ret = do_syscall(env,
225                               env->regs[R_EAX],
226                               env->regs[R_EBX],
227                               env->regs[R_ECX],
228                               env->regs[R_EDX],
229                               env->regs[R_ESI],
230                               env->regs[R_EDI],
231                               env->regs[R_EBP],
232                               0, 0);
233              if (ret == -QEMU_ERESTARTSYS) {
234                  env->eip -= 2;
235              } else if (ret != -QEMU_ESIGRETURN) {
236                  env->regs[R_EAX] = ret;
237              }
238              break;
239  #ifdef TARGET_X86_64
240          case EXCP_SYSCALL:
241              /* linux syscall from syscall instruction.  */
242              ret = do_syscall(env,
243                               env->regs[R_EAX],
244                               env->regs[R_EDI],
245                               env->regs[R_ESI],
246                               env->regs[R_EDX],
247                               env->regs[10],
248                               env->regs[8],
249                               env->regs[9],
250                               0, 0);
251              if (ret == -QEMU_ERESTARTSYS) {
252                  env->eip -= 2;
253              } else if (ret != -QEMU_ESIGRETURN) {
254                  env->regs[R_EAX] = ret;
255              }
256              break;
257          case EXCP_VSYSCALL:
258              emulate_vsyscall(env);
259              break;
260  #endif
261          case EXCP0B_NOSEG:
262          case EXCP0C_STACK:
263              force_sig(TARGET_SIGBUS);
264              break;
265          case EXCP0D_GPF:
266              /* XXX: potential problem if ABI32 */
267              if (maybe_handle_vm86_trap(env, trapnr)) {
268                  break;
269              }
270              force_sig(TARGET_SIGSEGV);
271              break;
272          case EXCP0E_PAGE:
273              force_sig_fault(TARGET_SIGSEGV,
274                              (env->error_code & PG_ERROR_P_MASK ?
275                               TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR),
276                              env->cr[2]);
277              break;
278          case EXCP00_DIVZ:
279              if (maybe_handle_vm86_trap(env, trapnr)) {
280                  break;
281              }
282              force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip);
283              break;
284          case EXCP01_DB:
285              if (maybe_handle_vm86_trap(env, trapnr)) {
286                  break;
287              }
288              force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
289              break;
290          case EXCP03_INT3:
291              if (maybe_handle_vm86_trap(env, trapnr)) {
292                  break;
293              }
294              force_sig(TARGET_SIGTRAP);
295              break;
296          case EXCP04_INTO:
297          case EXCP05_BOUND:
298              if (maybe_handle_vm86_trap(env, trapnr)) {
299                  break;
300              }
301              force_sig(TARGET_SIGSEGV);
302              break;
303          case EXCP06_ILLOP:
304              force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip);
305              break;
306          case EXCP_INTERRUPT:
307              /* just indicate that signals should be handled asap */
308              break;
309          case EXCP_DEBUG:
310              force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip);
311              break;
312          case EXCP_ATOMIC:
313              cpu_exec_step_atomic(cs);
314              break;
315          default:
316              EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n",
317                        trapnr);
318              abort();
319          }
320          process_pending_signals(env);
321      }
322  }
323  
324  static void target_cpu_free(void *obj)
325  {
326      target_munmap(cpu_env(obj)->gdt.base,
327                    sizeof(uint64_t) * TARGET_GDT_ENTRIES);
328      g_free(obj);
329  }
330  
331  void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs)
332  {
333      CPUState *cpu = env_cpu(env);
334      bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0;
335      int i;
336  
337      OBJECT(cpu)->free = target_cpu_free;
338      env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
339      env->hflags |= HF_PE_MASK | HF_CPL_MASK;
340      if (env->features[FEAT_1_EDX] & CPUID_SSE) {
341          env->cr[4] |= CR4_OSFXSR_MASK;
342          env->hflags |= HF_OSFXSR_MASK;
343      }
344  
345      /* enable 64 bit mode if possible */
346      if (is64) {
347          env->cr[4] |= CR4_PAE_MASK;
348          env->efer |= MSR_EFER_LMA | MSR_EFER_LME;
349          env->hflags |= HF_LMA_MASK;
350      }
351  #ifndef TARGET_ABI32
352      else {
353          fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n");
354          exit(EXIT_FAILURE);
355      }
356  #endif
357  
358      /* flags setup : we activate the IRQs by default as in user mode */
359      env->eflags |= IF_MASK;
360  
361      /* linux register setup */
362  #ifndef TARGET_ABI32
363      env->regs[R_EAX] = regs->rax;
364      env->regs[R_EBX] = regs->rbx;
365      env->regs[R_ECX] = regs->rcx;
366      env->regs[R_EDX] = regs->rdx;
367      env->regs[R_ESI] = regs->rsi;
368      env->regs[R_EDI] = regs->rdi;
369      env->regs[R_EBP] = regs->rbp;
370      env->regs[R_ESP] = regs->rsp;
371      env->eip = regs->rip;
372  #else
373      env->regs[R_EAX] = regs->eax;
374      env->regs[R_EBX] = regs->ebx;
375      env->regs[R_ECX] = regs->ecx;
376      env->regs[R_EDX] = regs->edx;
377      env->regs[R_ESI] = regs->esi;
378      env->regs[R_EDI] = regs->edi;
379      env->regs[R_EBP] = regs->ebp;
380      env->regs[R_ESP] = regs->esp;
381      env->eip = regs->eip;
382  #endif
383  
384      /* linux interrupt setup */
385  #ifndef TARGET_ABI32
386      env->idt.limit = 511;
387  #else
388      env->idt.limit = 255;
389  #endif
390      env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
391                                  PROT_READ|PROT_WRITE,
392                                  MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
393      idt_table = g2h_untagged(env->idt.base);
394      for (i = 0; i < 20; i++) {
395          set_idt(i, 0, is64);
396      }
397      set_idt(3, 3, is64);
398      set_idt(4, 3, is64);
399      set_idt(0x80, 3, is64);
400  
401      /* linux segment setup */
402      {
403          uint64_t *gdt_table;
404          env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
405                                      PROT_READ|PROT_WRITE,
406                                      MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
407          env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
408          gdt_table = g2h_untagged(env->gdt.base);
409  #ifdef TARGET_ABI32
410          write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
411                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
412                   (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
413  #else
414          /* 64 bit code segment */
415          write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff,
416                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
417                   DESC_L_MASK |
418                   (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
419  #endif
420          write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff,
421                   DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
422                   (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
423      }
424      cpu_x86_load_seg(env, R_CS, __USER_CS);
425      cpu_x86_load_seg(env, R_SS, __USER_DS);
426  #ifdef TARGET_ABI32
427      cpu_x86_load_seg(env, R_DS, __USER_DS);
428      cpu_x86_load_seg(env, R_ES, __USER_DS);
429      cpu_x86_load_seg(env, R_FS, __USER_DS);
430      cpu_x86_load_seg(env, R_GS, __USER_DS);
431      /* This hack makes Wine work... */
432      env->segs[R_FS].selector = 0;
433  #else
434      cpu_x86_load_seg(env, R_DS, 0);
435      cpu_x86_load_seg(env, R_ES, 0);
436      cpu_x86_load_seg(env, R_FS, 0);
437      cpu_x86_load_seg(env, R_GS, 0);
438  #endif
439  }
440