1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu-common.h" 22 #include "qemu.h" 23 #include "qemu/timer.h" 24 #include "user-internals.h" 25 #include "cpu_loop-common.h" 26 #include "signal-common.h" 27 #include "user-mmap.h" 28 29 /***********************************************************/ 30 /* CPUX86 core interface */ 31 32 uint64_t cpu_get_tsc(CPUX86State *env) 33 { 34 return cpu_get_host_ticks(); 35 } 36 37 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 38 int flags) 39 { 40 unsigned int e1, e2; 41 uint32_t *p; 42 e1 = (addr << 16) | (limit & 0xffff); 43 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 44 e2 |= flags; 45 p = ptr; 46 p[0] = tswap32(e1); 47 p[1] = tswap32(e2); 48 } 49 50 static uint64_t *idt_table; 51 #ifdef TARGET_X86_64 52 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 53 uint64_t addr, unsigned int sel) 54 { 55 uint32_t *p, e1, e2; 56 e1 = (addr & 0xffff) | (sel << 16); 57 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 58 p = ptr; 59 p[0] = tswap32(e1); 60 p[1] = tswap32(e2); 61 p[2] = tswap32(addr >> 32); 62 p[3] = 0; 63 } 64 /* only dpl matters as we do only user space emulation */ 65 static void set_idt(int n, unsigned int dpl) 66 { 67 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 68 } 69 #else 70 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 71 uint32_t addr, unsigned int sel) 72 { 73 uint32_t *p, e1, e2; 74 e1 = (addr & 0xffff) | (sel << 16); 75 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 76 p = ptr; 77 p[0] = tswap32(e1); 78 p[1] = tswap32(e2); 79 } 80 81 /* only dpl matters as we do only user space emulation */ 82 static void set_idt(int n, unsigned int dpl) 83 { 84 set_gate(idt_table + n, 0, dpl, 0, 0); 85 } 86 #endif 87 88 #ifdef TARGET_X86_64 89 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 90 { 91 /* 92 * For all the vsyscalls, NULL means "don't write anything" not 93 * "write it at address 0". 94 */ 95 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 96 return true; 97 } 98 99 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 100 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 101 return false; 102 } 103 104 /* 105 * Since v3.1, the kernel traps and emulates the vsyscall page. 106 * Entry points other than the official generate SIGSEGV. 107 */ 108 static void emulate_vsyscall(CPUX86State *env) 109 { 110 int syscall; 111 abi_ulong ret; 112 uint64_t caller; 113 114 /* 115 * Validate the entry point. We have already validated the page 116 * during translation to get here; now verify the offset. 117 */ 118 switch (env->eip & ~TARGET_PAGE_MASK) { 119 case 0x000: 120 syscall = TARGET_NR_gettimeofday; 121 break; 122 case 0x400: 123 syscall = TARGET_NR_time; 124 break; 125 case 0x800: 126 syscall = TARGET_NR_getcpu; 127 break; 128 default: 129 goto sigsegv; 130 } 131 132 /* 133 * Validate the return address. 134 * Note that the kernel treats this the same as an invalid entry point. 135 */ 136 if (get_user_u64(caller, env->regs[R_ESP])) { 137 goto sigsegv; 138 } 139 140 /* 141 * Validate the the pointer arguments. 142 */ 143 switch (syscall) { 144 case TARGET_NR_gettimeofday: 145 if (!write_ok_or_segv(env, env->regs[R_EDI], 146 sizeof(struct target_timeval)) || 147 !write_ok_or_segv(env, env->regs[R_ESI], 148 sizeof(struct target_timezone))) { 149 return; 150 } 151 break; 152 case TARGET_NR_time: 153 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 154 return; 155 } 156 break; 157 case TARGET_NR_getcpu: 158 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 159 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 160 return; 161 } 162 break; 163 default: 164 g_assert_not_reached(); 165 } 166 167 /* 168 * Perform the syscall. None of the vsyscalls should need restarting. 169 */ 170 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 171 env->regs[R_EDX], env->regs[10], env->regs[8], 172 env->regs[9], 0, 0); 173 g_assert(ret != -QEMU_ERESTARTSYS); 174 g_assert(ret != -QEMU_ESIGRETURN); 175 if (ret == -TARGET_EFAULT) { 176 goto sigsegv; 177 } 178 env->regs[R_EAX] = ret; 179 180 /* Emulate a ret instruction to leave the vsyscall page. */ 181 env->eip = caller; 182 env->regs[R_ESP] += 8; 183 return; 184 185 sigsegv: 186 force_sig(TARGET_SIGSEGV); 187 } 188 #endif 189 190 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) 191 { 192 #ifndef TARGET_X86_64 193 if (env->eflags & VM_MASK) { 194 handle_vm86_trap(env, trapnr); 195 return true; 196 } 197 #endif 198 return false; 199 } 200 201 void cpu_loop(CPUX86State *env) 202 { 203 CPUState *cs = env_cpu(env); 204 int trapnr; 205 abi_ulong pc; 206 abi_ulong ret; 207 208 for(;;) { 209 cpu_exec_start(cs); 210 trapnr = cpu_exec(cs); 211 cpu_exec_end(cs); 212 process_queued_cpu_work(cs); 213 214 switch(trapnr) { 215 case 0x80: 216 /* linux syscall from int $0x80 */ 217 ret = do_syscall(env, 218 env->regs[R_EAX], 219 env->regs[R_EBX], 220 env->regs[R_ECX], 221 env->regs[R_EDX], 222 env->regs[R_ESI], 223 env->regs[R_EDI], 224 env->regs[R_EBP], 225 0, 0); 226 if (ret == -QEMU_ERESTARTSYS) { 227 env->eip -= 2; 228 } else if (ret != -QEMU_ESIGRETURN) { 229 env->regs[R_EAX] = ret; 230 } 231 break; 232 #ifndef TARGET_ABI32 233 case EXCP_SYSCALL: 234 /* linux syscall from syscall instruction */ 235 ret = do_syscall(env, 236 env->regs[R_EAX], 237 env->regs[R_EDI], 238 env->regs[R_ESI], 239 env->regs[R_EDX], 240 env->regs[10], 241 env->regs[8], 242 env->regs[9], 243 0, 0); 244 if (ret == -QEMU_ERESTARTSYS) { 245 env->eip -= 2; 246 } else if (ret != -QEMU_ESIGRETURN) { 247 env->regs[R_EAX] = ret; 248 } 249 break; 250 #endif 251 #ifdef TARGET_X86_64 252 case EXCP_VSYSCALL: 253 emulate_vsyscall(env); 254 break; 255 #endif 256 case EXCP0B_NOSEG: 257 case EXCP0C_STACK: 258 force_sig(TARGET_SIGBUS); 259 break; 260 case EXCP0D_GPF: 261 /* XXX: potential problem if ABI32 */ 262 if (maybe_handle_vm86_trap(env, trapnr)) { 263 break; 264 } 265 force_sig(TARGET_SIGSEGV); 266 break; 267 case EXCP0E_PAGE: 268 force_sig_fault(TARGET_SIGSEGV, 269 (env->error_code & PG_ERROR_P_MASK ? 270 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 271 env->cr[2]); 272 break; 273 case EXCP00_DIVZ: 274 if (maybe_handle_vm86_trap(env, trapnr)) { 275 break; 276 } 277 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 278 break; 279 case EXCP01_DB: 280 if (maybe_handle_vm86_trap(env, trapnr)) { 281 break; 282 } 283 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 284 break; 285 case EXCP03_INT3: 286 if (maybe_handle_vm86_trap(env, trapnr)) { 287 break; 288 } 289 force_sig(TARGET_SIGTRAP); 290 break; 291 case EXCP04_INTO: 292 case EXCP05_BOUND: 293 if (maybe_handle_vm86_trap(env, trapnr)) { 294 break; 295 } 296 force_sig(TARGET_SIGSEGV); 297 break; 298 case EXCP06_ILLOP: 299 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 300 break; 301 case EXCP_INTERRUPT: 302 /* just indicate that signals should be handled asap */ 303 break; 304 case EXCP_DEBUG: 305 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 306 break; 307 case EXCP_ATOMIC: 308 cpu_exec_step_atomic(cs); 309 break; 310 default: 311 pc = env->segs[R_CS].base + env->eip; 312 EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", 313 (long)pc, trapnr); 314 abort(); 315 } 316 process_pending_signals(env); 317 } 318 } 319 320 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 321 { 322 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 323 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 324 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 325 env->cr[4] |= CR4_OSFXSR_MASK; 326 env->hflags |= HF_OSFXSR_MASK; 327 } 328 #ifndef TARGET_ABI32 329 /* enable 64 bit mode if possible */ 330 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { 331 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 332 exit(EXIT_FAILURE); 333 } 334 env->cr[4] |= CR4_PAE_MASK; 335 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 336 env->hflags |= HF_LMA_MASK; 337 #endif 338 339 /* flags setup : we activate the IRQs by default as in user mode */ 340 env->eflags |= IF_MASK; 341 342 /* linux register setup */ 343 #ifndef TARGET_ABI32 344 env->regs[R_EAX] = regs->rax; 345 env->regs[R_EBX] = regs->rbx; 346 env->regs[R_ECX] = regs->rcx; 347 env->regs[R_EDX] = regs->rdx; 348 env->regs[R_ESI] = regs->rsi; 349 env->regs[R_EDI] = regs->rdi; 350 env->regs[R_EBP] = regs->rbp; 351 env->regs[R_ESP] = regs->rsp; 352 env->eip = regs->rip; 353 #else 354 env->regs[R_EAX] = regs->eax; 355 env->regs[R_EBX] = regs->ebx; 356 env->regs[R_ECX] = regs->ecx; 357 env->regs[R_EDX] = regs->edx; 358 env->regs[R_ESI] = regs->esi; 359 env->regs[R_EDI] = regs->edi; 360 env->regs[R_EBP] = regs->ebp; 361 env->regs[R_ESP] = regs->esp; 362 env->eip = regs->eip; 363 #endif 364 365 /* linux interrupt setup */ 366 #ifndef TARGET_ABI32 367 env->idt.limit = 511; 368 #else 369 env->idt.limit = 255; 370 #endif 371 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 372 PROT_READ|PROT_WRITE, 373 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 374 idt_table = g2h_untagged(env->idt.base); 375 set_idt(0, 0); 376 set_idt(1, 0); 377 set_idt(2, 0); 378 set_idt(3, 3); 379 set_idt(4, 3); 380 set_idt(5, 0); 381 set_idt(6, 0); 382 set_idt(7, 0); 383 set_idt(8, 0); 384 set_idt(9, 0); 385 set_idt(10, 0); 386 set_idt(11, 0); 387 set_idt(12, 0); 388 set_idt(13, 0); 389 set_idt(14, 0); 390 set_idt(15, 0); 391 set_idt(16, 0); 392 set_idt(17, 0); 393 set_idt(18, 0); 394 set_idt(19, 0); 395 set_idt(0x80, 3); 396 397 /* linux segment setup */ 398 { 399 uint64_t *gdt_table; 400 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 401 PROT_READ|PROT_WRITE, 402 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 403 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 404 gdt_table = g2h_untagged(env->gdt.base); 405 #ifdef TARGET_ABI32 406 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 407 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 408 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 409 #else 410 /* 64 bit code segment */ 411 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 412 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 413 DESC_L_MASK | 414 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 415 #endif 416 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 417 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 418 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 419 } 420 cpu_x86_load_seg(env, R_CS, __USER_CS); 421 cpu_x86_load_seg(env, R_SS, __USER_DS); 422 #ifdef TARGET_ABI32 423 cpu_x86_load_seg(env, R_DS, __USER_DS); 424 cpu_x86_load_seg(env, R_ES, __USER_DS); 425 cpu_x86_load_seg(env, R_FS, __USER_DS); 426 cpu_x86_load_seg(env, R_GS, __USER_DS); 427 /* This hack makes Wine work... */ 428 env->segs[R_FS].selector = 0; 429 #else 430 cpu_x86_load_seg(env, R_DS, 0); 431 cpu_x86_load_seg(env, R_ES, 0); 432 cpu_x86_load_seg(env, R_FS, 0); 433 cpu_x86_load_seg(env, R_GS, 0); 434 #endif 435 } 436