1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu.h" 22 #include "qemu/timer.h" 23 #include "user-internals.h" 24 #include "cpu_loop-common.h" 25 #include "signal-common.h" 26 #include "user-mmap.h" 27 28 /***********************************************************/ 29 /* CPUX86 core interface */ 30 31 uint64_t cpu_get_tsc(CPUX86State *env) 32 { 33 return cpu_get_host_ticks(); 34 } 35 36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 37 int flags) 38 { 39 unsigned int e1, e2; 40 uint32_t *p; 41 e1 = (addr << 16) | (limit & 0xffff); 42 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 43 e2 |= flags; 44 p = ptr; 45 p[0] = tswap32(e1); 46 p[1] = tswap32(e2); 47 } 48 49 static uint64_t *idt_table; 50 #ifdef TARGET_X86_64 51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 52 uint64_t addr, unsigned int sel) 53 { 54 uint32_t *p, e1, e2; 55 e1 = (addr & 0xffff) | (sel << 16); 56 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 57 p = ptr; 58 p[0] = tswap32(e1); 59 p[1] = tswap32(e2); 60 p[2] = tswap32(addr >> 32); 61 p[3] = 0; 62 } 63 /* only dpl matters as we do only user space emulation */ 64 static void set_idt(int n, unsigned int dpl) 65 { 66 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 67 } 68 #else 69 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 70 uint32_t addr, unsigned int sel) 71 { 72 uint32_t *p, e1, e2; 73 e1 = (addr & 0xffff) | (sel << 16); 74 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 75 p = ptr; 76 p[0] = tswap32(e1); 77 p[1] = tswap32(e2); 78 } 79 80 /* only dpl matters as we do only user space emulation */ 81 static void set_idt(int n, unsigned int dpl) 82 { 83 set_gate(idt_table + n, 0, dpl, 0, 0); 84 } 85 #endif 86 87 #ifdef TARGET_X86_64 88 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 89 { 90 /* 91 * For all the vsyscalls, NULL means "don't write anything" not 92 * "write it at address 0". 93 */ 94 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 95 return true; 96 } 97 98 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 99 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 100 return false; 101 } 102 103 /* 104 * Since v3.1, the kernel traps and emulates the vsyscall page. 105 * Entry points other than the official generate SIGSEGV. 106 */ 107 static void emulate_vsyscall(CPUX86State *env) 108 { 109 int syscall; 110 abi_ulong ret; 111 uint64_t caller; 112 113 /* 114 * Validate the entry point. We have already validated the page 115 * during translation to get here; now verify the offset. 116 */ 117 switch (env->eip & ~TARGET_PAGE_MASK) { 118 case 0x000: 119 syscall = TARGET_NR_gettimeofday; 120 break; 121 case 0x400: 122 syscall = TARGET_NR_time; 123 break; 124 case 0x800: 125 syscall = TARGET_NR_getcpu; 126 break; 127 default: 128 goto sigsegv; 129 } 130 131 /* 132 * Validate the return address. 133 * Note that the kernel treats this the same as an invalid entry point. 134 */ 135 if (get_user_u64(caller, env->regs[R_ESP])) { 136 goto sigsegv; 137 } 138 139 /* 140 * Validate the pointer arguments. 141 */ 142 switch (syscall) { 143 case TARGET_NR_gettimeofday: 144 if (!write_ok_or_segv(env, env->regs[R_EDI], 145 sizeof(struct target_timeval)) || 146 !write_ok_or_segv(env, env->regs[R_ESI], 147 sizeof(struct target_timezone))) { 148 return; 149 } 150 break; 151 case TARGET_NR_time: 152 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 153 return; 154 } 155 break; 156 case TARGET_NR_getcpu: 157 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 158 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 159 return; 160 } 161 break; 162 default: 163 g_assert_not_reached(); 164 } 165 166 /* 167 * Perform the syscall. None of the vsyscalls should need restarting. 168 */ 169 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 170 env->regs[R_EDX], env->regs[10], env->regs[8], 171 env->regs[9], 0, 0); 172 g_assert(ret != -QEMU_ERESTARTSYS); 173 g_assert(ret != -QEMU_ESIGRETURN); 174 if (ret == -TARGET_EFAULT) { 175 goto sigsegv; 176 } 177 env->regs[R_EAX] = ret; 178 179 /* Emulate a ret instruction to leave the vsyscall page. */ 180 env->eip = caller; 181 env->regs[R_ESP] += 8; 182 return; 183 184 sigsegv: 185 force_sig(TARGET_SIGSEGV); 186 } 187 #endif 188 189 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) 190 { 191 #ifndef TARGET_X86_64 192 if (env->eflags & VM_MASK) { 193 handle_vm86_trap(env, trapnr); 194 return true; 195 } 196 #endif 197 return false; 198 } 199 200 void cpu_loop(CPUX86State *env) 201 { 202 CPUState *cs = env_cpu(env); 203 int trapnr; 204 abi_ulong ret; 205 206 for(;;) { 207 cpu_exec_start(cs); 208 trapnr = cpu_exec(cs); 209 cpu_exec_end(cs); 210 process_queued_cpu_work(cs); 211 212 switch(trapnr) { 213 case 0x80: 214 #ifndef TARGET_X86_64 215 case EXCP_SYSCALL: 216 #endif 217 /* linux syscall from int $0x80 */ 218 ret = do_syscall(env, 219 env->regs[R_EAX], 220 env->regs[R_EBX], 221 env->regs[R_ECX], 222 env->regs[R_EDX], 223 env->regs[R_ESI], 224 env->regs[R_EDI], 225 env->regs[R_EBP], 226 0, 0); 227 if (ret == -QEMU_ERESTARTSYS) { 228 env->eip -= 2; 229 } else if (ret != -QEMU_ESIGRETURN) { 230 env->regs[R_EAX] = ret; 231 } 232 break; 233 #ifdef TARGET_X86_64 234 case EXCP_SYSCALL: 235 /* linux syscall from syscall instruction. */ 236 ret = do_syscall(env, 237 env->regs[R_EAX], 238 env->regs[R_EDI], 239 env->regs[R_ESI], 240 env->regs[R_EDX], 241 env->regs[10], 242 env->regs[8], 243 env->regs[9], 244 0, 0); 245 if (ret == -QEMU_ERESTARTSYS) { 246 env->eip -= 2; 247 } else if (ret != -QEMU_ESIGRETURN) { 248 env->regs[R_EAX] = ret; 249 } 250 break; 251 case EXCP_VSYSCALL: 252 emulate_vsyscall(env); 253 break; 254 #endif 255 case EXCP0B_NOSEG: 256 case EXCP0C_STACK: 257 force_sig(TARGET_SIGBUS); 258 break; 259 case EXCP0D_GPF: 260 /* XXX: potential problem if ABI32 */ 261 if (maybe_handle_vm86_trap(env, trapnr)) { 262 break; 263 } 264 force_sig(TARGET_SIGSEGV); 265 break; 266 case EXCP0E_PAGE: 267 force_sig_fault(TARGET_SIGSEGV, 268 (env->error_code & PG_ERROR_P_MASK ? 269 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 270 env->cr[2]); 271 break; 272 case EXCP00_DIVZ: 273 if (maybe_handle_vm86_trap(env, trapnr)) { 274 break; 275 } 276 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 277 break; 278 case EXCP01_DB: 279 if (maybe_handle_vm86_trap(env, trapnr)) { 280 break; 281 } 282 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 283 break; 284 case EXCP03_INT3: 285 if (maybe_handle_vm86_trap(env, trapnr)) { 286 break; 287 } 288 force_sig(TARGET_SIGTRAP); 289 break; 290 case EXCP04_INTO: 291 case EXCP05_BOUND: 292 if (maybe_handle_vm86_trap(env, trapnr)) { 293 break; 294 } 295 force_sig(TARGET_SIGSEGV); 296 break; 297 case EXCP06_ILLOP: 298 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 299 break; 300 case EXCP_INTERRUPT: 301 /* just indicate that signals should be handled asap */ 302 break; 303 case EXCP_DEBUG: 304 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 305 break; 306 case EXCP_ATOMIC: 307 cpu_exec_step_atomic(cs); 308 break; 309 default: 310 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 311 trapnr); 312 abort(); 313 } 314 process_pending_signals(env); 315 } 316 } 317 318 static void target_cpu_free(void *obj) 319 { 320 CPUArchState *env = ((CPUState *)obj)->env_ptr; 321 target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES); 322 g_free(obj); 323 } 324 325 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 326 { 327 CPUState *cpu = env_cpu(env); 328 OBJECT(cpu)->free = target_cpu_free; 329 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 330 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 331 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 332 env->cr[4] |= CR4_OSFXSR_MASK; 333 env->hflags |= HF_OSFXSR_MASK; 334 } 335 #ifndef TARGET_ABI32 336 /* enable 64 bit mode if possible */ 337 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { 338 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 339 exit(EXIT_FAILURE); 340 } 341 env->cr[4] |= CR4_PAE_MASK; 342 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 343 env->hflags |= HF_LMA_MASK; 344 #endif 345 346 /* flags setup : we activate the IRQs by default as in user mode */ 347 env->eflags |= IF_MASK; 348 349 /* linux register setup */ 350 #ifndef TARGET_ABI32 351 env->regs[R_EAX] = regs->rax; 352 env->regs[R_EBX] = regs->rbx; 353 env->regs[R_ECX] = regs->rcx; 354 env->regs[R_EDX] = regs->rdx; 355 env->regs[R_ESI] = regs->rsi; 356 env->regs[R_EDI] = regs->rdi; 357 env->regs[R_EBP] = regs->rbp; 358 env->regs[R_ESP] = regs->rsp; 359 env->eip = regs->rip; 360 #else 361 env->regs[R_EAX] = regs->eax; 362 env->regs[R_EBX] = regs->ebx; 363 env->regs[R_ECX] = regs->ecx; 364 env->regs[R_EDX] = regs->edx; 365 env->regs[R_ESI] = regs->esi; 366 env->regs[R_EDI] = regs->edi; 367 env->regs[R_EBP] = regs->ebp; 368 env->regs[R_ESP] = regs->esp; 369 env->eip = regs->eip; 370 #endif 371 372 /* linux interrupt setup */ 373 #ifndef TARGET_ABI32 374 env->idt.limit = 511; 375 #else 376 env->idt.limit = 255; 377 #endif 378 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 379 PROT_READ|PROT_WRITE, 380 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 381 idt_table = g2h_untagged(env->idt.base); 382 set_idt(0, 0); 383 set_idt(1, 0); 384 set_idt(2, 0); 385 set_idt(3, 3); 386 set_idt(4, 3); 387 set_idt(5, 0); 388 set_idt(6, 0); 389 set_idt(7, 0); 390 set_idt(8, 0); 391 set_idt(9, 0); 392 set_idt(10, 0); 393 set_idt(11, 0); 394 set_idt(12, 0); 395 set_idt(13, 0); 396 set_idt(14, 0); 397 set_idt(15, 0); 398 set_idt(16, 0); 399 set_idt(17, 0); 400 set_idt(18, 0); 401 set_idt(19, 0); 402 set_idt(0x80, 3); 403 404 /* linux segment setup */ 405 { 406 uint64_t *gdt_table; 407 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 408 PROT_READ|PROT_WRITE, 409 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 410 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 411 gdt_table = g2h_untagged(env->gdt.base); 412 #ifdef TARGET_ABI32 413 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 414 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 415 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 416 #else 417 /* 64 bit code segment */ 418 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 419 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 420 DESC_L_MASK | 421 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 422 #endif 423 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 424 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 425 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 426 } 427 cpu_x86_load_seg(env, R_CS, __USER_CS); 428 cpu_x86_load_seg(env, R_SS, __USER_DS); 429 #ifdef TARGET_ABI32 430 cpu_x86_load_seg(env, R_DS, __USER_DS); 431 cpu_x86_load_seg(env, R_ES, __USER_DS); 432 cpu_x86_load_seg(env, R_FS, __USER_DS); 433 cpu_x86_load_seg(env, R_GS, __USER_DS); 434 /* This hack makes Wine work... */ 435 env->segs[R_FS].selector = 0; 436 #else 437 cpu_x86_load_seg(env, R_DS, 0); 438 cpu_x86_load_seg(env, R_ES, 0); 439 cpu_x86_load_seg(env, R_FS, 0); 440 cpu_x86_load_seg(env, R_GS, 0); 441 #endif 442 } 443