1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu.h" 22 #include "qemu/timer.h" 23 #include "user-internals.h" 24 #include "cpu_loop-common.h" 25 #include "signal-common.h" 26 #include "user-mmap.h" 27 28 /***********************************************************/ 29 /* CPUX86 core interface */ 30 31 uint64_t cpu_get_tsc(CPUX86State *env) 32 { 33 return cpu_get_host_ticks(); 34 } 35 36 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 37 int flags) 38 { 39 unsigned int e1, e2; 40 uint32_t *p; 41 e1 = (addr << 16) | (limit & 0xffff); 42 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 43 e2 |= flags; 44 p = ptr; 45 p[0] = tswap32(e1); 46 p[1] = tswap32(e2); 47 } 48 49 static uint64_t *idt_table; 50 51 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 52 uint64_t addr, unsigned int sel) 53 { 54 uint32_t *p, e1, e2; 55 e1 = (addr & 0xffff) | (sel << 16); 56 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 57 p = ptr; 58 p[0] = tswap32(e1); 59 p[1] = tswap32(e2); 60 p[2] = tswap32(addr >> 32); 61 p[3] = 0; 62 } 63 64 #ifdef TARGET_X86_64 65 /* only dpl matters as we do only user space emulation */ 66 static void set_idt(int n, unsigned int dpl, bool is64) 67 { 68 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 69 } 70 #else 71 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 72 uint32_t addr, unsigned int sel) 73 { 74 uint32_t *p, e1, e2; 75 e1 = (addr & 0xffff) | (sel << 16); 76 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 77 p = ptr; 78 p[0] = tswap32(e1); 79 p[1] = tswap32(e2); 80 } 81 82 /* only dpl matters as we do only user space emulation */ 83 static void set_idt(int n, unsigned int dpl, bool is64) 84 { 85 if (is64) { 86 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 87 } else { 88 set_gate(idt_table + n, 0, dpl, 0, 0); 89 } 90 } 91 #endif 92 93 #ifdef TARGET_X86_64 94 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 95 { 96 /* 97 * For all the vsyscalls, NULL means "don't write anything" not 98 * "write it at address 0". 99 */ 100 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 101 return true; 102 } 103 104 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 105 force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 106 return false; 107 } 108 109 /* 110 * Since v3.1, the kernel traps and emulates the vsyscall page. 111 * Entry points other than the official generate SIGSEGV. 112 */ 113 static void emulate_vsyscall(CPUX86State *env) 114 { 115 int syscall; 116 abi_ulong ret; 117 uint64_t caller; 118 119 /* 120 * Validate the entry point. We have already validated the page 121 * during translation to get here; now verify the offset. 122 */ 123 switch (env->eip & ~TARGET_PAGE_MASK) { 124 case 0x000: 125 syscall = TARGET_NR_gettimeofday; 126 break; 127 case 0x400: 128 syscall = TARGET_NR_time; 129 break; 130 case 0x800: 131 syscall = TARGET_NR_getcpu; 132 break; 133 default: 134 goto sigsegv; 135 } 136 137 /* 138 * Validate the return address. 139 * Note that the kernel treats this the same as an invalid entry point. 140 */ 141 if (get_user_u64(caller, env->regs[R_ESP])) { 142 goto sigsegv; 143 } 144 145 /* 146 * Validate the pointer arguments. 147 */ 148 switch (syscall) { 149 case TARGET_NR_gettimeofday: 150 if (!write_ok_or_segv(env, env->regs[R_EDI], 151 sizeof(struct target_timeval)) || 152 !write_ok_or_segv(env, env->regs[R_ESI], 153 sizeof(struct target_timezone))) { 154 return; 155 } 156 break; 157 case TARGET_NR_time: 158 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 159 return; 160 } 161 break; 162 case TARGET_NR_getcpu: 163 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 164 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 165 return; 166 } 167 break; 168 default: 169 g_assert_not_reached(); 170 } 171 172 /* 173 * Perform the syscall. None of the vsyscalls should need restarting. 174 */ 175 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 176 env->regs[R_EDX], env->regs[10], env->regs[8], 177 env->regs[9], 0, 0); 178 g_assert(ret != -QEMU_ERESTARTSYS); 179 g_assert(ret != -QEMU_ESIGRETURN); 180 if (ret == -TARGET_EFAULT) { 181 goto sigsegv; 182 } 183 env->regs[R_EAX] = ret; 184 185 /* Emulate a ret instruction to leave the vsyscall page. */ 186 env->eip = caller; 187 env->regs[R_ESP] += 8; 188 return; 189 190 sigsegv: 191 force_sig(TARGET_SIGSEGV); 192 } 193 #endif 194 195 static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) 196 { 197 #ifndef TARGET_X86_64 198 if (env->eflags & VM_MASK) { 199 handle_vm86_trap(env, trapnr); 200 return true; 201 } 202 #endif 203 return false; 204 } 205 206 void cpu_loop(CPUX86State *env) 207 { 208 CPUState *cs = env_cpu(env); 209 int trapnr; 210 abi_ulong ret; 211 212 for(;;) { 213 cpu_exec_start(cs); 214 trapnr = cpu_exec(cs); 215 cpu_exec_end(cs); 216 process_queued_cpu_work(cs); 217 218 switch(trapnr) { 219 case 0x80: 220 #ifndef TARGET_X86_64 221 case EXCP_SYSCALL: 222 #endif 223 /* linux syscall from int $0x80 */ 224 ret = do_syscall(env, 225 env->regs[R_EAX], 226 env->regs[R_EBX], 227 env->regs[R_ECX], 228 env->regs[R_EDX], 229 env->regs[R_ESI], 230 env->regs[R_EDI], 231 env->regs[R_EBP], 232 0, 0); 233 if (ret == -QEMU_ERESTARTSYS) { 234 env->eip -= 2; 235 } else if (ret != -QEMU_ESIGRETURN) { 236 env->regs[R_EAX] = ret; 237 } 238 break; 239 #ifdef TARGET_X86_64 240 case EXCP_SYSCALL: 241 /* linux syscall from syscall instruction. */ 242 ret = do_syscall(env, 243 env->regs[R_EAX], 244 env->regs[R_EDI], 245 env->regs[R_ESI], 246 env->regs[R_EDX], 247 env->regs[10], 248 env->regs[8], 249 env->regs[9], 250 0, 0); 251 if (ret == -QEMU_ERESTARTSYS) { 252 env->eip -= 2; 253 } else if (ret != -QEMU_ESIGRETURN) { 254 env->regs[R_EAX] = ret; 255 } 256 break; 257 case EXCP_VSYSCALL: 258 emulate_vsyscall(env); 259 break; 260 #endif 261 case EXCP0B_NOSEG: 262 case EXCP0C_STACK: 263 force_sig(TARGET_SIGBUS); 264 break; 265 case EXCP0D_GPF: 266 /* XXX: potential problem if ABI32 */ 267 if (maybe_handle_vm86_trap(env, trapnr)) { 268 break; 269 } 270 force_sig(TARGET_SIGSEGV); 271 break; 272 case EXCP0E_PAGE: 273 force_sig_fault(TARGET_SIGSEGV, 274 (env->error_code & PG_ERROR_P_MASK ? 275 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 276 env->cr[2]); 277 break; 278 case EXCP00_DIVZ: 279 if (maybe_handle_vm86_trap(env, trapnr)) { 280 break; 281 } 282 force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 283 break; 284 case EXCP01_DB: 285 if (maybe_handle_vm86_trap(env, trapnr)) { 286 break; 287 } 288 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 289 break; 290 case EXCP03_INT3: 291 if (maybe_handle_vm86_trap(env, trapnr)) { 292 break; 293 } 294 force_sig(TARGET_SIGTRAP); 295 break; 296 case EXCP04_INTO: 297 case EXCP05_BOUND: 298 if (maybe_handle_vm86_trap(env, trapnr)) { 299 break; 300 } 301 force_sig(TARGET_SIGSEGV); 302 break; 303 case EXCP06_ILLOP: 304 force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 305 break; 306 case EXCP_INTERRUPT: 307 /* just indicate that signals should be handled asap */ 308 break; 309 case EXCP_DEBUG: 310 force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 311 break; 312 case EXCP_ATOMIC: 313 cpu_exec_step_atomic(cs); 314 break; 315 default: 316 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 317 trapnr); 318 abort(); 319 } 320 process_pending_signals(env); 321 } 322 } 323 324 static void target_cpu_free(void *obj) 325 { 326 CPUArchState *env = ((CPUState *)obj)->env_ptr; 327 target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES); 328 g_free(obj); 329 } 330 331 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 332 { 333 CPUState *cpu = env_cpu(env); 334 bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0; 335 int i; 336 337 OBJECT(cpu)->free = target_cpu_free; 338 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 339 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 340 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 341 env->cr[4] |= CR4_OSFXSR_MASK; 342 env->hflags |= HF_OSFXSR_MASK; 343 } 344 345 /* enable 64 bit mode if possible */ 346 if (is64) { 347 env->cr[4] |= CR4_PAE_MASK; 348 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 349 env->hflags |= HF_LMA_MASK; 350 } 351 #ifndef TARGET_ABI32 352 else { 353 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 354 exit(EXIT_FAILURE); 355 } 356 #endif 357 358 /* flags setup : we activate the IRQs by default as in user mode */ 359 env->eflags |= IF_MASK; 360 361 /* linux register setup */ 362 #ifndef TARGET_ABI32 363 env->regs[R_EAX] = regs->rax; 364 env->regs[R_EBX] = regs->rbx; 365 env->regs[R_ECX] = regs->rcx; 366 env->regs[R_EDX] = regs->rdx; 367 env->regs[R_ESI] = regs->rsi; 368 env->regs[R_EDI] = regs->rdi; 369 env->regs[R_EBP] = regs->rbp; 370 env->regs[R_ESP] = regs->rsp; 371 env->eip = regs->rip; 372 #else 373 env->regs[R_EAX] = regs->eax; 374 env->regs[R_EBX] = regs->ebx; 375 env->regs[R_ECX] = regs->ecx; 376 env->regs[R_EDX] = regs->edx; 377 env->regs[R_ESI] = regs->esi; 378 env->regs[R_EDI] = regs->edi; 379 env->regs[R_EBP] = regs->ebp; 380 env->regs[R_ESP] = regs->esp; 381 env->eip = regs->eip; 382 #endif 383 384 /* linux interrupt setup */ 385 #ifndef TARGET_ABI32 386 env->idt.limit = 511; 387 #else 388 env->idt.limit = 255; 389 #endif 390 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 391 PROT_READ|PROT_WRITE, 392 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 393 idt_table = g2h_untagged(env->idt.base); 394 for (i = 0; i < 20; i++) { 395 set_idt(i, 0, is64); 396 } 397 set_idt(3, 3, is64); 398 set_idt(4, 3, is64); 399 set_idt(0x80, 3, is64); 400 401 /* linux segment setup */ 402 { 403 uint64_t *gdt_table; 404 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 405 PROT_READ|PROT_WRITE, 406 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 407 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 408 gdt_table = g2h_untagged(env->gdt.base); 409 #ifdef TARGET_ABI32 410 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 411 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 412 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 413 #else 414 /* 64 bit code segment */ 415 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 416 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 417 DESC_L_MASK | 418 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 419 #endif 420 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 421 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 422 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 423 } 424 cpu_x86_load_seg(env, R_CS, __USER_CS); 425 cpu_x86_load_seg(env, R_SS, __USER_DS); 426 #ifdef TARGET_ABI32 427 cpu_x86_load_seg(env, R_DS, __USER_DS); 428 cpu_x86_load_seg(env, R_ES, __USER_DS); 429 cpu_x86_load_seg(env, R_FS, __USER_DS); 430 cpu_x86_load_seg(env, R_GS, __USER_DS); 431 /* This hack makes Wine work... */ 432 env->segs[R_FS].selector = 0; 433 #else 434 cpu_x86_load_seg(env, R_DS, 0); 435 cpu_x86_load_seg(env, R_ES, 0); 436 cpu_x86_load_seg(env, R_FS, 0); 437 cpu_x86_load_seg(env, R_GS, 0); 438 #endif 439 } 440