1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu-common.h" 22 #include "qemu.h" 23 #include "cpu_loop-common.h" 24 25 /***********************************************************/ 26 /* CPUX86 core interface */ 27 28 uint64_t cpu_get_tsc(CPUX86State *env) 29 { 30 return cpu_get_host_ticks(); 31 } 32 33 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 34 int flags) 35 { 36 unsigned int e1, e2; 37 uint32_t *p; 38 e1 = (addr << 16) | (limit & 0xffff); 39 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 40 e2 |= flags; 41 p = ptr; 42 p[0] = tswap32(e1); 43 p[1] = tswap32(e2); 44 } 45 46 static uint64_t *idt_table; 47 #ifdef TARGET_X86_64 48 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 49 uint64_t addr, unsigned int sel) 50 { 51 uint32_t *p, e1, e2; 52 e1 = (addr & 0xffff) | (sel << 16); 53 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 54 p = ptr; 55 p[0] = tswap32(e1); 56 p[1] = tswap32(e2); 57 p[2] = tswap32(addr >> 32); 58 p[3] = 0; 59 } 60 /* only dpl matters as we do only user space emulation */ 61 static void set_idt(int n, unsigned int dpl) 62 { 63 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 64 } 65 #else 66 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 67 uint32_t addr, unsigned int sel) 68 { 69 uint32_t *p, e1, e2; 70 e1 = (addr & 0xffff) | (sel << 16); 71 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 72 p = ptr; 73 p[0] = tswap32(e1); 74 p[1] = tswap32(e2); 75 } 76 77 /* only dpl matters as we do only user space emulation */ 78 static void set_idt(int n, unsigned int dpl) 79 { 80 set_gate(idt_table + n, 0, dpl, 0, 0); 81 } 82 #endif 83 84 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) 85 { 86 target_siginfo_t info = { 87 .si_signo = sig, 88 .si_code = code, 89 ._sifields._sigfault._addr = addr 90 }; 91 92 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); 93 } 94 95 #ifdef TARGET_X86_64 96 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 97 { 98 /* 99 * For all the vsyscalls, NULL means "don't write anything" not 100 * "write it at address 0". 101 */ 102 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 103 return true; 104 } 105 106 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 107 gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 108 return false; 109 } 110 111 /* 112 * Since v3.1, the kernel traps and emulates the vsyscall page. 113 * Entry points other than the official generate SIGSEGV. 114 */ 115 static void emulate_vsyscall(CPUX86State *env) 116 { 117 int syscall; 118 abi_ulong ret; 119 uint64_t caller; 120 121 /* 122 * Validate the entry point. We have already validated the page 123 * during translation to get here; now verify the offset. 124 */ 125 switch (env->eip & ~TARGET_PAGE_MASK) { 126 case 0x000: 127 syscall = TARGET_NR_gettimeofday; 128 break; 129 case 0x400: 130 syscall = TARGET_NR_time; 131 break; 132 case 0x800: 133 syscall = TARGET_NR_getcpu; 134 break; 135 default: 136 goto sigsegv; 137 } 138 139 /* 140 * Validate the return address. 141 * Note that the kernel treats this the same as an invalid entry point. 142 */ 143 if (get_user_u64(caller, env->regs[R_ESP])) { 144 goto sigsegv; 145 } 146 147 /* 148 * Validate the the pointer arguments. 149 */ 150 switch (syscall) { 151 case TARGET_NR_gettimeofday: 152 if (!write_ok_or_segv(env, env->regs[R_EDI], 153 sizeof(struct target_timeval)) || 154 !write_ok_or_segv(env, env->regs[R_ESI], 155 sizeof(struct target_timezone))) { 156 return; 157 } 158 break; 159 case TARGET_NR_time: 160 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 161 return; 162 } 163 break; 164 case TARGET_NR_getcpu: 165 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 166 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 167 return; 168 } 169 break; 170 default: 171 g_assert_not_reached(); 172 } 173 174 /* 175 * Perform the syscall. None of the vsyscalls should need restarting. 176 */ 177 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 178 env->regs[R_EDX], env->regs[10], env->regs[8], 179 env->regs[9], 0, 0); 180 g_assert(ret != -TARGET_ERESTARTSYS); 181 g_assert(ret != -TARGET_QEMU_ESIGRETURN); 182 if (ret == -TARGET_EFAULT) { 183 goto sigsegv; 184 } 185 env->regs[R_EAX] = ret; 186 187 /* Emulate a ret instruction to leave the vsyscall page. */ 188 env->eip = caller; 189 env->regs[R_ESP] += 8; 190 return; 191 192 sigsegv: 193 /* Like force_sig(SIGSEGV). */ 194 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 195 } 196 #endif 197 198 void cpu_loop(CPUX86State *env) 199 { 200 CPUState *cs = env_cpu(env); 201 int trapnr; 202 abi_ulong pc; 203 abi_ulong ret; 204 205 for(;;) { 206 cpu_exec_start(cs); 207 trapnr = cpu_exec(cs); 208 cpu_exec_end(cs); 209 process_queued_cpu_work(cs); 210 211 switch(trapnr) { 212 case 0x80: 213 /* linux syscall from int $0x80 */ 214 ret = do_syscall(env, 215 env->regs[R_EAX], 216 env->regs[R_EBX], 217 env->regs[R_ECX], 218 env->regs[R_EDX], 219 env->regs[R_ESI], 220 env->regs[R_EDI], 221 env->regs[R_EBP], 222 0, 0); 223 if (ret == -TARGET_ERESTARTSYS) { 224 env->eip -= 2; 225 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 226 env->regs[R_EAX] = ret; 227 } 228 break; 229 #ifndef TARGET_ABI32 230 case EXCP_SYSCALL: 231 /* linux syscall from syscall instruction */ 232 ret = do_syscall(env, 233 env->regs[R_EAX], 234 env->regs[R_EDI], 235 env->regs[R_ESI], 236 env->regs[R_EDX], 237 env->regs[10], 238 env->regs[8], 239 env->regs[9], 240 0, 0); 241 if (ret == -TARGET_ERESTARTSYS) { 242 env->eip -= 2; 243 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 244 env->regs[R_EAX] = ret; 245 } 246 break; 247 #endif 248 #ifdef TARGET_X86_64 249 case EXCP_VSYSCALL: 250 emulate_vsyscall(env); 251 break; 252 #endif 253 case EXCP0B_NOSEG: 254 case EXCP0C_STACK: 255 gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); 256 break; 257 case EXCP0D_GPF: 258 /* XXX: potential problem if ABI32 */ 259 #ifndef TARGET_X86_64 260 if (env->eflags & VM_MASK) { 261 handle_vm86_fault(env); 262 break; 263 } 264 #endif 265 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 266 break; 267 case EXCP0E_PAGE: 268 gen_signal(env, TARGET_SIGSEGV, 269 (env->error_code & 1 ? 270 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 271 env->cr[2]); 272 break; 273 case EXCP00_DIVZ: 274 #ifndef TARGET_X86_64 275 if (env->eflags & VM_MASK) { 276 handle_vm86_trap(env, trapnr); 277 break; 278 } 279 #endif 280 gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 281 break; 282 case EXCP01_DB: 283 case EXCP03_INT3: 284 #ifndef TARGET_X86_64 285 if (env->eflags & VM_MASK) { 286 handle_vm86_trap(env, trapnr); 287 break; 288 } 289 #endif 290 if (trapnr == EXCP01_DB) { 291 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 292 } else { 293 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); 294 } 295 break; 296 case EXCP04_INTO: 297 case EXCP05_BOUND: 298 #ifndef TARGET_X86_64 299 if (env->eflags & VM_MASK) { 300 handle_vm86_trap(env, trapnr); 301 break; 302 } 303 #endif 304 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 305 break; 306 case EXCP06_ILLOP: 307 gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 308 break; 309 case EXCP_INTERRUPT: 310 /* just indicate that signals should be handled asap */ 311 break; 312 case EXCP_DEBUG: 313 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); 314 break; 315 case EXCP_ATOMIC: 316 cpu_exec_step_atomic(cs); 317 break; 318 default: 319 pc = env->segs[R_CS].base + env->eip; 320 EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", 321 (long)pc, trapnr); 322 abort(); 323 } 324 process_pending_signals(env); 325 } 326 } 327 328 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 329 { 330 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 331 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 332 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 333 env->cr[4] |= CR4_OSFXSR_MASK; 334 env->hflags |= HF_OSFXSR_MASK; 335 } 336 #ifndef TARGET_ABI32 337 /* enable 64 bit mode if possible */ 338 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { 339 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 340 exit(EXIT_FAILURE); 341 } 342 env->cr[4] |= CR4_PAE_MASK; 343 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 344 env->hflags |= HF_LMA_MASK; 345 #endif 346 347 /* flags setup : we activate the IRQs by default as in user mode */ 348 env->eflags |= IF_MASK; 349 350 /* linux register setup */ 351 #ifndef TARGET_ABI32 352 env->regs[R_EAX] = regs->rax; 353 env->regs[R_EBX] = regs->rbx; 354 env->regs[R_ECX] = regs->rcx; 355 env->regs[R_EDX] = regs->rdx; 356 env->regs[R_ESI] = regs->rsi; 357 env->regs[R_EDI] = regs->rdi; 358 env->regs[R_EBP] = regs->rbp; 359 env->regs[R_ESP] = regs->rsp; 360 env->eip = regs->rip; 361 #else 362 env->regs[R_EAX] = regs->eax; 363 env->regs[R_EBX] = regs->ebx; 364 env->regs[R_ECX] = regs->ecx; 365 env->regs[R_EDX] = regs->edx; 366 env->regs[R_ESI] = regs->esi; 367 env->regs[R_EDI] = regs->edi; 368 env->regs[R_EBP] = regs->ebp; 369 env->regs[R_ESP] = regs->esp; 370 env->eip = regs->eip; 371 #endif 372 373 /* linux interrupt setup */ 374 #ifndef TARGET_ABI32 375 env->idt.limit = 511; 376 #else 377 env->idt.limit = 255; 378 #endif 379 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 380 PROT_READ|PROT_WRITE, 381 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 382 idt_table = g2h_untagged(env->idt.base); 383 set_idt(0, 0); 384 set_idt(1, 0); 385 set_idt(2, 0); 386 set_idt(3, 3); 387 set_idt(4, 3); 388 set_idt(5, 0); 389 set_idt(6, 0); 390 set_idt(7, 0); 391 set_idt(8, 0); 392 set_idt(9, 0); 393 set_idt(10, 0); 394 set_idt(11, 0); 395 set_idt(12, 0); 396 set_idt(13, 0); 397 set_idt(14, 0); 398 set_idt(15, 0); 399 set_idt(16, 0); 400 set_idt(17, 0); 401 set_idt(18, 0); 402 set_idt(19, 0); 403 set_idt(0x80, 3); 404 405 /* linux segment setup */ 406 { 407 uint64_t *gdt_table; 408 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 409 PROT_READ|PROT_WRITE, 410 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 411 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 412 gdt_table = g2h_untagged(env->gdt.base); 413 #ifdef TARGET_ABI32 414 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 415 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 416 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 417 #else 418 /* 64 bit code segment */ 419 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 420 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 421 DESC_L_MASK | 422 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 423 #endif 424 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 425 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 426 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 427 } 428 cpu_x86_load_seg(env, R_CS, __USER_CS); 429 cpu_x86_load_seg(env, R_SS, __USER_DS); 430 #ifdef TARGET_ABI32 431 cpu_x86_load_seg(env, R_DS, __USER_DS); 432 cpu_x86_load_seg(env, R_ES, __USER_DS); 433 cpu_x86_load_seg(env, R_FS, __USER_DS); 434 cpu_x86_load_seg(env, R_GS, __USER_DS); 435 /* This hack makes Wine work... */ 436 env->segs[R_FS].selector = 0; 437 #else 438 cpu_x86_load_seg(env, R_DS, 0); 439 cpu_x86_load_seg(env, R_ES, 0); 440 cpu_x86_load_seg(env, R_FS, 0); 441 cpu_x86_load_seg(env, R_GS, 0); 442 #endif 443 } 444