1 /* 2 * qemu user cpu loop 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 #include "qemu/osdep.h" 21 #include "qemu-common.h" 22 #include "qemu.h" 23 #include "cpu_loop-common.h" 24 #include "signal-common.h" 25 26 /***********************************************************/ 27 /* CPUX86 core interface */ 28 29 uint64_t cpu_get_tsc(CPUX86State *env) 30 { 31 return cpu_get_host_ticks(); 32 } 33 34 static void write_dt(void *ptr, unsigned long addr, unsigned long limit, 35 int flags) 36 { 37 unsigned int e1, e2; 38 uint32_t *p; 39 e1 = (addr << 16) | (limit & 0xffff); 40 e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); 41 e2 |= flags; 42 p = ptr; 43 p[0] = tswap32(e1); 44 p[1] = tswap32(e2); 45 } 46 47 static uint64_t *idt_table; 48 #ifdef TARGET_X86_64 49 static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, 50 uint64_t addr, unsigned int sel) 51 { 52 uint32_t *p, e1, e2; 53 e1 = (addr & 0xffff) | (sel << 16); 54 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 55 p = ptr; 56 p[0] = tswap32(e1); 57 p[1] = tswap32(e2); 58 p[2] = tswap32(addr >> 32); 59 p[3] = 0; 60 } 61 /* only dpl matters as we do only user space emulation */ 62 static void set_idt(int n, unsigned int dpl) 63 { 64 set_gate64(idt_table + n * 2, 0, dpl, 0, 0); 65 } 66 #else 67 static void set_gate(void *ptr, unsigned int type, unsigned int dpl, 68 uint32_t addr, unsigned int sel) 69 { 70 uint32_t *p, e1, e2; 71 e1 = (addr & 0xffff) | (sel << 16); 72 e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); 73 p = ptr; 74 p[0] = tswap32(e1); 75 p[1] = tswap32(e2); 76 } 77 78 /* only dpl matters as we do only user space emulation */ 79 static void set_idt(int n, unsigned int dpl) 80 { 81 set_gate(idt_table + n, 0, dpl, 0, 0); 82 } 83 #endif 84 85 static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) 86 { 87 target_siginfo_t info = { 88 .si_signo = sig, 89 .si_code = code, 90 ._sifields._sigfault._addr = addr 91 }; 92 93 queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); 94 } 95 96 #ifdef TARGET_X86_64 97 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) 98 { 99 /* 100 * For all the vsyscalls, NULL means "don't write anything" not 101 * "write it at address 0". 102 */ 103 if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { 104 return true; 105 } 106 107 env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; 108 gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); 109 return false; 110 } 111 112 /* 113 * Since v3.1, the kernel traps and emulates the vsyscall page. 114 * Entry points other than the official generate SIGSEGV. 115 */ 116 static void emulate_vsyscall(CPUX86State *env) 117 { 118 int syscall; 119 abi_ulong ret; 120 uint64_t caller; 121 122 /* 123 * Validate the entry point. We have already validated the page 124 * during translation to get here; now verify the offset. 125 */ 126 switch (env->eip & ~TARGET_PAGE_MASK) { 127 case 0x000: 128 syscall = TARGET_NR_gettimeofday; 129 break; 130 case 0x400: 131 syscall = TARGET_NR_time; 132 break; 133 case 0x800: 134 syscall = TARGET_NR_getcpu; 135 break; 136 default: 137 goto sigsegv; 138 } 139 140 /* 141 * Validate the return address. 142 * Note that the kernel treats this the same as an invalid entry point. 143 */ 144 if (get_user_u64(caller, env->regs[R_ESP])) { 145 goto sigsegv; 146 } 147 148 /* 149 * Validate the the pointer arguments. 150 */ 151 switch (syscall) { 152 case TARGET_NR_gettimeofday: 153 if (!write_ok_or_segv(env, env->regs[R_EDI], 154 sizeof(struct target_timeval)) || 155 !write_ok_or_segv(env, env->regs[R_ESI], 156 sizeof(struct target_timezone))) { 157 return; 158 } 159 break; 160 case TARGET_NR_time: 161 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { 162 return; 163 } 164 break; 165 case TARGET_NR_getcpu: 166 if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || 167 !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { 168 return; 169 } 170 break; 171 default: 172 g_assert_not_reached(); 173 } 174 175 /* 176 * Perform the syscall. None of the vsyscalls should need restarting. 177 */ 178 ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], 179 env->regs[R_EDX], env->regs[10], env->regs[8], 180 env->regs[9], 0, 0); 181 g_assert(ret != -TARGET_ERESTARTSYS); 182 g_assert(ret != -TARGET_QEMU_ESIGRETURN); 183 if (ret == -TARGET_EFAULT) { 184 goto sigsegv; 185 } 186 env->regs[R_EAX] = ret; 187 188 /* Emulate a ret instruction to leave the vsyscall page. */ 189 env->eip = caller; 190 env->regs[R_ESP] += 8; 191 return; 192 193 sigsegv: 194 /* Like force_sig(SIGSEGV). */ 195 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 196 } 197 #endif 198 199 void cpu_loop(CPUX86State *env) 200 { 201 CPUState *cs = env_cpu(env); 202 int trapnr; 203 abi_ulong pc; 204 abi_ulong ret; 205 206 for(;;) { 207 cpu_exec_start(cs); 208 trapnr = cpu_exec(cs); 209 cpu_exec_end(cs); 210 process_queued_cpu_work(cs); 211 212 switch(trapnr) { 213 case 0x80: 214 /* linux syscall from int $0x80 */ 215 ret = do_syscall(env, 216 env->regs[R_EAX], 217 env->regs[R_EBX], 218 env->regs[R_ECX], 219 env->regs[R_EDX], 220 env->regs[R_ESI], 221 env->regs[R_EDI], 222 env->regs[R_EBP], 223 0, 0); 224 if (ret == -TARGET_ERESTARTSYS) { 225 env->eip -= 2; 226 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 227 env->regs[R_EAX] = ret; 228 } 229 break; 230 #ifndef TARGET_ABI32 231 case EXCP_SYSCALL: 232 /* linux syscall from syscall instruction */ 233 ret = do_syscall(env, 234 env->regs[R_EAX], 235 env->regs[R_EDI], 236 env->regs[R_ESI], 237 env->regs[R_EDX], 238 env->regs[10], 239 env->regs[8], 240 env->regs[9], 241 0, 0); 242 if (ret == -TARGET_ERESTARTSYS) { 243 env->eip -= 2; 244 } else if (ret != -TARGET_QEMU_ESIGRETURN) { 245 env->regs[R_EAX] = ret; 246 } 247 break; 248 #endif 249 #ifdef TARGET_X86_64 250 case EXCP_VSYSCALL: 251 emulate_vsyscall(env); 252 break; 253 #endif 254 case EXCP0B_NOSEG: 255 case EXCP0C_STACK: 256 gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); 257 break; 258 case EXCP0D_GPF: 259 /* XXX: potential problem if ABI32 */ 260 #ifndef TARGET_X86_64 261 if (env->eflags & VM_MASK) { 262 handle_vm86_fault(env); 263 break; 264 } 265 #endif 266 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 267 break; 268 case EXCP0E_PAGE: 269 gen_signal(env, TARGET_SIGSEGV, 270 (env->error_code & 1 ? 271 TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), 272 env->cr[2]); 273 break; 274 case EXCP00_DIVZ: 275 #ifndef TARGET_X86_64 276 if (env->eflags & VM_MASK) { 277 handle_vm86_trap(env, trapnr); 278 break; 279 } 280 #endif 281 gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); 282 break; 283 case EXCP01_DB: 284 case EXCP03_INT3: 285 #ifndef TARGET_X86_64 286 if (env->eflags & VM_MASK) { 287 handle_vm86_trap(env, trapnr); 288 break; 289 } 290 #endif 291 if (trapnr == EXCP01_DB) { 292 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); 293 } else { 294 gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); 295 } 296 break; 297 case EXCP04_INTO: 298 case EXCP05_BOUND: 299 #ifndef TARGET_X86_64 300 if (env->eflags & VM_MASK) { 301 handle_vm86_trap(env, trapnr); 302 break; 303 } 304 #endif 305 gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); 306 break; 307 case EXCP06_ILLOP: 308 gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); 309 break; 310 case EXCP_INTERRUPT: 311 /* just indicate that signals should be handled asap */ 312 break; 313 case EXCP_DEBUG: 314 gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); 315 break; 316 case EXCP_ATOMIC: 317 cpu_exec_step_atomic(cs); 318 break; 319 default: 320 pc = env->segs[R_CS].base + env->eip; 321 EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", 322 (long)pc, trapnr); 323 abort(); 324 } 325 process_pending_signals(env); 326 } 327 } 328 329 void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) 330 { 331 env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; 332 env->hflags |= HF_PE_MASK | HF_CPL_MASK; 333 if (env->features[FEAT_1_EDX] & CPUID_SSE) { 334 env->cr[4] |= CR4_OSFXSR_MASK; 335 env->hflags |= HF_OSFXSR_MASK; 336 } 337 #ifndef TARGET_ABI32 338 /* enable 64 bit mode if possible */ 339 if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { 340 fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); 341 exit(EXIT_FAILURE); 342 } 343 env->cr[4] |= CR4_PAE_MASK; 344 env->efer |= MSR_EFER_LMA | MSR_EFER_LME; 345 env->hflags |= HF_LMA_MASK; 346 #endif 347 348 /* flags setup : we activate the IRQs by default as in user mode */ 349 env->eflags |= IF_MASK; 350 351 /* linux register setup */ 352 #ifndef TARGET_ABI32 353 env->regs[R_EAX] = regs->rax; 354 env->regs[R_EBX] = regs->rbx; 355 env->regs[R_ECX] = regs->rcx; 356 env->regs[R_EDX] = regs->rdx; 357 env->regs[R_ESI] = regs->rsi; 358 env->regs[R_EDI] = regs->rdi; 359 env->regs[R_EBP] = regs->rbp; 360 env->regs[R_ESP] = regs->rsp; 361 env->eip = regs->rip; 362 #else 363 env->regs[R_EAX] = regs->eax; 364 env->regs[R_EBX] = regs->ebx; 365 env->regs[R_ECX] = regs->ecx; 366 env->regs[R_EDX] = regs->edx; 367 env->regs[R_ESI] = regs->esi; 368 env->regs[R_EDI] = regs->edi; 369 env->regs[R_EBP] = regs->ebp; 370 env->regs[R_ESP] = regs->esp; 371 env->eip = regs->eip; 372 #endif 373 374 /* linux interrupt setup */ 375 #ifndef TARGET_ABI32 376 env->idt.limit = 511; 377 #else 378 env->idt.limit = 255; 379 #endif 380 env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), 381 PROT_READ|PROT_WRITE, 382 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 383 idt_table = g2h_untagged(env->idt.base); 384 set_idt(0, 0); 385 set_idt(1, 0); 386 set_idt(2, 0); 387 set_idt(3, 3); 388 set_idt(4, 3); 389 set_idt(5, 0); 390 set_idt(6, 0); 391 set_idt(7, 0); 392 set_idt(8, 0); 393 set_idt(9, 0); 394 set_idt(10, 0); 395 set_idt(11, 0); 396 set_idt(12, 0); 397 set_idt(13, 0); 398 set_idt(14, 0); 399 set_idt(15, 0); 400 set_idt(16, 0); 401 set_idt(17, 0); 402 set_idt(18, 0); 403 set_idt(19, 0); 404 set_idt(0x80, 3); 405 406 /* linux segment setup */ 407 { 408 uint64_t *gdt_table; 409 env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, 410 PROT_READ|PROT_WRITE, 411 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 412 env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; 413 gdt_table = g2h_untagged(env->gdt.base); 414 #ifdef TARGET_ABI32 415 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 416 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 417 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 418 #else 419 /* 64 bit code segment */ 420 write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, 421 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 422 DESC_L_MASK | 423 (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); 424 #endif 425 write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, 426 DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | 427 (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); 428 } 429 cpu_x86_load_seg(env, R_CS, __USER_CS); 430 cpu_x86_load_seg(env, R_SS, __USER_DS); 431 #ifdef TARGET_ABI32 432 cpu_x86_load_seg(env, R_DS, __USER_DS); 433 cpu_x86_load_seg(env, R_ES, __USER_DS); 434 cpu_x86_load_seg(env, R_FS, __USER_DS); 435 cpu_x86_load_seg(env, R_GS, __USER_DS); 436 /* This hack makes Wine work... */ 437 env->segs[R_FS].selector = 0; 438 #else 439 cpu_x86_load_seg(env, R_DS, 0); 440 cpu_x86_load_seg(env, R_ES, 0); 441 cpu_x86_load_seg(env, R_FS, 0); 442 cpu_x86_load_seg(env, R_GS, 0); 443 #endif 444 } 445