1 /* 2 * Copyright (C) 1994 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * General FPU state handling cleanups 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 */ 8 #include <asm/fpu/internal.h> 9 #include <asm/fpu/regset.h> 10 #include <asm/fpu/signal.h> 11 #include <asm/fpu/types.h> 12 #include <asm/traps.h> 13 14 #include <linux/hardirq.h> 15 16 #define CREATE_TRACE_POINTS 17 #include <asm/trace/fpu.h> 18 19 /* 20 * Represents the initial FPU state. It's mostly (but not completely) zeroes, 21 * depending on the FPU hardware format: 22 */ 23 union fpregs_state init_fpstate __read_mostly; 24 25 /* 26 * Track whether the kernel is using the FPU state 27 * currently. 28 * 29 * This flag is used: 30 * 31 * - by IRQ context code to potentially use the FPU 32 * if it's unused. 33 * 34 * - to debug kernel_fpu_begin()/end() correctness 35 */ 36 static DEFINE_PER_CPU(bool, in_kernel_fpu); 37 38 /* 39 * Track which context is using the FPU on the CPU: 40 */ 41 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); 42 43 static void kernel_fpu_disable(void) 44 { 45 WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); 46 this_cpu_write(in_kernel_fpu, true); 47 } 48 49 static void kernel_fpu_enable(void) 50 { 51 WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); 52 this_cpu_write(in_kernel_fpu, false); 53 } 54 55 static bool kernel_fpu_disabled(void) 56 { 57 return this_cpu_read(in_kernel_fpu); 58 } 59 60 /* 61 * Were we in an interrupt that interrupted kernel mode? 62 * 63 * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that 64 * pair does nothing at all: the thread must not have fpu (so 65 * that we don't try to save the FPU state), and TS must 66 * be set (so that the clts/stts pair does nothing that is 67 * visible in the interrupted kernel thread). 68 * 69 * Except for the eagerfpu case when we return true; in the likely case 70 * the thread has FPU but we are not going to set/clear TS. 71 */ 72 static bool interrupted_kernel_fpu_idle(void) 73 { 74 if (kernel_fpu_disabled()) 75 return false; 76 77 if (use_eager_fpu()) 78 return true; 79 80 return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); 81 } 82 83 /* 84 * Were we in user mode (or vm86 mode) when we were 85 * interrupted? 86 * 87 * Doing kernel_fpu_begin/end() is ok if we are running 88 * in an interrupt context from user mode - we'll just 89 * save the FPU state as required. 90 */ 91 static bool interrupted_user_mode(void) 92 { 93 struct pt_regs *regs = get_irq_regs(); 94 return regs && user_mode(regs); 95 } 96 97 /* 98 * Can we use the FPU in kernel mode with the 99 * whole "kernel_fpu_begin/end()" sequence? 100 * 101 * It's always ok in process context (ie "not interrupt") 102 * but it is sometimes ok even from an irq. 103 */ 104 bool irq_fpu_usable(void) 105 { 106 return !in_interrupt() || 107 interrupted_user_mode() || 108 interrupted_kernel_fpu_idle(); 109 } 110 EXPORT_SYMBOL(irq_fpu_usable); 111 112 void __kernel_fpu_begin(void) 113 { 114 struct fpu *fpu = ¤t->thread.fpu; 115 116 WARN_ON_FPU(!irq_fpu_usable()); 117 118 kernel_fpu_disable(); 119 120 if (fpu->fpregs_active) { 121 /* 122 * Ignore return value -- we don't care if reg state 123 * is clobbered. 124 */ 125 copy_fpregs_to_fpstate(fpu); 126 } else { 127 this_cpu_write(fpu_fpregs_owner_ctx, NULL); 128 __fpregs_activate_hw(); 129 } 130 } 131 EXPORT_SYMBOL(__kernel_fpu_begin); 132 133 void __kernel_fpu_end(void) 134 { 135 struct fpu *fpu = ¤t->thread.fpu; 136 137 if (fpu->fpregs_active) 138 copy_kernel_to_fpregs(&fpu->state); 139 else 140 __fpregs_deactivate_hw(); 141 142 kernel_fpu_enable(); 143 } 144 EXPORT_SYMBOL(__kernel_fpu_end); 145 146 void kernel_fpu_begin(void) 147 { 148 preempt_disable(); 149 __kernel_fpu_begin(); 150 } 151 EXPORT_SYMBOL_GPL(kernel_fpu_begin); 152 153 void kernel_fpu_end(void) 154 { 155 __kernel_fpu_end(); 156 preempt_enable(); 157 } 158 EXPORT_SYMBOL_GPL(kernel_fpu_end); 159 160 /* 161 * CR0::TS save/restore functions: 162 */ 163 int irq_ts_save(void) 164 { 165 /* 166 * If in process context and not atomic, we can take a spurious DNA fault. 167 * Otherwise, doing clts() in process context requires disabling preemption 168 * or some heavy lifting like kernel_fpu_begin() 169 */ 170 if (!in_atomic()) 171 return 0; 172 173 if (read_cr0() & X86_CR0_TS) { 174 clts(); 175 return 1; 176 } 177 178 return 0; 179 } 180 EXPORT_SYMBOL_GPL(irq_ts_save); 181 182 void irq_ts_restore(int TS_state) 183 { 184 if (TS_state) 185 stts(); 186 } 187 EXPORT_SYMBOL_GPL(irq_ts_restore); 188 189 /* 190 * Save the FPU state (mark it for reload if necessary): 191 * 192 * This only ever gets called for the current task. 193 */ 194 void fpu__save(struct fpu *fpu) 195 { 196 WARN_ON_FPU(fpu != ¤t->thread.fpu); 197 198 preempt_disable(); 199 trace_x86_fpu_before_save(fpu); 200 if (fpu->fpregs_active) { 201 if (!copy_fpregs_to_fpstate(fpu)) { 202 if (use_eager_fpu()) 203 copy_kernel_to_fpregs(&fpu->state); 204 else 205 fpregs_deactivate(fpu); 206 } 207 } 208 trace_x86_fpu_after_save(fpu); 209 preempt_enable(); 210 } 211 EXPORT_SYMBOL_GPL(fpu__save); 212 213 /* 214 * Legacy x87 fpstate state init: 215 */ 216 static inline void fpstate_init_fstate(struct fregs_state *fp) 217 { 218 fp->cwd = 0xffff037fu; 219 fp->swd = 0xffff0000u; 220 fp->twd = 0xffffffffu; 221 fp->fos = 0xffff0000u; 222 } 223 224 void fpstate_init(union fpregs_state *state) 225 { 226 if (!static_cpu_has(X86_FEATURE_FPU)) { 227 fpstate_init_soft(&state->soft); 228 return; 229 } 230 231 memset(state, 0, fpu_kernel_xstate_size); 232 233 /* 234 * XRSTORS requires that this bit is set in xcomp_bv, or 235 * it will #GP. Make sure it is replaced after the memset(). 236 */ 237 if (static_cpu_has(X86_FEATURE_XSAVES)) 238 state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; 239 240 if (static_cpu_has(X86_FEATURE_FXSR)) 241 fpstate_init_fxstate(&state->fxsave); 242 else 243 fpstate_init_fstate(&state->fsave); 244 } 245 EXPORT_SYMBOL_GPL(fpstate_init); 246 247 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 248 { 249 dst_fpu->counter = 0; 250 dst_fpu->fpregs_active = 0; 251 dst_fpu->last_cpu = -1; 252 253 if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) 254 return 0; 255 256 WARN_ON_FPU(src_fpu != ¤t->thread.fpu); 257 258 /* 259 * Don't let 'init optimized' areas of the XSAVE area 260 * leak into the child task: 261 */ 262 if (use_eager_fpu()) 263 memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); 264 265 /* 266 * Save current FPU registers directly into the child 267 * FPU context, without any memory-to-memory copying. 268 * In lazy mode, if the FPU context isn't loaded into 269 * fpregs, CR0.TS will be set and do_device_not_available 270 * will load the FPU context. 271 * 272 * We have to do all this with preemption disabled, 273 * mostly because of the FNSAVE case, because in that 274 * case we must not allow preemption in the window 275 * between the FNSAVE and us marking the context lazy. 276 * 277 * It shouldn't be an issue as even FNSAVE is plenty 278 * fast in terms of critical section length. 279 */ 280 preempt_disable(); 281 if (!copy_fpregs_to_fpstate(dst_fpu)) { 282 memcpy(&src_fpu->state, &dst_fpu->state, 283 fpu_kernel_xstate_size); 284 285 if (use_eager_fpu()) 286 copy_kernel_to_fpregs(&src_fpu->state); 287 else 288 fpregs_deactivate(src_fpu); 289 } 290 preempt_enable(); 291 292 trace_x86_fpu_copy_src(src_fpu); 293 trace_x86_fpu_copy_dst(dst_fpu); 294 295 return 0; 296 } 297 298 /* 299 * Activate the current task's in-memory FPU context, 300 * if it has not been used before: 301 */ 302 void fpu__activate_curr(struct fpu *fpu) 303 { 304 WARN_ON_FPU(fpu != ¤t->thread.fpu); 305 306 if (!fpu->fpstate_active) { 307 fpstate_init(&fpu->state); 308 trace_x86_fpu_init_state(fpu); 309 310 trace_x86_fpu_activate_state(fpu); 311 /* Safe to do for the current task: */ 312 fpu->fpstate_active = 1; 313 } 314 } 315 EXPORT_SYMBOL_GPL(fpu__activate_curr); 316 317 /* 318 * This function must be called before we read a task's fpstate. 319 * 320 * If the task has not used the FPU before then initialize its 321 * fpstate. 322 * 323 * If the task has used the FPU before then save it. 324 */ 325 void fpu__activate_fpstate_read(struct fpu *fpu) 326 { 327 /* 328 * If fpregs are active (in the current CPU), then 329 * copy them to the fpstate: 330 */ 331 if (fpu->fpregs_active) { 332 fpu__save(fpu); 333 } else { 334 if (!fpu->fpstate_active) { 335 fpstate_init(&fpu->state); 336 trace_x86_fpu_init_state(fpu); 337 338 trace_x86_fpu_activate_state(fpu); 339 /* Safe to do for current and for stopped child tasks: */ 340 fpu->fpstate_active = 1; 341 } 342 } 343 } 344 345 /* 346 * This function must be called before we write a task's fpstate. 347 * 348 * If the task has used the FPU before then unlazy it. 349 * If the task has not used the FPU before then initialize its fpstate. 350 * 351 * After this function call, after registers in the fpstate are 352 * modified and the child task has woken up, the child task will 353 * restore the modified FPU state from the modified context. If we 354 * didn't clear its lazy status here then the lazy in-registers 355 * state pending on its former CPU could be restored, corrupting 356 * the modifications. 357 */ 358 void fpu__activate_fpstate_write(struct fpu *fpu) 359 { 360 /* 361 * Only stopped child tasks can be used to modify the FPU 362 * state in the fpstate buffer: 363 */ 364 WARN_ON_FPU(fpu == ¤t->thread.fpu); 365 366 if (fpu->fpstate_active) { 367 /* Invalidate any lazy state: */ 368 fpu->last_cpu = -1; 369 } else { 370 fpstate_init(&fpu->state); 371 trace_x86_fpu_init_state(fpu); 372 373 trace_x86_fpu_activate_state(fpu); 374 /* Safe to do for stopped child tasks: */ 375 fpu->fpstate_active = 1; 376 } 377 } 378 379 /* 380 * This function must be called before we write the current 381 * task's fpstate. 382 * 383 * This call gets the current FPU register state and moves 384 * it in to the 'fpstate'. Preemption is disabled so that 385 * no writes to the 'fpstate' can occur from context 386 * swiches. 387 * 388 * Must be followed by a fpu__current_fpstate_write_end(). 389 */ 390 void fpu__current_fpstate_write_begin(void) 391 { 392 struct fpu *fpu = ¤t->thread.fpu; 393 394 /* 395 * Ensure that the context-switching code does not write 396 * over the fpstate while we are doing our update. 397 */ 398 preempt_disable(); 399 400 /* 401 * Move the fpregs in to the fpu's 'fpstate'. 402 */ 403 fpu__activate_fpstate_read(fpu); 404 405 /* 406 * The caller is about to write to 'fpu'. Ensure that no 407 * CPU thinks that its fpregs match the fpstate. This 408 * ensures we will not be lazy and skip a XRSTOR in the 409 * future. 410 */ 411 fpu->last_cpu = -1; 412 } 413 414 /* 415 * This function must be paired with fpu__current_fpstate_write_begin() 416 * 417 * This will ensure that the modified fpstate gets placed back in 418 * the fpregs if necessary. 419 * 420 * Note: This function may be called whether or not an _actual_ 421 * write to the fpstate occurred. 422 */ 423 void fpu__current_fpstate_write_end(void) 424 { 425 struct fpu *fpu = ¤t->thread.fpu; 426 427 /* 428 * 'fpu' now has an updated copy of the state, but the 429 * registers may still be out of date. Update them with 430 * an XRSTOR if they are active. 431 */ 432 if (fpregs_active()) 433 copy_kernel_to_fpregs(&fpu->state); 434 435 /* 436 * Our update is done and the fpregs/fpstate are in sync 437 * if necessary. Context switches can happen again. 438 */ 439 preempt_enable(); 440 } 441 442 /* 443 * 'fpu__restore()' is called to copy FPU registers from 444 * the FPU fpstate to the live hw registers and to activate 445 * access to the hardware registers, so that FPU instructions 446 * can be used afterwards. 447 * 448 * Must be called with kernel preemption disabled (for example 449 * with local interrupts disabled, as it is in the case of 450 * do_device_not_available()). 451 */ 452 void fpu__restore(struct fpu *fpu) 453 { 454 fpu__activate_curr(fpu); 455 456 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ 457 kernel_fpu_disable(); 458 trace_x86_fpu_before_restore(fpu); 459 fpregs_activate(fpu); 460 copy_kernel_to_fpregs(&fpu->state); 461 fpu->counter++; 462 trace_x86_fpu_after_restore(fpu); 463 kernel_fpu_enable(); 464 } 465 EXPORT_SYMBOL_GPL(fpu__restore); 466 467 /* 468 * Drops current FPU state: deactivates the fpregs and 469 * the fpstate. NOTE: it still leaves previous contents 470 * in the fpregs in the eager-FPU case. 471 * 472 * This function can be used in cases where we know that 473 * a state-restore is coming: either an explicit one, 474 * or a reschedule. 475 */ 476 void fpu__drop(struct fpu *fpu) 477 { 478 preempt_disable(); 479 fpu->counter = 0; 480 481 if (fpu->fpregs_active) { 482 /* Ignore delayed exceptions from user space */ 483 asm volatile("1: fwait\n" 484 "2:\n" 485 _ASM_EXTABLE(1b, 2b)); 486 fpregs_deactivate(fpu); 487 } 488 489 fpu->fpstate_active = 0; 490 491 trace_x86_fpu_dropped(fpu); 492 493 preempt_enable(); 494 } 495 496 /* 497 * Clear FPU registers by setting them up from 498 * the init fpstate: 499 */ 500 static inline void copy_init_fpstate_to_fpregs(void) 501 { 502 if (use_xsave()) 503 copy_kernel_to_xregs(&init_fpstate.xsave, -1); 504 else if (static_cpu_has(X86_FEATURE_FXSR)) 505 copy_kernel_to_fxregs(&init_fpstate.fxsave); 506 else 507 copy_kernel_to_fregs(&init_fpstate.fsave); 508 } 509 510 /* 511 * Clear the FPU state back to init state. 512 * 513 * Called by sys_execve(), by the signal handler code and by various 514 * error paths. 515 */ 516 void fpu__clear(struct fpu *fpu) 517 { 518 WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ 519 520 if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) { 521 /* FPU state will be reallocated lazily at the first use. */ 522 fpu__drop(fpu); 523 } else { 524 if (!fpu->fpstate_active) { 525 fpu__activate_curr(fpu); 526 user_fpu_begin(); 527 } 528 copy_init_fpstate_to_fpregs(); 529 } 530 } 531 532 /* 533 * x87 math exception handling: 534 */ 535 536 int fpu__exception_code(struct fpu *fpu, int trap_nr) 537 { 538 int err; 539 540 if (trap_nr == X86_TRAP_MF) { 541 unsigned short cwd, swd; 542 /* 543 * (~cwd & swd) will mask out exceptions that are not set to unmasked 544 * status. 0x3f is the exception bits in these regs, 0x200 is the 545 * C1 reg you need in case of a stack fault, 0x040 is the stack 546 * fault bit. We should only be taking one exception at a time, 547 * so if this combination doesn't produce any single exception, 548 * then we have a bad program that isn't synchronizing its FPU usage 549 * and it will suffer the consequences since we won't be able to 550 * fully reproduce the context of the exception. 551 */ 552 if (boot_cpu_has(X86_FEATURE_FXSR)) { 553 cwd = fpu->state.fxsave.cwd; 554 swd = fpu->state.fxsave.swd; 555 } else { 556 cwd = (unsigned short)fpu->state.fsave.cwd; 557 swd = (unsigned short)fpu->state.fsave.swd; 558 } 559 560 err = swd & ~cwd; 561 } else { 562 /* 563 * The SIMD FPU exceptions are handled a little differently, as there 564 * is only a single status/control register. Thus, to determine which 565 * unmasked exception was caught we must mask the exception mask bits 566 * at 0x1f80, and then use these to mask the exception bits at 0x3f. 567 */ 568 unsigned short mxcsr = MXCSR_DEFAULT; 569 570 if (boot_cpu_has(X86_FEATURE_XMM)) 571 mxcsr = fpu->state.fxsave.mxcsr; 572 573 err = ~(mxcsr >> 7) & mxcsr; 574 } 575 576 if (err & 0x001) { /* Invalid op */ 577 /* 578 * swd & 0x240 == 0x040: Stack Underflow 579 * swd & 0x240 == 0x240: Stack Overflow 580 * User must clear the SF bit (0x40) if set 581 */ 582 return FPE_FLTINV; 583 } else if (err & 0x004) { /* Divide by Zero */ 584 return FPE_FLTDIV; 585 } else if (err & 0x008) { /* Overflow */ 586 return FPE_FLTOVF; 587 } else if (err & 0x012) { /* Denormal, Underflow */ 588 return FPE_FLTUND; 589 } else if (err & 0x020) { /* Precision */ 590 return FPE_FLTRES; 591 } 592 593 /* 594 * If we're using IRQ 13, or supposedly even some trap 595 * X86_TRAP_MF implementations, it's possible 596 * we get a spurious trap, which is not an error. 597 */ 598 return 0; 599 } 600