1 /* 2 * S390 version 3 * Copyright IBM Corp. 1999 4 * Author(s): Hartmut Penner (hp@de.ibm.com) 5 * Ulrich Weigand (uweigand@de.ibm.com) 6 * 7 * Derived from "arch/i386/mm/fault.c" 8 * Copyright (C) 1995 Linus Torvalds 9 */ 10 11 #include <linux/kernel_stat.h> 12 #include <linux/perf_event.h> 13 #include <linux/signal.h> 14 #include <linux/sched.h> 15 #include <linux/kernel.h> 16 #include <linux/errno.h> 17 #include <linux/string.h> 18 #include <linux/types.h> 19 #include <linux/ptrace.h> 20 #include <linux/mman.h> 21 #include <linux/mm.h> 22 #include <linux/compat.h> 23 #include <linux/smp.h> 24 #include <linux/kdebug.h> 25 #include <linux/init.h> 26 #include <linux/console.h> 27 #include <linux/module.h> 28 #include <linux/hardirq.h> 29 #include <linux/kprobes.h> 30 #include <linux/uaccess.h> 31 #include <linux/hugetlb.h> 32 #include <asm/asm-offsets.h> 33 #include <asm/pgtable.h> 34 #include <asm/irq.h> 35 #include <asm/mmu_context.h> 36 #include <asm/facility.h> 37 #include "../kernel/entry.h" 38 39 #define __FAIL_ADDR_MASK -4096L 40 #define __SUBCODE_MASK 0x0600 41 #define __PF_RES_FIELD 0x8000000000000000ULL 42 43 #define VM_FAULT_BADCONTEXT 0x010000 44 #define VM_FAULT_BADMAP 0x020000 45 #define VM_FAULT_BADACCESS 0x040000 46 #define VM_FAULT_SIGNAL 0x080000 47 #define VM_FAULT_PFAULT 0x100000 48 49 static unsigned long store_indication __read_mostly; 50 51 static int __init fault_init(void) 52 { 53 if (test_facility(75)) 54 store_indication = 0xc00; 55 return 0; 56 } 57 early_initcall(fault_init); 58 59 static inline int notify_page_fault(struct pt_regs *regs) 60 { 61 int ret = 0; 62 63 /* kprobe_running() needs smp_processor_id() */ 64 if (kprobes_built_in() && !user_mode(regs)) { 65 preempt_disable(); 66 if (kprobe_running() && kprobe_fault_handler(regs, 14)) 67 ret = 1; 68 preempt_enable(); 69 } 70 return ret; 71 } 72 73 74 /* 75 * Unlock any spinlocks which will prevent us from getting the 76 * message out. 77 */ 78 void bust_spinlocks(int yes) 79 { 80 if (yes) { 81 oops_in_progress = 1; 82 } else { 83 int loglevel_save = console_loglevel; 84 console_unblank(); 85 oops_in_progress = 0; 86 /* 87 * OK, the message is on the console. Now we call printk() 88 * without oops_in_progress set so that printk will give klogd 89 * a poke. Hold onto your hats... 90 */ 91 console_loglevel = 15; 92 printk(" "); 93 console_loglevel = loglevel_save; 94 } 95 } 96 97 /* 98 * Returns the address space associated with the fault. 99 * Returns 0 for kernel space and 1 for user space. 100 */ 101 static inline int user_space_fault(struct pt_regs *regs) 102 { 103 unsigned long trans_exc_code; 104 105 /* 106 * The lowest two bits of the translation exception 107 * identification indicate which paging table was used. 108 */ 109 trans_exc_code = regs->int_parm_long & 3; 110 if (trans_exc_code == 3) /* home space -> kernel */ 111 return 0; 112 if (user_mode(regs)) 113 return 1; 114 if (trans_exc_code == 2) /* secondary space -> set_fs */ 115 return current->thread.mm_segment.ar4; 116 if (current->flags & PF_VCPU) 117 return 1; 118 return 0; 119 } 120 121 static int bad_address(void *p) 122 { 123 unsigned long dummy; 124 125 return probe_kernel_address((unsigned long *)p, dummy); 126 } 127 128 static void dump_pagetable(unsigned long asce, unsigned long address) 129 { 130 unsigned long *table = __va(asce & PAGE_MASK); 131 132 pr_alert("AS:%016lx ", asce); 133 switch (asce & _ASCE_TYPE_MASK) { 134 case _ASCE_TYPE_REGION1: 135 table = table + ((address >> 53) & 0x7ff); 136 if (bad_address(table)) 137 goto bad; 138 pr_cont("R1:%016lx ", *table); 139 if (*table & _REGION_ENTRY_INVALID) 140 goto out; 141 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 142 /* fallthrough */ 143 case _ASCE_TYPE_REGION2: 144 table = table + ((address >> 42) & 0x7ff); 145 if (bad_address(table)) 146 goto bad; 147 pr_cont("R2:%016lx ", *table); 148 if (*table & _REGION_ENTRY_INVALID) 149 goto out; 150 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 151 /* fallthrough */ 152 case _ASCE_TYPE_REGION3: 153 table = table + ((address >> 31) & 0x7ff); 154 if (bad_address(table)) 155 goto bad; 156 pr_cont("R3:%016lx ", *table); 157 if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE)) 158 goto out; 159 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); 160 /* fallthrough */ 161 case _ASCE_TYPE_SEGMENT: 162 table = table + ((address >> 20) & 0x7ff); 163 if (bad_address(table)) 164 goto bad; 165 pr_cont("S:%016lx ", *table); 166 if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) 167 goto out; 168 table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); 169 } 170 table = table + ((address >> 12) & 0xff); 171 if (bad_address(table)) 172 goto bad; 173 pr_cont("P:%016lx ", *table); 174 out: 175 pr_cont("\n"); 176 return; 177 bad: 178 pr_cont("BAD\n"); 179 } 180 181 static void dump_fault_info(struct pt_regs *regs) 182 { 183 unsigned long asce; 184 185 pr_alert("Fault in "); 186 switch (regs->int_parm_long & 3) { 187 case 3: 188 pr_cont("home space "); 189 break; 190 case 2: 191 pr_cont("secondary space "); 192 break; 193 case 1: 194 pr_cont("access register "); 195 break; 196 case 0: 197 pr_cont("primary space "); 198 break; 199 } 200 pr_cont("mode while using "); 201 if (!user_space_fault(regs)) { 202 asce = S390_lowcore.kernel_asce; 203 pr_cont("kernel "); 204 } 205 #ifdef CONFIG_PGSTE 206 else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { 207 struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; 208 asce = gmap->asce; 209 pr_cont("gmap "); 210 } 211 #endif 212 else { 213 asce = S390_lowcore.user_asce; 214 pr_cont("user "); 215 } 216 pr_cont("ASCE.\n"); 217 dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); 218 } 219 220 static inline void report_user_fault(struct pt_regs *regs, long signr) 221 { 222 if ((task_pid_nr(current) > 1) && !show_unhandled_signals) 223 return; 224 if (!unhandled_signal(current, signr)) 225 return; 226 if (!printk_ratelimit()) 227 return; 228 printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ", 229 regs->int_code & 0xffff, regs->int_code >> 17); 230 print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); 231 printk(KERN_CONT "\n"); 232 printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", 233 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); 234 dump_fault_info(regs); 235 show_regs(regs); 236 } 237 238 /* 239 * Send SIGSEGV to task. This is an external routine 240 * to keep the stack usage of do_page_fault small. 241 */ 242 static noinline void do_sigsegv(struct pt_regs *regs, int si_code) 243 { 244 struct siginfo si; 245 246 report_user_fault(regs, SIGSEGV); 247 si.si_signo = SIGSEGV; 248 si.si_code = si_code; 249 si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); 250 force_sig_info(SIGSEGV, &si, current); 251 } 252 253 static noinline void do_no_context(struct pt_regs *regs) 254 { 255 const struct exception_table_entry *fixup; 256 unsigned long address; 257 258 /* Are we prepared to handle this kernel fault? */ 259 fixup = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN); 260 if (fixup) { 261 regs->psw.addr = extable_fixup(fixup) | PSW_ADDR_AMODE; 262 return; 263 } 264 265 /* 266 * Oops. The kernel tried to access some bad page. We'll have to 267 * terminate things with extreme prejudice. 268 */ 269 address = regs->int_parm_long & __FAIL_ADDR_MASK; 270 if (!user_space_fault(regs)) 271 printk(KERN_ALERT "Unable to handle kernel pointer dereference" 272 " in virtual kernel address space\n"); 273 else 274 printk(KERN_ALERT "Unable to handle kernel paging request" 275 " in virtual user address space\n"); 276 printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", 277 regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); 278 dump_fault_info(regs); 279 die(regs, "Oops"); 280 do_exit(SIGKILL); 281 } 282 283 static noinline void do_low_address(struct pt_regs *regs) 284 { 285 /* Low-address protection hit in kernel mode means 286 NULL pointer write access in kernel mode. */ 287 if (regs->psw.mask & PSW_MASK_PSTATE) { 288 /* Low-address protection hit in user mode 'cannot happen'. */ 289 die (regs, "Low-address protection"); 290 do_exit(SIGKILL); 291 } 292 293 do_no_context(regs); 294 } 295 296 static noinline void do_sigbus(struct pt_regs *regs) 297 { 298 struct task_struct *tsk = current; 299 struct siginfo si; 300 301 /* 302 * Send a sigbus, regardless of whether we were in kernel 303 * or user mode. 304 */ 305 si.si_signo = SIGBUS; 306 si.si_errno = 0; 307 si.si_code = BUS_ADRERR; 308 si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); 309 force_sig_info(SIGBUS, &si, tsk); 310 } 311 312 static noinline void do_fault_error(struct pt_regs *regs, int fault) 313 { 314 int si_code; 315 316 switch (fault) { 317 case VM_FAULT_BADACCESS: 318 case VM_FAULT_BADMAP: 319 /* Bad memory access. Check if it is kernel or user space. */ 320 if (user_mode(regs)) { 321 /* User mode accesses just cause a SIGSEGV */ 322 si_code = (fault == VM_FAULT_BADMAP) ? 323 SEGV_MAPERR : SEGV_ACCERR; 324 do_sigsegv(regs, si_code); 325 return; 326 } 327 case VM_FAULT_BADCONTEXT: 328 case VM_FAULT_PFAULT: 329 do_no_context(regs); 330 break; 331 case VM_FAULT_SIGNAL: 332 if (!user_mode(regs)) 333 do_no_context(regs); 334 break; 335 default: /* fault & VM_FAULT_ERROR */ 336 if (fault & VM_FAULT_OOM) { 337 if (!user_mode(regs)) 338 do_no_context(regs); 339 else 340 pagefault_out_of_memory(); 341 } else if (fault & VM_FAULT_SIGSEGV) { 342 /* Kernel mode? Handle exceptions or die */ 343 if (!user_mode(regs)) 344 do_no_context(regs); 345 else 346 do_sigsegv(regs, SEGV_MAPERR); 347 } else if (fault & VM_FAULT_SIGBUS) { 348 /* Kernel mode? Handle exceptions or die */ 349 if (!user_mode(regs)) 350 do_no_context(regs); 351 else 352 do_sigbus(regs); 353 } else 354 BUG(); 355 break; 356 } 357 } 358 359 /* 360 * This routine handles page faults. It determines the address, 361 * and the problem, and then passes it off to one of the appropriate 362 * routines. 363 * 364 * interruption code (int_code): 365 * 04 Protection -> Write-Protection (suprression) 366 * 10 Segment translation -> Not present (nullification) 367 * 11 Page translation -> Not present (nullification) 368 * 3b Region third trans. -> Not present (nullification) 369 */ 370 static inline int do_exception(struct pt_regs *regs, int access) 371 { 372 #ifdef CONFIG_PGSTE 373 struct gmap *gmap; 374 #endif 375 struct task_struct *tsk; 376 struct mm_struct *mm; 377 struct vm_area_struct *vma; 378 unsigned long trans_exc_code; 379 unsigned long address; 380 unsigned int flags; 381 int fault; 382 383 tsk = current; 384 /* 385 * The instruction that caused the program check has 386 * been nullified. Don't signal single step via SIGTRAP. 387 */ 388 clear_pt_regs_flag(regs, PIF_PER_TRAP); 389 390 if (notify_page_fault(regs)) 391 return 0; 392 393 mm = tsk->mm; 394 trans_exc_code = regs->int_parm_long; 395 396 /* 397 * Verify that the fault happened in user space, that 398 * we are not in an interrupt and that there is a 399 * user context. 400 */ 401 fault = VM_FAULT_BADCONTEXT; 402 if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) 403 goto out; 404 405 address = trans_exc_code & __FAIL_ADDR_MASK; 406 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); 407 flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 408 if (user_mode(regs)) 409 flags |= FAULT_FLAG_USER; 410 if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400) 411 flags |= FAULT_FLAG_WRITE; 412 down_read(&mm->mmap_sem); 413 414 #ifdef CONFIG_PGSTE 415 gmap = (current->flags & PF_VCPU) ? 416 (struct gmap *) S390_lowcore.gmap : NULL; 417 if (gmap) { 418 current->thread.gmap_addr = address; 419 address = __gmap_translate(gmap, address); 420 if (address == -EFAULT) { 421 fault = VM_FAULT_BADMAP; 422 goto out_up; 423 } 424 if (gmap->pfault_enabled) 425 flags |= FAULT_FLAG_RETRY_NOWAIT; 426 } 427 #endif 428 429 retry: 430 fault = VM_FAULT_BADMAP; 431 vma = find_vma(mm, address); 432 if (!vma) 433 goto out_up; 434 435 if (unlikely(vma->vm_start > address)) { 436 if (!(vma->vm_flags & VM_GROWSDOWN)) 437 goto out_up; 438 if (expand_stack(vma, address)) 439 goto out_up; 440 } 441 442 /* 443 * Ok, we have a good vm_area for this memory access, so 444 * we can handle it.. 445 */ 446 fault = VM_FAULT_BADACCESS; 447 if (unlikely(!(vma->vm_flags & access))) 448 goto out_up; 449 450 if (is_vm_hugetlb_page(vma)) 451 address &= HPAGE_MASK; 452 /* 453 * If for any reason at all we couldn't handle the fault, 454 * make sure we exit gracefully rather than endlessly redo 455 * the fault. 456 */ 457 fault = handle_mm_fault(mm, vma, address, flags); 458 /* No reason to continue if interrupted by SIGKILL. */ 459 if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { 460 fault = VM_FAULT_SIGNAL; 461 goto out; 462 } 463 if (unlikely(fault & VM_FAULT_ERROR)) 464 goto out_up; 465 466 /* 467 * Major/minor page fault accounting is only done on the 468 * initial attempt. If we go through a retry, it is extremely 469 * likely that the page will be found in page cache at that point. 470 */ 471 if (flags & FAULT_FLAG_ALLOW_RETRY) { 472 if (fault & VM_FAULT_MAJOR) { 473 tsk->maj_flt++; 474 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 475 regs, address); 476 } else { 477 tsk->min_flt++; 478 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 479 regs, address); 480 } 481 if (fault & VM_FAULT_RETRY) { 482 #ifdef CONFIG_PGSTE 483 if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { 484 /* FAULT_FLAG_RETRY_NOWAIT has been set, 485 * mmap_sem has not been released */ 486 current->thread.gmap_pfault = 1; 487 fault = VM_FAULT_PFAULT; 488 goto out_up; 489 } 490 #endif 491 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 492 * of starvation. */ 493 flags &= ~(FAULT_FLAG_ALLOW_RETRY | 494 FAULT_FLAG_RETRY_NOWAIT); 495 flags |= FAULT_FLAG_TRIED; 496 down_read(&mm->mmap_sem); 497 goto retry; 498 } 499 } 500 #ifdef CONFIG_PGSTE 501 if (gmap) { 502 address = __gmap_link(gmap, current->thread.gmap_addr, 503 address); 504 if (address == -EFAULT) { 505 fault = VM_FAULT_BADMAP; 506 goto out_up; 507 } 508 if (address == -ENOMEM) { 509 fault = VM_FAULT_OOM; 510 goto out_up; 511 } 512 } 513 #endif 514 fault = 0; 515 out_up: 516 up_read(&mm->mmap_sem); 517 out: 518 return fault; 519 } 520 521 void do_protection_exception(struct pt_regs *regs) 522 { 523 unsigned long trans_exc_code; 524 int fault; 525 526 trans_exc_code = regs->int_parm_long; 527 /* 528 * Protection exceptions are suppressing, decrement psw address. 529 * The exception to this rule are aborted transactions, for these 530 * the PSW already points to the correct location. 531 */ 532 if (!(regs->int_code & 0x200)) 533 regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16); 534 /* 535 * Check for low-address protection. This needs to be treated 536 * as a special case because the translation exception code 537 * field is not guaranteed to contain valid data in this case. 538 */ 539 if (unlikely(!(trans_exc_code & 4))) { 540 do_low_address(regs); 541 return; 542 } 543 fault = do_exception(regs, VM_WRITE); 544 if (unlikely(fault)) 545 do_fault_error(regs, fault); 546 } 547 NOKPROBE_SYMBOL(do_protection_exception); 548 549 void do_dat_exception(struct pt_regs *regs) 550 { 551 int access, fault; 552 553 access = VM_READ | VM_EXEC | VM_WRITE; 554 fault = do_exception(regs, access); 555 if (unlikely(fault)) 556 do_fault_error(regs, fault); 557 } 558 NOKPROBE_SYMBOL(do_dat_exception); 559 560 #ifdef CONFIG_PFAULT 561 /* 562 * 'pfault' pseudo page faults routines. 563 */ 564 static int pfault_disable; 565 566 static int __init nopfault(char *str) 567 { 568 pfault_disable = 1; 569 return 1; 570 } 571 572 __setup("nopfault", nopfault); 573 574 struct pfault_refbk { 575 u16 refdiagc; 576 u16 reffcode; 577 u16 refdwlen; 578 u16 refversn; 579 u64 refgaddr; 580 u64 refselmk; 581 u64 refcmpmk; 582 u64 reserved; 583 } __attribute__ ((packed, aligned(8))); 584 585 int pfault_init(void) 586 { 587 struct pfault_refbk refbk = { 588 .refdiagc = 0x258, 589 .reffcode = 0, 590 .refdwlen = 5, 591 .refversn = 2, 592 .refgaddr = __LC_CURRENT_PID, 593 .refselmk = 1ULL << 48, 594 .refcmpmk = 1ULL << 48, 595 .reserved = __PF_RES_FIELD }; 596 int rc; 597 598 if (pfault_disable) 599 return -1; 600 asm volatile( 601 " diag %1,%0,0x258\n" 602 "0: j 2f\n" 603 "1: la %0,8\n" 604 "2:\n" 605 EX_TABLE(0b,1b) 606 : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc"); 607 return rc; 608 } 609 610 void pfault_fini(void) 611 { 612 struct pfault_refbk refbk = { 613 .refdiagc = 0x258, 614 .reffcode = 1, 615 .refdwlen = 5, 616 .refversn = 2, 617 }; 618 619 if (pfault_disable) 620 return; 621 asm volatile( 622 " diag %0,0,0x258\n" 623 "0:\n" 624 EX_TABLE(0b,0b) 625 : : "a" (&refbk), "m" (refbk) : "cc"); 626 } 627 628 static DEFINE_SPINLOCK(pfault_lock); 629 static LIST_HEAD(pfault_list); 630 631 static void pfault_interrupt(struct ext_code ext_code, 632 unsigned int param32, unsigned long param64) 633 { 634 struct task_struct *tsk; 635 __u16 subcode; 636 pid_t pid; 637 638 /* 639 * Get the external interruption subcode & pfault 640 * initial/completion signal bit. VM stores this 641 * in the 'cpu address' field associated with the 642 * external interrupt. 643 */ 644 subcode = ext_code.subcode; 645 if ((subcode & 0xff00) != __SUBCODE_MASK) 646 return; 647 inc_irq_stat(IRQEXT_PFL); 648 /* Get the token (= pid of the affected task). */ 649 pid = param64; 650 rcu_read_lock(); 651 tsk = find_task_by_pid_ns(pid, &init_pid_ns); 652 if (tsk) 653 get_task_struct(tsk); 654 rcu_read_unlock(); 655 if (!tsk) 656 return; 657 spin_lock(&pfault_lock); 658 if (subcode & 0x0080) { 659 /* signal bit is set -> a page has been swapped in by VM */ 660 if (tsk->thread.pfault_wait == 1) { 661 /* Initial interrupt was faster than the completion 662 * interrupt. pfault_wait is valid. Set pfault_wait 663 * back to zero and wake up the process. This can 664 * safely be done because the task is still sleeping 665 * and can't produce new pfaults. */ 666 tsk->thread.pfault_wait = 0; 667 list_del(&tsk->thread.list); 668 wake_up_process(tsk); 669 put_task_struct(tsk); 670 } else { 671 /* Completion interrupt was faster than initial 672 * interrupt. Set pfault_wait to -1 so the initial 673 * interrupt doesn't put the task to sleep. 674 * If the task is not running, ignore the completion 675 * interrupt since it must be a leftover of a PFAULT 676 * CANCEL operation which didn't remove all pending 677 * completion interrupts. */ 678 if (tsk->state == TASK_RUNNING) 679 tsk->thread.pfault_wait = -1; 680 } 681 } else { 682 /* signal bit not set -> a real page is missing. */ 683 if (WARN_ON_ONCE(tsk != current)) 684 goto out; 685 if (tsk->thread.pfault_wait == 1) { 686 /* Already on the list with a reference: put to sleep */ 687 __set_task_state(tsk, TASK_UNINTERRUPTIBLE); 688 set_tsk_need_resched(tsk); 689 } else if (tsk->thread.pfault_wait == -1) { 690 /* Completion interrupt was faster than the initial 691 * interrupt (pfault_wait == -1). Set pfault_wait 692 * back to zero and exit. */ 693 tsk->thread.pfault_wait = 0; 694 } else { 695 /* Initial interrupt arrived before completion 696 * interrupt. Let the task sleep. 697 * An extra task reference is needed since a different 698 * cpu may set the task state to TASK_RUNNING again 699 * before the scheduler is reached. */ 700 get_task_struct(tsk); 701 tsk->thread.pfault_wait = 1; 702 list_add(&tsk->thread.list, &pfault_list); 703 __set_task_state(tsk, TASK_UNINTERRUPTIBLE); 704 set_tsk_need_resched(tsk); 705 } 706 } 707 out: 708 spin_unlock(&pfault_lock); 709 put_task_struct(tsk); 710 } 711 712 static int pfault_cpu_notify(struct notifier_block *self, unsigned long action, 713 void *hcpu) 714 { 715 struct thread_struct *thread, *next; 716 struct task_struct *tsk; 717 718 switch (action & ~CPU_TASKS_FROZEN) { 719 case CPU_DEAD: 720 spin_lock_irq(&pfault_lock); 721 list_for_each_entry_safe(thread, next, &pfault_list, list) { 722 thread->pfault_wait = 0; 723 list_del(&thread->list); 724 tsk = container_of(thread, struct task_struct, thread); 725 wake_up_process(tsk); 726 put_task_struct(tsk); 727 } 728 spin_unlock_irq(&pfault_lock); 729 break; 730 default: 731 break; 732 } 733 return NOTIFY_OK; 734 } 735 736 static int __init pfault_irq_init(void) 737 { 738 int rc; 739 740 rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); 741 if (rc) 742 goto out_extint; 743 rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; 744 if (rc) 745 goto out_pfault; 746 irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); 747 hotcpu_notifier(pfault_cpu_notify, 0); 748 return 0; 749 750 out_pfault: 751 unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); 752 out_extint: 753 pfault_disable = 1; 754 return rc; 755 } 756 early_initcall(pfault_irq_init); 757 758 #endif /* CONFIG_PFAULT */ 759