fault.c (95a0c7c2d6cfde3fb5fdb713428ed0df4d6bdd58) fault.c (f5caf621ee357279e759c0911daf6d55c7d36f03)
1/*
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
5 */
6#include <linux/sched.h> /* test_thread_flag(), ... */
7#include <linux/sched/task_stack.h> /* task_stack_*(), ... */
8#include <linux/kdebug.h> /* oops_begin/end, ... */

--- 382 unchanged lines hidden (view full) ---

391 pgd_t *base = __va(read_cr3_pa());
392 pgd_t *pgd = &base[pgd_index(address)];
393 p4d_t *p4d;
394 pud_t *pud;
395 pmd_t *pmd;
396 pte_t *pte;
397
398#ifdef CONFIG_X86_PAE
1/*
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
4 * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
5 */
6#include <linux/sched.h> /* test_thread_flag(), ... */
7#include <linux/sched/task_stack.h> /* task_stack_*(), ... */
8#include <linux/kdebug.h> /* oops_begin/end, ... */

--- 382 unchanged lines hidden (view full) ---

391 pgd_t *base = __va(read_cr3_pa());
392 pgd_t *pgd = &base[pgd_index(address)];
393 p4d_t *p4d;
394 pud_t *pud;
395 pmd_t *pmd;
396 pte_t *pte;
397
398#ifdef CONFIG_X86_PAE
399 printk("*pdpt = %016Lx ", pgd_val(*pgd));
399 pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
400 if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
401 goto out;
400 if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
401 goto out;
402#define pr_pde pr_cont
403#else
404#define pr_pde pr_info
402#endif
403 p4d = p4d_offset(pgd, address);
404 pud = pud_offset(p4d, address);
405 pmd = pmd_offset(pud, address);
405#endif
406 p4d = p4d_offset(pgd, address);
407 pud = pud_offset(p4d, address);
408 pmd = pmd_offset(pud, address);
406 printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
409 pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
410#undef pr_pde
407
408 /*
409 * We must not directly access the pte in the highpte
410 * case if the page table is located in highmem.
411 * And let's rather not kmap-atomic the pte, just in case
412 * it's allocated already:
413 */
414 if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
415 goto out;
416
417 pte = pte_offset_kernel(pmd, address);
411
412 /*
413 * We must not directly access the pte in the highpte
414 * case if the page table is located in highmem.
415 * And let's rather not kmap-atomic the pte, just in case
416 * it's allocated already:
417 */
418 if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
419 goto out;
420
421 pte = pte_offset_kernel(pmd, address);
418 printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
422 pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
419out:
423out:
420 printk("\n");
424 pr_cont("\n");
421}
422
423#else /* CONFIG_X86_64: */
424
425void vmalloc_sync_all(void)
426{
427 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
428}

--- 131 unchanged lines hidden (view full) ---

560 p4d_t *p4d;
561 pud_t *pud;
562 pmd_t *pmd;
563 pte_t *pte;
564
565 if (bad_address(pgd))
566 goto bad;
567
425}
426
427#else /* CONFIG_X86_64: */
428
429void vmalloc_sync_all(void)
430{
431 sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
432}

--- 131 unchanged lines hidden (view full) ---

564 p4d_t *p4d;
565 pud_t *pud;
566 pmd_t *pmd;
567 pte_t *pte;
568
569 if (bad_address(pgd))
570 goto bad;
571
568 printk("PGD %lx ", pgd_val(*pgd));
572 pr_info("PGD %lx ", pgd_val(*pgd));
569
570 if (!pgd_present(*pgd))
571 goto out;
572
573 p4d = p4d_offset(pgd, address);
574 if (bad_address(p4d))
575 goto bad;
576
573
574 if (!pgd_present(*pgd))
575 goto out;
576
577 p4d = p4d_offset(pgd, address);
578 if (bad_address(p4d))
579 goto bad;
580
577 printk("P4D %lx ", p4d_val(*p4d));
581 pr_cont("P4D %lx ", p4d_val(*p4d));
578 if (!p4d_present(*p4d) || p4d_large(*p4d))
579 goto out;
580
581 pud = pud_offset(p4d, address);
582 if (bad_address(pud))
583 goto bad;
584
582 if (!p4d_present(*p4d) || p4d_large(*p4d))
583 goto out;
584
585 pud = pud_offset(p4d, address);
586 if (bad_address(pud))
587 goto bad;
588
585 printk("PUD %lx ", pud_val(*pud));
589 pr_cont("PUD %lx ", pud_val(*pud));
586 if (!pud_present(*pud) || pud_large(*pud))
587 goto out;
588
589 pmd = pmd_offset(pud, address);
590 if (bad_address(pmd))
591 goto bad;
592
590 if (!pud_present(*pud) || pud_large(*pud))
591 goto out;
592
593 pmd = pmd_offset(pud, address);
594 if (bad_address(pmd))
595 goto bad;
596
593 printk("PMD %lx ", pmd_val(*pmd));
597 pr_cont("PMD %lx ", pmd_val(*pmd));
594 if (!pmd_present(*pmd) || pmd_large(*pmd))
595 goto out;
596
597 pte = pte_offset_kernel(pmd, address);
598 if (bad_address(pte))
599 goto bad;
600
598 if (!pmd_present(*pmd) || pmd_large(*pmd))
599 goto out;
600
601 pte = pte_offset_kernel(pmd, address);
602 if (bad_address(pte))
603 goto bad;
604
601 printk("PTE %lx", pte_val(*pte));
605 pr_cont("PTE %lx", pte_val(*pte));
602out:
606out:
603 printk("\n");
607 pr_cont("\n");
604 return;
605bad:
608 return;
609bad:
606 printk("BAD\n");
610 pr_info("BAD\n");
607}
608
609#endif /* CONFIG_X86_64 */
610
611/*
612 * Workaround for K8 erratum #93 & buggy BIOS.
613 *
614 * BIOS SMM functions are required to use a specific workaround

--- 182 unchanged lines hidden (view full) ---

797 /*
798 * Stack overflow? During boot, we can fault near the initial
799 * stack in the direct map, but that's not an overflow -- check
800 * that we're in vmalloc space to avoid this.
801 */
802 if (is_vmalloc_addr((void *)address) &&
803 (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
804 address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
611}
612
613#endif /* CONFIG_X86_64 */
614
615/*
616 * Workaround for K8 erratum #93 & buggy BIOS.
617 *
618 * BIOS SMM functions are required to use a specific workaround

--- 182 unchanged lines hidden (view full) ---

801 /*
802 * Stack overflow? During boot, we can fault near the initial
803 * stack in the direct map, but that's not an overflow -- check
804 * that we're in vmalloc space to avoid this.
805 */
806 if (is_vmalloc_addr((void *)address) &&
807 (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
808 address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
805 register void *__sp asm("rsp");
806 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
807 /*
808 * We're likely to be running with very little stack space
809 * left. It's plausible that we'd hit this condition but
810 * double-fault even before we get this far, in which case
811 * we're fine: the double-fault handler will deal with it.
812 *
813 * We don't want to make it all the way into the oops code
814 * and then double-fault, though, because we're likely to
815 * break the console driver and lose most of the stack dump.
816 */
817 asm volatile ("movq %[stack], %%rsp\n\t"
818 "call handle_stack_overflow\n\t"
819 "1: jmp 1b"
809 unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
810 /*
811 * We're likely to be running with very little stack space
812 * left. It's plausible that we'd hit this condition but
813 * double-fault even before we get this far, in which case
814 * we're fine: the double-fault handler will deal with it.
815 *
816 * We don't want to make it all the way into the oops code
817 * and then double-fault, though, because we're likely to
818 * break the console driver and lose most of the stack dump.
819 */
820 asm volatile ("movq %[stack], %%rsp\n\t"
821 "call handle_stack_overflow\n\t"
822 "1: jmp 1b"
820 : "+r" (__sp)
823 : ASM_CALL_CONSTRAINT
821 : "D" ("kernel stack overflow (page fault)"),
822 "S" (regs), "d" (address),
823 [stack] "rm" (stack));
824 unreachable();
825 }
826#endif
827
828 /*

--- 420 unchanged lines hidden (view full) ---

1249
1250 return true;
1251}
1252
1253/*
1254 * This routine handles page faults. It determines the address,
1255 * and the problem, and then passes it off to one of the appropriate
1256 * routines.
824 : "D" ("kernel stack overflow (page fault)"),
825 "S" (regs), "d" (address),
826 [stack] "rm" (stack));
827 unreachable();
828 }
829#endif
830
831 /*

--- 420 unchanged lines hidden (view full) ---

1252
1253 return true;
1254}
1255
1256/*
1257 * This routine handles page faults. It determines the address,
1258 * and the problem, and then passes it off to one of the appropriate
1259 * routines.
1257 *
1258 * This function must have noinline because both callers
1259 * {,trace_}do_page_fault() have notrace on. Having this an actual function
1260 * guarantees there's a function trace entry.
1261 */
1262static noinline void
1263__do_page_fault(struct pt_regs *regs, unsigned long error_code,
1264 unsigned long address)
1265{
1266 struct vm_area_struct *vma;
1267 struct task_struct *tsk;
1268 struct mm_struct *mm;

--- 216 unchanged lines hidden (view full) ---

1485 tsk->min_flt++;
1486 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
1487 }
1488
1489 check_v8086_mode(regs, address, tsk);
1490}
1491NOKPROBE_SYMBOL(__do_page_fault);
1492
1260 */
1261static noinline void
1262__do_page_fault(struct pt_regs *regs, unsigned long error_code,
1263 unsigned long address)
1264{
1265 struct vm_area_struct *vma;
1266 struct task_struct *tsk;
1267 struct mm_struct *mm;

--- 216 unchanged lines hidden (view full) ---

1484 tsk->min_flt++;
1485 perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
1486 }
1487
1488 check_v8086_mode(regs, address, tsk);
1489}
1490NOKPROBE_SYMBOL(__do_page_fault);
1491
1493dotraplinkage void notrace
1494do_page_fault(struct pt_regs *regs, unsigned long error_code)
1495{
1496 unsigned long address = read_cr2(); /* Get the faulting address */
1497 enum ctx_state prev_state;
1498
1499 /*
1500 * We must have this function tagged with __kprobes, notrace and call
1501 * read_cr2() before calling anything else. To avoid calling any kind
1502 * of tracing machinery before we've observed the CR2 value.
1503 *
1504 * exception_{enter,exit}() contain all sorts of tracepoints.
1505 */
1506
1507 prev_state = exception_enter();
1508 __do_page_fault(regs, error_code, address);
1509 exception_exit(prev_state);
1510}
1511NOKPROBE_SYMBOL(do_page_fault);
1512
1513#ifdef CONFIG_TRACING
1514static nokprobe_inline void
1515trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
1516 unsigned long error_code)
1517{
1518 if (user_mode(regs))
1519 trace_page_fault_user(address, regs, error_code);
1520 else
1521 trace_page_fault_kernel(address, regs, error_code);
1522}
1523
1492static nokprobe_inline void
1493trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
1494 unsigned long error_code)
1495{
1496 if (user_mode(regs))
1497 trace_page_fault_user(address, regs, error_code);
1498 else
1499 trace_page_fault_kernel(address, regs, error_code);
1500}
1501
1502/*
1503 * We must have this function blacklisted from kprobes, tagged with notrace
1504 * and call read_cr2() before calling anything else. To avoid calling any
1505 * kind of tracing machinery before we've observed the CR2 value.
1506 *
1507 * exception_{enter,exit}() contains all sorts of tracepoints.
1508 */
1524dotraplinkage void notrace
1509dotraplinkage void notrace
1525trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
1510do_page_fault(struct pt_regs *regs, unsigned long error_code)
1526{
1511{
1527 /*
1528 * The exception_enter and tracepoint processing could
1529 * trigger another page faults (user space callchain
1530 * reading) and destroy the original cr2 value, so read
1531 * the faulting address now.
1532 */
1533 unsigned long address = read_cr2();
1512 unsigned long address = read_cr2(); /* Get the faulting address */
1534 enum ctx_state prev_state;
1535
1536 prev_state = exception_enter();
1513 enum ctx_state prev_state;
1514
1515 prev_state = exception_enter();
1537 trace_page_fault_entries(address, regs, error_code);
1516 if (trace_pagefault_enabled())
1517 trace_page_fault_entries(address, regs, error_code);
1518
1538 __do_page_fault(regs, error_code, address);
1539 exception_exit(prev_state);
1540}
1519 __do_page_fault(regs, error_code, address);
1520 exception_exit(prev_state);
1521}
1541NOKPROBE_SYMBOL(trace_do_page_fault);
1542#endif /* CONFIG_TRACING */
1522NOKPROBE_SYMBOL(do_page_fault);