xref: /openbmc/linux/arch/sh/mm/fault.c (revision 28080329ede3e4110bb14306b4529a5b9a2ce163)
1*28080329SPaul Mundt /*
2*28080329SPaul Mundt  * Page fault handler for SH with an MMU.
3*28080329SPaul Mundt  *
4*28080329SPaul Mundt  *  Copyright (C) 1999  Niibe Yutaka
5*28080329SPaul Mundt  *  Copyright (C) 2003 - 2012  Paul Mundt
6*28080329SPaul Mundt  *
7*28080329SPaul Mundt  *  Based on linux/arch/i386/mm/fault.c:
8*28080329SPaul Mundt  *   Copyright (C) 1995  Linus Torvalds
9*28080329SPaul Mundt  *
10*28080329SPaul Mundt  * This file is subject to the terms and conditions of the GNU General Public
11*28080329SPaul Mundt  * License.  See the file "COPYING" in the main directory of this archive
12*28080329SPaul Mundt  * for more details.
13*28080329SPaul Mundt  */
14*28080329SPaul Mundt #include <linux/kernel.h>
15*28080329SPaul Mundt #include <linux/mm.h>
16*28080329SPaul Mundt #include <linux/hardirq.h>
17*28080329SPaul Mundt #include <linux/kprobes.h>
18*28080329SPaul Mundt #include <linux/perf_event.h>
19*28080329SPaul Mundt #include <linux/kdebug.h>
20*28080329SPaul Mundt #include <asm/io_trapped.h>
21*28080329SPaul Mundt #include <asm/mmu_context.h>
22*28080329SPaul Mundt #include <asm/tlbflush.h>
23*28080329SPaul Mundt #include <asm/traps.h>
24*28080329SPaul Mundt 
25*28080329SPaul Mundt static inline int notify_page_fault(struct pt_regs *regs, int trap)
26*28080329SPaul Mundt {
27*28080329SPaul Mundt 	int ret = 0;
28*28080329SPaul Mundt 
29*28080329SPaul Mundt 	if (kprobes_built_in() && !user_mode(regs)) {
30*28080329SPaul Mundt 		preempt_disable();
31*28080329SPaul Mundt 		if (kprobe_running() && kprobe_fault_handler(regs, trap))
32*28080329SPaul Mundt 			ret = 1;
33*28080329SPaul Mundt 		preempt_enable();
34*28080329SPaul Mundt 	}
35*28080329SPaul Mundt 
36*28080329SPaul Mundt 	return ret;
37*28080329SPaul Mundt }
38*28080329SPaul Mundt 
39*28080329SPaul Mundt static void
40*28080329SPaul Mundt force_sig_info_fault(int si_signo, int si_code, unsigned long address,
41*28080329SPaul Mundt 		     struct task_struct *tsk)
42*28080329SPaul Mundt {
43*28080329SPaul Mundt 	siginfo_t info;
44*28080329SPaul Mundt 
45*28080329SPaul Mundt 	info.si_signo	= si_signo;
46*28080329SPaul Mundt 	info.si_errno	= 0;
47*28080329SPaul Mundt 	info.si_code	= si_code;
48*28080329SPaul Mundt 	info.si_addr	= (void __user *)address;
49*28080329SPaul Mundt 
50*28080329SPaul Mundt 	force_sig_info(si_signo, &info, tsk);
51*28080329SPaul Mundt }
52*28080329SPaul Mundt 
53*28080329SPaul Mundt /*
54*28080329SPaul Mundt  * This is useful to dump out the page tables associated with
55*28080329SPaul Mundt  * 'addr' in mm 'mm'.
56*28080329SPaul Mundt  */
57*28080329SPaul Mundt static void show_pte(struct mm_struct *mm, unsigned long addr)
58*28080329SPaul Mundt {
59*28080329SPaul Mundt 	pgd_t *pgd;
60*28080329SPaul Mundt 
61*28080329SPaul Mundt 	if (mm)
62*28080329SPaul Mundt 		pgd = mm->pgd;
63*28080329SPaul Mundt 	else
64*28080329SPaul Mundt 		pgd = get_TTB();
65*28080329SPaul Mundt 
66*28080329SPaul Mundt 	printk(KERN_ALERT "pgd = %p\n", pgd);
67*28080329SPaul Mundt 	pgd += pgd_index(addr);
68*28080329SPaul Mundt 	printk(KERN_ALERT "[%08lx] *pgd=%0*Lx", addr,
69*28080329SPaul Mundt 	       (u32)(sizeof(*pgd) * 2), (u64)pgd_val(*pgd));
70*28080329SPaul Mundt 
71*28080329SPaul Mundt 	do {
72*28080329SPaul Mundt 		pud_t *pud;
73*28080329SPaul Mundt 		pmd_t *pmd;
74*28080329SPaul Mundt 		pte_t *pte;
75*28080329SPaul Mundt 
76*28080329SPaul Mundt 		if (pgd_none(*pgd))
77*28080329SPaul Mundt 			break;
78*28080329SPaul Mundt 
79*28080329SPaul Mundt 		if (pgd_bad(*pgd)) {
80*28080329SPaul Mundt 			printk("(bad)");
81*28080329SPaul Mundt 			break;
82*28080329SPaul Mundt 		}
83*28080329SPaul Mundt 
84*28080329SPaul Mundt 		pud = pud_offset(pgd, addr);
85*28080329SPaul Mundt 		if (PTRS_PER_PUD != 1)
86*28080329SPaul Mundt 			printk(", *pud=%0*Lx", (u32)(sizeof(*pud) * 2),
87*28080329SPaul Mundt 			       (u64)pud_val(*pud));
88*28080329SPaul Mundt 
89*28080329SPaul Mundt 		if (pud_none(*pud))
90*28080329SPaul Mundt 			break;
91*28080329SPaul Mundt 
92*28080329SPaul Mundt 		if (pud_bad(*pud)) {
93*28080329SPaul Mundt 			printk("(bad)");
94*28080329SPaul Mundt 			break;
95*28080329SPaul Mundt 		}
96*28080329SPaul Mundt 
97*28080329SPaul Mundt 		pmd = pmd_offset(pud, addr);
98*28080329SPaul Mundt 		if (PTRS_PER_PMD != 1)
99*28080329SPaul Mundt 			printk(", *pmd=%0*Lx", (u32)(sizeof(*pmd) * 2),
100*28080329SPaul Mundt 			       (u64)pmd_val(*pmd));
101*28080329SPaul Mundt 
102*28080329SPaul Mundt 		if (pmd_none(*pmd))
103*28080329SPaul Mundt 			break;
104*28080329SPaul Mundt 
105*28080329SPaul Mundt 		if (pmd_bad(*pmd)) {
106*28080329SPaul Mundt 			printk("(bad)");
107*28080329SPaul Mundt 			break;
108*28080329SPaul Mundt 		}
109*28080329SPaul Mundt 
110*28080329SPaul Mundt 		/* We must not map this if we have highmem enabled */
111*28080329SPaul Mundt 		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
112*28080329SPaul Mundt 			break;
113*28080329SPaul Mundt 
114*28080329SPaul Mundt 		pte = pte_offset_kernel(pmd, addr);
115*28080329SPaul Mundt 		printk(", *pte=%0*Lx", (u32)(sizeof(*pte) * 2),
116*28080329SPaul Mundt 		       (u64)pte_val(*pte));
117*28080329SPaul Mundt 	} while (0);
118*28080329SPaul Mundt 
119*28080329SPaul Mundt 	printk("\n");
120*28080329SPaul Mundt }
121*28080329SPaul Mundt 
122*28080329SPaul Mundt static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
123*28080329SPaul Mundt {
124*28080329SPaul Mundt 	unsigned index = pgd_index(address);
125*28080329SPaul Mundt 	pgd_t *pgd_k;
126*28080329SPaul Mundt 	pud_t *pud, *pud_k;
127*28080329SPaul Mundt 	pmd_t *pmd, *pmd_k;
128*28080329SPaul Mundt 
129*28080329SPaul Mundt 	pgd += index;
130*28080329SPaul Mundt 	pgd_k = init_mm.pgd + index;
131*28080329SPaul Mundt 
132*28080329SPaul Mundt 	if (!pgd_present(*pgd_k))
133*28080329SPaul Mundt 		return NULL;
134*28080329SPaul Mundt 
135*28080329SPaul Mundt 	pud = pud_offset(pgd, address);
136*28080329SPaul Mundt 	pud_k = pud_offset(pgd_k, address);
137*28080329SPaul Mundt 	if (!pud_present(*pud_k))
138*28080329SPaul Mundt 		return NULL;
139*28080329SPaul Mundt 
140*28080329SPaul Mundt 	if (!pud_present(*pud))
141*28080329SPaul Mundt 	    set_pud(pud, *pud_k);
142*28080329SPaul Mundt 
143*28080329SPaul Mundt 	pmd = pmd_offset(pud, address);
144*28080329SPaul Mundt 	pmd_k = pmd_offset(pud_k, address);
145*28080329SPaul Mundt 	if (!pmd_present(*pmd_k))
146*28080329SPaul Mundt 		return NULL;
147*28080329SPaul Mundt 
148*28080329SPaul Mundt 	if (!pmd_present(*pmd))
149*28080329SPaul Mundt 		set_pmd(pmd, *pmd_k);
150*28080329SPaul Mundt 	else {
151*28080329SPaul Mundt 		/*
152*28080329SPaul Mundt 		 * The page tables are fully synchronised so there must
153*28080329SPaul Mundt 		 * be another reason for the fault. Return NULL here to
154*28080329SPaul Mundt 		 * signal that we have not taken care of the fault.
155*28080329SPaul Mundt 		 */
156*28080329SPaul Mundt 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
157*28080329SPaul Mundt 		return NULL;
158*28080329SPaul Mundt 	}
159*28080329SPaul Mundt 
160*28080329SPaul Mundt 	return pmd_k;
161*28080329SPaul Mundt }
162*28080329SPaul Mundt 
163*28080329SPaul Mundt /*
164*28080329SPaul Mundt  * Handle a fault on the vmalloc or module mapping area
165*28080329SPaul Mundt  */
166*28080329SPaul Mundt static noinline int vmalloc_fault(unsigned long address)
167*28080329SPaul Mundt {
168*28080329SPaul Mundt 	pgd_t *pgd_k;
169*28080329SPaul Mundt 	pmd_t *pmd_k;
170*28080329SPaul Mundt 	pte_t *pte_k;
171*28080329SPaul Mundt 
172*28080329SPaul Mundt 	/* Make sure we are in vmalloc/module area: */
173*28080329SPaul Mundt 	if (!is_vmalloc_addr((void *)address))
174*28080329SPaul Mundt 		return -1;
175*28080329SPaul Mundt 
176*28080329SPaul Mundt 	/*
177*28080329SPaul Mundt 	 * Synchronize this task's top level page-table
178*28080329SPaul Mundt 	 * with the 'reference' page table.
179*28080329SPaul Mundt 	 *
180*28080329SPaul Mundt 	 * Do _not_ use "current" here. We might be inside
181*28080329SPaul Mundt 	 * an interrupt in the middle of a task switch..
182*28080329SPaul Mundt 	 */
183*28080329SPaul Mundt 	pgd_k = get_TTB();
184*28080329SPaul Mundt 	pmd_k = vmalloc_sync_one(pgd_k, address);
185*28080329SPaul Mundt 	if (!pmd_k)
186*28080329SPaul Mundt 		return -1;
187*28080329SPaul Mundt 
188*28080329SPaul Mundt 	pte_k = pte_offset_kernel(pmd_k, address);
189*28080329SPaul Mundt 	if (!pte_present(*pte_k))
190*28080329SPaul Mundt 		return -1;
191*28080329SPaul Mundt 
192*28080329SPaul Mundt 	return 0;
193*28080329SPaul Mundt }
194*28080329SPaul Mundt 
195*28080329SPaul Mundt static void
196*28080329SPaul Mundt show_fault_oops(struct pt_regs *regs, unsigned long address)
197*28080329SPaul Mundt {
198*28080329SPaul Mundt 	if (!oops_may_print())
199*28080329SPaul Mundt 		return;
200*28080329SPaul Mundt 
201*28080329SPaul Mundt 	printk(KERN_ALERT "BUG: unable to handle kernel ");
202*28080329SPaul Mundt 	if (address < PAGE_SIZE)
203*28080329SPaul Mundt 		printk(KERN_CONT "NULL pointer dereference");
204*28080329SPaul Mundt 	else
205*28080329SPaul Mundt 		printk(KERN_CONT "paging request");
206*28080329SPaul Mundt 
207*28080329SPaul Mundt 	printk(KERN_CONT " at %08lx\n", address);
208*28080329SPaul Mundt 	printk(KERN_ALERT "PC:");
209*28080329SPaul Mundt 	printk_address(regs->pc, 1);
210*28080329SPaul Mundt 
211*28080329SPaul Mundt 	show_pte(NULL, address);
212*28080329SPaul Mundt }
213*28080329SPaul Mundt 
214*28080329SPaul Mundt static noinline void
215*28080329SPaul Mundt no_context(struct pt_regs *regs, unsigned long error_code,
216*28080329SPaul Mundt 	   unsigned long address)
217*28080329SPaul Mundt {
218*28080329SPaul Mundt 	/* Are we prepared to handle this kernel fault?  */
219*28080329SPaul Mundt 	if (fixup_exception(regs))
220*28080329SPaul Mundt 		return;
221*28080329SPaul Mundt 
222*28080329SPaul Mundt 	if (handle_trapped_io(regs, address))
223*28080329SPaul Mundt 		return;
224*28080329SPaul Mundt 
225*28080329SPaul Mundt 	/*
226*28080329SPaul Mundt 	 * Oops. The kernel tried to access some bad page. We'll have to
227*28080329SPaul Mundt 	 * terminate things with extreme prejudice.
228*28080329SPaul Mundt 	 */
229*28080329SPaul Mundt 	bust_spinlocks(1);
230*28080329SPaul Mundt 
231*28080329SPaul Mundt 	show_fault_oops(regs, address);
232*28080329SPaul Mundt 
233*28080329SPaul Mundt 	die("Oops", regs, error_code);
234*28080329SPaul Mundt 	bust_spinlocks(0);
235*28080329SPaul Mundt 	do_exit(SIGKILL);
236*28080329SPaul Mundt }
237*28080329SPaul Mundt 
238*28080329SPaul Mundt static void
239*28080329SPaul Mundt __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
240*28080329SPaul Mundt 		       unsigned long address, int si_code)
241*28080329SPaul Mundt {
242*28080329SPaul Mundt 	struct task_struct *tsk = current;
243*28080329SPaul Mundt 
244*28080329SPaul Mundt 	/* User mode accesses just cause a SIGSEGV */
245*28080329SPaul Mundt 	if (user_mode(regs)) {
246*28080329SPaul Mundt 		/*
247*28080329SPaul Mundt 		 * It's possible to have interrupts off here:
248*28080329SPaul Mundt 		 */
249*28080329SPaul Mundt 		local_irq_enable();
250*28080329SPaul Mundt 
251*28080329SPaul Mundt 		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
252*28080329SPaul Mundt 
253*28080329SPaul Mundt 		return;
254*28080329SPaul Mundt 	}
255*28080329SPaul Mundt 
256*28080329SPaul Mundt 	no_context(regs, error_code, address);
257*28080329SPaul Mundt }
258*28080329SPaul Mundt 
259*28080329SPaul Mundt static noinline void
260*28080329SPaul Mundt bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
261*28080329SPaul Mundt 		     unsigned long address)
262*28080329SPaul Mundt {
263*28080329SPaul Mundt 	__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
264*28080329SPaul Mundt }
265*28080329SPaul Mundt 
266*28080329SPaul Mundt static void
267*28080329SPaul Mundt __bad_area(struct pt_regs *regs, unsigned long error_code,
268*28080329SPaul Mundt 	   unsigned long address, int si_code)
269*28080329SPaul Mundt {
270*28080329SPaul Mundt 	struct mm_struct *mm = current->mm;
271*28080329SPaul Mundt 
272*28080329SPaul Mundt 	/*
273*28080329SPaul Mundt 	 * Something tried to access memory that isn't in our memory map..
274*28080329SPaul Mundt 	 * Fix it, but check if it's kernel or user first..
275*28080329SPaul Mundt 	 */
276*28080329SPaul Mundt 	up_read(&mm->mmap_sem);
277*28080329SPaul Mundt 
278*28080329SPaul Mundt 	__bad_area_nosemaphore(regs, error_code, address, si_code);
279*28080329SPaul Mundt }
280*28080329SPaul Mundt 
281*28080329SPaul Mundt static noinline void
282*28080329SPaul Mundt bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
283*28080329SPaul Mundt {
284*28080329SPaul Mundt 	__bad_area(regs, error_code, address, SEGV_MAPERR);
285*28080329SPaul Mundt }
286*28080329SPaul Mundt 
287*28080329SPaul Mundt static noinline void
288*28080329SPaul Mundt bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
289*28080329SPaul Mundt 		      unsigned long address)
290*28080329SPaul Mundt {
291*28080329SPaul Mundt 	__bad_area(regs, error_code, address, SEGV_ACCERR);
292*28080329SPaul Mundt }
293*28080329SPaul Mundt 
294*28080329SPaul Mundt static void out_of_memory(void)
295*28080329SPaul Mundt {
296*28080329SPaul Mundt 	/*
297*28080329SPaul Mundt 	 * We ran out of memory, call the OOM killer, and return the userspace
298*28080329SPaul Mundt 	 * (which will retry the fault, or kill us if we got oom-killed):
299*28080329SPaul Mundt 	 */
300*28080329SPaul Mundt 	up_read(&current->mm->mmap_sem);
301*28080329SPaul Mundt 
302*28080329SPaul Mundt 	pagefault_out_of_memory();
303*28080329SPaul Mundt }
304*28080329SPaul Mundt 
305*28080329SPaul Mundt static void
306*28080329SPaul Mundt do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
307*28080329SPaul Mundt {
308*28080329SPaul Mundt 	struct task_struct *tsk = current;
309*28080329SPaul Mundt 	struct mm_struct *mm = tsk->mm;
310*28080329SPaul Mundt 
311*28080329SPaul Mundt 	up_read(&mm->mmap_sem);
312*28080329SPaul Mundt 
313*28080329SPaul Mundt 	/* Kernel mode? Handle exceptions or die: */
314*28080329SPaul Mundt 	if (!user_mode(regs))
315*28080329SPaul Mundt 		no_context(regs, error_code, address);
316*28080329SPaul Mundt 
317*28080329SPaul Mundt 	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
318*28080329SPaul Mundt }
319*28080329SPaul Mundt 
320*28080329SPaul Mundt static noinline int
321*28080329SPaul Mundt mm_fault_error(struct pt_regs *regs, unsigned long error_code,
322*28080329SPaul Mundt 	       unsigned long address, unsigned int fault)
323*28080329SPaul Mundt {
324*28080329SPaul Mundt 	/*
325*28080329SPaul Mundt 	 * Pagefault was interrupted by SIGKILL. We have no reason to
326*28080329SPaul Mundt 	 * continue pagefault.
327*28080329SPaul Mundt 	 */
328*28080329SPaul Mundt 	if (fatal_signal_pending(current)) {
329*28080329SPaul Mundt 		if (!(fault & VM_FAULT_RETRY))
330*28080329SPaul Mundt 			up_read(&current->mm->mmap_sem);
331*28080329SPaul Mundt 		if (!user_mode(regs))
332*28080329SPaul Mundt 			no_context(regs, error_code, address);
333*28080329SPaul Mundt 		return 1;
334*28080329SPaul Mundt 	}
335*28080329SPaul Mundt 
336*28080329SPaul Mundt 	if (!(fault & VM_FAULT_ERROR))
337*28080329SPaul Mundt 		return 0;
338*28080329SPaul Mundt 
339*28080329SPaul Mundt 	if (fault & VM_FAULT_OOM) {
340*28080329SPaul Mundt 		/* Kernel mode? Handle exceptions or die: */
341*28080329SPaul Mundt 		if (!user_mode(regs)) {
342*28080329SPaul Mundt 			up_read(&current->mm->mmap_sem);
343*28080329SPaul Mundt 			no_context(regs, error_code, address);
344*28080329SPaul Mundt 			return 1;
345*28080329SPaul Mundt 		}
346*28080329SPaul Mundt 
347*28080329SPaul Mundt 		out_of_memory();
348*28080329SPaul Mundt 	} else {
349*28080329SPaul Mundt 		if (fault & VM_FAULT_SIGBUS)
350*28080329SPaul Mundt 			do_sigbus(regs, error_code, address);
351*28080329SPaul Mundt 		else
352*28080329SPaul Mundt 			BUG();
353*28080329SPaul Mundt 	}
354*28080329SPaul Mundt 
355*28080329SPaul Mundt 	return 1;
356*28080329SPaul Mundt }
357*28080329SPaul Mundt 
358*28080329SPaul Mundt static inline int access_error(int error_code, struct vm_area_struct *vma)
359*28080329SPaul Mundt {
360*28080329SPaul Mundt 	if (error_code & FAULT_CODE_WRITE) {
361*28080329SPaul Mundt 		/* write, present and write, not present: */
362*28080329SPaul Mundt 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
363*28080329SPaul Mundt 			return 1;
364*28080329SPaul Mundt 		return 0;
365*28080329SPaul Mundt 	}
366*28080329SPaul Mundt 
367*28080329SPaul Mundt 	/* ITLB miss on NX page */
368*28080329SPaul Mundt 	if (unlikely((error_code & FAULT_CODE_ITLB) &&
369*28080329SPaul Mundt 		     !(vma->vm_flags & VM_EXEC)))
370*28080329SPaul Mundt 		return 1;
371*28080329SPaul Mundt 
372*28080329SPaul Mundt 	/* read, not present: */
373*28080329SPaul Mundt 	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
374*28080329SPaul Mundt 		return 1;
375*28080329SPaul Mundt 
376*28080329SPaul Mundt 	return 0;
377*28080329SPaul Mundt }
378*28080329SPaul Mundt 
379*28080329SPaul Mundt static int fault_in_kernel_space(unsigned long address)
380*28080329SPaul Mundt {
381*28080329SPaul Mundt 	return address >= TASK_SIZE;
382*28080329SPaul Mundt }
383*28080329SPaul Mundt 
384*28080329SPaul Mundt /*
385*28080329SPaul Mundt  * This routine handles page faults.  It determines the address,
386*28080329SPaul Mundt  * and the problem, and then passes it off to one of the appropriate
387*28080329SPaul Mundt  * routines.
388*28080329SPaul Mundt  */
389*28080329SPaul Mundt asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
390*28080329SPaul Mundt 					unsigned long error_code,
391*28080329SPaul Mundt 					unsigned long address)
392*28080329SPaul Mundt {
393*28080329SPaul Mundt 	unsigned long vec;
394*28080329SPaul Mundt 	struct task_struct *tsk;
395*28080329SPaul Mundt 	struct mm_struct *mm;
396*28080329SPaul Mundt 	struct vm_area_struct * vma;
397*28080329SPaul Mundt 	int fault;
398*28080329SPaul Mundt 	int write = error_code & FAULT_CODE_WRITE;
399*28080329SPaul Mundt 	unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
400*28080329SPaul Mundt 			      (write ? FAULT_FLAG_WRITE : 0));
401*28080329SPaul Mundt 
402*28080329SPaul Mundt 	tsk = current;
403*28080329SPaul Mundt 	mm = tsk->mm;
404*28080329SPaul Mundt 	vec = lookup_exception_vector();
405*28080329SPaul Mundt 
406*28080329SPaul Mundt 	/*
407*28080329SPaul Mundt 	 * We fault-in kernel-space virtual memory on-demand. The
408*28080329SPaul Mundt 	 * 'reference' page table is init_mm.pgd.
409*28080329SPaul Mundt 	 *
410*28080329SPaul Mundt 	 * NOTE! We MUST NOT take any locks for this case. We may
411*28080329SPaul Mundt 	 * be in an interrupt or a critical region, and should
412*28080329SPaul Mundt 	 * only copy the information from the master page table,
413*28080329SPaul Mundt 	 * nothing more.
414*28080329SPaul Mundt 	 */
415*28080329SPaul Mundt 	if (unlikely(fault_in_kernel_space(address))) {
416*28080329SPaul Mundt 		if (vmalloc_fault(address) >= 0)
417*28080329SPaul Mundt 			return;
418*28080329SPaul Mundt 		if (notify_page_fault(regs, vec))
419*28080329SPaul Mundt 			return;
420*28080329SPaul Mundt 
421*28080329SPaul Mundt 		bad_area_nosemaphore(regs, error_code, address);
422*28080329SPaul Mundt 		return;
423*28080329SPaul Mundt 	}
424*28080329SPaul Mundt 
425*28080329SPaul Mundt 	if (unlikely(notify_page_fault(regs, vec)))
426*28080329SPaul Mundt 		return;
427*28080329SPaul Mundt 
428*28080329SPaul Mundt 	/* Only enable interrupts if they were on before the fault */
429*28080329SPaul Mundt 	if ((regs->sr & SR_IMASK) != SR_IMASK)
430*28080329SPaul Mundt 		local_irq_enable();
431*28080329SPaul Mundt 
432*28080329SPaul Mundt 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
433*28080329SPaul Mundt 
434*28080329SPaul Mundt 	/*
435*28080329SPaul Mundt 	 * If we're in an interrupt, have no user context or are running
436*28080329SPaul Mundt 	 * in an atomic region then we must not take the fault:
437*28080329SPaul Mundt 	 */
438*28080329SPaul Mundt 	if (unlikely(in_atomic() || !mm)) {
439*28080329SPaul Mundt 		bad_area_nosemaphore(regs, error_code, address);
440*28080329SPaul Mundt 		return;
441*28080329SPaul Mundt 	}
442*28080329SPaul Mundt 
443*28080329SPaul Mundt retry:
444*28080329SPaul Mundt 	down_read(&mm->mmap_sem);
445*28080329SPaul Mundt 
446*28080329SPaul Mundt 	vma = find_vma(mm, address);
447*28080329SPaul Mundt 	if (unlikely(!vma)) {
448*28080329SPaul Mundt 		bad_area(regs, error_code, address);
449*28080329SPaul Mundt 		return;
450*28080329SPaul Mundt 	}
451*28080329SPaul Mundt 	if (likely(vma->vm_start <= address))
452*28080329SPaul Mundt 		goto good_area;
453*28080329SPaul Mundt 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
454*28080329SPaul Mundt 		bad_area(regs, error_code, address);
455*28080329SPaul Mundt 		return;
456*28080329SPaul Mundt 	}
457*28080329SPaul Mundt 	if (unlikely(expand_stack(vma, address))) {
458*28080329SPaul Mundt 		bad_area(regs, error_code, address);
459*28080329SPaul Mundt 		return;
460*28080329SPaul Mundt 	}
461*28080329SPaul Mundt 
462*28080329SPaul Mundt 	/*
463*28080329SPaul Mundt 	 * Ok, we have a good vm_area for this memory access, so
464*28080329SPaul Mundt 	 * we can handle it..
465*28080329SPaul Mundt 	 */
466*28080329SPaul Mundt good_area:
467*28080329SPaul Mundt 	if (unlikely(access_error(error_code, vma))) {
468*28080329SPaul Mundt 		bad_area_access_error(regs, error_code, address);
469*28080329SPaul Mundt 		return;
470*28080329SPaul Mundt 	}
471*28080329SPaul Mundt 
472*28080329SPaul Mundt 	set_thread_fault_code(error_code);
473*28080329SPaul Mundt 
474*28080329SPaul Mundt 	/*
475*28080329SPaul Mundt 	 * If for any reason at all we couldn't handle the fault,
476*28080329SPaul Mundt 	 * make sure we exit gracefully rather than endlessly redo
477*28080329SPaul Mundt 	 * the fault.
478*28080329SPaul Mundt 	 */
479*28080329SPaul Mundt 	fault = handle_mm_fault(mm, vma, address, flags);
480*28080329SPaul Mundt 
481*28080329SPaul Mundt 	if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
482*28080329SPaul Mundt 		if (mm_fault_error(regs, error_code, address, fault))
483*28080329SPaul Mundt 			return;
484*28080329SPaul Mundt 
485*28080329SPaul Mundt 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
486*28080329SPaul Mundt 		if (fault & VM_FAULT_MAJOR) {
487*28080329SPaul Mundt 			tsk->maj_flt++;
488*28080329SPaul Mundt 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
489*28080329SPaul Mundt 				      regs, address);
490*28080329SPaul Mundt 		} else {
491*28080329SPaul Mundt 			tsk->min_flt++;
492*28080329SPaul Mundt 			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
493*28080329SPaul Mundt 				      regs, address);
494*28080329SPaul Mundt 		}
495*28080329SPaul Mundt 		if (fault & VM_FAULT_RETRY) {
496*28080329SPaul Mundt 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
497*28080329SPaul Mundt 
498*28080329SPaul Mundt 			/*
499*28080329SPaul Mundt 			 * No need to up_read(&mm->mmap_sem) as we would
500*28080329SPaul Mundt 			 * have already released it in __lock_page_or_retry
501*28080329SPaul Mundt 			 * in mm/filemap.c.
502*28080329SPaul Mundt 			 */
503*28080329SPaul Mundt 			goto retry;
504*28080329SPaul Mundt 		}
505*28080329SPaul Mundt 	}
506*28080329SPaul Mundt 
507*28080329SPaul Mundt 	up_read(&mm->mmap_sem);
508*28080329SPaul Mundt }
509