xref: /openbmc/linux/arch/sparc/mm/fault_64.c (revision a266ef69b890f099069cf51bb40572611c435a54)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * arch/sparc64/mm/fault.c: Page fault handlers for the 64-bit Sparc.
4  *
5  * Copyright (C) 1996, 2008 David S. Miller (davem@davemloft.net)
6  * Copyright (C) 1997, 1999 Jakub Jelinek (jj@ultra.linux.cz)
7  */
8 
9 #include <asm/head.h>
10 
11 #include <linux/string.h>
12 #include <linux/types.h>
13 #include <linux/sched.h>
14 #include <linux/sched/debug.h>
15 #include <linux/ptrace.h>
16 #include <linux/mman.h>
17 #include <linux/signal.h>
18 #include <linux/mm.h>
19 #include <linux/extable.h>
20 #include <linux/init.h>
21 #include <linux/perf_event.h>
22 #include <linux/interrupt.h>
23 #include <linux/kprobes.h>
24 #include <linux/kdebug.h>
25 #include <linux/percpu.h>
26 #include <linux/context_tracking.h>
27 #include <linux/uaccess.h>
28 
29 #include <asm/page.h>
30 #include <asm/openprom.h>
31 #include <asm/oplib.h>
32 #include <asm/asi.h>
33 #include <asm/lsu.h>
34 #include <asm/sections.h>
35 #include <asm/mmu_context.h>
36 #include <asm/setup.h>
37 
38 int show_unhandled_signals = 1;
39 
40 static void __kprobes unhandled_fault(unsigned long address,
41 				      struct task_struct *tsk,
42 				      struct pt_regs *regs)
43 {
44 	if ((unsigned long) address < PAGE_SIZE) {
45 		printk(KERN_ALERT "Unable to handle kernel NULL "
46 		       "pointer dereference\n");
47 	} else {
48 		printk(KERN_ALERT "Unable to handle kernel paging request "
49 		       "at virtual address %016lx\n", (unsigned long)address);
50 	}
51 	printk(KERN_ALERT "tsk->{mm,active_mm}->context = %016lx\n",
52 	       (tsk->mm ?
53 		CTX_HWBITS(tsk->mm->context) :
54 		CTX_HWBITS(tsk->active_mm->context)));
55 	printk(KERN_ALERT "tsk->{mm,active_mm}->pgd = %016lx\n",
56 	       (tsk->mm ? (unsigned long) tsk->mm->pgd :
57 		          (unsigned long) tsk->active_mm->pgd));
58 	die_if_kernel("Oops", regs);
59 }
60 
61 static void __kprobes bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr)
62 {
63 	printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n",
64 	       regs->tpc);
65 	printk(KERN_CRIT "OOPS: RPC [%016lx]\n", regs->u_regs[15]);
66 	printk("OOPS: RPC <%pS>\n", (void *) regs->u_regs[15]);
67 	printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr);
68 	dump_stack();
69 	unhandled_fault(regs->tpc, current, regs);
70 }
71 
72 /*
73  * We now make sure that mmap_lock is held in all paths that call
74  * this. Additionally, to prevent kswapd from ripping ptes from
75  * under us, raise interrupts around the time that we look at the
76  * pte, kswapd will have to wait to get his smp ipi response from
77  * us. vmtruncate likewise. This saves us having to get pte lock.
78  */
79 static unsigned int get_user_insn(unsigned long tpc)
80 {
81 	pgd_t *pgdp = pgd_offset(current->mm, tpc);
82 	p4d_t *p4dp;
83 	pud_t *pudp;
84 	pmd_t *pmdp;
85 	pte_t *ptep, pte;
86 	unsigned long pa;
87 	u32 insn = 0;
88 
89 	if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
90 		goto out;
91 	p4dp = p4d_offset(pgdp, tpc);
92 	if (p4d_none(*p4dp) || unlikely(p4d_bad(*p4dp)))
93 		goto out;
94 	pudp = pud_offset(p4dp, tpc);
95 	if (pud_none(*pudp) || unlikely(pud_bad(*pudp)))
96 		goto out;
97 
98 	/* This disables preemption for us as well. */
99 	local_irq_disable();
100 
101 	pmdp = pmd_offset(pudp, tpc);
102 	if (pmd_none(*pmdp) || unlikely(pmd_bad(*pmdp)))
103 		goto out_irq_enable;
104 
105 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
106 	if (is_hugetlb_pmd(*pmdp)) {
107 		pa  = pmd_pfn(*pmdp) << PAGE_SHIFT;
108 		pa += tpc & ~HPAGE_MASK;
109 
110 		/* Use phys bypass so we don't pollute dtlb/dcache. */
111 		__asm__ __volatile__("lduwa [%1] %2, %0"
112 				     : "=r" (insn)
113 				     : "r" (pa), "i" (ASI_PHYS_USE_EC));
114 	} else
115 #endif
116 	{
117 		ptep = pte_offset_map(pmdp, tpc);
118 		pte = *ptep;
119 		if (pte_present(pte)) {
120 			pa  = (pte_pfn(pte) << PAGE_SHIFT);
121 			pa += (tpc & ~PAGE_MASK);
122 
123 			/* Use phys bypass so we don't pollute dtlb/dcache. */
124 			__asm__ __volatile__("lduwa [%1] %2, %0"
125 					     : "=r" (insn)
126 					     : "r" (pa), "i" (ASI_PHYS_USE_EC));
127 		}
128 		pte_unmap(ptep);
129 	}
130 out_irq_enable:
131 	local_irq_enable();
132 out:
133 	return insn;
134 }
135 
136 static inline void
137 show_signal_msg(struct pt_regs *regs, int sig, int code,
138 		unsigned long address, struct task_struct *tsk)
139 {
140 	if (!unhandled_signal(tsk, sig))
141 		return;
142 
143 	if (!printk_ratelimit())
144 		return;
145 
146 	printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
147 	       task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
148 	       tsk->comm, task_pid_nr(tsk), address,
149 	       (void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
150 	       (void *)regs->u_regs[UREG_FP], code);
151 
152 	print_vma_addr(KERN_CONT " in ", regs->tpc);
153 
154 	printk(KERN_CONT "\n");
155 }
156 
157 static void do_fault_siginfo(int code, int sig, struct pt_regs *regs,
158 			     unsigned long fault_addr, unsigned int insn,
159 			     int fault_code)
160 {
161 	unsigned long addr;
162 
163 	if (fault_code & FAULT_CODE_ITLB) {
164 		addr = regs->tpc;
165 	} else {
166 		/* If we were able to probe the faulting instruction, use it
167 		 * to compute a precise fault address.  Otherwise use the fault
168 		 * time provided address which may only have page granularity.
169 		 */
170 		if (insn)
171 			addr = compute_effective_address(regs, insn, 0);
172 		else
173 			addr = fault_addr;
174 	}
175 
176 	if (unlikely(show_unhandled_signals))
177 		show_signal_msg(regs, sig, code, addr, current);
178 
179 	force_sig_fault(sig, code, (void __user *) addr);
180 }
181 
182 static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn)
183 {
184 	if (!insn) {
185 		if (!regs->tpc || (regs->tpc & 0x3))
186 			return 0;
187 		if (regs->tstate & TSTATE_PRIV) {
188 			insn = *(unsigned int *) regs->tpc;
189 		} else {
190 			insn = get_user_insn(regs->tpc);
191 		}
192 	}
193 	return insn;
194 }
195 
196 static void __kprobes do_kernel_fault(struct pt_regs *regs, int si_code,
197 				      int fault_code, unsigned int insn,
198 				      unsigned long address)
199 {
200 	unsigned char asi = ASI_P;
201 
202 	if ((!insn) && (regs->tstate & TSTATE_PRIV))
203 		goto cannot_handle;
204 
205 	/* If user insn could be read (thus insn is zero), that
206 	 * is fine.  We will just gun down the process with a signal
207 	 * in that case.
208 	 */
209 
210 	if (!(fault_code & (FAULT_CODE_WRITE|FAULT_CODE_ITLB)) &&
211 	    (insn & 0xc0800000) == 0xc0800000) {
212 		if (insn & 0x2000)
213 			asi = (regs->tstate >> 24);
214 		else
215 			asi = (insn >> 5);
216 		if ((asi & 0xf2) == 0x82) {
217 			if (insn & 0x1000000) {
218 				handle_ldf_stq(insn, regs);
219 			} else {
220 				/* This was a non-faulting load. Just clear the
221 				 * destination register(s) and continue with the next
222 				 * instruction. -jj
223 				 */
224 				handle_ld_nf(insn, regs);
225 			}
226 			return;
227 		}
228 	}
229 
230 	/* Is this in ex_table? */
231 	if (regs->tstate & TSTATE_PRIV) {
232 		const struct exception_table_entry *entry;
233 
234 		entry = search_exception_tables(regs->tpc);
235 		if (entry) {
236 			regs->tpc = entry->fixup;
237 			regs->tnpc = regs->tpc + 4;
238 			return;
239 		}
240 	} else {
241 		/* The si_code was set to make clear whether
242 		 * this was a SEGV_MAPERR or SEGV_ACCERR fault.
243 		 */
244 		do_fault_siginfo(si_code, SIGSEGV, regs, address, insn, fault_code);
245 		return;
246 	}
247 
248 cannot_handle:
249 	unhandled_fault (address, current, regs);
250 }
251 
252 static void noinline __kprobes bogus_32bit_fault_tpc(struct pt_regs *regs)
253 {
254 	static int times;
255 
256 	if (times++ < 10)
257 		printk(KERN_ERR "FAULT[%s:%d]: 32-bit process reports "
258 		       "64-bit TPC [%lx]\n",
259 		       current->comm, current->pid,
260 		       regs->tpc);
261 	show_regs(regs);
262 }
263 
264 asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
265 {
266 	enum ctx_state prev_state = exception_enter();
267 	struct mm_struct *mm = current->mm;
268 	struct vm_area_struct *vma;
269 	unsigned int insn = 0;
270 	int si_code, fault_code;
271 	vm_fault_t fault;
272 	unsigned long address, mm_rss;
273 	unsigned int flags = FAULT_FLAG_DEFAULT;
274 
275 	fault_code = get_thread_fault_code();
276 
277 	if (kprobe_page_fault(regs, 0))
278 		goto exit_exception;
279 
280 	si_code = SEGV_MAPERR;
281 	address = current_thread_info()->fault_address;
282 
283 	if ((fault_code & FAULT_CODE_ITLB) &&
284 	    (fault_code & FAULT_CODE_DTLB))
285 		BUG();
286 
287 	if (test_thread_flag(TIF_32BIT)) {
288 		if (!(regs->tstate & TSTATE_PRIV)) {
289 			if (unlikely((regs->tpc >> 32) != 0)) {
290 				bogus_32bit_fault_tpc(regs);
291 				goto intr_or_no_mm;
292 			}
293 		}
294 		if (unlikely((address >> 32) != 0))
295 			goto intr_or_no_mm;
296 	}
297 
298 	if (regs->tstate & TSTATE_PRIV) {
299 		unsigned long tpc = regs->tpc;
300 
301 		/* Sanity check the PC. */
302 		if ((tpc >= KERNBASE && tpc < (unsigned long) __init_end) ||
303 		    (tpc >= MODULES_VADDR && tpc < MODULES_END)) {
304 			/* Valid, no problems... */
305 		} else {
306 			bad_kernel_pc(regs, address);
307 			goto exit_exception;
308 		}
309 	} else
310 		flags |= FAULT_FLAG_USER;
311 
312 	/*
313 	 * If we're in an interrupt or have no user
314 	 * context, we must not take the fault..
315 	 */
316 	if (faulthandler_disabled() || !mm)
317 		goto intr_or_no_mm;
318 
319 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
320 
321 	if (!mmap_read_trylock(mm)) {
322 		if ((regs->tstate & TSTATE_PRIV) &&
323 		    !search_exception_tables(regs->tpc)) {
324 			insn = get_fault_insn(regs, insn);
325 			goto handle_kernel_fault;
326 		}
327 
328 retry:
329 		mmap_read_lock(mm);
330 	}
331 
332 	if (fault_code & FAULT_CODE_BAD_RA)
333 		goto do_sigbus;
334 
335 	vma = find_vma(mm, address);
336 	if (!vma)
337 		goto bad_area;
338 
339 	/* Pure DTLB misses do not tell us whether the fault causing
340 	 * load/store/atomic was a write or not, it only says that there
341 	 * was no match.  So in such a case we (carefully) read the
342 	 * instruction to try and figure this out.  It's an optimization
343 	 * so it's ok if we can't do this.
344 	 *
345 	 * Special hack, window spill/fill knows the exact fault type.
346 	 */
347 	if (((fault_code &
348 	      (FAULT_CODE_DTLB | FAULT_CODE_WRITE | FAULT_CODE_WINFIXUP)) == FAULT_CODE_DTLB) &&
349 	    (vma->vm_flags & VM_WRITE) != 0) {
350 		insn = get_fault_insn(regs, 0);
351 		if (!insn)
352 			goto continue_fault;
353 		/* All loads, stores and atomics have bits 30 and 31 both set
354 		 * in the instruction.  Bit 21 is set in all stores, but we
355 		 * have to avoid prefetches which also have bit 21 set.
356 		 */
357 		if ((insn & 0xc0200000) == 0xc0200000 &&
358 		    (insn & 0x01780000) != 0x01680000) {
359 			/* Don't bother updating thread struct value,
360 			 * because update_mmu_cache only cares which tlb
361 			 * the access came from.
362 			 */
363 			fault_code |= FAULT_CODE_WRITE;
364 		}
365 	}
366 continue_fault:
367 
368 	if (vma->vm_start <= address)
369 		goto good_area;
370 	if (!(vma->vm_flags & VM_GROWSDOWN))
371 		goto bad_area;
372 	if (!(fault_code & FAULT_CODE_WRITE)) {
373 		/* Non-faulting loads shouldn't expand stack. */
374 		insn = get_fault_insn(regs, insn);
375 		if ((insn & 0xc0800000) == 0xc0800000) {
376 			unsigned char asi;
377 
378 			if (insn & 0x2000)
379 				asi = (regs->tstate >> 24);
380 			else
381 				asi = (insn >> 5);
382 			if ((asi & 0xf2) == 0x82)
383 				goto bad_area;
384 		}
385 	}
386 	if (expand_stack(vma, address))
387 		goto bad_area;
388 	/*
389 	 * Ok, we have a good vm_area for this memory access, so
390 	 * we can handle it..
391 	 */
392 good_area:
393 	si_code = SEGV_ACCERR;
394 
395 	/* If we took a ITLB miss on a non-executable page, catch
396 	 * that here.
397 	 */
398 	if ((fault_code & FAULT_CODE_ITLB) && !(vma->vm_flags & VM_EXEC)) {
399 		WARN(address != regs->tpc,
400 		     "address (%lx) != regs->tpc (%lx)\n", address, regs->tpc);
401 		WARN_ON(regs->tstate & TSTATE_PRIV);
402 		goto bad_area;
403 	}
404 
405 	if (fault_code & FAULT_CODE_WRITE) {
406 		if (!(vma->vm_flags & VM_WRITE))
407 			goto bad_area;
408 
409 		/* Spitfire has an icache which does not snoop
410 		 * processor stores.  Later processors do...
411 		 */
412 		if (tlb_type == spitfire &&
413 		    (vma->vm_flags & VM_EXEC) != 0 &&
414 		    vma->vm_file != NULL)
415 			set_thread_fault_code(fault_code |
416 					      FAULT_CODE_BLKCOMMIT);
417 
418 		flags |= FAULT_FLAG_WRITE;
419 	} else {
420 		/* Allow reads even for write-only mappings */
421 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
422 			goto bad_area;
423 	}
424 
425 	fault = handle_mm_fault(vma, address, flags, regs);
426 
427 	if (fault_signal_pending(fault, regs))
428 		goto exit_exception;
429 
430 	/* The fault is fully completed (including releasing mmap lock) */
431 	if (fault & VM_FAULT_COMPLETED)
432 		goto lock_released;
433 
434 	if (unlikely(fault & VM_FAULT_ERROR)) {
435 		if (fault & VM_FAULT_OOM)
436 			goto out_of_memory;
437 		else if (fault & VM_FAULT_SIGSEGV)
438 			goto bad_area;
439 		else if (fault & VM_FAULT_SIGBUS)
440 			goto do_sigbus;
441 		BUG();
442 	}
443 
444 	if (fault & VM_FAULT_RETRY) {
445 		flags |= FAULT_FLAG_TRIED;
446 
447 		/* No need to mmap_read_unlock(mm) as we would
448 		 * have already released it in __lock_page_or_retry
449 		 * in mm/filemap.c.
450 		 */
451 
452 		goto retry;
453 	}
454 	mmap_read_unlock(mm);
455 
456 lock_released:
457 	mm_rss = get_mm_rss(mm);
458 #if defined(CONFIG_TRANSPARENT_HUGEPAGE)
459 	mm_rss -= (mm->context.thp_pte_count * (HPAGE_SIZE / PAGE_SIZE));
460 #endif
461 	if (unlikely(mm_rss >
462 		     mm->context.tsb_block[MM_TSB_BASE].tsb_rss_limit))
463 		tsb_grow(mm, MM_TSB_BASE, mm_rss);
464 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
465 	mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
466 	mm_rss *= REAL_HPAGE_PER_HPAGE;
467 	if (unlikely(mm_rss >
468 		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
469 		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
470 			tsb_grow(mm, MM_TSB_HUGE, mm_rss);
471 		else
472 			hugetlb_setup(regs);
473 
474 	}
475 #endif
476 exit_exception:
477 	exception_exit(prev_state);
478 	return;
479 
480 	/*
481 	 * Something tried to access memory that isn't in our memory map..
482 	 * Fix it, but check if it's kernel or user first..
483 	 */
484 bad_area:
485 	insn = get_fault_insn(regs, insn);
486 	mmap_read_unlock(mm);
487 
488 handle_kernel_fault:
489 	do_kernel_fault(regs, si_code, fault_code, insn, address);
490 	goto exit_exception;
491 
492 /*
493  * We ran out of memory, or some other thing happened to us that made
494  * us unable to handle the page fault gracefully.
495  */
496 out_of_memory:
497 	insn = get_fault_insn(regs, insn);
498 	mmap_read_unlock(mm);
499 	if (!(regs->tstate & TSTATE_PRIV)) {
500 		pagefault_out_of_memory();
501 		goto exit_exception;
502 	}
503 	goto handle_kernel_fault;
504 
505 intr_or_no_mm:
506 	insn = get_fault_insn(regs, 0);
507 	goto handle_kernel_fault;
508 
509 do_sigbus:
510 	insn = get_fault_insn(regs, insn);
511 	mmap_read_unlock(mm);
512 
513 	/*
514 	 * Send a sigbus, regardless of whether we were in kernel
515 	 * or user mode.
516 	 */
517 	do_fault_siginfo(BUS_ADRERR, SIGBUS, regs, address, insn, fault_code);
518 
519 	/* Kernel mode? Handle exceptions or die */
520 	if (regs->tstate & TSTATE_PRIV)
521 		goto handle_kernel_fault;
522 }
523