xref: /openbmc/linux/kernel/events/uprobes.c (revision b6bec26c)
1 /*
2  * User-space Probes (UProbes)
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17  *
18  * Copyright (C) IBM Corporation, 2008-2012
19  * Authors:
20  *	Srikar Dronamraju
21  *	Jim Keniston
22  * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
23  */
24 
25 #include <linux/kernel.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>	/* read_mapping_page */
28 #include <linux/slab.h>
29 #include <linux/sched.h>
30 #include <linux/rmap.h>		/* anon_vma_prepare */
31 #include <linux/mmu_notifier.h>	/* set_pte_at_notify */
32 #include <linux/swap.h>		/* try_to_free_swap */
33 #include <linux/ptrace.h>	/* user_enable_single_step */
34 #include <linux/kdebug.h>	/* notifier mechanism */
35 #include "../../mm/internal.h"	/* munlock_vma_page */
36 #include <linux/percpu-rwsem.h>
37 
38 #include <linux/uprobes.h>
39 
40 #define UINSNS_PER_PAGE			(PAGE_SIZE/UPROBE_XOL_SLOT_BYTES)
41 #define MAX_UPROBE_XOL_SLOTS		UINSNS_PER_PAGE
42 
43 static struct rb_root uprobes_tree = RB_ROOT;
44 
45 static DEFINE_SPINLOCK(uprobes_treelock);	/* serialize rbtree access */
46 
47 #define UPROBES_HASH_SZ	13
48 
49 /*
50  * We need separate register/unregister and mmap/munmap lock hashes because
51  * of mmap_sem nesting.
52  *
53  * uprobe_register() needs to install probes on (potentially) all processes
54  * and thus needs to acquire multiple mmap_sems (consequtively, not
55  * concurrently), whereas uprobe_mmap() is called while holding mmap_sem
56  * for the particular process doing the mmap.
57  *
58  * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
59  * because of lock order against i_mmap_mutex. This means there's a hole in
60  * the register vma iteration where a mmap() can happen.
61  *
62  * Thus uprobe_register() can race with uprobe_mmap() and we can try and
63  * install a probe where one is already installed.
64  */
65 
66 /* serialize (un)register */
67 static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
68 
69 #define uprobes_hash(v)		(&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
70 
71 /* serialize uprobe->pending_list */
72 static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
73 #define uprobes_mmap_hash(v)	(&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
74 
75 static struct percpu_rw_semaphore dup_mmap_sem;
76 
77 /*
78  * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
79  * events active at this time.  Probably a fine grained per inode count is
80  * better?
81  */
82 static atomic_t uprobe_events = ATOMIC_INIT(0);
83 
84 /* Have a copy of original instruction */
85 #define UPROBE_COPY_INSN	0
86 /* Dont run handlers when first register/ last unregister in progress*/
87 #define UPROBE_RUN_HANDLER	1
88 /* Can skip singlestep */
89 #define UPROBE_SKIP_SSTEP	2
90 
91 struct uprobe {
92 	struct rb_node		rb_node;	/* node in the rb tree */
93 	atomic_t		ref;
94 	struct rw_semaphore	consumer_rwsem;
95 	struct mutex		copy_mutex;	/* TODO: kill me and UPROBE_COPY_INSN */
96 	struct list_head	pending_list;
97 	struct uprobe_consumer	*consumers;
98 	struct inode		*inode;		/* Also hold a ref to inode */
99 	loff_t			offset;
100 	unsigned long		flags;
101 	struct arch_uprobe	arch;
102 };
103 
104 /*
105  * valid_vma: Verify if the specified vma is an executable vma
106  * Relax restrictions while unregistering: vm_flags might have
107  * changed after breakpoint was inserted.
108  *	- is_register: indicates if we are in register context.
109  *	- Return 1 if the specified virtual address is in an
110  *	  executable vma.
111  */
112 static bool valid_vma(struct vm_area_struct *vma, bool is_register)
113 {
114 	vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
115 
116 	if (is_register)
117 		flags |= VM_WRITE;
118 
119 	return vma->vm_file && (vma->vm_flags & flags) == VM_MAYEXEC;
120 }
121 
122 static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset)
123 {
124 	return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
125 }
126 
127 static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr)
128 {
129 	return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start);
130 }
131 
132 /**
133  * __replace_page - replace page in vma by new page.
134  * based on replace_page in mm/ksm.c
135  *
136  * @vma:      vma that holds the pte pointing to page
137  * @addr:     address the old @page is mapped at
138  * @page:     the cowed page we are replacing by kpage
139  * @kpage:    the modified page we replace page by
140  *
141  * Returns 0 on success, -EFAULT on failure.
142  */
143 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
144 				struct page *page, struct page *kpage)
145 {
146 	struct mm_struct *mm = vma->vm_mm;
147 	spinlock_t *ptl;
148 	pte_t *ptep;
149 	int err;
150 	/* For mmu_notifiers */
151 	const unsigned long mmun_start = addr;
152 	const unsigned long mmun_end   = addr + PAGE_SIZE;
153 
154 	/* For try_to_free_swap() and munlock_vma_page() below */
155 	lock_page(page);
156 
157 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
158 	err = -EAGAIN;
159 	ptep = page_check_address(page, mm, addr, &ptl, 0);
160 	if (!ptep)
161 		goto unlock;
162 
163 	get_page(kpage);
164 	page_add_new_anon_rmap(kpage, vma, addr);
165 
166 	if (!PageAnon(page)) {
167 		dec_mm_counter(mm, MM_FILEPAGES);
168 		inc_mm_counter(mm, MM_ANONPAGES);
169 	}
170 
171 	flush_cache_page(vma, addr, pte_pfn(*ptep));
172 	ptep_clear_flush(vma, addr, ptep);
173 	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
174 
175 	page_remove_rmap(page);
176 	if (!page_mapped(page))
177 		try_to_free_swap(page);
178 	pte_unmap_unlock(ptep, ptl);
179 
180 	if (vma->vm_flags & VM_LOCKED)
181 		munlock_vma_page(page);
182 	put_page(page);
183 
184 	err = 0;
185  unlock:
186 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
187 	unlock_page(page);
188 	return err;
189 }
190 
191 /**
192  * is_swbp_insn - check if instruction is breakpoint instruction.
193  * @insn: instruction to be checked.
194  * Default implementation of is_swbp_insn
195  * Returns true if @insn is a breakpoint instruction.
196  */
197 bool __weak is_swbp_insn(uprobe_opcode_t *insn)
198 {
199 	return *insn == UPROBE_SWBP_INSN;
200 }
201 
202 static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode)
203 {
204 	void *kaddr = kmap_atomic(page);
205 	memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE);
206 	kunmap_atomic(kaddr);
207 }
208 
209 static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
210 {
211 	uprobe_opcode_t old_opcode;
212 	bool is_swbp;
213 
214 	copy_opcode(page, vaddr, &old_opcode);
215 	is_swbp = is_swbp_insn(&old_opcode);
216 
217 	if (is_swbp_insn(new_opcode)) {
218 		if (is_swbp)		/* register: already installed? */
219 			return 0;
220 	} else {
221 		if (!is_swbp)		/* unregister: was it changed by us? */
222 			return 0;
223 	}
224 
225 	return 1;
226 }
227 
228 /*
229  * NOTE:
230  * Expect the breakpoint instruction to be the smallest size instruction for
231  * the architecture. If an arch has variable length instruction and the
232  * breakpoint instruction is not of the smallest length instruction
233  * supported by that architecture then we need to modify is_swbp_at_addr and
234  * write_opcode accordingly. This would never be a problem for archs that
235  * have fixed length instructions.
236  */
237 
238 /*
239  * write_opcode - write the opcode at a given virtual address.
240  * @mm: the probed process address space.
241  * @vaddr: the virtual address to store the opcode.
242  * @opcode: opcode to be written at @vaddr.
243  *
244  * Called with mm->mmap_sem held (for read and with a reference to
245  * mm).
246  *
247  * For mm @mm, write the opcode at @vaddr.
248  * Return 0 (success) or a negative errno.
249  */
250 static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
251 			uprobe_opcode_t opcode)
252 {
253 	struct page *old_page, *new_page;
254 	void *vaddr_old, *vaddr_new;
255 	struct vm_area_struct *vma;
256 	int ret;
257 
258 retry:
259 	/* Read the page with vaddr into memory */
260 	ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
261 	if (ret <= 0)
262 		return ret;
263 
264 	ret = verify_opcode(old_page, vaddr, &opcode);
265 	if (ret <= 0)
266 		goto put_old;
267 
268 	ret = -ENOMEM;
269 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
270 	if (!new_page)
271 		goto put_old;
272 
273 	__SetPageUptodate(new_page);
274 
275 	/* copy the page now that we've got it stable */
276 	vaddr_old = kmap_atomic(old_page);
277 	vaddr_new = kmap_atomic(new_page);
278 
279 	memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
280 	memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
281 
282 	kunmap_atomic(vaddr_new);
283 	kunmap_atomic(vaddr_old);
284 
285 	ret = anon_vma_prepare(vma);
286 	if (ret)
287 		goto put_new;
288 
289 	ret = __replace_page(vma, vaddr, old_page, new_page);
290 
291 put_new:
292 	page_cache_release(new_page);
293 put_old:
294 	put_page(old_page);
295 
296 	if (unlikely(ret == -EAGAIN))
297 		goto retry;
298 	return ret;
299 }
300 
301 /**
302  * set_swbp - store breakpoint at a given address.
303  * @auprobe: arch specific probepoint information.
304  * @mm: the probed process address space.
305  * @vaddr: the virtual address to insert the opcode.
306  *
307  * For mm @mm, store the breakpoint instruction at @vaddr.
308  * Return 0 (success) or a negative errno.
309  */
310 int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
311 {
312 	return write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
313 }
314 
315 /**
316  * set_orig_insn - Restore the original instruction.
317  * @mm: the probed process address space.
318  * @auprobe: arch specific probepoint information.
319  * @vaddr: the virtual address to insert the opcode.
320  *
321  * For mm @mm, restore the original opcode (opcode) at @vaddr.
322  * Return 0 (success) or a negative errno.
323  */
324 int __weak
325 set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
326 {
327 	return write_opcode(mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
328 }
329 
330 static int match_uprobe(struct uprobe *l, struct uprobe *r)
331 {
332 	if (l->inode < r->inode)
333 		return -1;
334 
335 	if (l->inode > r->inode)
336 		return 1;
337 
338 	if (l->offset < r->offset)
339 		return -1;
340 
341 	if (l->offset > r->offset)
342 		return 1;
343 
344 	return 0;
345 }
346 
347 static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
348 {
349 	struct uprobe u = { .inode = inode, .offset = offset };
350 	struct rb_node *n = uprobes_tree.rb_node;
351 	struct uprobe *uprobe;
352 	int match;
353 
354 	while (n) {
355 		uprobe = rb_entry(n, struct uprobe, rb_node);
356 		match = match_uprobe(&u, uprobe);
357 		if (!match) {
358 			atomic_inc(&uprobe->ref);
359 			return uprobe;
360 		}
361 
362 		if (match < 0)
363 			n = n->rb_left;
364 		else
365 			n = n->rb_right;
366 	}
367 	return NULL;
368 }
369 
370 /*
371  * Find a uprobe corresponding to a given inode:offset
372  * Acquires uprobes_treelock
373  */
374 static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
375 {
376 	struct uprobe *uprobe;
377 
378 	spin_lock(&uprobes_treelock);
379 	uprobe = __find_uprobe(inode, offset);
380 	spin_unlock(&uprobes_treelock);
381 
382 	return uprobe;
383 }
384 
385 static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
386 {
387 	struct rb_node **p = &uprobes_tree.rb_node;
388 	struct rb_node *parent = NULL;
389 	struct uprobe *u;
390 	int match;
391 
392 	while (*p) {
393 		parent = *p;
394 		u = rb_entry(parent, struct uprobe, rb_node);
395 		match = match_uprobe(uprobe, u);
396 		if (!match) {
397 			atomic_inc(&u->ref);
398 			return u;
399 		}
400 
401 		if (match < 0)
402 			p = &parent->rb_left;
403 		else
404 			p = &parent->rb_right;
405 
406 	}
407 
408 	u = NULL;
409 	rb_link_node(&uprobe->rb_node, parent, p);
410 	rb_insert_color(&uprobe->rb_node, &uprobes_tree);
411 	/* get access + creation ref */
412 	atomic_set(&uprobe->ref, 2);
413 
414 	return u;
415 }
416 
417 /*
418  * Acquire uprobes_treelock.
419  * Matching uprobe already exists in rbtree;
420  *	increment (access refcount) and return the matching uprobe.
421  *
422  * No matching uprobe; insert the uprobe in rb_tree;
423  *	get a double refcount (access + creation) and return NULL.
424  */
425 static struct uprobe *insert_uprobe(struct uprobe *uprobe)
426 {
427 	struct uprobe *u;
428 
429 	spin_lock(&uprobes_treelock);
430 	u = __insert_uprobe(uprobe);
431 	spin_unlock(&uprobes_treelock);
432 
433 	/* For now assume that the instruction need not be single-stepped */
434 	__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
435 
436 	return u;
437 }
438 
439 static void put_uprobe(struct uprobe *uprobe)
440 {
441 	if (atomic_dec_and_test(&uprobe->ref))
442 		kfree(uprobe);
443 }
444 
445 static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
446 {
447 	struct uprobe *uprobe, *cur_uprobe;
448 
449 	uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
450 	if (!uprobe)
451 		return NULL;
452 
453 	uprobe->inode = igrab(inode);
454 	uprobe->offset = offset;
455 	init_rwsem(&uprobe->consumer_rwsem);
456 	mutex_init(&uprobe->copy_mutex);
457 
458 	/* add to uprobes_tree, sorted on inode:offset */
459 	cur_uprobe = insert_uprobe(uprobe);
460 
461 	/* a uprobe exists for this inode:offset combination */
462 	if (cur_uprobe) {
463 		kfree(uprobe);
464 		uprobe = cur_uprobe;
465 		iput(inode);
466 	} else {
467 		atomic_inc(&uprobe_events);
468 	}
469 
470 	return uprobe;
471 }
472 
473 static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
474 {
475 	struct uprobe_consumer *uc;
476 
477 	if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
478 		return;
479 
480 	down_read(&uprobe->consumer_rwsem);
481 	for (uc = uprobe->consumers; uc; uc = uc->next) {
482 		if (!uc->filter || uc->filter(uc, current))
483 			uc->handler(uc, regs);
484 	}
485 	up_read(&uprobe->consumer_rwsem);
486 }
487 
488 /* Returns the previous consumer */
489 static struct uprobe_consumer *
490 consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
491 {
492 	down_write(&uprobe->consumer_rwsem);
493 	uc->next = uprobe->consumers;
494 	uprobe->consumers = uc;
495 	up_write(&uprobe->consumer_rwsem);
496 
497 	return uc->next;
498 }
499 
500 /*
501  * For uprobe @uprobe, delete the consumer @uc.
502  * Return true if the @uc is deleted successfully
503  * or return false.
504  */
505 static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
506 {
507 	struct uprobe_consumer **con;
508 	bool ret = false;
509 
510 	down_write(&uprobe->consumer_rwsem);
511 	for (con = &uprobe->consumers; *con; con = &(*con)->next) {
512 		if (*con == uc) {
513 			*con = uc->next;
514 			ret = true;
515 			break;
516 		}
517 	}
518 	up_write(&uprobe->consumer_rwsem);
519 
520 	return ret;
521 }
522 
523 static int
524 __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
525 			unsigned long nbytes, loff_t offset)
526 {
527 	struct page *page;
528 	void *vaddr;
529 	unsigned long off;
530 	pgoff_t idx;
531 
532 	if (!filp)
533 		return -EINVAL;
534 
535 	if (!mapping->a_ops->readpage)
536 		return -EIO;
537 
538 	idx = offset >> PAGE_CACHE_SHIFT;
539 	off = offset & ~PAGE_MASK;
540 
541 	/*
542 	 * Ensure that the page that has the original instruction is
543 	 * populated and in page-cache.
544 	 */
545 	page = read_mapping_page(mapping, idx, filp);
546 	if (IS_ERR(page))
547 		return PTR_ERR(page);
548 
549 	vaddr = kmap_atomic(page);
550 	memcpy(insn, vaddr + off, nbytes);
551 	kunmap_atomic(vaddr);
552 	page_cache_release(page);
553 
554 	return 0;
555 }
556 
557 static int copy_insn(struct uprobe *uprobe, struct file *filp)
558 {
559 	struct address_space *mapping;
560 	unsigned long nbytes;
561 	int bytes;
562 
563 	nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK);
564 	mapping = uprobe->inode->i_mapping;
565 
566 	/* Instruction at end of binary; copy only available bytes */
567 	if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
568 		bytes = uprobe->inode->i_size - uprobe->offset;
569 	else
570 		bytes = MAX_UINSN_BYTES;
571 
572 	/* Instruction at the page-boundary; copy bytes in second page */
573 	if (nbytes < bytes) {
574 		int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes,
575 				bytes - nbytes, uprobe->offset + nbytes);
576 		if (err)
577 			return err;
578 		bytes = nbytes;
579 	}
580 	return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset);
581 }
582 
583 static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
584 				struct mm_struct *mm, unsigned long vaddr)
585 {
586 	int ret = 0;
587 
588 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
589 		return ret;
590 
591 	mutex_lock(&uprobe->copy_mutex);
592 	if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
593 		goto out;
594 
595 	ret = copy_insn(uprobe, file);
596 	if (ret)
597 		goto out;
598 
599 	ret = -ENOTSUPP;
600 	if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
601 		goto out;
602 
603 	ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
604 	if (ret)
605 		goto out;
606 
607 	/* write_opcode() assumes we don't cross page boundary */
608 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
609 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
610 
611 	smp_wmb(); /* pairs with rmb() in find_active_uprobe() */
612 	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
613 
614  out:
615 	mutex_unlock(&uprobe->copy_mutex);
616 
617 	return ret;
618 }
619 
620 static int
621 install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
622 			struct vm_area_struct *vma, unsigned long vaddr)
623 {
624 	bool first_uprobe;
625 	int ret;
626 
627 	/*
628 	 * If probe is being deleted, unregister thread could be done with
629 	 * the vma-rmap-walk through. Adding a probe now can be fatal since
630 	 * nobody will be able to cleanup. Also we could be from fork or
631 	 * mremap path, where the probe might have already been inserted.
632 	 * Hence behave as if probe already existed.
633 	 */
634 	if (!uprobe->consumers)
635 		return 0;
636 
637 	ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
638 	if (ret)
639 		return ret;
640 
641 	/*
642 	 * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(),
643 	 * the task can hit this breakpoint right after __replace_page().
644 	 */
645 	first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags);
646 	if (first_uprobe)
647 		set_bit(MMF_HAS_UPROBES, &mm->flags);
648 
649 	ret = set_swbp(&uprobe->arch, mm, vaddr);
650 	if (!ret)
651 		clear_bit(MMF_RECALC_UPROBES, &mm->flags);
652 	else if (first_uprobe)
653 		clear_bit(MMF_HAS_UPROBES, &mm->flags);
654 
655 	return ret;
656 }
657 
658 static int
659 remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
660 {
661 	/* can happen if uprobe_register() fails */
662 	if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
663 		return 0;
664 
665 	set_bit(MMF_RECALC_UPROBES, &mm->flags);
666 	return set_orig_insn(&uprobe->arch, mm, vaddr);
667 }
668 
669 /*
670  * There could be threads that have already hit the breakpoint. They
671  * will recheck the current insn and restart if find_uprobe() fails.
672  * See find_active_uprobe().
673  */
674 static void delete_uprobe(struct uprobe *uprobe)
675 {
676 	spin_lock(&uprobes_treelock);
677 	rb_erase(&uprobe->rb_node, &uprobes_tree);
678 	spin_unlock(&uprobes_treelock);
679 	iput(uprobe->inode);
680 	put_uprobe(uprobe);
681 	atomic_dec(&uprobe_events);
682 }
683 
684 struct map_info {
685 	struct map_info *next;
686 	struct mm_struct *mm;
687 	unsigned long vaddr;
688 };
689 
690 static inline struct map_info *free_map_info(struct map_info *info)
691 {
692 	struct map_info *next = info->next;
693 	kfree(info);
694 	return next;
695 }
696 
697 static struct map_info *
698 build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
699 {
700 	unsigned long pgoff = offset >> PAGE_SHIFT;
701 	struct vm_area_struct *vma;
702 	struct map_info *curr = NULL;
703 	struct map_info *prev = NULL;
704 	struct map_info *info;
705 	int more = 0;
706 
707  again:
708 	mutex_lock(&mapping->i_mmap_mutex);
709 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
710 		if (!valid_vma(vma, is_register))
711 			continue;
712 
713 		if (!prev && !more) {
714 			/*
715 			 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
716 			 * reclaim. This is optimistic, no harm done if it fails.
717 			 */
718 			prev = kmalloc(sizeof(struct map_info),
719 					GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN);
720 			if (prev)
721 				prev->next = NULL;
722 		}
723 		if (!prev) {
724 			more++;
725 			continue;
726 		}
727 
728 		if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
729 			continue;
730 
731 		info = prev;
732 		prev = prev->next;
733 		info->next = curr;
734 		curr = info;
735 
736 		info->mm = vma->vm_mm;
737 		info->vaddr = offset_to_vaddr(vma, offset);
738 	}
739 	mutex_unlock(&mapping->i_mmap_mutex);
740 
741 	if (!more)
742 		goto out;
743 
744 	prev = curr;
745 	while (curr) {
746 		mmput(curr->mm);
747 		curr = curr->next;
748 	}
749 
750 	do {
751 		info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
752 		if (!info) {
753 			curr = ERR_PTR(-ENOMEM);
754 			goto out;
755 		}
756 		info->next = prev;
757 		prev = info;
758 	} while (--more);
759 
760 	goto again;
761  out:
762 	while (prev)
763 		prev = free_map_info(prev);
764 	return curr;
765 }
766 
767 static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
768 {
769 	struct map_info *info;
770 	int err = 0;
771 
772 	percpu_down_write(&dup_mmap_sem);
773 	info = build_map_info(uprobe->inode->i_mapping,
774 					uprobe->offset, is_register);
775 	if (IS_ERR(info)) {
776 		err = PTR_ERR(info);
777 		goto out;
778 	}
779 
780 	while (info) {
781 		struct mm_struct *mm = info->mm;
782 		struct vm_area_struct *vma;
783 
784 		if (err && is_register)
785 			goto free;
786 
787 		down_write(&mm->mmap_sem);
788 		vma = find_vma(mm, info->vaddr);
789 		if (!vma || !valid_vma(vma, is_register) ||
790 		    vma->vm_file->f_mapping->host != uprobe->inode)
791 			goto unlock;
792 
793 		if (vma->vm_start > info->vaddr ||
794 		    vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
795 			goto unlock;
796 
797 		if (is_register)
798 			err = install_breakpoint(uprobe, mm, vma, info->vaddr);
799 		else
800 			err |= remove_breakpoint(uprobe, mm, info->vaddr);
801 
802  unlock:
803 		up_write(&mm->mmap_sem);
804  free:
805 		mmput(mm);
806 		info = free_map_info(info);
807 	}
808  out:
809 	percpu_up_write(&dup_mmap_sem);
810 	return err;
811 }
812 
813 static int __uprobe_register(struct uprobe *uprobe)
814 {
815 	return register_for_each_vma(uprobe, true);
816 }
817 
818 static void __uprobe_unregister(struct uprobe *uprobe)
819 {
820 	if (!register_for_each_vma(uprobe, false))
821 		delete_uprobe(uprobe);
822 
823 	/* TODO : cant unregister? schedule a worker thread */
824 }
825 
826 /*
827  * uprobe_register - register a probe
828  * @inode: the file in which the probe has to be placed.
829  * @offset: offset from the start of the file.
830  * @uc: information on howto handle the probe..
831  *
832  * Apart from the access refcount, uprobe_register() takes a creation
833  * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
834  * inserted into the rbtree (i.e first consumer for a @inode:@offset
835  * tuple).  Creation refcount stops uprobe_unregister from freeing the
836  * @uprobe even before the register operation is complete. Creation
837  * refcount is released when the last @uc for the @uprobe
838  * unregisters.
839  *
840  * Return errno if it cannot successully install probes
841  * else return 0 (success)
842  */
843 int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
844 {
845 	struct uprobe *uprobe;
846 	int ret;
847 
848 	if (!inode || !uc || uc->next)
849 		return -EINVAL;
850 
851 	if (offset > i_size_read(inode))
852 		return -EINVAL;
853 
854 	ret = 0;
855 	mutex_lock(uprobes_hash(inode));
856 	uprobe = alloc_uprobe(inode, offset);
857 
858 	if (!uprobe) {
859 		ret = -ENOMEM;
860 	} else if (!consumer_add(uprobe, uc)) {
861 		ret = __uprobe_register(uprobe);
862 		if (ret) {
863 			uprobe->consumers = NULL;
864 			__uprobe_unregister(uprobe);
865 		} else {
866 			set_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
867 		}
868 	}
869 
870 	mutex_unlock(uprobes_hash(inode));
871 	if (uprobe)
872 		put_uprobe(uprobe);
873 
874 	return ret;
875 }
876 
877 /*
878  * uprobe_unregister - unregister a already registered probe.
879  * @inode: the file in which the probe has to be removed.
880  * @offset: offset from the start of the file.
881  * @uc: identify which probe if multiple probes are colocated.
882  */
883 void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
884 {
885 	struct uprobe *uprobe;
886 
887 	if (!inode || !uc)
888 		return;
889 
890 	uprobe = find_uprobe(inode, offset);
891 	if (!uprobe)
892 		return;
893 
894 	mutex_lock(uprobes_hash(inode));
895 
896 	if (consumer_del(uprobe, uc)) {
897 		if (!uprobe->consumers) {
898 			__uprobe_unregister(uprobe);
899 			clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags);
900 		}
901 	}
902 
903 	mutex_unlock(uprobes_hash(inode));
904 	if (uprobe)
905 		put_uprobe(uprobe);
906 }
907 
908 static struct rb_node *
909 find_node_in_range(struct inode *inode, loff_t min, loff_t max)
910 {
911 	struct rb_node *n = uprobes_tree.rb_node;
912 
913 	while (n) {
914 		struct uprobe *u = rb_entry(n, struct uprobe, rb_node);
915 
916 		if (inode < u->inode) {
917 			n = n->rb_left;
918 		} else if (inode > u->inode) {
919 			n = n->rb_right;
920 		} else {
921 			if (max < u->offset)
922 				n = n->rb_left;
923 			else if (min > u->offset)
924 				n = n->rb_right;
925 			else
926 				break;
927 		}
928 	}
929 
930 	return n;
931 }
932 
933 /*
934  * For a given range in vma, build a list of probes that need to be inserted.
935  */
936 static void build_probe_list(struct inode *inode,
937 				struct vm_area_struct *vma,
938 				unsigned long start, unsigned long end,
939 				struct list_head *head)
940 {
941 	loff_t min, max;
942 	struct rb_node *n, *t;
943 	struct uprobe *u;
944 
945 	INIT_LIST_HEAD(head);
946 	min = vaddr_to_offset(vma, start);
947 	max = min + (end - start) - 1;
948 
949 	spin_lock(&uprobes_treelock);
950 	n = find_node_in_range(inode, min, max);
951 	if (n) {
952 		for (t = n; t; t = rb_prev(t)) {
953 			u = rb_entry(t, struct uprobe, rb_node);
954 			if (u->inode != inode || u->offset < min)
955 				break;
956 			list_add(&u->pending_list, head);
957 			atomic_inc(&u->ref);
958 		}
959 		for (t = n; (t = rb_next(t)); ) {
960 			u = rb_entry(t, struct uprobe, rb_node);
961 			if (u->inode != inode || u->offset > max)
962 				break;
963 			list_add(&u->pending_list, head);
964 			atomic_inc(&u->ref);
965 		}
966 	}
967 	spin_unlock(&uprobes_treelock);
968 }
969 
970 /*
971  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
972  *
973  * Currently we ignore all errors and always return 0, the callers
974  * can't handle the failure anyway.
975  */
976 int uprobe_mmap(struct vm_area_struct *vma)
977 {
978 	struct list_head tmp_list;
979 	struct uprobe *uprobe, *u;
980 	struct inode *inode;
981 
982 	if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
983 		return 0;
984 
985 	inode = vma->vm_file->f_mapping->host;
986 	if (!inode)
987 		return 0;
988 
989 	mutex_lock(uprobes_mmap_hash(inode));
990 	build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
991 
992 	list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
993 		if (!fatal_signal_pending(current)) {
994 			unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
995 			install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
996 		}
997 		put_uprobe(uprobe);
998 	}
999 	mutex_unlock(uprobes_mmap_hash(inode));
1000 
1001 	return 0;
1002 }
1003 
1004 static bool
1005 vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1006 {
1007 	loff_t min, max;
1008 	struct inode *inode;
1009 	struct rb_node *n;
1010 
1011 	inode = vma->vm_file->f_mapping->host;
1012 
1013 	min = vaddr_to_offset(vma, start);
1014 	max = min + (end - start) - 1;
1015 
1016 	spin_lock(&uprobes_treelock);
1017 	n = find_node_in_range(inode, min, max);
1018 	spin_unlock(&uprobes_treelock);
1019 
1020 	return !!n;
1021 }
1022 
1023 /*
1024  * Called in context of a munmap of a vma.
1025  */
1026 void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1027 {
1028 	if (!atomic_read(&uprobe_events) || !valid_vma(vma, false))
1029 		return;
1030 
1031 	if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
1032 		return;
1033 
1034 	if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags) ||
1035 	     test_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags))
1036 		return;
1037 
1038 	if (vma_has_uprobes(vma, start, end))
1039 		set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
1040 }
1041 
1042 /* Slot allocation for XOL */
1043 static int xol_add_vma(struct xol_area *area)
1044 {
1045 	struct mm_struct *mm;
1046 	int ret;
1047 
1048 	area->page = alloc_page(GFP_HIGHUSER);
1049 	if (!area->page)
1050 		return -ENOMEM;
1051 
1052 	ret = -EALREADY;
1053 	mm = current->mm;
1054 
1055 	down_write(&mm->mmap_sem);
1056 	if (mm->uprobes_state.xol_area)
1057 		goto fail;
1058 
1059 	ret = -ENOMEM;
1060 
1061 	/* Try to map as high as possible, this is only a hint. */
1062 	area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
1063 	if (area->vaddr & ~PAGE_MASK) {
1064 		ret = area->vaddr;
1065 		goto fail;
1066 	}
1067 
1068 	ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
1069 				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page);
1070 	if (ret)
1071 		goto fail;
1072 
1073 	smp_wmb();	/* pairs with get_xol_area() */
1074 	mm->uprobes_state.xol_area = area;
1075 	ret = 0;
1076 
1077 fail:
1078 	up_write(&mm->mmap_sem);
1079 	if (ret)
1080 		__free_page(area->page);
1081 
1082 	return ret;
1083 }
1084 
1085 static struct xol_area *get_xol_area(struct mm_struct *mm)
1086 {
1087 	struct xol_area *area;
1088 
1089 	area = mm->uprobes_state.xol_area;
1090 	smp_read_barrier_depends();	/* pairs with wmb in xol_add_vma() */
1091 
1092 	return area;
1093 }
1094 
1095 /*
1096  * xol_alloc_area - Allocate process's xol_area.
1097  * This area will be used for storing instructions for execution out of
1098  * line.
1099  *
1100  * Returns the allocated area or NULL.
1101  */
1102 static struct xol_area *xol_alloc_area(void)
1103 {
1104 	struct xol_area *area;
1105 
1106 	area = kzalloc(sizeof(*area), GFP_KERNEL);
1107 	if (unlikely(!area))
1108 		return NULL;
1109 
1110 	area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
1111 
1112 	if (!area->bitmap)
1113 		goto fail;
1114 
1115 	init_waitqueue_head(&area->wq);
1116 	if (!xol_add_vma(area))
1117 		return area;
1118 
1119 fail:
1120 	kfree(area->bitmap);
1121 	kfree(area);
1122 
1123 	return get_xol_area(current->mm);
1124 }
1125 
1126 /*
1127  * uprobe_clear_state - Free the area allocated for slots.
1128  */
1129 void uprobe_clear_state(struct mm_struct *mm)
1130 {
1131 	struct xol_area *area = mm->uprobes_state.xol_area;
1132 
1133 	if (!area)
1134 		return;
1135 
1136 	put_page(area->page);
1137 	kfree(area->bitmap);
1138 	kfree(area);
1139 }
1140 
1141 void uprobe_start_dup_mmap(void)
1142 {
1143 	percpu_down_read(&dup_mmap_sem);
1144 }
1145 
1146 void uprobe_end_dup_mmap(void)
1147 {
1148 	percpu_up_read(&dup_mmap_sem);
1149 }
1150 
1151 void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm)
1152 {
1153 	newmm->uprobes_state.xol_area = NULL;
1154 
1155 	if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) {
1156 		set_bit(MMF_HAS_UPROBES, &newmm->flags);
1157 		/* unconditionally, dup_mmap() skips VM_DONTCOPY vmas */
1158 		set_bit(MMF_RECALC_UPROBES, &newmm->flags);
1159 	}
1160 }
1161 
1162 /*
1163  *  - search for a free slot.
1164  */
1165 static unsigned long xol_take_insn_slot(struct xol_area *area)
1166 {
1167 	unsigned long slot_addr;
1168 	int slot_nr;
1169 
1170 	do {
1171 		slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
1172 		if (slot_nr < UINSNS_PER_PAGE) {
1173 			if (!test_and_set_bit(slot_nr, area->bitmap))
1174 				break;
1175 
1176 			slot_nr = UINSNS_PER_PAGE;
1177 			continue;
1178 		}
1179 		wait_event(area->wq, (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
1180 	} while (slot_nr >= UINSNS_PER_PAGE);
1181 
1182 	slot_addr = area->vaddr + (slot_nr * UPROBE_XOL_SLOT_BYTES);
1183 	atomic_inc(&area->slot_count);
1184 
1185 	return slot_addr;
1186 }
1187 
1188 /*
1189  * xol_get_insn_slot - If was not allocated a slot, then
1190  * allocate a slot.
1191  * Returns the allocated slot address or 0.
1192  */
1193 static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr)
1194 {
1195 	struct xol_area *area;
1196 	unsigned long offset;
1197 	void *vaddr;
1198 
1199 	area = get_xol_area(current->mm);
1200 	if (!area) {
1201 		area = xol_alloc_area();
1202 		if (!area)
1203 			return 0;
1204 	}
1205 	current->utask->xol_vaddr = xol_take_insn_slot(area);
1206 
1207 	/*
1208 	 * Initialize the slot if xol_vaddr points to valid
1209 	 * instruction slot.
1210 	 */
1211 	if (unlikely(!current->utask->xol_vaddr))
1212 		return 0;
1213 
1214 	current->utask->vaddr = slot_addr;
1215 	offset = current->utask->xol_vaddr & ~PAGE_MASK;
1216 	vaddr = kmap_atomic(area->page);
1217 	memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
1218 	kunmap_atomic(vaddr);
1219 	/*
1220 	 * We probably need flush_icache_user_range() but it needs vma.
1221 	 * This should work on supported architectures too.
1222 	 */
1223 	flush_dcache_page(area->page);
1224 
1225 	return current->utask->xol_vaddr;
1226 }
1227 
1228 /*
1229  * xol_free_insn_slot - If slot was earlier allocated by
1230  * @xol_get_insn_slot(), make the slot available for
1231  * subsequent requests.
1232  */
1233 static void xol_free_insn_slot(struct task_struct *tsk)
1234 {
1235 	struct xol_area *area;
1236 	unsigned long vma_end;
1237 	unsigned long slot_addr;
1238 
1239 	if (!tsk->mm || !tsk->mm->uprobes_state.xol_area || !tsk->utask)
1240 		return;
1241 
1242 	slot_addr = tsk->utask->xol_vaddr;
1243 
1244 	if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
1245 		return;
1246 
1247 	area = tsk->mm->uprobes_state.xol_area;
1248 	vma_end = area->vaddr + PAGE_SIZE;
1249 	if (area->vaddr <= slot_addr && slot_addr < vma_end) {
1250 		unsigned long offset;
1251 		int slot_nr;
1252 
1253 		offset = slot_addr - area->vaddr;
1254 		slot_nr = offset / UPROBE_XOL_SLOT_BYTES;
1255 		if (slot_nr >= UINSNS_PER_PAGE)
1256 			return;
1257 
1258 		clear_bit(slot_nr, area->bitmap);
1259 		atomic_dec(&area->slot_count);
1260 		if (waitqueue_active(&area->wq))
1261 			wake_up(&area->wq);
1262 
1263 		tsk->utask->xol_vaddr = 0;
1264 	}
1265 }
1266 
1267 /**
1268  * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
1269  * @regs: Reflects the saved state of the task after it has hit a breakpoint
1270  * instruction.
1271  * Return the address of the breakpoint instruction.
1272  */
1273 unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
1274 {
1275 	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
1276 }
1277 
1278 /*
1279  * Called with no locks held.
1280  * Called in context of a exiting or a exec-ing thread.
1281  */
1282 void uprobe_free_utask(struct task_struct *t)
1283 {
1284 	struct uprobe_task *utask = t->utask;
1285 
1286 	if (!utask)
1287 		return;
1288 
1289 	if (utask->active_uprobe)
1290 		put_uprobe(utask->active_uprobe);
1291 
1292 	xol_free_insn_slot(t);
1293 	kfree(utask);
1294 	t->utask = NULL;
1295 }
1296 
1297 /*
1298  * Called in context of a new clone/fork from copy_process.
1299  */
1300 void uprobe_copy_process(struct task_struct *t)
1301 {
1302 	t->utask = NULL;
1303 }
1304 
1305 /*
1306  * Allocate a uprobe_task object for the task.
1307  * Called when the thread hits a breakpoint for the first time.
1308  *
1309  * Returns:
1310  * - pointer to new uprobe_task on success
1311  * - NULL otherwise
1312  */
1313 static struct uprobe_task *add_utask(void)
1314 {
1315 	struct uprobe_task *utask;
1316 
1317 	utask = kzalloc(sizeof *utask, GFP_KERNEL);
1318 	if (unlikely(!utask))
1319 		return NULL;
1320 
1321 	current->utask = utask;
1322 	return utask;
1323 }
1324 
1325 /* Prepare to single-step probed instruction out of line. */
1326 static int
1327 pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr)
1328 {
1329 	if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs))
1330 		return 0;
1331 
1332 	return -EFAULT;
1333 }
1334 
1335 /*
1336  * If we are singlestepping, then ensure this thread is not connected to
1337  * non-fatal signals until completion of singlestep.  When xol insn itself
1338  * triggers the signal,  restart the original insn even if the task is
1339  * already SIGKILL'ed (since coredump should report the correct ip).  This
1340  * is even more important if the task has a handler for SIGSEGV/etc, The
1341  * _same_ instruction should be repeated again after return from the signal
1342  * handler, and SSTEP can never finish in this case.
1343  */
1344 bool uprobe_deny_signal(void)
1345 {
1346 	struct task_struct *t = current;
1347 	struct uprobe_task *utask = t->utask;
1348 
1349 	if (likely(!utask || !utask->active_uprobe))
1350 		return false;
1351 
1352 	WARN_ON_ONCE(utask->state != UTASK_SSTEP);
1353 
1354 	if (signal_pending(t)) {
1355 		spin_lock_irq(&t->sighand->siglock);
1356 		clear_tsk_thread_flag(t, TIF_SIGPENDING);
1357 		spin_unlock_irq(&t->sighand->siglock);
1358 
1359 		if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) {
1360 			utask->state = UTASK_SSTEP_TRAPPED;
1361 			set_tsk_thread_flag(t, TIF_UPROBE);
1362 			set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
1363 		}
1364 	}
1365 
1366 	return true;
1367 }
1368 
1369 /*
1370  * Avoid singlestepping the original instruction if the original instruction
1371  * is a NOP or can be emulated.
1372  */
1373 static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs)
1374 {
1375 	if (test_bit(UPROBE_SKIP_SSTEP, &uprobe->flags)) {
1376 		if (arch_uprobe_skip_sstep(&uprobe->arch, regs))
1377 			return true;
1378 		clear_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
1379 	}
1380 	return false;
1381 }
1382 
1383 static void mmf_recalc_uprobes(struct mm_struct *mm)
1384 {
1385 	struct vm_area_struct *vma;
1386 
1387 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
1388 		if (!valid_vma(vma, false))
1389 			continue;
1390 		/*
1391 		 * This is not strictly accurate, we can race with
1392 		 * uprobe_unregister() and see the already removed
1393 		 * uprobe if delete_uprobe() was not yet called.
1394 		 */
1395 		if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
1396 			return;
1397 	}
1398 
1399 	clear_bit(MMF_HAS_UPROBES, &mm->flags);
1400 }
1401 
1402 static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
1403 {
1404 	struct page *page;
1405 	uprobe_opcode_t opcode;
1406 	int result;
1407 
1408 	pagefault_disable();
1409 	result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr,
1410 							sizeof(opcode));
1411 	pagefault_enable();
1412 
1413 	if (likely(result == 0))
1414 		goto out;
1415 
1416 	result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
1417 	if (result < 0)
1418 		return result;
1419 
1420 	copy_opcode(page, vaddr, &opcode);
1421 	put_page(page);
1422  out:
1423 	return is_swbp_insn(&opcode);
1424 }
1425 
1426 static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
1427 {
1428 	struct mm_struct *mm = current->mm;
1429 	struct uprobe *uprobe = NULL;
1430 	struct vm_area_struct *vma;
1431 
1432 	down_read(&mm->mmap_sem);
1433 	vma = find_vma(mm, bp_vaddr);
1434 	if (vma && vma->vm_start <= bp_vaddr) {
1435 		if (valid_vma(vma, false)) {
1436 			struct inode *inode = vma->vm_file->f_mapping->host;
1437 			loff_t offset = vaddr_to_offset(vma, bp_vaddr);
1438 
1439 			uprobe = find_uprobe(inode, offset);
1440 		}
1441 
1442 		if (!uprobe)
1443 			*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
1444 	} else {
1445 		*is_swbp = -EFAULT;
1446 	}
1447 
1448 	if (!uprobe && test_and_clear_bit(MMF_RECALC_UPROBES, &mm->flags))
1449 		mmf_recalc_uprobes(mm);
1450 	up_read(&mm->mmap_sem);
1451 
1452 	return uprobe;
1453 }
1454 
1455 /*
1456  * Run handler and ask thread to singlestep.
1457  * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
1458  */
1459 static void handle_swbp(struct pt_regs *regs)
1460 {
1461 	struct uprobe_task *utask;
1462 	struct uprobe *uprobe;
1463 	unsigned long bp_vaddr;
1464 	int uninitialized_var(is_swbp);
1465 
1466 	bp_vaddr = uprobe_get_swbp_addr(regs);
1467 	uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
1468 
1469 	if (!uprobe) {
1470 		if (is_swbp > 0) {
1471 			/* No matching uprobe; signal SIGTRAP. */
1472 			send_sig(SIGTRAP, current, 0);
1473 		} else {
1474 			/*
1475 			 * Either we raced with uprobe_unregister() or we can't
1476 			 * access this memory. The latter is only possible if
1477 			 * another thread plays with our ->mm. In both cases
1478 			 * we can simply restart. If this vma was unmapped we
1479 			 * can pretend this insn was not executed yet and get
1480 			 * the (correct) SIGSEGV after restart.
1481 			 */
1482 			instruction_pointer_set(regs, bp_vaddr);
1483 		}
1484 		return;
1485 	}
1486 	/*
1487 	 * TODO: move copy_insn/etc into _register and remove this hack.
1488 	 * After we hit the bp, _unregister + _register can install the
1489 	 * new and not-yet-analyzed uprobe at the same address, restart.
1490 	 */
1491 	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
1492 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
1493 		goto restart;
1494 
1495 	utask = current->utask;
1496 	if (!utask) {
1497 		utask = add_utask();
1498 		/* Cannot allocate; re-execute the instruction. */
1499 		if (!utask)
1500 			goto restart;
1501 	}
1502 
1503 	handler_chain(uprobe, regs);
1504 	if (can_skip_sstep(uprobe, regs))
1505 		goto out;
1506 
1507 	if (!pre_ssout(uprobe, regs, bp_vaddr)) {
1508 		utask->active_uprobe = uprobe;
1509 		utask->state = UTASK_SSTEP;
1510 		return;
1511 	}
1512 
1513 restart:
1514 	/*
1515 	 * cannot singlestep; cannot skip instruction;
1516 	 * re-execute the instruction.
1517 	 */
1518 	instruction_pointer_set(regs, bp_vaddr);
1519 out:
1520 	put_uprobe(uprobe);
1521 }
1522 
1523 /*
1524  * Perform required fix-ups and disable singlestep.
1525  * Allow pending signals to take effect.
1526  */
1527 static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs)
1528 {
1529 	struct uprobe *uprobe;
1530 
1531 	uprobe = utask->active_uprobe;
1532 	if (utask->state == UTASK_SSTEP_ACK)
1533 		arch_uprobe_post_xol(&uprobe->arch, regs);
1534 	else if (utask->state == UTASK_SSTEP_TRAPPED)
1535 		arch_uprobe_abort_xol(&uprobe->arch, regs);
1536 	else
1537 		WARN_ON_ONCE(1);
1538 
1539 	put_uprobe(uprobe);
1540 	utask->active_uprobe = NULL;
1541 	utask->state = UTASK_RUNNING;
1542 	xol_free_insn_slot(current);
1543 
1544 	spin_lock_irq(&current->sighand->siglock);
1545 	recalc_sigpending(); /* see uprobe_deny_signal() */
1546 	spin_unlock_irq(&current->sighand->siglock);
1547 }
1548 
1549 /*
1550  * On breakpoint hit, breakpoint notifier sets the TIF_UPROBE flag and
1551  * allows the thread to return from interrupt. After that handle_swbp()
1552  * sets utask->active_uprobe.
1553  *
1554  * On singlestep exception, singlestep notifier sets the TIF_UPROBE flag
1555  * and allows the thread to return from interrupt.
1556  *
1557  * While returning to userspace, thread notices the TIF_UPROBE flag and calls
1558  * uprobe_notify_resume().
1559  */
1560 void uprobe_notify_resume(struct pt_regs *regs)
1561 {
1562 	struct uprobe_task *utask;
1563 
1564 	clear_thread_flag(TIF_UPROBE);
1565 
1566 	utask = current->utask;
1567 	if (utask && utask->active_uprobe)
1568 		handle_singlestep(utask, regs);
1569 	else
1570 		handle_swbp(regs);
1571 }
1572 
1573 /*
1574  * uprobe_pre_sstep_notifier gets called from interrupt context as part of
1575  * notifier mechanism. Set TIF_UPROBE flag and indicate breakpoint hit.
1576  */
1577 int uprobe_pre_sstep_notifier(struct pt_regs *regs)
1578 {
1579 	if (!current->mm || !test_bit(MMF_HAS_UPROBES, &current->mm->flags))
1580 		return 0;
1581 
1582 	set_thread_flag(TIF_UPROBE);
1583 	return 1;
1584 }
1585 
1586 /*
1587  * uprobe_post_sstep_notifier gets called in interrupt context as part of notifier
1588  * mechanism. Set TIF_UPROBE flag and indicate completion of singlestep.
1589  */
1590 int uprobe_post_sstep_notifier(struct pt_regs *regs)
1591 {
1592 	struct uprobe_task *utask = current->utask;
1593 
1594 	if (!current->mm || !utask || !utask->active_uprobe)
1595 		/* task is currently not uprobed */
1596 		return 0;
1597 
1598 	utask->state = UTASK_SSTEP_ACK;
1599 	set_thread_flag(TIF_UPROBE);
1600 	return 1;
1601 }
1602 
1603 static struct notifier_block uprobe_exception_nb = {
1604 	.notifier_call		= arch_uprobe_exception_notify,
1605 	.priority		= INT_MAX-1,	/* notified after kprobes, kgdb */
1606 };
1607 
1608 static int __init init_uprobes(void)
1609 {
1610 	int i;
1611 
1612 	for (i = 0; i < UPROBES_HASH_SZ; i++) {
1613 		mutex_init(&uprobes_mutex[i]);
1614 		mutex_init(&uprobes_mmap_mutex[i]);
1615 	}
1616 
1617 	if (percpu_init_rwsem(&dup_mmap_sem))
1618 		return -ENOMEM;
1619 
1620 	return register_die_notifier(&uprobe_exception_nb);
1621 }
1622 module_init(init_uprobes);
1623 
1624 static void __exit exit_uprobes(void)
1625 {
1626 }
1627 module_exit(exit_uprobes);
1628