xref: /openbmc/linux/arch/x86/kernel/ftrace.c (revision 96ac6d43)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Dynamic function tracing support.
4  *
5  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
6  *
7  * Thanks goes to Ingo Molnar, for suggesting the idea.
8  * Mathieu Desnoyers, for suggesting postponing the modifications.
9  * Arjan van de Ven, for keeping me straight, and explaining to me
10  * the dangers of modifying code on the run.
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/spinlock.h>
16 #include <linux/hardirq.h>
17 #include <linux/uaccess.h>
18 #include <linux/ftrace.h>
19 #include <linux/percpu.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/list.h>
24 #include <linux/module.h>
25 
26 #include <trace/syscall.h>
27 
28 #include <asm/set_memory.h>
29 #include <asm/kprobes.h>
30 #include <asm/ftrace.h>
31 #include <asm/nops.h>
32 #include <asm/text-patching.h>
33 
34 #ifdef CONFIG_DYNAMIC_FTRACE
35 
36 int ftrace_arch_code_modify_prepare(void)
37 {
38 	set_kernel_text_rw();
39 	set_all_modules_text_rw();
40 	return 0;
41 }
42 
43 int ftrace_arch_code_modify_post_process(void)
44 {
45 	set_all_modules_text_ro();
46 	set_kernel_text_ro();
47 	return 0;
48 }
49 
50 union ftrace_code_union {
51 	char code[MCOUNT_INSN_SIZE];
52 	struct {
53 		unsigned char op;
54 		int offset;
55 	} __attribute__((packed));
56 };
57 
58 static int ftrace_calc_offset(long ip, long addr)
59 {
60 	return (int)(addr - ip);
61 }
62 
63 static unsigned char *
64 ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
65 {
66 	static union ftrace_code_union calc;
67 
68 	calc.op		= op;
69 	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
70 
71 	return calc.code;
72 }
73 
74 static unsigned char *
75 ftrace_call_replace(unsigned long ip, unsigned long addr)
76 {
77 	return ftrace_text_replace(0xe8, ip, addr);
78 }
79 
80 static inline int
81 within(unsigned long addr, unsigned long start, unsigned long end)
82 {
83 	return addr >= start && addr < end;
84 }
85 
86 static unsigned long text_ip_addr(unsigned long ip)
87 {
88 	/*
89 	 * On x86_64, kernel text mappings are mapped read-only, so we use
90 	 * the kernel identity mapping instead of the kernel text mapping
91 	 * to modify the kernel text.
92 	 *
93 	 * For 32bit kernels, these mappings are same and we can use
94 	 * kernel identity mapping to modify code.
95 	 */
96 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
97 		ip = (unsigned long)__va(__pa_symbol(ip));
98 
99 	return ip;
100 }
101 
102 static const unsigned char *ftrace_nop_replace(void)
103 {
104 	return ideal_nops[NOP_ATOMIC5];
105 }
106 
107 static int
108 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
109 		   unsigned const char *new_code)
110 {
111 	unsigned char replaced[MCOUNT_INSN_SIZE];
112 
113 	ftrace_expected = old_code;
114 
115 	/*
116 	 * Note:
117 	 * We are paranoid about modifying text, as if a bug was to happen, it
118 	 * could cause us to read or write to someplace that could cause harm.
119 	 * Carefully read and modify the code with probe_kernel_*(), and make
120 	 * sure what we read is what we expected it to be before modifying it.
121 	 */
122 
123 	/* read the text we want to modify */
124 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
125 		return -EFAULT;
126 
127 	/* Make sure it is what we expect it to be */
128 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
129 		return -EINVAL;
130 
131 	ip = text_ip_addr(ip);
132 
133 	/* replace the text with the new text */
134 	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
135 		return -EPERM;
136 
137 	sync_core();
138 
139 	return 0;
140 }
141 
142 int ftrace_make_nop(struct module *mod,
143 		    struct dyn_ftrace *rec, unsigned long addr)
144 {
145 	unsigned const char *new, *old;
146 	unsigned long ip = rec->ip;
147 
148 	old = ftrace_call_replace(ip, addr);
149 	new = ftrace_nop_replace();
150 
151 	/*
152 	 * On boot up, and when modules are loaded, the MCOUNT_ADDR
153 	 * is converted to a nop, and will never become MCOUNT_ADDR
154 	 * again. This code is either running before SMP (on boot up)
155 	 * or before the code will ever be executed (module load).
156 	 * We do not want to use the breakpoint version in this case,
157 	 * just modify the code directly.
158 	 */
159 	if (addr == MCOUNT_ADDR)
160 		return ftrace_modify_code_direct(rec->ip, old, new);
161 
162 	ftrace_expected = NULL;
163 
164 	/* Normal cases use add_brk_on_nop */
165 	WARN_ONCE(1, "invalid use of ftrace_make_nop");
166 	return -EINVAL;
167 }
168 
169 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
170 {
171 	unsigned const char *new, *old;
172 	unsigned long ip = rec->ip;
173 
174 	old = ftrace_nop_replace();
175 	new = ftrace_call_replace(ip, addr);
176 
177 	/* Should only be called when module is loaded */
178 	return ftrace_modify_code_direct(rec->ip, old, new);
179 }
180 
181 /*
182  * The modifying_ftrace_code is used to tell the breakpoint
183  * handler to call ftrace_int3_handler(). If it fails to
184  * call this handler for a breakpoint added by ftrace, then
185  * the kernel may crash.
186  *
187  * As atomic_writes on x86 do not need a barrier, we do not
188  * need to add smp_mb()s for this to work. It is also considered
189  * that we can not read the modifying_ftrace_code before
190  * executing the breakpoint. That would be quite remarkable if
191  * it could do that. Here's the flow that is required:
192  *
193  *   CPU-0                          CPU-1
194  *
195  * atomic_inc(mfc);
196  * write int3s
197  *				<trap-int3> // implicit (r)mb
198  *				if (atomic_read(mfc))
199  *					call ftrace_int3_handler()
200  *
201  * Then when we are finished:
202  *
203  * atomic_dec(mfc);
204  *
205  * If we hit a breakpoint that was not set by ftrace, it does not
206  * matter if ftrace_int3_handler() is called or not. It will
207  * simply be ignored. But it is crucial that a ftrace nop/caller
208  * breakpoint is handled. No other user should ever place a
209  * breakpoint on an ftrace nop/caller location. It must only
210  * be done by this code.
211  */
212 atomic_t modifying_ftrace_code __read_mostly;
213 
214 static int
215 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
216 		   unsigned const char *new_code);
217 
218 /*
219  * Should never be called:
220  *  As it is only called by __ftrace_replace_code() which is called by
221  *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
222  *  which is called to turn mcount into nops or nops into function calls
223  *  but not to convert a function from not using regs to one that uses
224  *  regs, which ftrace_modify_call() is for.
225  */
226 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
227 				 unsigned long addr)
228 {
229 	WARN_ON(1);
230 	ftrace_expected = NULL;
231 	return -EINVAL;
232 }
233 
234 static unsigned long ftrace_update_func;
235 static unsigned long ftrace_update_func_call;
236 
237 static int update_ftrace_func(unsigned long ip, void *new)
238 {
239 	unsigned char old[MCOUNT_INSN_SIZE];
240 	int ret;
241 
242 	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
243 
244 	ftrace_update_func = ip;
245 	/* Make sure the breakpoints see the ftrace_update_func update */
246 	smp_wmb();
247 
248 	/* See comment above by declaration of modifying_ftrace_code */
249 	atomic_inc(&modifying_ftrace_code);
250 
251 	ret = ftrace_modify_code(ip, old, new);
252 
253 	atomic_dec(&modifying_ftrace_code);
254 
255 	return ret;
256 }
257 
258 int ftrace_update_ftrace_func(ftrace_func_t func)
259 {
260 	unsigned long ip = (unsigned long)(&ftrace_call);
261 	unsigned char *new;
262 	int ret;
263 
264 	ftrace_update_func_call = (unsigned long)func;
265 
266 	new = ftrace_call_replace(ip, (unsigned long)func);
267 	ret = update_ftrace_func(ip, new);
268 
269 	/* Also update the regs callback function */
270 	if (!ret) {
271 		ip = (unsigned long)(&ftrace_regs_call);
272 		new = ftrace_call_replace(ip, (unsigned long)func);
273 		ret = update_ftrace_func(ip, new);
274 	}
275 
276 	return ret;
277 }
278 
279 static nokprobe_inline int is_ftrace_caller(unsigned long ip)
280 {
281 	if (ip == ftrace_update_func)
282 		return 1;
283 
284 	return 0;
285 }
286 
287 /*
288  * A breakpoint was added to the code address we are about to
289  * modify, and this is the handle that will just skip over it.
290  * We are either changing a nop into a trace call, or a trace
291  * call to a nop. While the change is taking place, we treat
292  * it just like it was a nop.
293  */
294 int ftrace_int3_handler(struct pt_regs *regs)
295 {
296 	unsigned long ip;
297 
298 	if (WARN_ON_ONCE(!regs))
299 		return 0;
300 
301 	ip = regs->ip - INT3_INSN_SIZE;
302 
303 #ifdef CONFIG_X86_64
304 	if (ftrace_location(ip)) {
305 		int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
306 		return 1;
307 	} else if (is_ftrace_caller(ip)) {
308 		if (!ftrace_update_func_call) {
309 			int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
310 			return 1;
311 		}
312 		int3_emulate_call(regs, ftrace_update_func_call);
313 		return 1;
314 	}
315 #else
316 	if (ftrace_location(ip) || is_ftrace_caller(ip)) {
317 		int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
318 		return 1;
319 	}
320 #endif
321 
322 	return 0;
323 }
324 NOKPROBE_SYMBOL(ftrace_int3_handler);
325 
326 static int ftrace_write(unsigned long ip, const char *val, int size)
327 {
328 	ip = text_ip_addr(ip);
329 
330 	if (probe_kernel_write((void *)ip, val, size))
331 		return -EPERM;
332 
333 	return 0;
334 }
335 
336 static int add_break(unsigned long ip, const char *old)
337 {
338 	unsigned char replaced[MCOUNT_INSN_SIZE];
339 	unsigned char brk = BREAKPOINT_INSTRUCTION;
340 
341 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
342 		return -EFAULT;
343 
344 	ftrace_expected = old;
345 
346 	/* Make sure it is what we expect it to be */
347 	if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
348 		return -EINVAL;
349 
350 	return ftrace_write(ip, &brk, 1);
351 }
352 
353 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
354 {
355 	unsigned const char *old;
356 	unsigned long ip = rec->ip;
357 
358 	old = ftrace_call_replace(ip, addr);
359 
360 	return add_break(rec->ip, old);
361 }
362 
363 
364 static int add_brk_on_nop(struct dyn_ftrace *rec)
365 {
366 	unsigned const char *old;
367 
368 	old = ftrace_nop_replace();
369 
370 	return add_break(rec->ip, old);
371 }
372 
373 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
374 {
375 	unsigned long ftrace_addr;
376 	int ret;
377 
378 	ftrace_addr = ftrace_get_addr_curr(rec);
379 
380 	ret = ftrace_test_record(rec, enable);
381 
382 	switch (ret) {
383 	case FTRACE_UPDATE_IGNORE:
384 		return 0;
385 
386 	case FTRACE_UPDATE_MAKE_CALL:
387 		/* converting nop to call */
388 		return add_brk_on_nop(rec);
389 
390 	case FTRACE_UPDATE_MODIFY_CALL:
391 	case FTRACE_UPDATE_MAKE_NOP:
392 		/* converting a call to a nop */
393 		return add_brk_on_call(rec, ftrace_addr);
394 	}
395 	return 0;
396 }
397 
398 /*
399  * On error, we need to remove breakpoints. This needs to
400  * be done caefully. If the address does not currently have a
401  * breakpoint, we know we are done. Otherwise, we look at the
402  * remaining 4 bytes of the instruction. If it matches a nop
403  * we replace the breakpoint with the nop. Otherwise we replace
404  * it with the call instruction.
405  */
406 static int remove_breakpoint(struct dyn_ftrace *rec)
407 {
408 	unsigned char ins[MCOUNT_INSN_SIZE];
409 	unsigned char brk = BREAKPOINT_INSTRUCTION;
410 	const unsigned char *nop;
411 	unsigned long ftrace_addr;
412 	unsigned long ip = rec->ip;
413 
414 	/* If we fail the read, just give up */
415 	if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
416 		return -EFAULT;
417 
418 	/* If this does not have a breakpoint, we are done */
419 	if (ins[0] != brk)
420 		return 0;
421 
422 	nop = ftrace_nop_replace();
423 
424 	/*
425 	 * If the last 4 bytes of the instruction do not match
426 	 * a nop, then we assume that this is a call to ftrace_addr.
427 	 */
428 	if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
429 		/*
430 		 * For extra paranoidism, we check if the breakpoint is on
431 		 * a call that would actually jump to the ftrace_addr.
432 		 * If not, don't touch the breakpoint, we make just create
433 		 * a disaster.
434 		 */
435 		ftrace_addr = ftrace_get_addr_new(rec);
436 		nop = ftrace_call_replace(ip, ftrace_addr);
437 
438 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
439 			goto update;
440 
441 		/* Check both ftrace_addr and ftrace_old_addr */
442 		ftrace_addr = ftrace_get_addr_curr(rec);
443 		nop = ftrace_call_replace(ip, ftrace_addr);
444 
445 		ftrace_expected = nop;
446 
447 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
448 			return -EINVAL;
449 	}
450 
451  update:
452 	return ftrace_write(ip, nop, 1);
453 }
454 
455 static int add_update_code(unsigned long ip, unsigned const char *new)
456 {
457 	/* skip breakpoint */
458 	ip++;
459 	new++;
460 	return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
461 }
462 
463 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
464 {
465 	unsigned long ip = rec->ip;
466 	unsigned const char *new;
467 
468 	new = ftrace_call_replace(ip, addr);
469 	return add_update_code(ip, new);
470 }
471 
472 static int add_update_nop(struct dyn_ftrace *rec)
473 {
474 	unsigned long ip = rec->ip;
475 	unsigned const char *new;
476 
477 	new = ftrace_nop_replace();
478 	return add_update_code(ip, new);
479 }
480 
481 static int add_update(struct dyn_ftrace *rec, int enable)
482 {
483 	unsigned long ftrace_addr;
484 	int ret;
485 
486 	ret = ftrace_test_record(rec, enable);
487 
488 	ftrace_addr  = ftrace_get_addr_new(rec);
489 
490 	switch (ret) {
491 	case FTRACE_UPDATE_IGNORE:
492 		return 0;
493 
494 	case FTRACE_UPDATE_MODIFY_CALL:
495 	case FTRACE_UPDATE_MAKE_CALL:
496 		/* converting nop to call */
497 		return add_update_call(rec, ftrace_addr);
498 
499 	case FTRACE_UPDATE_MAKE_NOP:
500 		/* converting a call to a nop */
501 		return add_update_nop(rec);
502 	}
503 
504 	return 0;
505 }
506 
507 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
508 {
509 	unsigned long ip = rec->ip;
510 	unsigned const char *new;
511 
512 	new = ftrace_call_replace(ip, addr);
513 
514 	return ftrace_write(ip, new, 1);
515 }
516 
517 static int finish_update_nop(struct dyn_ftrace *rec)
518 {
519 	unsigned long ip = rec->ip;
520 	unsigned const char *new;
521 
522 	new = ftrace_nop_replace();
523 
524 	return ftrace_write(ip, new, 1);
525 }
526 
527 static int finish_update(struct dyn_ftrace *rec, int enable)
528 {
529 	unsigned long ftrace_addr;
530 	int ret;
531 
532 	ret = ftrace_update_record(rec, enable);
533 
534 	ftrace_addr = ftrace_get_addr_new(rec);
535 
536 	switch (ret) {
537 	case FTRACE_UPDATE_IGNORE:
538 		return 0;
539 
540 	case FTRACE_UPDATE_MODIFY_CALL:
541 	case FTRACE_UPDATE_MAKE_CALL:
542 		/* converting nop to call */
543 		return finish_update_call(rec, ftrace_addr);
544 
545 	case FTRACE_UPDATE_MAKE_NOP:
546 		/* converting a call to a nop */
547 		return finish_update_nop(rec);
548 	}
549 
550 	return 0;
551 }
552 
553 static void do_sync_core(void *data)
554 {
555 	sync_core();
556 }
557 
558 static void run_sync(void)
559 {
560 	int enable_irqs;
561 
562 	/* No need to sync if there's only one CPU */
563 	if (num_online_cpus() == 1)
564 		return;
565 
566 	enable_irqs = irqs_disabled();
567 
568 	/* We may be called with interrupts disabled (on bootup). */
569 	if (enable_irqs)
570 		local_irq_enable();
571 	on_each_cpu(do_sync_core, NULL, 1);
572 	if (enable_irqs)
573 		local_irq_disable();
574 }
575 
576 void ftrace_replace_code(int enable)
577 {
578 	struct ftrace_rec_iter *iter;
579 	struct dyn_ftrace *rec;
580 	const char *report = "adding breakpoints";
581 	int count = 0;
582 	int ret;
583 
584 	for_ftrace_rec_iter(iter) {
585 		rec = ftrace_rec_iter_record(iter);
586 
587 		ret = add_breakpoints(rec, enable);
588 		if (ret)
589 			goto remove_breakpoints;
590 		count++;
591 	}
592 
593 	run_sync();
594 
595 	report = "updating code";
596 	count = 0;
597 
598 	for_ftrace_rec_iter(iter) {
599 		rec = ftrace_rec_iter_record(iter);
600 
601 		ret = add_update(rec, enable);
602 		if (ret)
603 			goto remove_breakpoints;
604 		count++;
605 	}
606 
607 	run_sync();
608 
609 	report = "removing breakpoints";
610 	count = 0;
611 
612 	for_ftrace_rec_iter(iter) {
613 		rec = ftrace_rec_iter_record(iter);
614 
615 		ret = finish_update(rec, enable);
616 		if (ret)
617 			goto remove_breakpoints;
618 		count++;
619 	}
620 
621 	run_sync();
622 
623 	return;
624 
625  remove_breakpoints:
626 	pr_warn("Failed on %s (%d):\n", report, count);
627 	ftrace_bug(ret, rec);
628 	for_ftrace_rec_iter(iter) {
629 		rec = ftrace_rec_iter_record(iter);
630 		/*
631 		 * Breakpoints are handled only when this function is in
632 		 * progress. The system could not work with them.
633 		 */
634 		if (remove_breakpoint(rec))
635 			BUG();
636 	}
637 	run_sync();
638 }
639 
640 static int
641 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
642 		   unsigned const char *new_code)
643 {
644 	int ret;
645 
646 	ret = add_break(ip, old_code);
647 	if (ret)
648 		goto out;
649 
650 	run_sync();
651 
652 	ret = add_update_code(ip, new_code);
653 	if (ret)
654 		goto fail_update;
655 
656 	run_sync();
657 
658 	ret = ftrace_write(ip, new_code, 1);
659 	/*
660 	 * The breakpoint is handled only when this function is in progress.
661 	 * The system could not work if we could not remove it.
662 	 */
663 	BUG_ON(ret);
664  out:
665 	run_sync();
666 	return ret;
667 
668  fail_update:
669 	/* Also here the system could not work with the breakpoint */
670 	if (ftrace_write(ip, old_code, 1))
671 		BUG();
672 	goto out;
673 }
674 
675 void arch_ftrace_update_code(int command)
676 {
677 	/* See comment above by declaration of modifying_ftrace_code */
678 	atomic_inc(&modifying_ftrace_code);
679 
680 	ftrace_modify_all_code(command);
681 
682 	atomic_dec(&modifying_ftrace_code);
683 }
684 
685 int __init ftrace_dyn_arch_init(void)
686 {
687 	return 0;
688 }
689 
690 /* Currently only x86_64 supports dynamic trampolines */
691 #ifdef CONFIG_X86_64
692 
693 #ifdef CONFIG_MODULES
694 #include <linux/moduleloader.h>
695 /* Module allocation simplifies allocating memory for code */
696 static inline void *alloc_tramp(unsigned long size)
697 {
698 	return module_alloc(size);
699 }
700 static inline void tramp_free(void *tramp)
701 {
702 	module_memfree(tramp);
703 }
704 #else
705 /* Trampolines can only be created if modules are supported */
706 static inline void *alloc_tramp(unsigned long size)
707 {
708 	return NULL;
709 }
710 static inline void tramp_free(void *tramp) { }
711 #endif
712 
713 /* Defined as markers to the end of the ftrace default trampolines */
714 extern void ftrace_regs_caller_end(void);
715 extern void ftrace_epilogue(void);
716 extern void ftrace_caller_op_ptr(void);
717 extern void ftrace_regs_caller_op_ptr(void);
718 
719 /* movq function_trace_op(%rip), %rdx */
720 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
721 #define OP_REF_SIZE	7
722 
723 /*
724  * The ftrace_ops is passed to the function callback. Since the
725  * trampoline only services a single ftrace_ops, we can pass in
726  * that ops directly.
727  *
728  * The ftrace_op_code_union is used to create a pointer to the
729  * ftrace_ops that will be passed to the callback function.
730  */
731 union ftrace_op_code_union {
732 	char code[OP_REF_SIZE];
733 	struct {
734 		char op[3];
735 		int offset;
736 	} __attribute__((packed));
737 };
738 
739 #define RET_SIZE		1
740 
741 static unsigned long
742 create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
743 {
744 	unsigned long start_offset;
745 	unsigned long end_offset;
746 	unsigned long op_offset;
747 	unsigned long offset;
748 	unsigned long npages;
749 	unsigned long size;
750 	unsigned long retq;
751 	unsigned long *ptr;
752 	void *trampoline;
753 	void *ip;
754 	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
755 	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
756 	union ftrace_op_code_union op_ptr;
757 	int ret;
758 
759 	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
760 		start_offset = (unsigned long)ftrace_regs_caller;
761 		end_offset = (unsigned long)ftrace_regs_caller_end;
762 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
763 	} else {
764 		start_offset = (unsigned long)ftrace_caller;
765 		end_offset = (unsigned long)ftrace_epilogue;
766 		op_offset = (unsigned long)ftrace_caller_op_ptr;
767 	}
768 
769 	size = end_offset - start_offset;
770 
771 	/*
772 	 * Allocate enough size to store the ftrace_caller code,
773 	 * the iret , as well as the address of the ftrace_ops this
774 	 * trampoline is used for.
775 	 */
776 	trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
777 	if (!trampoline)
778 		return 0;
779 
780 	*tramp_size = size + RET_SIZE + sizeof(void *);
781 	npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
782 
783 	/* Copy ftrace_caller onto the trampoline memory */
784 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
785 	if (WARN_ON(ret < 0))
786 		goto fail;
787 
788 	ip = trampoline + size;
789 
790 	/* The trampoline ends with ret(q) */
791 	retq = (unsigned long)ftrace_stub;
792 	ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
793 	if (WARN_ON(ret < 0))
794 		goto fail;
795 
796 	/*
797 	 * The address of the ftrace_ops that is used for this trampoline
798 	 * is stored at the end of the trampoline. This will be used to
799 	 * load the third parameter for the callback. Basically, that
800 	 * location at the end of the trampoline takes the place of
801 	 * the global function_trace_op variable.
802 	 */
803 
804 	ptr = (unsigned long *)(trampoline + size + RET_SIZE);
805 	*ptr = (unsigned long)ops;
806 
807 	op_offset -= start_offset;
808 	memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
809 
810 	/* Are we pointing to the reference? */
811 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0))
812 		goto fail;
813 
814 	/* Load the contents of ptr into the callback parameter */
815 	offset = (unsigned long)ptr;
816 	offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
817 
818 	op_ptr.offset = offset;
819 
820 	/* put in the new offset to the ftrace_ops */
821 	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
822 
823 	/* ALLOC_TRAMP flags lets us know we created it */
824 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
825 
826 	set_vm_flush_reset_perms(trampoline);
827 
828 	/*
829 	 * Module allocation needs to be completed by making the page
830 	 * executable. The page is still writable, which is a security hazard,
831 	 * but anyhow ftrace breaks W^X completely.
832 	 */
833 	set_memory_x((unsigned long)trampoline, npages);
834 	return (unsigned long)trampoline;
835 fail:
836 	tramp_free(trampoline);
837 	return 0;
838 }
839 
840 static unsigned long calc_trampoline_call_offset(bool save_regs)
841 {
842 	unsigned long start_offset;
843 	unsigned long call_offset;
844 
845 	if (save_regs) {
846 		start_offset = (unsigned long)ftrace_regs_caller;
847 		call_offset = (unsigned long)ftrace_regs_call;
848 	} else {
849 		start_offset = (unsigned long)ftrace_caller;
850 		call_offset = (unsigned long)ftrace_call;
851 	}
852 
853 	return call_offset - start_offset;
854 }
855 
856 void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
857 {
858 	ftrace_func_t func;
859 	unsigned char *new;
860 	unsigned long offset;
861 	unsigned long ip;
862 	unsigned int size;
863 	int ret, npages;
864 
865 	if (ops->trampoline) {
866 		/*
867 		 * The ftrace_ops caller may set up its own trampoline.
868 		 * In such a case, this code must not modify it.
869 		 */
870 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
871 			return;
872 		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
873 		set_memory_rw(ops->trampoline, npages);
874 	} else {
875 		ops->trampoline = create_trampoline(ops, &size);
876 		if (!ops->trampoline)
877 			return;
878 		ops->trampoline_size = size;
879 		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
880 	}
881 
882 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
883 	ip = ops->trampoline + offset;
884 
885 	func = ftrace_ops_get_func(ops);
886 
887 	ftrace_update_func_call = (unsigned long)func;
888 
889 	/* Do a safe modify in case the trampoline is executing */
890 	new = ftrace_call_replace(ip, (unsigned long)func);
891 	ret = update_ftrace_func(ip, new);
892 	set_memory_ro(ops->trampoline, npages);
893 
894 	/* The update should never fail */
895 	WARN_ON(ret);
896 }
897 
898 /* Return the address of the function the trampoline calls */
899 static void *addr_from_call(void *ptr)
900 {
901 	union ftrace_code_union calc;
902 	int ret;
903 
904 	ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
905 	if (WARN_ON_ONCE(ret < 0))
906 		return NULL;
907 
908 	/* Make sure this is a call */
909 	if (WARN_ON_ONCE(calc.op != 0xe8)) {
910 		pr_warn("Expected e8, got %x\n", calc.op);
911 		return NULL;
912 	}
913 
914 	return ptr + MCOUNT_INSN_SIZE + calc.offset;
915 }
916 
917 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
918 			   unsigned long frame_pointer);
919 
920 /*
921  * If the ops->trampoline was not allocated, then it probably
922  * has a static trampoline func, or is the ftrace caller itself.
923  */
924 static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
925 {
926 	unsigned long offset;
927 	bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
928 	void *ptr;
929 
930 	if (ops && ops->trampoline) {
931 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
932 		/*
933 		 * We only know about function graph tracer setting as static
934 		 * trampoline.
935 		 */
936 		if (ops->trampoline == FTRACE_GRAPH_ADDR)
937 			return (void *)prepare_ftrace_return;
938 #endif
939 		return NULL;
940 	}
941 
942 	offset = calc_trampoline_call_offset(save_regs);
943 
944 	if (save_regs)
945 		ptr = (void *)FTRACE_REGS_ADDR + offset;
946 	else
947 		ptr = (void *)FTRACE_ADDR + offset;
948 
949 	return addr_from_call(ptr);
950 }
951 
952 void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
953 {
954 	unsigned long offset;
955 
956 	/* If we didn't allocate this trampoline, consider it static */
957 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
958 		return static_tramp_func(ops, rec);
959 
960 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
961 	return addr_from_call((void *)ops->trampoline + offset);
962 }
963 
964 void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
965 {
966 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
967 		return;
968 
969 	tramp_free((void *)ops->trampoline);
970 	ops->trampoline = 0;
971 }
972 
973 #endif /* CONFIG_X86_64 */
974 #endif /* CONFIG_DYNAMIC_FTRACE */
975 
976 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
977 
978 #ifdef CONFIG_DYNAMIC_FTRACE
979 extern void ftrace_graph_call(void);
980 
981 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
982 {
983 	return ftrace_text_replace(0xe9, ip, addr);
984 }
985 
986 static int ftrace_mod_jmp(unsigned long ip, void *func)
987 {
988 	unsigned char *new;
989 
990 	ftrace_update_func_call = 0UL;
991 	new = ftrace_jmp_replace(ip, (unsigned long)func);
992 
993 	return update_ftrace_func(ip, new);
994 }
995 
996 int ftrace_enable_ftrace_graph_caller(void)
997 {
998 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
999 
1000 	return ftrace_mod_jmp(ip, &ftrace_graph_caller);
1001 }
1002 
1003 int ftrace_disable_ftrace_graph_caller(void)
1004 {
1005 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
1006 
1007 	return ftrace_mod_jmp(ip, &ftrace_stub);
1008 }
1009 
1010 #endif /* !CONFIG_DYNAMIC_FTRACE */
1011 
1012 /*
1013  * Hook the return address and push it in the stack of return addrs
1014  * in current thread info.
1015  */
1016 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1017 			   unsigned long frame_pointer)
1018 {
1019 	unsigned long old;
1020 	int faulted;
1021 	unsigned long return_hooker = (unsigned long)
1022 				&return_to_handler;
1023 
1024 	/*
1025 	 * When resuming from suspend-to-ram, this function can be indirectly
1026 	 * called from early CPU startup code while the CPU is in real mode,
1027 	 * which would fail miserably.  Make sure the stack pointer is a
1028 	 * virtual address.
1029 	 *
1030 	 * This check isn't as accurate as virt_addr_valid(), but it should be
1031 	 * good enough for this purpose, and it's fast.
1032 	 */
1033 	if (unlikely((long)__builtin_frame_address(0) >= 0))
1034 		return;
1035 
1036 	if (unlikely(ftrace_graph_is_dead()))
1037 		return;
1038 
1039 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
1040 		return;
1041 
1042 	/*
1043 	 * Protect against fault, even if it shouldn't
1044 	 * happen. This tool is too much intrusive to
1045 	 * ignore such a protection.
1046 	 */
1047 	asm volatile(
1048 		"1: " _ASM_MOV " (%[parent]), %[old]\n"
1049 		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
1050 		"   movl $0, %[faulted]\n"
1051 		"3:\n"
1052 
1053 		".section .fixup, \"ax\"\n"
1054 		"4: movl $1, %[faulted]\n"
1055 		"   jmp 3b\n"
1056 		".previous\n"
1057 
1058 		_ASM_EXTABLE(1b, 4b)
1059 		_ASM_EXTABLE(2b, 4b)
1060 
1061 		: [old] "=&r" (old), [faulted] "=r" (faulted)
1062 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
1063 		: "memory"
1064 	);
1065 
1066 	if (unlikely(faulted)) {
1067 		ftrace_graph_stop();
1068 		WARN_ON(1);
1069 		return;
1070 	}
1071 
1072 	if (function_graph_enter(old, self_addr, frame_pointer, parent))
1073 		*parent = old;
1074 }
1075 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1076