xref: /openbmc/linux/arch/x86/kernel/ftrace.c (revision b8d312aa)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Dynamic function tracing support.
4  *
5  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
6  *
7  * Thanks goes to Ingo Molnar, for suggesting the idea.
8  * Mathieu Desnoyers, for suggesting postponing the modifications.
9  * Arjan van de Ven, for keeping me straight, and explaining to me
10  * the dangers of modifying code on the run.
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/spinlock.h>
16 #include <linux/hardirq.h>
17 #include <linux/uaccess.h>
18 #include <linux/ftrace.h>
19 #include <linux/percpu.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/list.h>
24 #include <linux/module.h>
25 #include <linux/memory.h>
26 
27 #include <trace/syscall.h>
28 
29 #include <asm/set_memory.h>
30 #include <asm/kprobes.h>
31 #include <asm/ftrace.h>
32 #include <asm/nops.h>
33 #include <asm/text-patching.h>
34 
35 #ifdef CONFIG_DYNAMIC_FTRACE
36 
37 int ftrace_arch_code_modify_prepare(void)
38     __acquires(&text_mutex)
39 {
40 	/*
41 	 * Need to grab text_mutex to prevent a race from module loading
42 	 * and live kernel patching from changing the text permissions while
43 	 * ftrace has it set to "read/write".
44 	 */
45 	mutex_lock(&text_mutex);
46 	set_kernel_text_rw();
47 	set_all_modules_text_rw();
48 	return 0;
49 }
50 
51 int ftrace_arch_code_modify_post_process(void)
52     __releases(&text_mutex)
53 {
54 	set_all_modules_text_ro();
55 	set_kernel_text_ro();
56 	mutex_unlock(&text_mutex);
57 	return 0;
58 }
59 
60 union ftrace_code_union {
61 	char code[MCOUNT_INSN_SIZE];
62 	struct {
63 		unsigned char op;
64 		int offset;
65 	} __attribute__((packed));
66 };
67 
68 static int ftrace_calc_offset(long ip, long addr)
69 {
70 	return (int)(addr - ip);
71 }
72 
73 static unsigned char *
74 ftrace_text_replace(unsigned char op, unsigned long ip, unsigned long addr)
75 {
76 	static union ftrace_code_union calc;
77 
78 	calc.op		= op;
79 	calc.offset	= ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
80 
81 	return calc.code;
82 }
83 
84 static unsigned char *
85 ftrace_call_replace(unsigned long ip, unsigned long addr)
86 {
87 	return ftrace_text_replace(0xe8, ip, addr);
88 }
89 
90 static inline int
91 within(unsigned long addr, unsigned long start, unsigned long end)
92 {
93 	return addr >= start && addr < end;
94 }
95 
96 static unsigned long text_ip_addr(unsigned long ip)
97 {
98 	/*
99 	 * On x86_64, kernel text mappings are mapped read-only, so we use
100 	 * the kernel identity mapping instead of the kernel text mapping
101 	 * to modify the kernel text.
102 	 *
103 	 * For 32bit kernels, these mappings are same and we can use
104 	 * kernel identity mapping to modify code.
105 	 */
106 	if (within(ip, (unsigned long)_text, (unsigned long)_etext))
107 		ip = (unsigned long)__va(__pa_symbol(ip));
108 
109 	return ip;
110 }
111 
112 static const unsigned char *ftrace_nop_replace(void)
113 {
114 	return ideal_nops[NOP_ATOMIC5];
115 }
116 
117 static int
118 ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
119 		   unsigned const char *new_code)
120 {
121 	unsigned char replaced[MCOUNT_INSN_SIZE];
122 
123 	ftrace_expected = old_code;
124 
125 	/*
126 	 * Note:
127 	 * We are paranoid about modifying text, as if a bug was to happen, it
128 	 * could cause us to read or write to someplace that could cause harm.
129 	 * Carefully read and modify the code with probe_kernel_*(), and make
130 	 * sure what we read is what we expected it to be before modifying it.
131 	 */
132 
133 	/* read the text we want to modify */
134 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
135 		return -EFAULT;
136 
137 	/* Make sure it is what we expect it to be */
138 	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
139 		return -EINVAL;
140 
141 	ip = text_ip_addr(ip);
142 
143 	/* replace the text with the new text */
144 	if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE))
145 		return -EPERM;
146 
147 	sync_core();
148 
149 	return 0;
150 }
151 
152 int ftrace_make_nop(struct module *mod,
153 		    struct dyn_ftrace *rec, unsigned long addr)
154 {
155 	unsigned const char *new, *old;
156 	unsigned long ip = rec->ip;
157 
158 	old = ftrace_call_replace(ip, addr);
159 	new = ftrace_nop_replace();
160 
161 	/*
162 	 * On boot up, and when modules are loaded, the MCOUNT_ADDR
163 	 * is converted to a nop, and will never become MCOUNT_ADDR
164 	 * again. This code is either running before SMP (on boot up)
165 	 * or before the code will ever be executed (module load).
166 	 * We do not want to use the breakpoint version in this case,
167 	 * just modify the code directly.
168 	 */
169 	if (addr == MCOUNT_ADDR)
170 		return ftrace_modify_code_direct(rec->ip, old, new);
171 
172 	ftrace_expected = NULL;
173 
174 	/* Normal cases use add_brk_on_nop */
175 	WARN_ONCE(1, "invalid use of ftrace_make_nop");
176 	return -EINVAL;
177 }
178 
179 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
180 {
181 	unsigned const char *new, *old;
182 	unsigned long ip = rec->ip;
183 
184 	old = ftrace_nop_replace();
185 	new = ftrace_call_replace(ip, addr);
186 
187 	/* Should only be called when module is loaded */
188 	return ftrace_modify_code_direct(rec->ip, old, new);
189 }
190 
191 /*
192  * The modifying_ftrace_code is used to tell the breakpoint
193  * handler to call ftrace_int3_handler(). If it fails to
194  * call this handler for a breakpoint added by ftrace, then
195  * the kernel may crash.
196  *
197  * As atomic_writes on x86 do not need a barrier, we do not
198  * need to add smp_mb()s for this to work. It is also considered
199  * that we can not read the modifying_ftrace_code before
200  * executing the breakpoint. That would be quite remarkable if
201  * it could do that. Here's the flow that is required:
202  *
203  *   CPU-0                          CPU-1
204  *
205  * atomic_inc(mfc);
206  * write int3s
207  *				<trap-int3> // implicit (r)mb
208  *				if (atomic_read(mfc))
209  *					call ftrace_int3_handler()
210  *
211  * Then when we are finished:
212  *
213  * atomic_dec(mfc);
214  *
215  * If we hit a breakpoint that was not set by ftrace, it does not
216  * matter if ftrace_int3_handler() is called or not. It will
217  * simply be ignored. But it is crucial that a ftrace nop/caller
218  * breakpoint is handled. No other user should ever place a
219  * breakpoint on an ftrace nop/caller location. It must only
220  * be done by this code.
221  */
222 atomic_t modifying_ftrace_code __read_mostly;
223 
224 static int
225 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
226 		   unsigned const char *new_code);
227 
228 /*
229  * Should never be called:
230  *  As it is only called by __ftrace_replace_code() which is called by
231  *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
232  *  which is called to turn mcount into nops or nops into function calls
233  *  but not to convert a function from not using regs to one that uses
234  *  regs, which ftrace_modify_call() is for.
235  */
236 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
237 				 unsigned long addr)
238 {
239 	WARN_ON(1);
240 	ftrace_expected = NULL;
241 	return -EINVAL;
242 }
243 
244 static unsigned long ftrace_update_func;
245 static unsigned long ftrace_update_func_call;
246 
247 static int update_ftrace_func(unsigned long ip, void *new)
248 {
249 	unsigned char old[MCOUNT_INSN_SIZE];
250 	int ret;
251 
252 	memcpy(old, (void *)ip, MCOUNT_INSN_SIZE);
253 
254 	ftrace_update_func = ip;
255 	/* Make sure the breakpoints see the ftrace_update_func update */
256 	smp_wmb();
257 
258 	/* See comment above by declaration of modifying_ftrace_code */
259 	atomic_inc(&modifying_ftrace_code);
260 
261 	ret = ftrace_modify_code(ip, old, new);
262 
263 	atomic_dec(&modifying_ftrace_code);
264 
265 	return ret;
266 }
267 
268 int ftrace_update_ftrace_func(ftrace_func_t func)
269 {
270 	unsigned long ip = (unsigned long)(&ftrace_call);
271 	unsigned char *new;
272 	int ret;
273 
274 	ftrace_update_func_call = (unsigned long)func;
275 
276 	new = ftrace_call_replace(ip, (unsigned long)func);
277 	ret = update_ftrace_func(ip, new);
278 
279 	/* Also update the regs callback function */
280 	if (!ret) {
281 		ip = (unsigned long)(&ftrace_regs_call);
282 		new = ftrace_call_replace(ip, (unsigned long)func);
283 		ret = update_ftrace_func(ip, new);
284 	}
285 
286 	return ret;
287 }
288 
289 static nokprobe_inline int is_ftrace_caller(unsigned long ip)
290 {
291 	if (ip == ftrace_update_func)
292 		return 1;
293 
294 	return 0;
295 }
296 
297 /*
298  * A breakpoint was added to the code address we are about to
299  * modify, and this is the handle that will just skip over it.
300  * We are either changing a nop into a trace call, or a trace
301  * call to a nop. While the change is taking place, we treat
302  * it just like it was a nop.
303  */
304 int ftrace_int3_handler(struct pt_regs *regs)
305 {
306 	unsigned long ip;
307 
308 	if (WARN_ON_ONCE(!regs))
309 		return 0;
310 
311 	ip = regs->ip - INT3_INSN_SIZE;
312 
313 	if (ftrace_location(ip)) {
314 		int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
315 		return 1;
316 	} else if (is_ftrace_caller(ip)) {
317 		if (!ftrace_update_func_call) {
318 			int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
319 			return 1;
320 		}
321 		int3_emulate_call(regs, ftrace_update_func_call);
322 		return 1;
323 	}
324 
325 	return 0;
326 }
327 NOKPROBE_SYMBOL(ftrace_int3_handler);
328 
329 static int ftrace_write(unsigned long ip, const char *val, int size)
330 {
331 	ip = text_ip_addr(ip);
332 
333 	if (probe_kernel_write((void *)ip, val, size))
334 		return -EPERM;
335 
336 	return 0;
337 }
338 
339 static int add_break(unsigned long ip, const char *old)
340 {
341 	unsigned char replaced[MCOUNT_INSN_SIZE];
342 	unsigned char brk = BREAKPOINT_INSTRUCTION;
343 
344 	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
345 		return -EFAULT;
346 
347 	ftrace_expected = old;
348 
349 	/* Make sure it is what we expect it to be */
350 	if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0)
351 		return -EINVAL;
352 
353 	return ftrace_write(ip, &brk, 1);
354 }
355 
356 static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr)
357 {
358 	unsigned const char *old;
359 	unsigned long ip = rec->ip;
360 
361 	old = ftrace_call_replace(ip, addr);
362 
363 	return add_break(rec->ip, old);
364 }
365 
366 
367 static int add_brk_on_nop(struct dyn_ftrace *rec)
368 {
369 	unsigned const char *old;
370 
371 	old = ftrace_nop_replace();
372 
373 	return add_break(rec->ip, old);
374 }
375 
376 static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
377 {
378 	unsigned long ftrace_addr;
379 	int ret;
380 
381 	ftrace_addr = ftrace_get_addr_curr(rec);
382 
383 	ret = ftrace_test_record(rec, enable);
384 
385 	switch (ret) {
386 	case FTRACE_UPDATE_IGNORE:
387 		return 0;
388 
389 	case FTRACE_UPDATE_MAKE_CALL:
390 		/* converting nop to call */
391 		return add_brk_on_nop(rec);
392 
393 	case FTRACE_UPDATE_MODIFY_CALL:
394 	case FTRACE_UPDATE_MAKE_NOP:
395 		/* converting a call to a nop */
396 		return add_brk_on_call(rec, ftrace_addr);
397 	}
398 	return 0;
399 }
400 
401 /*
402  * On error, we need to remove breakpoints. This needs to
403  * be done caefully. If the address does not currently have a
404  * breakpoint, we know we are done. Otherwise, we look at the
405  * remaining 4 bytes of the instruction. If it matches a nop
406  * we replace the breakpoint with the nop. Otherwise we replace
407  * it with the call instruction.
408  */
409 static int remove_breakpoint(struct dyn_ftrace *rec)
410 {
411 	unsigned char ins[MCOUNT_INSN_SIZE];
412 	unsigned char brk = BREAKPOINT_INSTRUCTION;
413 	const unsigned char *nop;
414 	unsigned long ftrace_addr;
415 	unsigned long ip = rec->ip;
416 
417 	/* If we fail the read, just give up */
418 	if (probe_kernel_read(ins, (void *)ip, MCOUNT_INSN_SIZE))
419 		return -EFAULT;
420 
421 	/* If this does not have a breakpoint, we are done */
422 	if (ins[0] != brk)
423 		return 0;
424 
425 	nop = ftrace_nop_replace();
426 
427 	/*
428 	 * If the last 4 bytes of the instruction do not match
429 	 * a nop, then we assume that this is a call to ftrace_addr.
430 	 */
431 	if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) {
432 		/*
433 		 * For extra paranoidism, we check if the breakpoint is on
434 		 * a call that would actually jump to the ftrace_addr.
435 		 * If not, don't touch the breakpoint, we make just create
436 		 * a disaster.
437 		 */
438 		ftrace_addr = ftrace_get_addr_new(rec);
439 		nop = ftrace_call_replace(ip, ftrace_addr);
440 
441 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
442 			goto update;
443 
444 		/* Check both ftrace_addr and ftrace_old_addr */
445 		ftrace_addr = ftrace_get_addr_curr(rec);
446 		nop = ftrace_call_replace(ip, ftrace_addr);
447 
448 		ftrace_expected = nop;
449 
450 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
451 			return -EINVAL;
452 	}
453 
454  update:
455 	return ftrace_write(ip, nop, 1);
456 }
457 
458 static int add_update_code(unsigned long ip, unsigned const char *new)
459 {
460 	/* skip breakpoint */
461 	ip++;
462 	new++;
463 	return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1);
464 }
465 
466 static int add_update_call(struct dyn_ftrace *rec, unsigned long addr)
467 {
468 	unsigned long ip = rec->ip;
469 	unsigned const char *new;
470 
471 	new = ftrace_call_replace(ip, addr);
472 	return add_update_code(ip, new);
473 }
474 
475 static int add_update_nop(struct dyn_ftrace *rec)
476 {
477 	unsigned long ip = rec->ip;
478 	unsigned const char *new;
479 
480 	new = ftrace_nop_replace();
481 	return add_update_code(ip, new);
482 }
483 
484 static int add_update(struct dyn_ftrace *rec, bool enable)
485 {
486 	unsigned long ftrace_addr;
487 	int ret;
488 
489 	ret = ftrace_test_record(rec, enable);
490 
491 	ftrace_addr  = ftrace_get_addr_new(rec);
492 
493 	switch (ret) {
494 	case FTRACE_UPDATE_IGNORE:
495 		return 0;
496 
497 	case FTRACE_UPDATE_MODIFY_CALL:
498 	case FTRACE_UPDATE_MAKE_CALL:
499 		/* converting nop to call */
500 		return add_update_call(rec, ftrace_addr);
501 
502 	case FTRACE_UPDATE_MAKE_NOP:
503 		/* converting a call to a nop */
504 		return add_update_nop(rec);
505 	}
506 
507 	return 0;
508 }
509 
510 static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr)
511 {
512 	unsigned long ip = rec->ip;
513 	unsigned const char *new;
514 
515 	new = ftrace_call_replace(ip, addr);
516 
517 	return ftrace_write(ip, new, 1);
518 }
519 
520 static int finish_update_nop(struct dyn_ftrace *rec)
521 {
522 	unsigned long ip = rec->ip;
523 	unsigned const char *new;
524 
525 	new = ftrace_nop_replace();
526 
527 	return ftrace_write(ip, new, 1);
528 }
529 
530 static int finish_update(struct dyn_ftrace *rec, bool enable)
531 {
532 	unsigned long ftrace_addr;
533 	int ret;
534 
535 	ret = ftrace_update_record(rec, enable);
536 
537 	ftrace_addr = ftrace_get_addr_new(rec);
538 
539 	switch (ret) {
540 	case FTRACE_UPDATE_IGNORE:
541 		return 0;
542 
543 	case FTRACE_UPDATE_MODIFY_CALL:
544 	case FTRACE_UPDATE_MAKE_CALL:
545 		/* converting nop to call */
546 		return finish_update_call(rec, ftrace_addr);
547 
548 	case FTRACE_UPDATE_MAKE_NOP:
549 		/* converting a call to a nop */
550 		return finish_update_nop(rec);
551 	}
552 
553 	return 0;
554 }
555 
556 static void do_sync_core(void *data)
557 {
558 	sync_core();
559 }
560 
561 static void run_sync(void)
562 {
563 	int enable_irqs;
564 
565 	/* No need to sync if there's only one CPU */
566 	if (num_online_cpus() == 1)
567 		return;
568 
569 	enable_irqs = irqs_disabled();
570 
571 	/* We may be called with interrupts disabled (on bootup). */
572 	if (enable_irqs)
573 		local_irq_enable();
574 	on_each_cpu(do_sync_core, NULL, 1);
575 	if (enable_irqs)
576 		local_irq_disable();
577 }
578 
579 void ftrace_replace_code(int enable)
580 {
581 	struct ftrace_rec_iter *iter;
582 	struct dyn_ftrace *rec;
583 	const char *report = "adding breakpoints";
584 	int count = 0;
585 	int ret;
586 
587 	for_ftrace_rec_iter(iter) {
588 		rec = ftrace_rec_iter_record(iter);
589 
590 		ret = add_breakpoints(rec, enable);
591 		if (ret)
592 			goto remove_breakpoints;
593 		count++;
594 	}
595 
596 	run_sync();
597 
598 	report = "updating code";
599 	count = 0;
600 
601 	for_ftrace_rec_iter(iter) {
602 		rec = ftrace_rec_iter_record(iter);
603 
604 		ret = add_update(rec, enable);
605 		if (ret)
606 			goto remove_breakpoints;
607 		count++;
608 	}
609 
610 	run_sync();
611 
612 	report = "removing breakpoints";
613 	count = 0;
614 
615 	for_ftrace_rec_iter(iter) {
616 		rec = ftrace_rec_iter_record(iter);
617 
618 		ret = finish_update(rec, enable);
619 		if (ret)
620 			goto remove_breakpoints;
621 		count++;
622 	}
623 
624 	run_sync();
625 
626 	return;
627 
628  remove_breakpoints:
629 	pr_warn("Failed on %s (%d):\n", report, count);
630 	ftrace_bug(ret, rec);
631 	for_ftrace_rec_iter(iter) {
632 		rec = ftrace_rec_iter_record(iter);
633 		/*
634 		 * Breakpoints are handled only when this function is in
635 		 * progress. The system could not work with them.
636 		 */
637 		if (remove_breakpoint(rec))
638 			BUG();
639 	}
640 	run_sync();
641 }
642 
643 static int
644 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
645 		   unsigned const char *new_code)
646 {
647 	int ret;
648 
649 	ret = add_break(ip, old_code);
650 	if (ret)
651 		goto out;
652 
653 	run_sync();
654 
655 	ret = add_update_code(ip, new_code);
656 	if (ret)
657 		goto fail_update;
658 
659 	run_sync();
660 
661 	ret = ftrace_write(ip, new_code, 1);
662 	/*
663 	 * The breakpoint is handled only when this function is in progress.
664 	 * The system could not work if we could not remove it.
665 	 */
666 	BUG_ON(ret);
667  out:
668 	run_sync();
669 	return ret;
670 
671  fail_update:
672 	/* Also here the system could not work with the breakpoint */
673 	if (ftrace_write(ip, old_code, 1))
674 		BUG();
675 	goto out;
676 }
677 
678 void arch_ftrace_update_code(int command)
679 {
680 	/* See comment above by declaration of modifying_ftrace_code */
681 	atomic_inc(&modifying_ftrace_code);
682 
683 	ftrace_modify_all_code(command);
684 
685 	atomic_dec(&modifying_ftrace_code);
686 }
687 
688 int __init ftrace_dyn_arch_init(void)
689 {
690 	return 0;
691 }
692 
693 /* Currently only x86_64 supports dynamic trampolines */
694 #ifdef CONFIG_X86_64
695 
696 #ifdef CONFIG_MODULES
697 #include <linux/moduleloader.h>
698 /* Module allocation simplifies allocating memory for code */
699 static inline void *alloc_tramp(unsigned long size)
700 {
701 	return module_alloc(size);
702 }
703 static inline void tramp_free(void *tramp)
704 {
705 	module_memfree(tramp);
706 }
707 #else
708 /* Trampolines can only be created if modules are supported */
709 static inline void *alloc_tramp(unsigned long size)
710 {
711 	return NULL;
712 }
713 static inline void tramp_free(void *tramp) { }
714 #endif
715 
716 /* Defined as markers to the end of the ftrace default trampolines */
717 extern void ftrace_regs_caller_end(void);
718 extern void ftrace_epilogue(void);
719 extern void ftrace_caller_op_ptr(void);
720 extern void ftrace_regs_caller_op_ptr(void);
721 
722 /* movq function_trace_op(%rip), %rdx */
723 /* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
724 #define OP_REF_SIZE	7
725 
726 /*
727  * The ftrace_ops is passed to the function callback. Since the
728  * trampoline only services a single ftrace_ops, we can pass in
729  * that ops directly.
730  *
731  * The ftrace_op_code_union is used to create a pointer to the
732  * ftrace_ops that will be passed to the callback function.
733  */
734 union ftrace_op_code_union {
735 	char code[OP_REF_SIZE];
736 	struct {
737 		char op[3];
738 		int offset;
739 	} __attribute__((packed));
740 };
741 
742 #define RET_SIZE		1
743 
744 static unsigned long
745 create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
746 {
747 	unsigned long start_offset;
748 	unsigned long end_offset;
749 	unsigned long op_offset;
750 	unsigned long offset;
751 	unsigned long npages;
752 	unsigned long size;
753 	unsigned long retq;
754 	unsigned long *ptr;
755 	void *trampoline;
756 	void *ip;
757 	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
758 	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
759 	union ftrace_op_code_union op_ptr;
760 	int ret;
761 
762 	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
763 		start_offset = (unsigned long)ftrace_regs_caller;
764 		end_offset = (unsigned long)ftrace_regs_caller_end;
765 		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
766 	} else {
767 		start_offset = (unsigned long)ftrace_caller;
768 		end_offset = (unsigned long)ftrace_epilogue;
769 		op_offset = (unsigned long)ftrace_caller_op_ptr;
770 	}
771 
772 	size = end_offset - start_offset;
773 
774 	/*
775 	 * Allocate enough size to store the ftrace_caller code,
776 	 * the iret , as well as the address of the ftrace_ops this
777 	 * trampoline is used for.
778 	 */
779 	trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *));
780 	if (!trampoline)
781 		return 0;
782 
783 	*tramp_size = size + RET_SIZE + sizeof(void *);
784 	npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
785 
786 	/* Copy ftrace_caller onto the trampoline memory */
787 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
788 	if (WARN_ON(ret < 0))
789 		goto fail;
790 
791 	ip = trampoline + size;
792 
793 	/* The trampoline ends with ret(q) */
794 	retq = (unsigned long)ftrace_stub;
795 	ret = probe_kernel_read(ip, (void *)retq, RET_SIZE);
796 	if (WARN_ON(ret < 0))
797 		goto fail;
798 
799 	/*
800 	 * The address of the ftrace_ops that is used for this trampoline
801 	 * is stored at the end of the trampoline. This will be used to
802 	 * load the third parameter for the callback. Basically, that
803 	 * location at the end of the trampoline takes the place of
804 	 * the global function_trace_op variable.
805 	 */
806 
807 	ptr = (unsigned long *)(trampoline + size + RET_SIZE);
808 	*ptr = (unsigned long)ops;
809 
810 	op_offset -= start_offset;
811 	memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
812 
813 	/* Are we pointing to the reference? */
814 	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0))
815 		goto fail;
816 
817 	/* Load the contents of ptr into the callback parameter */
818 	offset = (unsigned long)ptr;
819 	offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
820 
821 	op_ptr.offset = offset;
822 
823 	/* put in the new offset to the ftrace_ops */
824 	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
825 
826 	/* ALLOC_TRAMP flags lets us know we created it */
827 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
828 
829 	set_vm_flush_reset_perms(trampoline);
830 
831 	/*
832 	 * Module allocation needs to be completed by making the page
833 	 * executable. The page is still writable, which is a security hazard,
834 	 * but anyhow ftrace breaks W^X completely.
835 	 */
836 	set_memory_x((unsigned long)trampoline, npages);
837 	return (unsigned long)trampoline;
838 fail:
839 	tramp_free(trampoline);
840 	return 0;
841 }
842 
843 static unsigned long calc_trampoline_call_offset(bool save_regs)
844 {
845 	unsigned long start_offset;
846 	unsigned long call_offset;
847 
848 	if (save_regs) {
849 		start_offset = (unsigned long)ftrace_regs_caller;
850 		call_offset = (unsigned long)ftrace_regs_call;
851 	} else {
852 		start_offset = (unsigned long)ftrace_caller;
853 		call_offset = (unsigned long)ftrace_call;
854 	}
855 
856 	return call_offset - start_offset;
857 }
858 
859 void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
860 {
861 	ftrace_func_t func;
862 	unsigned char *new;
863 	unsigned long offset;
864 	unsigned long ip;
865 	unsigned int size;
866 	int ret, npages;
867 
868 	if (ops->trampoline) {
869 		/*
870 		 * The ftrace_ops caller may set up its own trampoline.
871 		 * In such a case, this code must not modify it.
872 		 */
873 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
874 			return;
875 		npages = PAGE_ALIGN(ops->trampoline_size) >> PAGE_SHIFT;
876 		set_memory_rw(ops->trampoline, npages);
877 	} else {
878 		ops->trampoline = create_trampoline(ops, &size);
879 		if (!ops->trampoline)
880 			return;
881 		ops->trampoline_size = size;
882 		npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
883 	}
884 
885 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
886 	ip = ops->trampoline + offset;
887 
888 	func = ftrace_ops_get_func(ops);
889 
890 	ftrace_update_func_call = (unsigned long)func;
891 
892 	/* Do a safe modify in case the trampoline is executing */
893 	new = ftrace_call_replace(ip, (unsigned long)func);
894 	ret = update_ftrace_func(ip, new);
895 	set_memory_ro(ops->trampoline, npages);
896 
897 	/* The update should never fail */
898 	WARN_ON(ret);
899 }
900 
901 /* Return the address of the function the trampoline calls */
902 static void *addr_from_call(void *ptr)
903 {
904 	union ftrace_code_union calc;
905 	int ret;
906 
907 	ret = probe_kernel_read(&calc, ptr, MCOUNT_INSN_SIZE);
908 	if (WARN_ON_ONCE(ret < 0))
909 		return NULL;
910 
911 	/* Make sure this is a call */
912 	if (WARN_ON_ONCE(calc.op != 0xe8)) {
913 		pr_warn("Expected e8, got %x\n", calc.op);
914 		return NULL;
915 	}
916 
917 	return ptr + MCOUNT_INSN_SIZE + calc.offset;
918 }
919 
920 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
921 			   unsigned long frame_pointer);
922 
923 /*
924  * If the ops->trampoline was not allocated, then it probably
925  * has a static trampoline func, or is the ftrace caller itself.
926  */
927 static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
928 {
929 	unsigned long offset;
930 	bool save_regs = rec->flags & FTRACE_FL_REGS_EN;
931 	void *ptr;
932 
933 	if (ops && ops->trampoline) {
934 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
935 		/*
936 		 * We only know about function graph tracer setting as static
937 		 * trampoline.
938 		 */
939 		if (ops->trampoline == FTRACE_GRAPH_ADDR)
940 			return (void *)prepare_ftrace_return;
941 #endif
942 		return NULL;
943 	}
944 
945 	offset = calc_trampoline_call_offset(save_regs);
946 
947 	if (save_regs)
948 		ptr = (void *)FTRACE_REGS_ADDR + offset;
949 	else
950 		ptr = (void *)FTRACE_ADDR + offset;
951 
952 	return addr_from_call(ptr);
953 }
954 
955 void *arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
956 {
957 	unsigned long offset;
958 
959 	/* If we didn't allocate this trampoline, consider it static */
960 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
961 		return static_tramp_func(ops, rec);
962 
963 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
964 	return addr_from_call((void *)ops->trampoline + offset);
965 }
966 
967 void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
968 {
969 	if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
970 		return;
971 
972 	tramp_free((void *)ops->trampoline);
973 	ops->trampoline = 0;
974 }
975 
976 #endif /* CONFIG_X86_64 */
977 #endif /* CONFIG_DYNAMIC_FTRACE */
978 
979 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
980 
981 #ifdef CONFIG_DYNAMIC_FTRACE
982 extern void ftrace_graph_call(void);
983 
984 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
985 {
986 	return ftrace_text_replace(0xe9, ip, addr);
987 }
988 
989 static int ftrace_mod_jmp(unsigned long ip, void *func)
990 {
991 	unsigned char *new;
992 
993 	ftrace_update_func_call = 0UL;
994 	new = ftrace_jmp_replace(ip, (unsigned long)func);
995 
996 	return update_ftrace_func(ip, new);
997 }
998 
999 int ftrace_enable_ftrace_graph_caller(void)
1000 {
1001 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
1002 
1003 	return ftrace_mod_jmp(ip, &ftrace_graph_caller);
1004 }
1005 
1006 int ftrace_disable_ftrace_graph_caller(void)
1007 {
1008 	unsigned long ip = (unsigned long)(&ftrace_graph_call);
1009 
1010 	return ftrace_mod_jmp(ip, &ftrace_stub);
1011 }
1012 
1013 #endif /* !CONFIG_DYNAMIC_FTRACE */
1014 
1015 /*
1016  * Hook the return address and push it in the stack of return addrs
1017  * in current thread info.
1018  */
1019 void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
1020 			   unsigned long frame_pointer)
1021 {
1022 	unsigned long old;
1023 	int faulted;
1024 	unsigned long return_hooker = (unsigned long)
1025 				&return_to_handler;
1026 
1027 	/*
1028 	 * When resuming from suspend-to-ram, this function can be indirectly
1029 	 * called from early CPU startup code while the CPU is in real mode,
1030 	 * which would fail miserably.  Make sure the stack pointer is a
1031 	 * virtual address.
1032 	 *
1033 	 * This check isn't as accurate as virt_addr_valid(), but it should be
1034 	 * good enough for this purpose, and it's fast.
1035 	 */
1036 	if (unlikely((long)__builtin_frame_address(0) >= 0))
1037 		return;
1038 
1039 	if (unlikely(ftrace_graph_is_dead()))
1040 		return;
1041 
1042 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
1043 		return;
1044 
1045 	/*
1046 	 * Protect against fault, even if it shouldn't
1047 	 * happen. This tool is too much intrusive to
1048 	 * ignore such a protection.
1049 	 */
1050 	asm volatile(
1051 		"1: " _ASM_MOV " (%[parent]), %[old]\n"
1052 		"2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
1053 		"   movl $0, %[faulted]\n"
1054 		"3:\n"
1055 
1056 		".section .fixup, \"ax\"\n"
1057 		"4: movl $1, %[faulted]\n"
1058 		"   jmp 3b\n"
1059 		".previous\n"
1060 
1061 		_ASM_EXTABLE(1b, 4b)
1062 		_ASM_EXTABLE(2b, 4b)
1063 
1064 		: [old] "=&r" (old), [faulted] "=r" (faulted)
1065 		: [parent] "r" (parent), [return_hooker] "r" (return_hooker)
1066 		: "memory"
1067 	);
1068 
1069 	if (unlikely(faulted)) {
1070 		ftrace_graph_stop();
1071 		WARN_ON(1);
1072 		return;
1073 	}
1074 
1075 	if (function_graph_enter(old, self_addr, frame_pointer, parent))
1076 		*parent = old;
1077 }
1078 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1079