xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision 5bd8e16d)
1 #define pr_fmt(fmt) "SMP alternatives: " fmt
2 
3 #include <linux/module.h>
4 #include <linux/sched.h>
5 #include <linux/mutex.h>
6 #include <linux/list.h>
7 #include <linux/stringify.h>
8 #include <linux/kprobes.h>
9 #include <linux/mm.h>
10 #include <linux/vmalloc.h>
11 #include <linux/memory.h>
12 #include <linux/stop_machine.h>
13 #include <linux/slab.h>
14 #include <linux/kdebug.h>
15 #include <asm/alternative.h>
16 #include <asm/sections.h>
17 #include <asm/pgtable.h>
18 #include <asm/mce.h>
19 #include <asm/nmi.h>
20 #include <asm/cacheflush.h>
21 #include <asm/tlbflush.h>
22 #include <asm/io.h>
23 #include <asm/fixmap.h>
24 
25 #define MAX_PATCH_LEN (255-1)
26 
27 static int __initdata_or_module debug_alternative;
28 
29 static int __init debug_alt(char *str)
30 {
31 	debug_alternative = 1;
32 	return 1;
33 }
34 __setup("debug-alternative", debug_alt);
35 
36 static int noreplace_smp;
37 
38 static int __init setup_noreplace_smp(char *str)
39 {
40 	noreplace_smp = 1;
41 	return 1;
42 }
43 __setup("noreplace-smp", setup_noreplace_smp);
44 
45 #ifdef CONFIG_PARAVIRT
46 static int __initdata_or_module noreplace_paravirt = 0;
47 
48 static int __init setup_noreplace_paravirt(char *str)
49 {
50 	noreplace_paravirt = 1;
51 	return 1;
52 }
53 __setup("noreplace-paravirt", setup_noreplace_paravirt);
54 #endif
55 
56 #define DPRINTK(fmt, ...)				\
57 do {							\
58 	if (debug_alternative)				\
59 		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
60 } while (0)
61 
62 /*
63  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
64  * that correspond to that nop. Getting from one nop to the next, we
65  * add to the array the offset that is equal to the sum of all sizes of
66  * nops preceding the one we are after.
67  *
68  * Note: The GENERIC_NOP5_ATOMIC is at the end, as it breaks the
69  * nice symmetry of sizes of the previous nops.
70  */
71 #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
72 static const unsigned char intelnops[] =
73 {
74 	GENERIC_NOP1,
75 	GENERIC_NOP2,
76 	GENERIC_NOP3,
77 	GENERIC_NOP4,
78 	GENERIC_NOP5,
79 	GENERIC_NOP6,
80 	GENERIC_NOP7,
81 	GENERIC_NOP8,
82 	GENERIC_NOP5_ATOMIC
83 };
84 static const unsigned char * const intel_nops[ASM_NOP_MAX+2] =
85 {
86 	NULL,
87 	intelnops,
88 	intelnops + 1,
89 	intelnops + 1 + 2,
90 	intelnops + 1 + 2 + 3,
91 	intelnops + 1 + 2 + 3 + 4,
92 	intelnops + 1 + 2 + 3 + 4 + 5,
93 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
94 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
95 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
96 };
97 #endif
98 
99 #ifdef K8_NOP1
100 static const unsigned char k8nops[] =
101 {
102 	K8_NOP1,
103 	K8_NOP2,
104 	K8_NOP3,
105 	K8_NOP4,
106 	K8_NOP5,
107 	K8_NOP6,
108 	K8_NOP7,
109 	K8_NOP8,
110 	K8_NOP5_ATOMIC
111 };
112 static const unsigned char * const k8_nops[ASM_NOP_MAX+2] =
113 {
114 	NULL,
115 	k8nops,
116 	k8nops + 1,
117 	k8nops + 1 + 2,
118 	k8nops + 1 + 2 + 3,
119 	k8nops + 1 + 2 + 3 + 4,
120 	k8nops + 1 + 2 + 3 + 4 + 5,
121 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
122 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
123 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
124 };
125 #endif
126 
127 #if defined(K7_NOP1) && !defined(CONFIG_X86_64)
128 static const unsigned char k7nops[] =
129 {
130 	K7_NOP1,
131 	K7_NOP2,
132 	K7_NOP3,
133 	K7_NOP4,
134 	K7_NOP5,
135 	K7_NOP6,
136 	K7_NOP7,
137 	K7_NOP8,
138 	K7_NOP5_ATOMIC
139 };
140 static const unsigned char * const k7_nops[ASM_NOP_MAX+2] =
141 {
142 	NULL,
143 	k7nops,
144 	k7nops + 1,
145 	k7nops + 1 + 2,
146 	k7nops + 1 + 2 + 3,
147 	k7nops + 1 + 2 + 3 + 4,
148 	k7nops + 1 + 2 + 3 + 4 + 5,
149 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
150 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
151 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
152 };
153 #endif
154 
155 #ifdef P6_NOP1
156 static const unsigned char p6nops[] =
157 {
158 	P6_NOP1,
159 	P6_NOP2,
160 	P6_NOP3,
161 	P6_NOP4,
162 	P6_NOP5,
163 	P6_NOP6,
164 	P6_NOP7,
165 	P6_NOP8,
166 	P6_NOP5_ATOMIC
167 };
168 static const unsigned char * const p6_nops[ASM_NOP_MAX+2] =
169 {
170 	NULL,
171 	p6nops,
172 	p6nops + 1,
173 	p6nops + 1 + 2,
174 	p6nops + 1 + 2 + 3,
175 	p6nops + 1 + 2 + 3 + 4,
176 	p6nops + 1 + 2 + 3 + 4 + 5,
177 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
178 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
179 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
180 };
181 #endif
182 
183 /* Initialize these to a safe default */
184 #ifdef CONFIG_X86_64
185 const unsigned char * const *ideal_nops = p6_nops;
186 #else
187 const unsigned char * const *ideal_nops = intel_nops;
188 #endif
189 
190 void __init arch_init_ideal_nops(void)
191 {
192 	switch (boot_cpu_data.x86_vendor) {
193 	case X86_VENDOR_INTEL:
194 		/*
195 		 * Due to a decoder implementation quirk, some
196 		 * specific Intel CPUs actually perform better with
197 		 * the "k8_nops" than with the SDM-recommended NOPs.
198 		 */
199 		if (boot_cpu_data.x86 == 6 &&
200 		    boot_cpu_data.x86_model >= 0x0f &&
201 		    boot_cpu_data.x86_model != 0x1c &&
202 		    boot_cpu_data.x86_model != 0x26 &&
203 		    boot_cpu_data.x86_model != 0x27 &&
204 		    boot_cpu_data.x86_model < 0x30) {
205 			ideal_nops = k8_nops;
206 		} else if (boot_cpu_has(X86_FEATURE_NOPL)) {
207 			   ideal_nops = p6_nops;
208 		} else {
209 #ifdef CONFIG_X86_64
210 			ideal_nops = k8_nops;
211 #else
212 			ideal_nops = intel_nops;
213 #endif
214 		}
215 		break;
216 	default:
217 #ifdef CONFIG_X86_64
218 		ideal_nops = k8_nops;
219 #else
220 		if (boot_cpu_has(X86_FEATURE_K8))
221 			ideal_nops = k8_nops;
222 		else if (boot_cpu_has(X86_FEATURE_K7))
223 			ideal_nops = k7_nops;
224 		else
225 			ideal_nops = intel_nops;
226 #endif
227 	}
228 }
229 
230 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
231 static void __init_or_module add_nops(void *insns, unsigned int len)
232 {
233 	while (len > 0) {
234 		unsigned int noplen = len;
235 		if (noplen > ASM_NOP_MAX)
236 			noplen = ASM_NOP_MAX;
237 		memcpy(insns, ideal_nops[noplen], noplen);
238 		insns += noplen;
239 		len -= noplen;
240 	}
241 }
242 
243 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
244 extern s32 __smp_locks[], __smp_locks_end[];
245 void *text_poke_early(void *addr, const void *opcode, size_t len);
246 
247 /* Replace instructions with better alternatives for this CPU type.
248    This runs before SMP is initialized to avoid SMP problems with
249    self modifying code. This implies that asymmetric systems where
250    APs have less capabilities than the boot processor are not handled.
251    Tough. Make sure you disable such features by hand. */
252 
253 void __init_or_module apply_alternatives(struct alt_instr *start,
254 					 struct alt_instr *end)
255 {
256 	struct alt_instr *a;
257 	u8 *instr, *replacement;
258 	u8 insnbuf[MAX_PATCH_LEN];
259 
260 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
261 	/*
262 	 * The scan order should be from start to end. A later scanned
263 	 * alternative code can overwrite a previous scanned alternative code.
264 	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
265 	 * patch code.
266 	 *
267 	 * So be careful if you want to change the scan order to any other
268 	 * order.
269 	 */
270 	for (a = start; a < end; a++) {
271 		instr = (u8 *)&a->instr_offset + a->instr_offset;
272 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
273 		BUG_ON(a->replacementlen > a->instrlen);
274 		BUG_ON(a->instrlen > sizeof(insnbuf));
275 		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
276 		if (!boot_cpu_has(a->cpuid))
277 			continue;
278 
279 		memcpy(insnbuf, replacement, a->replacementlen);
280 
281 		/* 0xe8 is a relative jump; fix the offset. */
282 		if (*insnbuf == 0xe8 && a->replacementlen == 5)
283 		    *(s32 *)(insnbuf + 1) += replacement - instr;
284 
285 		add_nops(insnbuf + a->replacementlen,
286 			 a->instrlen - a->replacementlen);
287 
288 		text_poke_early(instr, insnbuf, a->instrlen);
289 	}
290 }
291 
292 #ifdef CONFIG_SMP
293 
294 static void alternatives_smp_lock(const s32 *start, const s32 *end,
295 				  u8 *text, u8 *text_end)
296 {
297 	const s32 *poff;
298 
299 	mutex_lock(&text_mutex);
300 	for (poff = start; poff < end; poff++) {
301 		u8 *ptr = (u8 *)poff + *poff;
302 
303 		if (!*poff || ptr < text || ptr >= text_end)
304 			continue;
305 		/* turn DS segment override prefix into lock prefix */
306 		if (*ptr == 0x3e)
307 			text_poke(ptr, ((unsigned char []){0xf0}), 1);
308 	}
309 	mutex_unlock(&text_mutex);
310 }
311 
312 static void alternatives_smp_unlock(const s32 *start, const s32 *end,
313 				    u8 *text, u8 *text_end)
314 {
315 	const s32 *poff;
316 
317 	mutex_lock(&text_mutex);
318 	for (poff = start; poff < end; poff++) {
319 		u8 *ptr = (u8 *)poff + *poff;
320 
321 		if (!*poff || ptr < text || ptr >= text_end)
322 			continue;
323 		/* turn lock prefix into DS segment override prefix */
324 		if (*ptr == 0xf0)
325 			text_poke(ptr, ((unsigned char []){0x3E}), 1);
326 	}
327 	mutex_unlock(&text_mutex);
328 }
329 
330 struct smp_alt_module {
331 	/* what is this ??? */
332 	struct module	*mod;
333 	char		*name;
334 
335 	/* ptrs to lock prefixes */
336 	const s32	*locks;
337 	const s32	*locks_end;
338 
339 	/* .text segment, needed to avoid patching init code ;) */
340 	u8		*text;
341 	u8		*text_end;
342 
343 	struct list_head next;
344 };
345 static LIST_HEAD(smp_alt_modules);
346 static DEFINE_MUTEX(smp_alt);
347 static bool uniproc_patched = false;	/* protected by smp_alt */
348 
349 void __init_or_module alternatives_smp_module_add(struct module *mod,
350 						  char *name,
351 						  void *locks, void *locks_end,
352 						  void *text,  void *text_end)
353 {
354 	struct smp_alt_module *smp;
355 
356 	mutex_lock(&smp_alt);
357 	if (!uniproc_patched)
358 		goto unlock;
359 
360 	if (num_possible_cpus() == 1)
361 		/* Don't bother remembering, we'll never have to undo it. */
362 		goto smp_unlock;
363 
364 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
365 	if (NULL == smp)
366 		/* we'll run the (safe but slow) SMP code then ... */
367 		goto unlock;
368 
369 	smp->mod	= mod;
370 	smp->name	= name;
371 	smp->locks	= locks;
372 	smp->locks_end	= locks_end;
373 	smp->text	= text;
374 	smp->text_end	= text_end;
375 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
376 		__func__, smp->locks, smp->locks_end,
377 		smp->text, smp->text_end, smp->name);
378 
379 	list_add_tail(&smp->next, &smp_alt_modules);
380 smp_unlock:
381 	alternatives_smp_unlock(locks, locks_end, text, text_end);
382 unlock:
383 	mutex_unlock(&smp_alt);
384 }
385 
386 void __init_or_module alternatives_smp_module_del(struct module *mod)
387 {
388 	struct smp_alt_module *item;
389 
390 	mutex_lock(&smp_alt);
391 	list_for_each_entry(item, &smp_alt_modules, next) {
392 		if (mod != item->mod)
393 			continue;
394 		list_del(&item->next);
395 		kfree(item);
396 		break;
397 	}
398 	mutex_unlock(&smp_alt);
399 }
400 
401 void alternatives_enable_smp(void)
402 {
403 	struct smp_alt_module *mod;
404 
405 #ifdef CONFIG_LOCKDEP
406 	/*
407 	 * Older binutils section handling bug prevented
408 	 * alternatives-replacement from working reliably.
409 	 *
410 	 * If this still occurs then you should see a hang
411 	 * or crash shortly after this line:
412 	 */
413 	pr_info("lockdep: fixing up alternatives\n");
414 #endif
415 
416 	/* Why bother if there are no other CPUs? */
417 	BUG_ON(num_possible_cpus() == 1);
418 
419 	mutex_lock(&smp_alt);
420 
421 	if (uniproc_patched) {
422 		pr_info("switching to SMP code\n");
423 		BUG_ON(num_online_cpus() != 1);
424 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
425 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
426 		list_for_each_entry(mod, &smp_alt_modules, next)
427 			alternatives_smp_lock(mod->locks, mod->locks_end,
428 					      mod->text, mod->text_end);
429 		uniproc_patched = false;
430 	}
431 	mutex_unlock(&smp_alt);
432 }
433 
434 /* Return 1 if the address range is reserved for smp-alternatives */
435 int alternatives_text_reserved(void *start, void *end)
436 {
437 	struct smp_alt_module *mod;
438 	const s32 *poff;
439 	u8 *text_start = start;
440 	u8 *text_end = end;
441 
442 	list_for_each_entry(mod, &smp_alt_modules, next) {
443 		if (mod->text > text_end || mod->text_end < text_start)
444 			continue;
445 		for (poff = mod->locks; poff < mod->locks_end; poff++) {
446 			const u8 *ptr = (const u8 *)poff + *poff;
447 
448 			if (text_start <= ptr && text_end > ptr)
449 				return 1;
450 		}
451 	}
452 
453 	return 0;
454 }
455 #endif
456 
457 #ifdef CONFIG_PARAVIRT
458 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
459 				     struct paravirt_patch_site *end)
460 {
461 	struct paravirt_patch_site *p;
462 	char insnbuf[MAX_PATCH_LEN];
463 
464 	if (noreplace_paravirt)
465 		return;
466 
467 	for (p = start; p < end; p++) {
468 		unsigned int used;
469 
470 		BUG_ON(p->len > MAX_PATCH_LEN);
471 		/* prep the buffer with the original instructions */
472 		memcpy(insnbuf, p->instr, p->len);
473 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
474 					 (unsigned long)p->instr, p->len);
475 
476 		BUG_ON(used > p->len);
477 
478 		/* Pad the rest with nops */
479 		add_nops(insnbuf + used, p->len - used);
480 		text_poke_early(p->instr, insnbuf, p->len);
481 	}
482 }
483 extern struct paravirt_patch_site __start_parainstructions[],
484 	__stop_parainstructions[];
485 #endif	/* CONFIG_PARAVIRT */
486 
487 void __init alternative_instructions(void)
488 {
489 	/* The patching is not fully atomic, so try to avoid local interruptions
490 	   that might execute the to be patched code.
491 	   Other CPUs are not running. */
492 	stop_nmi();
493 
494 	/*
495 	 * Don't stop machine check exceptions while patching.
496 	 * MCEs only happen when something got corrupted and in this
497 	 * case we must do something about the corruption.
498 	 * Ignoring it is worse than a unlikely patching race.
499 	 * Also machine checks tend to be broadcast and if one CPU
500 	 * goes into machine check the others follow quickly, so we don't
501 	 * expect a machine check to cause undue problems during to code
502 	 * patching.
503 	 */
504 
505 	apply_alternatives(__alt_instructions, __alt_instructions_end);
506 
507 #ifdef CONFIG_SMP
508 	/* Patch to UP if other cpus not imminent. */
509 	if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
510 		uniproc_patched = true;
511 		alternatives_smp_module_add(NULL, "core kernel",
512 					    __smp_locks, __smp_locks_end,
513 					    _text, _etext);
514 	}
515 
516 	if (!uniproc_patched || num_possible_cpus() == 1)
517 		free_init_pages("SMP alternatives",
518 				(unsigned long)__smp_locks,
519 				(unsigned long)__smp_locks_end);
520 #endif
521 
522 	apply_paravirt(__parainstructions, __parainstructions_end);
523 
524 	restart_nmi();
525 }
526 
527 /**
528  * text_poke_early - Update instructions on a live kernel at boot time
529  * @addr: address to modify
530  * @opcode: source of the copy
531  * @len: length to copy
532  *
533  * When you use this code to patch more than one byte of an instruction
534  * you need to make sure that other CPUs cannot execute this code in parallel.
535  * Also no thread must be currently preempted in the middle of these
536  * instructions. And on the local CPU you need to be protected again NMI or MCE
537  * handlers seeing an inconsistent instruction while you patch.
538  */
539 void *__init_or_module text_poke_early(void *addr, const void *opcode,
540 					      size_t len)
541 {
542 	unsigned long flags;
543 	local_irq_save(flags);
544 	memcpy(addr, opcode, len);
545 	sync_core();
546 	local_irq_restore(flags);
547 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
548 	   that causes hangs on some VIA CPUs. */
549 	return addr;
550 }
551 
552 /**
553  * text_poke - Update instructions on a live kernel
554  * @addr: address to modify
555  * @opcode: source of the copy
556  * @len: length to copy
557  *
558  * Only atomic text poke/set should be allowed when not doing early patching.
559  * It means the size must be writable atomically and the address must be aligned
560  * in a way that permits an atomic write. It also makes sure we fit on a single
561  * page.
562  *
563  * Note: Must be called under text_mutex.
564  */
565 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
566 {
567 	unsigned long flags;
568 	char *vaddr;
569 	struct page *pages[2];
570 	int i;
571 
572 	if (!core_kernel_text((unsigned long)addr)) {
573 		pages[0] = vmalloc_to_page(addr);
574 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
575 	} else {
576 		pages[0] = virt_to_page(addr);
577 		WARN_ON(!PageReserved(pages[0]));
578 		pages[1] = virt_to_page(addr + PAGE_SIZE);
579 	}
580 	BUG_ON(!pages[0]);
581 	local_irq_save(flags);
582 	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
583 	if (pages[1])
584 		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
585 	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
586 	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
587 	clear_fixmap(FIX_TEXT_POKE0);
588 	if (pages[1])
589 		clear_fixmap(FIX_TEXT_POKE1);
590 	local_flush_tlb();
591 	sync_core();
592 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
593 	   that causes hangs on some VIA CPUs. */
594 	for (i = 0; i < len; i++)
595 		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
596 	local_irq_restore(flags);
597 	return addr;
598 }
599 
600 static void do_sync_core(void *info)
601 {
602 	sync_core();
603 }
604 
605 static bool bp_patching_in_progress;
606 static void *bp_int3_handler, *bp_int3_addr;
607 
608 int poke_int3_handler(struct pt_regs *regs)
609 {
610 	/* bp_patching_in_progress */
611 	smp_rmb();
612 
613 	if (likely(!bp_patching_in_progress))
614 		return 0;
615 
616 	if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
617 		return 0;
618 
619 	/* set up the specified breakpoint handler */
620 	regs->ip = (unsigned long) bp_int3_handler;
621 
622 	return 1;
623 
624 }
625 
626 /**
627  * text_poke_bp() -- update instructions on live kernel on SMP
628  * @addr:	address to patch
629  * @opcode:	opcode of new instruction
630  * @len:	length to copy
631  * @handler:	address to jump to when the temporary breakpoint is hit
632  *
633  * Modify multi-byte instruction by using int3 breakpoint on SMP.
634  * We completely avoid stop_machine() here, and achieve the
635  * synchronization using int3 breakpoint.
636  *
637  * The way it is done:
638  *	- add a int3 trap to the address that will be patched
639  *	- sync cores
640  *	- update all but the first byte of the patched range
641  *	- sync cores
642  *	- replace the first byte (int3) by the first byte of
643  *	  replacing opcode
644  *	- sync cores
645  *
646  * Note: must be called under text_mutex.
647  */
648 void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
649 {
650 	unsigned char int3 = 0xcc;
651 
652 	bp_int3_handler = handler;
653 	bp_int3_addr = (u8 *)addr + sizeof(int3);
654 	bp_patching_in_progress = true;
655 	/*
656 	 * Corresponding read barrier in int3 notifier for
657 	 * making sure the in_progress flags is correctly ordered wrt.
658 	 * patching
659 	 */
660 	smp_wmb();
661 
662 	text_poke(addr, &int3, sizeof(int3));
663 
664 	on_each_cpu(do_sync_core, NULL, 1);
665 
666 	if (len - sizeof(int3) > 0) {
667 		/* patch all but the first byte */
668 		text_poke((char *)addr + sizeof(int3),
669 			  (const char *) opcode + sizeof(int3),
670 			  len - sizeof(int3));
671 		/*
672 		 * According to Intel, this core syncing is very likely
673 		 * not necessary and we'd be safe even without it. But
674 		 * better safe than sorry (plus there's not only Intel).
675 		 */
676 		on_each_cpu(do_sync_core, NULL, 1);
677 	}
678 
679 	/* patch the first byte */
680 	text_poke(addr, opcode, sizeof(int3));
681 
682 	on_each_cpu(do_sync_core, NULL, 1);
683 
684 	bp_patching_in_progress = false;
685 	smp_wmb();
686 
687 	return addr;
688 }
689 
690