xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision f15cbe6f1a4b4d9df59142fc8e4abb973302cf44)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/mutex.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <asm/alternative.h>
9 #include <asm/sections.h>
10 #include <asm/pgtable.h>
11 #include <asm/mce.h>
12 #include <asm/nmi.h>
13 #include <asm/vsyscall.h>
14 #include <asm/cacheflush.h>
15 #include <asm/io.h>
16 
17 #define MAX_PATCH_LEN (255-1)
18 
19 #ifdef CONFIG_HOTPLUG_CPU
20 static int smp_alt_once;
21 
22 static int __init bootonly(char *str)
23 {
24 	smp_alt_once = 1;
25 	return 1;
26 }
27 __setup("smp-alt-boot", bootonly);
28 #else
29 #define smp_alt_once 1
30 #endif
31 
32 static int debug_alternative;
33 
34 static int __init debug_alt(char *str)
35 {
36 	debug_alternative = 1;
37 	return 1;
38 }
39 __setup("debug-alternative", debug_alt);
40 
41 static int noreplace_smp;
42 
43 static int __init setup_noreplace_smp(char *str)
44 {
45 	noreplace_smp = 1;
46 	return 1;
47 }
48 __setup("noreplace-smp", setup_noreplace_smp);
49 
50 #ifdef CONFIG_PARAVIRT
51 static int noreplace_paravirt = 0;
52 
53 static int __init setup_noreplace_paravirt(char *str)
54 {
55 	noreplace_paravirt = 1;
56 	return 1;
57 }
58 __setup("noreplace-paravirt", setup_noreplace_paravirt);
59 #endif
60 
61 #define DPRINTK(fmt, args...) if (debug_alternative) \
62 	printk(KERN_DEBUG fmt, args)
63 
64 #ifdef GENERIC_NOP1
65 /* Use inline assembly to define this because the nops are defined
66    as inline assembly strings in the include files and we cannot
67    get them easily into strings. */
68 asm("\t.section .rodata, \"a\"\nintelnops: "
69 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
70 	GENERIC_NOP7 GENERIC_NOP8
71     "\t.previous");
72 extern const unsigned char intelnops[];
73 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
74 	NULL,
75 	intelnops,
76 	intelnops + 1,
77 	intelnops + 1 + 2,
78 	intelnops + 1 + 2 + 3,
79 	intelnops + 1 + 2 + 3 + 4,
80 	intelnops + 1 + 2 + 3 + 4 + 5,
81 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
82 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
83 };
84 #endif
85 
86 #ifdef K8_NOP1
87 asm("\t.section .rodata, \"a\"\nk8nops: "
88 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
89 	K8_NOP7 K8_NOP8
90     "\t.previous");
91 extern const unsigned char k8nops[];
92 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
93 	NULL,
94 	k8nops,
95 	k8nops + 1,
96 	k8nops + 1 + 2,
97 	k8nops + 1 + 2 + 3,
98 	k8nops + 1 + 2 + 3 + 4,
99 	k8nops + 1 + 2 + 3 + 4 + 5,
100 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
101 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
102 };
103 #endif
104 
105 #ifdef K7_NOP1
106 asm("\t.section .rodata, \"a\"\nk7nops: "
107 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
108 	K7_NOP7 K7_NOP8
109     "\t.previous");
110 extern const unsigned char k7nops[];
111 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
112 	NULL,
113 	k7nops,
114 	k7nops + 1,
115 	k7nops + 1 + 2,
116 	k7nops + 1 + 2 + 3,
117 	k7nops + 1 + 2 + 3 + 4,
118 	k7nops + 1 + 2 + 3 + 4 + 5,
119 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
120 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
121 };
122 #endif
123 
124 #ifdef P6_NOP1
125 asm("\t.section .rodata, \"a\"\np6nops: "
126 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
127 	P6_NOP7 P6_NOP8
128     "\t.previous");
129 extern const unsigned char p6nops[];
130 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
131 	NULL,
132 	p6nops,
133 	p6nops + 1,
134 	p6nops + 1 + 2,
135 	p6nops + 1 + 2 + 3,
136 	p6nops + 1 + 2 + 3 + 4,
137 	p6nops + 1 + 2 + 3 + 4 + 5,
138 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
139 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
140 };
141 #endif
142 
143 #ifdef CONFIG_X86_64
144 
145 extern char __vsyscall_0;
146 const unsigned char *const *find_nop_table(void)
147 {
148 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
150 }
151 
152 #else /* CONFIG_X86_64 */
153 
154 static const struct nop {
155 	int cpuid;
156 	const unsigned char *const *noptable;
157 } noptypes[] = {
158 	{ X86_FEATURE_K8, k8_nops },
159 	{ X86_FEATURE_K7, k7_nops },
160 	{ X86_FEATURE_P4, p6_nops },
161 	{ X86_FEATURE_P3, p6_nops },
162 	{ -1, NULL }
163 };
164 
165 const unsigned char *const *find_nop_table(void)
166 {
167 	const unsigned char *const *noptable = intel_nops;
168 	int i;
169 
170 	for (i = 0; noptypes[i].cpuid >= 0; i++) {
171 		if (boot_cpu_has(noptypes[i].cpuid)) {
172 			noptable = noptypes[i].noptable;
173 			break;
174 		}
175 	}
176 	return noptable;
177 }
178 
179 #endif /* CONFIG_X86_64 */
180 
181 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
182 void add_nops(void *insns, unsigned int len)
183 {
184 	const unsigned char *const *noptable = find_nop_table();
185 
186 	while (len > 0) {
187 		unsigned int noplen = len;
188 		if (noplen > ASM_NOP_MAX)
189 			noplen = ASM_NOP_MAX;
190 		memcpy(insns, noptable[noplen], noplen);
191 		insns += noplen;
192 		len -= noplen;
193 	}
194 }
195 EXPORT_SYMBOL_GPL(add_nops);
196 
197 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
198 extern u8 *__smp_locks[], *__smp_locks_end[];
199 
200 /* Replace instructions with better alternatives for this CPU type.
201    This runs before SMP is initialized to avoid SMP problems with
202    self modifying code. This implies that assymetric systems where
203    APs have less capabilities than the boot processor are not handled.
204    Tough. Make sure you disable such features by hand. */
205 
206 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
207 {
208 	struct alt_instr *a;
209 	char insnbuf[MAX_PATCH_LEN];
210 
211 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
212 	for (a = start; a < end; a++) {
213 		u8 *instr = a->instr;
214 		BUG_ON(a->replacementlen > a->instrlen);
215 		BUG_ON(a->instrlen > sizeof(insnbuf));
216 		if (!boot_cpu_has(a->cpuid))
217 			continue;
218 #ifdef CONFIG_X86_64
219 		/* vsyscall code is not mapped yet. resolve it manually. */
220 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
221 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
222 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
223 				__func__, a->instr, instr);
224 		}
225 #endif
226 		memcpy(insnbuf, a->replacement, a->replacementlen);
227 		add_nops(insnbuf + a->replacementlen,
228 			 a->instrlen - a->replacementlen);
229 		text_poke_early(instr, insnbuf, a->instrlen);
230 	}
231 }
232 
233 #ifdef CONFIG_SMP
234 
235 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
236 {
237 	u8 **ptr;
238 
239 	for (ptr = start; ptr < end; ptr++) {
240 		if (*ptr < text)
241 			continue;
242 		if (*ptr > text_end)
243 			continue;
244 		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
245 	};
246 }
247 
248 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
249 {
250 	u8 **ptr;
251 	char insn[1];
252 
253 	if (noreplace_smp)
254 		return;
255 
256 	add_nops(insn, 1);
257 	for (ptr = start; ptr < end; ptr++) {
258 		if (*ptr < text)
259 			continue;
260 		if (*ptr > text_end)
261 			continue;
262 		text_poke(*ptr, insn, 1);
263 	};
264 }
265 
266 struct smp_alt_module {
267 	/* what is this ??? */
268 	struct module	*mod;
269 	char		*name;
270 
271 	/* ptrs to lock prefixes */
272 	u8		**locks;
273 	u8		**locks_end;
274 
275 	/* .text segment, needed to avoid patching init code ;) */
276 	u8		*text;
277 	u8		*text_end;
278 
279 	struct list_head next;
280 };
281 static LIST_HEAD(smp_alt_modules);
282 static DEFINE_MUTEX(smp_alt);
283 static int smp_mode = 1;	/* protected by smp_alt */
284 
285 void alternatives_smp_module_add(struct module *mod, char *name,
286 				 void *locks, void *locks_end,
287 				 void *text,  void *text_end)
288 {
289 	struct smp_alt_module *smp;
290 
291 	if (noreplace_smp)
292 		return;
293 
294 	if (smp_alt_once) {
295 		if (boot_cpu_has(X86_FEATURE_UP))
296 			alternatives_smp_unlock(locks, locks_end,
297 						text, text_end);
298 		return;
299 	}
300 
301 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
302 	if (NULL == smp)
303 		return; /* we'll run the (safe but slow) SMP code then ... */
304 
305 	smp->mod	= mod;
306 	smp->name	= name;
307 	smp->locks	= locks;
308 	smp->locks_end	= locks_end;
309 	smp->text	= text;
310 	smp->text_end	= text_end;
311 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
312 		__func__, smp->locks, smp->locks_end,
313 		smp->text, smp->text_end, smp->name);
314 
315 	mutex_lock(&smp_alt);
316 	list_add_tail(&smp->next, &smp_alt_modules);
317 	if (boot_cpu_has(X86_FEATURE_UP))
318 		alternatives_smp_unlock(smp->locks, smp->locks_end,
319 					smp->text, smp->text_end);
320 	mutex_unlock(&smp_alt);
321 }
322 
323 void alternatives_smp_module_del(struct module *mod)
324 {
325 	struct smp_alt_module *item;
326 
327 	if (smp_alt_once || noreplace_smp)
328 		return;
329 
330 	mutex_lock(&smp_alt);
331 	list_for_each_entry(item, &smp_alt_modules, next) {
332 		if (mod != item->mod)
333 			continue;
334 		list_del(&item->next);
335 		mutex_unlock(&smp_alt);
336 		DPRINTK("%s: %s\n", __func__, item->name);
337 		kfree(item);
338 		return;
339 	}
340 	mutex_unlock(&smp_alt);
341 }
342 
343 void alternatives_smp_switch(int smp)
344 {
345 	struct smp_alt_module *mod;
346 
347 #ifdef CONFIG_LOCKDEP
348 	/*
349 	 * Older binutils section handling bug prevented
350 	 * alternatives-replacement from working reliably.
351 	 *
352 	 * If this still occurs then you should see a hang
353 	 * or crash shortly after this line:
354 	 */
355 	printk("lockdep: fixing up alternatives.\n");
356 #endif
357 
358 	if (noreplace_smp || smp_alt_once)
359 		return;
360 	BUG_ON(!smp && (num_online_cpus() > 1));
361 
362 	mutex_lock(&smp_alt);
363 
364 	/*
365 	 * Avoid unnecessary switches because it forces JIT based VMs to
366 	 * throw away all cached translations, which can be quite costly.
367 	 */
368 	if (smp == smp_mode) {
369 		/* nothing */
370 	} else if (smp) {
371 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
372 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
373 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
374 		list_for_each_entry(mod, &smp_alt_modules, next)
375 			alternatives_smp_lock(mod->locks, mod->locks_end,
376 					      mod->text, mod->text_end);
377 	} else {
378 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
379 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
380 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
381 		list_for_each_entry(mod, &smp_alt_modules, next)
382 			alternatives_smp_unlock(mod->locks, mod->locks_end,
383 						mod->text, mod->text_end);
384 	}
385 	smp_mode = smp;
386 	mutex_unlock(&smp_alt);
387 }
388 
389 #endif
390 
391 #ifdef CONFIG_PARAVIRT
392 void apply_paravirt(struct paravirt_patch_site *start,
393 		    struct paravirt_patch_site *end)
394 {
395 	struct paravirt_patch_site *p;
396 	char insnbuf[MAX_PATCH_LEN];
397 
398 	if (noreplace_paravirt)
399 		return;
400 
401 	for (p = start; p < end; p++) {
402 		unsigned int used;
403 
404 		BUG_ON(p->len > MAX_PATCH_LEN);
405 		/* prep the buffer with the original instructions */
406 		memcpy(insnbuf, p->instr, p->len);
407 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
408 					 (unsigned long)p->instr, p->len);
409 
410 		BUG_ON(used > p->len);
411 
412 		/* Pad the rest with nops */
413 		add_nops(insnbuf + used, p->len - used);
414 		text_poke_early(p->instr, insnbuf, p->len);
415 	}
416 }
417 extern struct paravirt_patch_site __start_parainstructions[],
418 	__stop_parainstructions[];
419 #endif	/* CONFIG_PARAVIRT */
420 
421 void __init alternative_instructions(void)
422 {
423 	/* The patching is not fully atomic, so try to avoid local interruptions
424 	   that might execute the to be patched code.
425 	   Other CPUs are not running. */
426 	stop_nmi();
427 #ifdef CONFIG_X86_MCE
428 	stop_mce();
429 #endif
430 
431 	apply_alternatives(__alt_instructions, __alt_instructions_end);
432 
433 	/* switch to patch-once-at-boottime-only mode and free the
434 	 * tables in case we know the number of CPUs will never ever
435 	 * change */
436 #ifdef CONFIG_HOTPLUG_CPU
437 	if (num_possible_cpus() < 2)
438 		smp_alt_once = 1;
439 #endif
440 
441 #ifdef CONFIG_SMP
442 	if (smp_alt_once) {
443 		if (1 == num_possible_cpus()) {
444 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
445 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
446 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
447 
448 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
449 						_text, _etext);
450 		}
451 	} else {
452 		alternatives_smp_module_add(NULL, "core kernel",
453 					    __smp_locks, __smp_locks_end,
454 					    _text, _etext);
455 
456 		/* Only switch to UP mode if we don't immediately boot others */
457 		if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
458 			alternatives_smp_switch(0);
459 	}
460 #endif
461  	apply_paravirt(__parainstructions, __parainstructions_end);
462 
463 	if (smp_alt_once)
464 		free_init_pages("SMP alternatives",
465 				(unsigned long)__smp_locks,
466 				(unsigned long)__smp_locks_end);
467 
468 	restart_nmi();
469 #ifdef CONFIG_X86_MCE
470 	restart_mce();
471 #endif
472 }
473 
474 /**
475  * text_poke_early - Update instructions on a live kernel at boot time
476  * @addr: address to modify
477  * @opcode: source of the copy
478  * @len: length to copy
479  *
480  * When you use this code to patch more than one byte of an instruction
481  * you need to make sure that other CPUs cannot execute this code in parallel.
482  * Also no thread must be currently preempted in the middle of these
483  * instructions. And on the local CPU you need to be protected again NMI or MCE
484  * handlers seeing an inconsistent instruction while you patch.
485  */
486 void *text_poke_early(void *addr, const void *opcode, size_t len)
487 {
488 	unsigned long flags;
489 	local_irq_save(flags);
490 	memcpy(addr, opcode, len);
491 	local_irq_restore(flags);
492 	sync_core();
493 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
494 	   that causes hangs on some VIA CPUs. */
495 	return addr;
496 }
497 
498 /**
499  * text_poke - Update instructions on a live kernel
500  * @addr: address to modify
501  * @opcode: source of the copy
502  * @len: length to copy
503  *
504  * Only atomic text poke/set should be allowed when not doing early patching.
505  * It means the size must be writable atomically and the address must be aligned
506  * in a way that permits an atomic write. It also makes sure we fit on a single
507  * page.
508  */
509 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
510 {
511 	unsigned long flags;
512 	char *vaddr;
513 	int nr_pages = 2;
514 	struct page *pages[2];
515 	int i;
516 
517 	if (!core_kernel_text((unsigned long)addr)) {
518 		pages[0] = vmalloc_to_page(addr);
519 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
520 	} else {
521 		pages[0] = virt_to_page(addr);
522 		WARN_ON(!PageReserved(pages[0]));
523 		pages[1] = virt_to_page(addr + PAGE_SIZE);
524 	}
525 	BUG_ON(!pages[0]);
526 	if (!pages[1])
527 		nr_pages = 1;
528 	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
529 	BUG_ON(!vaddr);
530 	local_irq_save(flags);
531 	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
532 	local_irq_restore(flags);
533 	vunmap(vaddr);
534 	sync_core();
535 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
536 	   that causes hangs on some VIA CPUs. */
537 	for (i = 0; i < len; i++)
538 		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
539 	return addr;
540 }
541