xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision 384740dc)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/mutex.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <asm/alternative.h>
9 #include <asm/sections.h>
10 #include <asm/pgtable.h>
11 #include <asm/mce.h>
12 #include <asm/nmi.h>
13 #include <asm/vsyscall.h>
14 #include <asm/cacheflush.h>
15 #include <asm/io.h>
16 
17 #define MAX_PATCH_LEN (255-1)
18 
19 #ifdef CONFIG_HOTPLUG_CPU
20 static int smp_alt_once;
21 
22 static int __init bootonly(char *str)
23 {
24 	smp_alt_once = 1;
25 	return 1;
26 }
27 __setup("smp-alt-boot", bootonly);
28 #else
29 #define smp_alt_once 1
30 #endif
31 
32 static int debug_alternative;
33 
34 static int __init debug_alt(char *str)
35 {
36 	debug_alternative = 1;
37 	return 1;
38 }
39 __setup("debug-alternative", debug_alt);
40 
41 static int noreplace_smp;
42 
43 static int __init setup_noreplace_smp(char *str)
44 {
45 	noreplace_smp = 1;
46 	return 1;
47 }
48 __setup("noreplace-smp", setup_noreplace_smp);
49 
50 #ifdef CONFIG_PARAVIRT
51 static int noreplace_paravirt = 0;
52 
53 static int __init setup_noreplace_paravirt(char *str)
54 {
55 	noreplace_paravirt = 1;
56 	return 1;
57 }
58 __setup("noreplace-paravirt", setup_noreplace_paravirt);
59 #endif
60 
61 #define DPRINTK(fmt, args...) if (debug_alternative) \
62 	printk(KERN_DEBUG fmt, args)
63 
64 #ifdef GENERIC_NOP1
65 /* Use inline assembly to define this because the nops are defined
66    as inline assembly strings in the include files and we cannot
67    get them easily into strings. */
68 asm("\t.section .rodata, \"a\"\nintelnops: "
69 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
70 	GENERIC_NOP7 GENERIC_NOP8
71     "\t.previous");
72 extern const unsigned char intelnops[];
73 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
74 	NULL,
75 	intelnops,
76 	intelnops + 1,
77 	intelnops + 1 + 2,
78 	intelnops + 1 + 2 + 3,
79 	intelnops + 1 + 2 + 3 + 4,
80 	intelnops + 1 + 2 + 3 + 4 + 5,
81 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
82 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
83 };
84 #endif
85 
86 #ifdef K8_NOP1
87 asm("\t.section .rodata, \"a\"\nk8nops: "
88 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
89 	K8_NOP7 K8_NOP8
90     "\t.previous");
91 extern const unsigned char k8nops[];
92 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
93 	NULL,
94 	k8nops,
95 	k8nops + 1,
96 	k8nops + 1 + 2,
97 	k8nops + 1 + 2 + 3,
98 	k8nops + 1 + 2 + 3 + 4,
99 	k8nops + 1 + 2 + 3 + 4 + 5,
100 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
101 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
102 };
103 #endif
104 
105 #ifdef K7_NOP1
106 asm("\t.section .rodata, \"a\"\nk7nops: "
107 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
108 	K7_NOP7 K7_NOP8
109     "\t.previous");
110 extern const unsigned char k7nops[];
111 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
112 	NULL,
113 	k7nops,
114 	k7nops + 1,
115 	k7nops + 1 + 2,
116 	k7nops + 1 + 2 + 3,
117 	k7nops + 1 + 2 + 3 + 4,
118 	k7nops + 1 + 2 + 3 + 4 + 5,
119 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
120 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
121 };
122 #endif
123 
124 #ifdef P6_NOP1
125 asm("\t.section .rodata, \"a\"\np6nops: "
126 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
127 	P6_NOP7 P6_NOP8
128     "\t.previous");
129 extern const unsigned char p6nops[];
130 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
131 	NULL,
132 	p6nops,
133 	p6nops + 1,
134 	p6nops + 1 + 2,
135 	p6nops + 1 + 2 + 3,
136 	p6nops + 1 + 2 + 3 + 4,
137 	p6nops + 1 + 2 + 3 + 4 + 5,
138 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
139 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
140 };
141 #endif
142 
143 #ifdef CONFIG_X86_64
144 
145 extern char __vsyscall_0;
146 const unsigned char *const *find_nop_table(void)
147 {
148 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
149 	    boot_cpu_has(X86_FEATURE_NOPL))
150 		return p6_nops;
151 	else
152 		return k8_nops;
153 }
154 
155 #else /* CONFIG_X86_64 */
156 
157 const unsigned char *const *find_nop_table(void)
158 {
159 	if (boot_cpu_has(X86_FEATURE_K8))
160 		return k8_nops;
161 	else if (boot_cpu_has(X86_FEATURE_K7))
162 		return k7_nops;
163 	else if (boot_cpu_has(X86_FEATURE_NOPL))
164 		return p6_nops;
165 	else
166 		return intel_nops;
167 }
168 
169 #endif /* CONFIG_X86_64 */
170 
171 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
172 void add_nops(void *insns, unsigned int len)
173 {
174 	const unsigned char *const *noptable = find_nop_table();
175 
176 	while (len > 0) {
177 		unsigned int noplen = len;
178 		if (noplen > ASM_NOP_MAX)
179 			noplen = ASM_NOP_MAX;
180 		memcpy(insns, noptable[noplen], noplen);
181 		insns += noplen;
182 		len -= noplen;
183 	}
184 }
185 EXPORT_SYMBOL_GPL(add_nops);
186 
187 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
188 extern u8 *__smp_locks[], *__smp_locks_end[];
189 
190 /* Replace instructions with better alternatives for this CPU type.
191    This runs before SMP is initialized to avoid SMP problems with
192    self modifying code. This implies that assymetric systems where
193    APs have less capabilities than the boot processor are not handled.
194    Tough. Make sure you disable such features by hand. */
195 
196 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
197 {
198 	struct alt_instr *a;
199 	char insnbuf[MAX_PATCH_LEN];
200 
201 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
202 	for (a = start; a < end; a++) {
203 		u8 *instr = a->instr;
204 		BUG_ON(a->replacementlen > a->instrlen);
205 		BUG_ON(a->instrlen > sizeof(insnbuf));
206 		if (!boot_cpu_has(a->cpuid))
207 			continue;
208 #ifdef CONFIG_X86_64
209 		/* vsyscall code is not mapped yet. resolve it manually. */
210 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
211 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
212 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
213 				__func__, a->instr, instr);
214 		}
215 #endif
216 		memcpy(insnbuf, a->replacement, a->replacementlen);
217 		add_nops(insnbuf + a->replacementlen,
218 			 a->instrlen - a->replacementlen);
219 		text_poke_early(instr, insnbuf, a->instrlen);
220 	}
221 }
222 
223 #ifdef CONFIG_SMP
224 
225 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
226 {
227 	u8 **ptr;
228 
229 	for (ptr = start; ptr < end; ptr++) {
230 		if (*ptr < text)
231 			continue;
232 		if (*ptr > text_end)
233 			continue;
234 		/* turn DS segment override prefix into lock prefix */
235 		text_poke(*ptr, ((unsigned char []){0xf0}), 1);
236 	};
237 }
238 
239 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
240 {
241 	u8 **ptr;
242 
243 	if (noreplace_smp)
244 		return;
245 
246 	for (ptr = start; ptr < end; ptr++) {
247 		if (*ptr < text)
248 			continue;
249 		if (*ptr > text_end)
250 			continue;
251 		/* turn lock prefix into DS segment override prefix */
252 		text_poke(*ptr, ((unsigned char []){0x3E}), 1);
253 	};
254 }
255 
256 struct smp_alt_module {
257 	/* what is this ??? */
258 	struct module	*mod;
259 	char		*name;
260 
261 	/* ptrs to lock prefixes */
262 	u8		**locks;
263 	u8		**locks_end;
264 
265 	/* .text segment, needed to avoid patching init code ;) */
266 	u8		*text;
267 	u8		*text_end;
268 
269 	struct list_head next;
270 };
271 static LIST_HEAD(smp_alt_modules);
272 static DEFINE_MUTEX(smp_alt);
273 static int smp_mode = 1;	/* protected by smp_alt */
274 
275 void alternatives_smp_module_add(struct module *mod, char *name,
276 				 void *locks, void *locks_end,
277 				 void *text,  void *text_end)
278 {
279 	struct smp_alt_module *smp;
280 
281 	if (noreplace_smp)
282 		return;
283 
284 	if (smp_alt_once) {
285 		if (boot_cpu_has(X86_FEATURE_UP))
286 			alternatives_smp_unlock(locks, locks_end,
287 						text, text_end);
288 		return;
289 	}
290 
291 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
292 	if (NULL == smp)
293 		return; /* we'll run the (safe but slow) SMP code then ... */
294 
295 	smp->mod	= mod;
296 	smp->name	= name;
297 	smp->locks	= locks;
298 	smp->locks_end	= locks_end;
299 	smp->text	= text;
300 	smp->text_end	= text_end;
301 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
302 		__func__, smp->locks, smp->locks_end,
303 		smp->text, smp->text_end, smp->name);
304 
305 	mutex_lock(&smp_alt);
306 	list_add_tail(&smp->next, &smp_alt_modules);
307 	if (boot_cpu_has(X86_FEATURE_UP))
308 		alternatives_smp_unlock(smp->locks, smp->locks_end,
309 					smp->text, smp->text_end);
310 	mutex_unlock(&smp_alt);
311 }
312 
313 void alternatives_smp_module_del(struct module *mod)
314 {
315 	struct smp_alt_module *item;
316 
317 	if (smp_alt_once || noreplace_smp)
318 		return;
319 
320 	mutex_lock(&smp_alt);
321 	list_for_each_entry(item, &smp_alt_modules, next) {
322 		if (mod != item->mod)
323 			continue;
324 		list_del(&item->next);
325 		mutex_unlock(&smp_alt);
326 		DPRINTK("%s: %s\n", __func__, item->name);
327 		kfree(item);
328 		return;
329 	}
330 	mutex_unlock(&smp_alt);
331 }
332 
333 void alternatives_smp_switch(int smp)
334 {
335 	struct smp_alt_module *mod;
336 
337 #ifdef CONFIG_LOCKDEP
338 	/*
339 	 * Older binutils section handling bug prevented
340 	 * alternatives-replacement from working reliably.
341 	 *
342 	 * If this still occurs then you should see a hang
343 	 * or crash shortly after this line:
344 	 */
345 	printk("lockdep: fixing up alternatives.\n");
346 #endif
347 
348 	if (noreplace_smp || smp_alt_once)
349 		return;
350 	BUG_ON(!smp && (num_online_cpus() > 1));
351 
352 	mutex_lock(&smp_alt);
353 
354 	/*
355 	 * Avoid unnecessary switches because it forces JIT based VMs to
356 	 * throw away all cached translations, which can be quite costly.
357 	 */
358 	if (smp == smp_mode) {
359 		/* nothing */
360 	} else if (smp) {
361 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
362 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
363 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
364 		list_for_each_entry(mod, &smp_alt_modules, next)
365 			alternatives_smp_lock(mod->locks, mod->locks_end,
366 					      mod->text, mod->text_end);
367 	} else {
368 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
369 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
370 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
371 		list_for_each_entry(mod, &smp_alt_modules, next)
372 			alternatives_smp_unlock(mod->locks, mod->locks_end,
373 						mod->text, mod->text_end);
374 	}
375 	smp_mode = smp;
376 	mutex_unlock(&smp_alt);
377 }
378 
379 #endif
380 
381 #ifdef CONFIG_PARAVIRT
382 void apply_paravirt(struct paravirt_patch_site *start,
383 		    struct paravirt_patch_site *end)
384 {
385 	struct paravirt_patch_site *p;
386 	char insnbuf[MAX_PATCH_LEN];
387 
388 	if (noreplace_paravirt)
389 		return;
390 
391 	for (p = start; p < end; p++) {
392 		unsigned int used;
393 
394 		BUG_ON(p->len > MAX_PATCH_LEN);
395 		/* prep the buffer with the original instructions */
396 		memcpy(insnbuf, p->instr, p->len);
397 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
398 					 (unsigned long)p->instr, p->len);
399 
400 		BUG_ON(used > p->len);
401 
402 		/* Pad the rest with nops */
403 		add_nops(insnbuf + used, p->len - used);
404 		text_poke_early(p->instr, insnbuf, p->len);
405 	}
406 }
407 extern struct paravirt_patch_site __start_parainstructions[],
408 	__stop_parainstructions[];
409 #endif	/* CONFIG_PARAVIRT */
410 
411 void __init alternative_instructions(void)
412 {
413 	/* The patching is not fully atomic, so try to avoid local interruptions
414 	   that might execute the to be patched code.
415 	   Other CPUs are not running. */
416 	stop_nmi();
417 #ifdef CONFIG_X86_MCE
418 	stop_mce();
419 #endif
420 
421 	apply_alternatives(__alt_instructions, __alt_instructions_end);
422 
423 	/* switch to patch-once-at-boottime-only mode and free the
424 	 * tables in case we know the number of CPUs will never ever
425 	 * change */
426 #ifdef CONFIG_HOTPLUG_CPU
427 	if (num_possible_cpus() < 2)
428 		smp_alt_once = 1;
429 #endif
430 
431 #ifdef CONFIG_SMP
432 	if (smp_alt_once) {
433 		if (1 == num_possible_cpus()) {
434 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
435 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
436 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
437 
438 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
439 						_text, _etext);
440 		}
441 	} else {
442 		alternatives_smp_module_add(NULL, "core kernel",
443 					    __smp_locks, __smp_locks_end,
444 					    _text, _etext);
445 
446 		/* Only switch to UP mode if we don't immediately boot others */
447 		if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
448 			alternatives_smp_switch(0);
449 	}
450 #endif
451  	apply_paravirt(__parainstructions, __parainstructions_end);
452 
453 	if (smp_alt_once)
454 		free_init_pages("SMP alternatives",
455 				(unsigned long)__smp_locks,
456 				(unsigned long)__smp_locks_end);
457 
458 	restart_nmi();
459 #ifdef CONFIG_X86_MCE
460 	restart_mce();
461 #endif
462 }
463 
464 /**
465  * text_poke_early - Update instructions on a live kernel at boot time
466  * @addr: address to modify
467  * @opcode: source of the copy
468  * @len: length to copy
469  *
470  * When you use this code to patch more than one byte of an instruction
471  * you need to make sure that other CPUs cannot execute this code in parallel.
472  * Also no thread must be currently preempted in the middle of these
473  * instructions. And on the local CPU you need to be protected again NMI or MCE
474  * handlers seeing an inconsistent instruction while you patch.
475  */
476 void *text_poke_early(void *addr, const void *opcode, size_t len)
477 {
478 	unsigned long flags;
479 	local_irq_save(flags);
480 	memcpy(addr, opcode, len);
481 	local_irq_restore(flags);
482 	sync_core();
483 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
484 	   that causes hangs on some VIA CPUs. */
485 	return addr;
486 }
487 
488 /**
489  * text_poke - Update instructions on a live kernel
490  * @addr: address to modify
491  * @opcode: source of the copy
492  * @len: length to copy
493  *
494  * Only atomic text poke/set should be allowed when not doing early patching.
495  * It means the size must be writable atomically and the address must be aligned
496  * in a way that permits an atomic write. It also makes sure we fit on a single
497  * page.
498  */
499 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
500 {
501 	unsigned long flags;
502 	char *vaddr;
503 	int nr_pages = 2;
504 	struct page *pages[2];
505 	int i;
506 
507 	if (!core_kernel_text((unsigned long)addr)) {
508 		pages[0] = vmalloc_to_page(addr);
509 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
510 	} else {
511 		pages[0] = virt_to_page(addr);
512 		WARN_ON(!PageReserved(pages[0]));
513 		pages[1] = virt_to_page(addr + PAGE_SIZE);
514 	}
515 	BUG_ON(!pages[0]);
516 	if (!pages[1])
517 		nr_pages = 1;
518 	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
519 	BUG_ON(!vaddr);
520 	local_irq_save(flags);
521 	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
522 	local_irq_restore(flags);
523 	vunmap(vaddr);
524 	sync_core();
525 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
526 	   that causes hangs on some VIA CPUs. */
527 	for (i = 0; i < len; i++)
528 		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
529 	return addr;
530 }
531