xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision b627b4ed)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/mutex.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <linux/memory.h>
9 #include <asm/alternative.h>
10 #include <asm/sections.h>
11 #include <asm/pgtable.h>
12 #include <asm/mce.h>
13 #include <asm/nmi.h>
14 #include <asm/vsyscall.h>
15 #include <asm/cacheflush.h>
16 #include <asm/tlbflush.h>
17 #include <asm/io.h>
18 #include <asm/fixmap.h>
19 
20 #define MAX_PATCH_LEN (255-1)
21 
22 #ifdef CONFIG_HOTPLUG_CPU
23 static int smp_alt_once;
24 
25 static int __init bootonly(char *str)
26 {
27 	smp_alt_once = 1;
28 	return 1;
29 }
30 __setup("smp-alt-boot", bootonly);
31 #else
32 #define smp_alt_once 1
33 #endif
34 
35 static int debug_alternative;
36 
37 static int __init debug_alt(char *str)
38 {
39 	debug_alternative = 1;
40 	return 1;
41 }
42 __setup("debug-alternative", debug_alt);
43 
44 static int noreplace_smp;
45 
46 static int __init setup_noreplace_smp(char *str)
47 {
48 	noreplace_smp = 1;
49 	return 1;
50 }
51 __setup("noreplace-smp", setup_noreplace_smp);
52 
53 #ifdef CONFIG_PARAVIRT
54 static int noreplace_paravirt = 0;
55 
56 static int __init setup_noreplace_paravirt(char *str)
57 {
58 	noreplace_paravirt = 1;
59 	return 1;
60 }
61 __setup("noreplace-paravirt", setup_noreplace_paravirt);
62 #endif
63 
64 #define DPRINTK(fmt, args...) if (debug_alternative) \
65 	printk(KERN_DEBUG fmt, args)
66 
67 #ifdef GENERIC_NOP1
68 /* Use inline assembly to define this because the nops are defined
69    as inline assembly strings in the include files and we cannot
70    get them easily into strings. */
71 asm("\t.section .rodata, \"a\"\nintelnops: "
72 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
73 	GENERIC_NOP7 GENERIC_NOP8
74     "\t.previous");
75 extern const unsigned char intelnops[];
76 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
77 	NULL,
78 	intelnops,
79 	intelnops + 1,
80 	intelnops + 1 + 2,
81 	intelnops + 1 + 2 + 3,
82 	intelnops + 1 + 2 + 3 + 4,
83 	intelnops + 1 + 2 + 3 + 4 + 5,
84 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
85 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
86 };
87 #endif
88 
89 #ifdef K8_NOP1
90 asm("\t.section .rodata, \"a\"\nk8nops: "
91 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
92 	K8_NOP7 K8_NOP8
93     "\t.previous");
94 extern const unsigned char k8nops[];
95 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
96 	NULL,
97 	k8nops,
98 	k8nops + 1,
99 	k8nops + 1 + 2,
100 	k8nops + 1 + 2 + 3,
101 	k8nops + 1 + 2 + 3 + 4,
102 	k8nops + 1 + 2 + 3 + 4 + 5,
103 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
104 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
105 };
106 #endif
107 
108 #ifdef K7_NOP1
109 asm("\t.section .rodata, \"a\"\nk7nops: "
110 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
111 	K7_NOP7 K7_NOP8
112     "\t.previous");
113 extern const unsigned char k7nops[];
114 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
115 	NULL,
116 	k7nops,
117 	k7nops + 1,
118 	k7nops + 1 + 2,
119 	k7nops + 1 + 2 + 3,
120 	k7nops + 1 + 2 + 3 + 4,
121 	k7nops + 1 + 2 + 3 + 4 + 5,
122 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
123 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
124 };
125 #endif
126 
127 #ifdef P6_NOP1
128 asm("\t.section .rodata, \"a\"\np6nops: "
129 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
130 	P6_NOP7 P6_NOP8
131     "\t.previous");
132 extern const unsigned char p6nops[];
133 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
134 	NULL,
135 	p6nops,
136 	p6nops + 1,
137 	p6nops + 1 + 2,
138 	p6nops + 1 + 2 + 3,
139 	p6nops + 1 + 2 + 3 + 4,
140 	p6nops + 1 + 2 + 3 + 4 + 5,
141 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
142 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
143 };
144 #endif
145 
146 #ifdef CONFIG_X86_64
147 
148 extern char __vsyscall_0;
149 const unsigned char *const *find_nop_table(void)
150 {
151 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
152 	    boot_cpu_has(X86_FEATURE_NOPL))
153 		return p6_nops;
154 	else
155 		return k8_nops;
156 }
157 
158 #else /* CONFIG_X86_64 */
159 
160 const unsigned char *const *find_nop_table(void)
161 {
162 	if (boot_cpu_has(X86_FEATURE_K8))
163 		return k8_nops;
164 	else if (boot_cpu_has(X86_FEATURE_K7))
165 		return k7_nops;
166 	else if (boot_cpu_has(X86_FEATURE_NOPL))
167 		return p6_nops;
168 	else
169 		return intel_nops;
170 }
171 
172 #endif /* CONFIG_X86_64 */
173 
174 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
175 void add_nops(void *insns, unsigned int len)
176 {
177 	const unsigned char *const *noptable = find_nop_table();
178 
179 	while (len > 0) {
180 		unsigned int noplen = len;
181 		if (noplen > ASM_NOP_MAX)
182 			noplen = ASM_NOP_MAX;
183 		memcpy(insns, noptable[noplen], noplen);
184 		insns += noplen;
185 		len -= noplen;
186 	}
187 }
188 EXPORT_SYMBOL_GPL(add_nops);
189 
190 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
191 extern u8 *__smp_locks[], *__smp_locks_end[];
192 
193 /* Replace instructions with better alternatives for this CPU type.
194    This runs before SMP is initialized to avoid SMP problems with
195    self modifying code. This implies that assymetric systems where
196    APs have less capabilities than the boot processor are not handled.
197    Tough. Make sure you disable such features by hand. */
198 
199 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
200 {
201 	struct alt_instr *a;
202 	char insnbuf[MAX_PATCH_LEN];
203 
204 	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
205 	for (a = start; a < end; a++) {
206 		u8 *instr = a->instr;
207 		BUG_ON(a->replacementlen > a->instrlen);
208 		BUG_ON(a->instrlen > sizeof(insnbuf));
209 		if (!boot_cpu_has(a->cpuid))
210 			continue;
211 #ifdef CONFIG_X86_64
212 		/* vsyscall code is not mapped yet. resolve it manually. */
213 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
214 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
215 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
216 				__func__, a->instr, instr);
217 		}
218 #endif
219 		memcpy(insnbuf, a->replacement, a->replacementlen);
220 		add_nops(insnbuf + a->replacementlen,
221 			 a->instrlen - a->replacementlen);
222 		text_poke_early(instr, insnbuf, a->instrlen);
223 	}
224 }
225 
226 #ifdef CONFIG_SMP
227 
228 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
229 {
230 	u8 **ptr;
231 
232 	mutex_lock(&text_mutex);
233 	for (ptr = start; ptr < end; ptr++) {
234 		if (*ptr < text)
235 			continue;
236 		if (*ptr > text_end)
237 			continue;
238 		/* turn DS segment override prefix into lock prefix */
239 		text_poke(*ptr, ((unsigned char []){0xf0}), 1);
240 	};
241 	mutex_unlock(&text_mutex);
242 }
243 
244 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
245 {
246 	u8 **ptr;
247 
248 	if (noreplace_smp)
249 		return;
250 
251 	mutex_lock(&text_mutex);
252 	for (ptr = start; ptr < end; ptr++) {
253 		if (*ptr < text)
254 			continue;
255 		if (*ptr > text_end)
256 			continue;
257 		/* turn lock prefix into DS segment override prefix */
258 		text_poke(*ptr, ((unsigned char []){0x3E}), 1);
259 	};
260 	mutex_unlock(&text_mutex);
261 }
262 
263 struct smp_alt_module {
264 	/* what is this ??? */
265 	struct module	*mod;
266 	char		*name;
267 
268 	/* ptrs to lock prefixes */
269 	u8		**locks;
270 	u8		**locks_end;
271 
272 	/* .text segment, needed to avoid patching init code ;) */
273 	u8		*text;
274 	u8		*text_end;
275 
276 	struct list_head next;
277 };
278 static LIST_HEAD(smp_alt_modules);
279 static DEFINE_MUTEX(smp_alt);
280 static int smp_mode = 1;	/* protected by smp_alt */
281 
282 void alternatives_smp_module_add(struct module *mod, char *name,
283 				 void *locks, void *locks_end,
284 				 void *text,  void *text_end)
285 {
286 	struct smp_alt_module *smp;
287 
288 	if (noreplace_smp)
289 		return;
290 
291 	if (smp_alt_once) {
292 		if (boot_cpu_has(X86_FEATURE_UP))
293 			alternatives_smp_unlock(locks, locks_end,
294 						text, text_end);
295 		return;
296 	}
297 
298 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
299 	if (NULL == smp)
300 		return; /* we'll run the (safe but slow) SMP code then ... */
301 
302 	smp->mod	= mod;
303 	smp->name	= name;
304 	smp->locks	= locks;
305 	smp->locks_end	= locks_end;
306 	smp->text	= text;
307 	smp->text_end	= text_end;
308 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
309 		__func__, smp->locks, smp->locks_end,
310 		smp->text, smp->text_end, smp->name);
311 
312 	mutex_lock(&smp_alt);
313 	list_add_tail(&smp->next, &smp_alt_modules);
314 	if (boot_cpu_has(X86_FEATURE_UP))
315 		alternatives_smp_unlock(smp->locks, smp->locks_end,
316 					smp->text, smp->text_end);
317 	mutex_unlock(&smp_alt);
318 }
319 
320 void alternatives_smp_module_del(struct module *mod)
321 {
322 	struct smp_alt_module *item;
323 
324 	if (smp_alt_once || noreplace_smp)
325 		return;
326 
327 	mutex_lock(&smp_alt);
328 	list_for_each_entry(item, &smp_alt_modules, next) {
329 		if (mod != item->mod)
330 			continue;
331 		list_del(&item->next);
332 		mutex_unlock(&smp_alt);
333 		DPRINTK("%s: %s\n", __func__, item->name);
334 		kfree(item);
335 		return;
336 	}
337 	mutex_unlock(&smp_alt);
338 }
339 
340 void alternatives_smp_switch(int smp)
341 {
342 	struct smp_alt_module *mod;
343 
344 #ifdef CONFIG_LOCKDEP
345 	/*
346 	 * Older binutils section handling bug prevented
347 	 * alternatives-replacement from working reliably.
348 	 *
349 	 * If this still occurs then you should see a hang
350 	 * or crash shortly after this line:
351 	 */
352 	printk("lockdep: fixing up alternatives.\n");
353 #endif
354 
355 	if (noreplace_smp || smp_alt_once)
356 		return;
357 	BUG_ON(!smp && (num_online_cpus() > 1));
358 
359 	mutex_lock(&smp_alt);
360 
361 	/*
362 	 * Avoid unnecessary switches because it forces JIT based VMs to
363 	 * throw away all cached translations, which can be quite costly.
364 	 */
365 	if (smp == smp_mode) {
366 		/* nothing */
367 	} else if (smp) {
368 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
369 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
370 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
371 		list_for_each_entry(mod, &smp_alt_modules, next)
372 			alternatives_smp_lock(mod->locks, mod->locks_end,
373 					      mod->text, mod->text_end);
374 	} else {
375 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
376 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
377 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
378 		list_for_each_entry(mod, &smp_alt_modules, next)
379 			alternatives_smp_unlock(mod->locks, mod->locks_end,
380 						mod->text, mod->text_end);
381 	}
382 	smp_mode = smp;
383 	mutex_unlock(&smp_alt);
384 }
385 
386 #endif
387 
388 #ifdef CONFIG_PARAVIRT
389 void apply_paravirt(struct paravirt_patch_site *start,
390 		    struct paravirt_patch_site *end)
391 {
392 	struct paravirt_patch_site *p;
393 	char insnbuf[MAX_PATCH_LEN];
394 
395 	if (noreplace_paravirt)
396 		return;
397 
398 	for (p = start; p < end; p++) {
399 		unsigned int used;
400 
401 		BUG_ON(p->len > MAX_PATCH_LEN);
402 		/* prep the buffer with the original instructions */
403 		memcpy(insnbuf, p->instr, p->len);
404 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
405 					 (unsigned long)p->instr, p->len);
406 
407 		BUG_ON(used > p->len);
408 
409 		/* Pad the rest with nops */
410 		add_nops(insnbuf + used, p->len - used);
411 		text_poke_early(p->instr, insnbuf, p->len);
412 	}
413 }
414 extern struct paravirt_patch_site __start_parainstructions[],
415 	__stop_parainstructions[];
416 #endif	/* CONFIG_PARAVIRT */
417 
418 void __init alternative_instructions(void)
419 {
420 	/* The patching is not fully atomic, so try to avoid local interruptions
421 	   that might execute the to be patched code.
422 	   Other CPUs are not running. */
423 	stop_nmi();
424 
425 	/*
426 	 * Don't stop machine check exceptions while patching.
427 	 * MCEs only happen when something got corrupted and in this
428 	 * case we must do something about the corruption.
429 	 * Ignoring it is worse than a unlikely patching race.
430 	 * Also machine checks tend to be broadcast and if one CPU
431 	 * goes into machine check the others follow quickly, so we don't
432 	 * expect a machine check to cause undue problems during to code
433 	 * patching.
434 	 */
435 
436 	apply_alternatives(__alt_instructions, __alt_instructions_end);
437 
438 	/* switch to patch-once-at-boottime-only mode and free the
439 	 * tables in case we know the number of CPUs will never ever
440 	 * change */
441 #ifdef CONFIG_HOTPLUG_CPU
442 	if (num_possible_cpus() < 2)
443 		smp_alt_once = 1;
444 #endif
445 
446 #ifdef CONFIG_SMP
447 	if (smp_alt_once) {
448 		if (1 == num_possible_cpus()) {
449 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
450 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
451 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
452 
453 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
454 						_text, _etext);
455 		}
456 	} else {
457 		alternatives_smp_module_add(NULL, "core kernel",
458 					    __smp_locks, __smp_locks_end,
459 					    _text, _etext);
460 
461 		/* Only switch to UP mode if we don't immediately boot others */
462 		if (num_present_cpus() == 1 || setup_max_cpus <= 1)
463 			alternatives_smp_switch(0);
464 	}
465 #endif
466  	apply_paravirt(__parainstructions, __parainstructions_end);
467 
468 	if (smp_alt_once)
469 		free_init_pages("SMP alternatives",
470 				(unsigned long)__smp_locks,
471 				(unsigned long)__smp_locks_end);
472 
473 	restart_nmi();
474 }
475 
476 /**
477  * text_poke_early - Update instructions on a live kernel at boot time
478  * @addr: address to modify
479  * @opcode: source of the copy
480  * @len: length to copy
481  *
482  * When you use this code to patch more than one byte of an instruction
483  * you need to make sure that other CPUs cannot execute this code in parallel.
484  * Also no thread must be currently preempted in the middle of these
485  * instructions. And on the local CPU you need to be protected again NMI or MCE
486  * handlers seeing an inconsistent instruction while you patch.
487  */
488 void *text_poke_early(void *addr, const void *opcode, size_t len)
489 {
490 	unsigned long flags;
491 	local_irq_save(flags);
492 	memcpy(addr, opcode, len);
493 	local_irq_restore(flags);
494 	sync_core();
495 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
496 	   that causes hangs on some VIA CPUs. */
497 	return addr;
498 }
499 
500 /**
501  * text_poke - Update instructions on a live kernel
502  * @addr: address to modify
503  * @opcode: source of the copy
504  * @len: length to copy
505  *
506  * Only atomic text poke/set should be allowed when not doing early patching.
507  * It means the size must be writable atomically and the address must be aligned
508  * in a way that permits an atomic write. It also makes sure we fit on a single
509  * page.
510  *
511  * Note: Must be called under text_mutex.
512  */
513 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
514 {
515 	unsigned long flags;
516 	char *vaddr;
517 	struct page *pages[2];
518 	int i;
519 
520 	if (!core_kernel_text((unsigned long)addr)) {
521 		pages[0] = vmalloc_to_page(addr);
522 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
523 	} else {
524 		pages[0] = virt_to_page(addr);
525 		WARN_ON(!PageReserved(pages[0]));
526 		pages[1] = virt_to_page(addr + PAGE_SIZE);
527 	}
528 	BUG_ON(!pages[0]);
529 	local_irq_save(flags);
530 	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
531 	if (pages[1])
532 		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
533 	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
534 	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
535 	clear_fixmap(FIX_TEXT_POKE0);
536 	if (pages[1])
537 		clear_fixmap(FIX_TEXT_POKE1);
538 	local_flush_tlb();
539 	sync_core();
540 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
541 	   that causes hangs on some VIA CPUs. */
542 	for (i = 0; i < len; i++)
543 		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
544 	local_irq_restore(flags);
545 	return addr;
546 }
547