xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision 643d1f7f)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/spinlock.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <asm/alternative.h>
9 #include <asm/sections.h>
10 #include <asm/pgtable.h>
11 #include <asm/mce.h>
12 #include <asm/nmi.h>
13 #include <asm/vsyscall.h>
14 
15 #define MAX_PATCH_LEN (255-1)
16 
17 #ifdef CONFIG_HOTPLUG_CPU
18 static int smp_alt_once;
19 
20 static int __init bootonly(char *str)
21 {
22 	smp_alt_once = 1;
23 	return 1;
24 }
25 __setup("smp-alt-boot", bootonly);
26 #else
27 #define smp_alt_once 1
28 #endif
29 
30 static int debug_alternative;
31 
32 static int __init debug_alt(char *str)
33 {
34 	debug_alternative = 1;
35 	return 1;
36 }
37 __setup("debug-alternative", debug_alt);
38 
39 static int noreplace_smp;
40 
41 static int __init setup_noreplace_smp(char *str)
42 {
43 	noreplace_smp = 1;
44 	return 1;
45 }
46 __setup("noreplace-smp", setup_noreplace_smp);
47 
48 #ifdef CONFIG_PARAVIRT
49 static int noreplace_paravirt = 0;
50 
51 static int __init setup_noreplace_paravirt(char *str)
52 {
53 	noreplace_paravirt = 1;
54 	return 1;
55 }
56 __setup("noreplace-paravirt", setup_noreplace_paravirt);
57 #endif
58 
59 #define DPRINTK(fmt, args...) if (debug_alternative) \
60 	printk(KERN_DEBUG fmt, args)
61 
62 #ifdef GENERIC_NOP1
63 /* Use inline assembly to define this because the nops are defined
64    as inline assembly strings in the include files and we cannot
65    get them easily into strings. */
66 asm("\t.section .rodata, \"a\"\nintelnops: "
67 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
68 	GENERIC_NOP7 GENERIC_NOP8);
69 extern const unsigned char intelnops[];
70 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
71 	NULL,
72 	intelnops,
73 	intelnops + 1,
74 	intelnops + 1 + 2,
75 	intelnops + 1 + 2 + 3,
76 	intelnops + 1 + 2 + 3 + 4,
77 	intelnops + 1 + 2 + 3 + 4 + 5,
78 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
79 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
80 };
81 #endif
82 
83 #ifdef K8_NOP1
84 asm("\t.section .rodata, \"a\"\nk8nops: "
85 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
86 	K8_NOP7 K8_NOP8);
87 extern const unsigned char k8nops[];
88 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
89 	NULL,
90 	k8nops,
91 	k8nops + 1,
92 	k8nops + 1 + 2,
93 	k8nops + 1 + 2 + 3,
94 	k8nops + 1 + 2 + 3 + 4,
95 	k8nops + 1 + 2 + 3 + 4 + 5,
96 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
97 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
98 };
99 #endif
100 
101 #ifdef K7_NOP1
102 asm("\t.section .rodata, \"a\"\nk7nops: "
103 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
104 	K7_NOP7 K7_NOP8);
105 extern const unsigned char k7nops[];
106 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
107 	NULL,
108 	k7nops,
109 	k7nops + 1,
110 	k7nops + 1 + 2,
111 	k7nops + 1 + 2 + 3,
112 	k7nops + 1 + 2 + 3 + 4,
113 	k7nops + 1 + 2 + 3 + 4 + 5,
114 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
115 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
116 };
117 #endif
118 
119 #ifdef P6_NOP1
120 asm("\t.section .rodata, \"a\"\np6nops: "
121 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
122 	P6_NOP7 P6_NOP8);
123 extern const unsigned char p6nops[];
124 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
125 	NULL,
126 	p6nops,
127 	p6nops + 1,
128 	p6nops + 1 + 2,
129 	p6nops + 1 + 2 + 3,
130 	p6nops + 1 + 2 + 3 + 4,
131 	p6nops + 1 + 2 + 3 + 4 + 5,
132 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
133 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
134 };
135 #endif
136 
137 #ifdef CONFIG_X86_64
138 
139 extern char __vsyscall_0;
140 static inline const unsigned char*const * find_nop_table(void)
141 {
142 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
143 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
144 }
145 
146 #else /* CONFIG_X86_64 */
147 
148 static const struct nop {
149 	int cpuid;
150 	const unsigned char *const *noptable;
151 } noptypes[] = {
152 	{ X86_FEATURE_K8, k8_nops },
153 	{ X86_FEATURE_K7, k7_nops },
154 	{ X86_FEATURE_P4, p6_nops },
155 	{ X86_FEATURE_P3, p6_nops },
156 	{ -1, NULL }
157 };
158 
159 static const unsigned char*const * find_nop_table(void)
160 {
161 	const unsigned char *const *noptable = intel_nops;
162 	int i;
163 
164 	for (i = 0; noptypes[i].cpuid >= 0; i++) {
165 		if (boot_cpu_has(noptypes[i].cpuid)) {
166 			noptable = noptypes[i].noptable;
167 			break;
168 		}
169 	}
170 	return noptable;
171 }
172 
173 #endif /* CONFIG_X86_64 */
174 
175 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
176 static void add_nops(void *insns, unsigned int len)
177 {
178 	const unsigned char *const *noptable = find_nop_table();
179 
180 	while (len > 0) {
181 		unsigned int noplen = len;
182 		if (noplen > ASM_NOP_MAX)
183 			noplen = ASM_NOP_MAX;
184 		memcpy(insns, noptable[noplen], noplen);
185 		insns += noplen;
186 		len -= noplen;
187 	}
188 }
189 
190 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
191 extern u8 *__smp_locks[], *__smp_locks_end[];
192 
193 /* Replace instructions with better alternatives for this CPU type.
194    This runs before SMP is initialized to avoid SMP problems with
195    self modifying code. This implies that assymetric systems where
196    APs have less capabilities than the boot processor are not handled.
197    Tough. Make sure you disable such features by hand. */
198 
199 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
200 {
201 	struct alt_instr *a;
202 	char insnbuf[MAX_PATCH_LEN];
203 
204 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
205 	for (a = start; a < end; a++) {
206 		u8 *instr = a->instr;
207 		BUG_ON(a->replacementlen > a->instrlen);
208 		BUG_ON(a->instrlen > sizeof(insnbuf));
209 		if (!boot_cpu_has(a->cpuid))
210 			continue;
211 #ifdef CONFIG_X86_64
212 		/* vsyscall code is not mapped yet. resolve it manually. */
213 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
214 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
215 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
216 				__FUNCTION__, a->instr, instr);
217 		}
218 #endif
219 		memcpy(insnbuf, a->replacement, a->replacementlen);
220 		add_nops(insnbuf + a->replacementlen,
221 			 a->instrlen - a->replacementlen);
222 		text_poke(instr, insnbuf, a->instrlen);
223 	}
224 }
225 
226 #ifdef CONFIG_SMP
227 
228 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
229 {
230 	u8 **ptr;
231 
232 	for (ptr = start; ptr < end; ptr++) {
233 		if (*ptr < text)
234 			continue;
235 		if (*ptr > text_end)
236 			continue;
237 		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
238 	};
239 }
240 
241 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
242 {
243 	u8 **ptr;
244 	char insn[1];
245 
246 	if (noreplace_smp)
247 		return;
248 
249 	add_nops(insn, 1);
250 	for (ptr = start; ptr < end; ptr++) {
251 		if (*ptr < text)
252 			continue;
253 		if (*ptr > text_end)
254 			continue;
255 		text_poke(*ptr, insn, 1);
256 	};
257 }
258 
259 struct smp_alt_module {
260 	/* what is this ??? */
261 	struct module	*mod;
262 	char		*name;
263 
264 	/* ptrs to lock prefixes */
265 	u8		**locks;
266 	u8		**locks_end;
267 
268 	/* .text segment, needed to avoid patching init code ;) */
269 	u8		*text;
270 	u8		*text_end;
271 
272 	struct list_head next;
273 };
274 static LIST_HEAD(smp_alt_modules);
275 static DEFINE_SPINLOCK(smp_alt);
276 static int smp_mode = 1;	/* protected by smp_alt */
277 
278 void alternatives_smp_module_add(struct module *mod, char *name,
279 				 void *locks, void *locks_end,
280 				 void *text,  void *text_end)
281 {
282 	struct smp_alt_module *smp;
283 	unsigned long flags;
284 
285 	if (noreplace_smp)
286 		return;
287 
288 	if (smp_alt_once) {
289 		if (boot_cpu_has(X86_FEATURE_UP))
290 			alternatives_smp_unlock(locks, locks_end,
291 						text, text_end);
292 		return;
293 	}
294 
295 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
296 	if (NULL == smp)
297 		return; /* we'll run the (safe but slow) SMP code then ... */
298 
299 	smp->mod	= mod;
300 	smp->name	= name;
301 	smp->locks	= locks;
302 	smp->locks_end	= locks_end;
303 	smp->text	= text;
304 	smp->text_end	= text_end;
305 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
306 		__FUNCTION__, smp->locks, smp->locks_end,
307 		smp->text, smp->text_end, smp->name);
308 
309 	spin_lock_irqsave(&smp_alt, flags);
310 	list_add_tail(&smp->next, &smp_alt_modules);
311 	if (boot_cpu_has(X86_FEATURE_UP))
312 		alternatives_smp_unlock(smp->locks, smp->locks_end,
313 					smp->text, smp->text_end);
314 	spin_unlock_irqrestore(&smp_alt, flags);
315 }
316 
317 void alternatives_smp_module_del(struct module *mod)
318 {
319 	struct smp_alt_module *item;
320 	unsigned long flags;
321 
322 	if (smp_alt_once || noreplace_smp)
323 		return;
324 
325 	spin_lock_irqsave(&smp_alt, flags);
326 	list_for_each_entry(item, &smp_alt_modules, next) {
327 		if (mod != item->mod)
328 			continue;
329 		list_del(&item->next);
330 		spin_unlock_irqrestore(&smp_alt, flags);
331 		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
332 		kfree(item);
333 		return;
334 	}
335 	spin_unlock_irqrestore(&smp_alt, flags);
336 }
337 
338 void alternatives_smp_switch(int smp)
339 {
340 	struct smp_alt_module *mod;
341 	unsigned long flags;
342 
343 #ifdef CONFIG_LOCKDEP
344 	/*
345 	 * Older binutils section handling bug prevented
346 	 * alternatives-replacement from working reliably.
347 	 *
348 	 * If this still occurs then you should see a hang
349 	 * or crash shortly after this line:
350 	 */
351 	printk("lockdep: fixing up alternatives.\n");
352 #endif
353 
354 	if (noreplace_smp || smp_alt_once)
355 		return;
356 	BUG_ON(!smp && (num_online_cpus() > 1));
357 
358 	spin_lock_irqsave(&smp_alt, flags);
359 
360 	/*
361 	 * Avoid unnecessary switches because it forces JIT based VMs to
362 	 * throw away all cached translations, which can be quite costly.
363 	 */
364 	if (smp == smp_mode) {
365 		/* nothing */
366 	} else if (smp) {
367 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
368 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
369 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
370 		list_for_each_entry(mod, &smp_alt_modules, next)
371 			alternatives_smp_lock(mod->locks, mod->locks_end,
372 					      mod->text, mod->text_end);
373 	} else {
374 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
375 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
376 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
377 		list_for_each_entry(mod, &smp_alt_modules, next)
378 			alternatives_smp_unlock(mod->locks, mod->locks_end,
379 						mod->text, mod->text_end);
380 	}
381 	smp_mode = smp;
382 	spin_unlock_irqrestore(&smp_alt, flags);
383 }
384 
385 #endif
386 
387 #ifdef CONFIG_PARAVIRT
388 void apply_paravirt(struct paravirt_patch_site *start,
389 		    struct paravirt_patch_site *end)
390 {
391 	struct paravirt_patch_site *p;
392 	char insnbuf[MAX_PATCH_LEN];
393 
394 	if (noreplace_paravirt)
395 		return;
396 
397 	for (p = start; p < end; p++) {
398 		unsigned int used;
399 
400 		BUG_ON(p->len > MAX_PATCH_LEN);
401 		/* prep the buffer with the original instructions */
402 		memcpy(insnbuf, p->instr, p->len);
403 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
404 					 (unsigned long)p->instr, p->len);
405 
406 		BUG_ON(used > p->len);
407 
408 		/* Pad the rest with nops */
409 		add_nops(insnbuf + used, p->len - used);
410 		text_poke(p->instr, insnbuf, p->len);
411 	}
412 }
413 extern struct paravirt_patch_site __start_parainstructions[],
414 	__stop_parainstructions[];
415 #endif	/* CONFIG_PARAVIRT */
416 
417 void __init alternative_instructions(void)
418 {
419 	unsigned long flags;
420 
421 	/* The patching is not fully atomic, so try to avoid local interruptions
422 	   that might execute the to be patched code.
423 	   Other CPUs are not running. */
424 	stop_nmi();
425 #ifdef CONFIG_X86_MCE
426 	stop_mce();
427 #endif
428 
429 	local_irq_save(flags);
430 	apply_alternatives(__alt_instructions, __alt_instructions_end);
431 
432 	/* switch to patch-once-at-boottime-only mode and free the
433 	 * tables in case we know the number of CPUs will never ever
434 	 * change */
435 #ifdef CONFIG_HOTPLUG_CPU
436 	if (num_possible_cpus() < 2)
437 		smp_alt_once = 1;
438 #endif
439 
440 #ifdef CONFIG_SMP
441 	if (smp_alt_once) {
442 		if (1 == num_possible_cpus()) {
443 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
444 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
445 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
446 
447 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
448 						_text, _etext);
449 		}
450 	} else {
451 		alternatives_smp_module_add(NULL, "core kernel",
452 					    __smp_locks, __smp_locks_end,
453 					    _text, _etext);
454 
455 		/* Only switch to UP mode if we don't immediately boot others */
456 		if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
457 			alternatives_smp_switch(0);
458 	}
459 #endif
460  	apply_paravirt(__parainstructions, __parainstructions_end);
461 	local_irq_restore(flags);
462 
463 	if (smp_alt_once)
464 		free_init_pages("SMP alternatives",
465 				(unsigned long)__smp_locks,
466 				(unsigned long)__smp_locks_end);
467 
468 	restart_nmi();
469 #ifdef CONFIG_X86_MCE
470 	restart_mce();
471 #endif
472 }
473 
474 /*
475  * Warning:
476  * When you use this code to patch more than one byte of an instruction
477  * you need to make sure that other CPUs cannot execute this code in parallel.
478  * Also no thread must be currently preempted in the middle of these instructions.
479  * And on the local CPU you need to be protected again NMI or MCE handlers
480  * seeing an inconsistent instruction while you patch.
481  */
482 void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
483 {
484 	memcpy(addr, opcode, len);
485 	sync_core();
486 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
487 	   that causes hangs on some VIA CPUs. */
488 }
489