xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision a1e58bbd)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/spinlock.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <asm/alternative.h>
9 #include <asm/sections.h>
10 #include <asm/pgtable.h>
11 #include <asm/mce.h>
12 #include <asm/nmi.h>
13 #include <asm/vsyscall.h>
14 
15 #define MAX_PATCH_LEN (255-1)
16 
17 #ifdef CONFIG_HOTPLUG_CPU
18 static int smp_alt_once;
19 
20 static int __init bootonly(char *str)
21 {
22 	smp_alt_once = 1;
23 	return 1;
24 }
25 __setup("smp-alt-boot", bootonly);
26 #else
27 #define smp_alt_once 1
28 #endif
29 
30 static int debug_alternative;
31 
32 static int __init debug_alt(char *str)
33 {
34 	debug_alternative = 1;
35 	return 1;
36 }
37 __setup("debug-alternative", debug_alt);
38 
39 static int noreplace_smp;
40 
41 static int __init setup_noreplace_smp(char *str)
42 {
43 	noreplace_smp = 1;
44 	return 1;
45 }
46 __setup("noreplace-smp", setup_noreplace_smp);
47 
48 #ifdef CONFIG_PARAVIRT
49 static int noreplace_paravirt = 0;
50 
51 static int __init setup_noreplace_paravirt(char *str)
52 {
53 	noreplace_paravirt = 1;
54 	return 1;
55 }
56 __setup("noreplace-paravirt", setup_noreplace_paravirt);
57 #endif
58 
59 #define DPRINTK(fmt, args...) if (debug_alternative) \
60 	printk(KERN_DEBUG fmt, args)
61 
62 #ifdef GENERIC_NOP1
63 /* Use inline assembly to define this because the nops are defined
64    as inline assembly strings in the include files and we cannot
65    get them easily into strings. */
66 asm("\t.section .rodata, \"a\"\nintelnops: "
67 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
68 	GENERIC_NOP7 GENERIC_NOP8
69     "\t.previous");
70 extern const unsigned char intelnops[];
71 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
72 	NULL,
73 	intelnops,
74 	intelnops + 1,
75 	intelnops + 1 + 2,
76 	intelnops + 1 + 2 + 3,
77 	intelnops + 1 + 2 + 3 + 4,
78 	intelnops + 1 + 2 + 3 + 4 + 5,
79 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
80 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
81 };
82 #endif
83 
84 #ifdef K8_NOP1
85 asm("\t.section .rodata, \"a\"\nk8nops: "
86 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
87 	K8_NOP7 K8_NOP8
88     "\t.previous");
89 extern const unsigned char k8nops[];
90 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
91 	NULL,
92 	k8nops,
93 	k8nops + 1,
94 	k8nops + 1 + 2,
95 	k8nops + 1 + 2 + 3,
96 	k8nops + 1 + 2 + 3 + 4,
97 	k8nops + 1 + 2 + 3 + 4 + 5,
98 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
99 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
100 };
101 #endif
102 
103 #ifdef K7_NOP1
104 asm("\t.section .rodata, \"a\"\nk7nops: "
105 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
106 	K7_NOP7 K7_NOP8
107     "\t.previous");
108 extern const unsigned char k7nops[];
109 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
110 	NULL,
111 	k7nops,
112 	k7nops + 1,
113 	k7nops + 1 + 2,
114 	k7nops + 1 + 2 + 3,
115 	k7nops + 1 + 2 + 3 + 4,
116 	k7nops + 1 + 2 + 3 + 4 + 5,
117 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
118 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
119 };
120 #endif
121 
122 #ifdef P6_NOP1
123 asm("\t.section .rodata, \"a\"\np6nops: "
124 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
125 	P6_NOP7 P6_NOP8
126     "\t.previous");
127 extern const unsigned char p6nops[];
128 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
129 	NULL,
130 	p6nops,
131 	p6nops + 1,
132 	p6nops + 1 + 2,
133 	p6nops + 1 + 2 + 3,
134 	p6nops + 1 + 2 + 3 + 4,
135 	p6nops + 1 + 2 + 3 + 4 + 5,
136 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
137 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
138 };
139 #endif
140 
141 #ifdef CONFIG_X86_64
142 
143 extern char __vsyscall_0;
144 static inline const unsigned char*const * find_nop_table(void)
145 {
146 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
147 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
148 }
149 
150 #else /* CONFIG_X86_64 */
151 
152 static const struct nop {
153 	int cpuid;
154 	const unsigned char *const *noptable;
155 } noptypes[] = {
156 	{ X86_FEATURE_K8, k8_nops },
157 	{ X86_FEATURE_K7, k7_nops },
158 	{ X86_FEATURE_P4, p6_nops },
159 	{ X86_FEATURE_P3, p6_nops },
160 	{ -1, NULL }
161 };
162 
163 static const unsigned char*const * find_nop_table(void)
164 {
165 	const unsigned char *const *noptable = intel_nops;
166 	int i;
167 
168 	for (i = 0; noptypes[i].cpuid >= 0; i++) {
169 		if (boot_cpu_has(noptypes[i].cpuid)) {
170 			noptable = noptypes[i].noptable;
171 			break;
172 		}
173 	}
174 	return noptable;
175 }
176 
177 #endif /* CONFIG_X86_64 */
178 
179 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
180 static void add_nops(void *insns, unsigned int len)
181 {
182 	const unsigned char *const *noptable = find_nop_table();
183 
184 	while (len > 0) {
185 		unsigned int noplen = len;
186 		if (noplen > ASM_NOP_MAX)
187 			noplen = ASM_NOP_MAX;
188 		memcpy(insns, noptable[noplen], noplen);
189 		insns += noplen;
190 		len -= noplen;
191 	}
192 }
193 
194 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
195 extern u8 *__smp_locks[], *__smp_locks_end[];
196 
197 /* Replace instructions with better alternatives for this CPU type.
198    This runs before SMP is initialized to avoid SMP problems with
199    self modifying code. This implies that assymetric systems where
200    APs have less capabilities than the boot processor are not handled.
201    Tough. Make sure you disable such features by hand. */
202 
203 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
204 {
205 	struct alt_instr *a;
206 	char insnbuf[MAX_PATCH_LEN];
207 
208 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
209 	for (a = start; a < end; a++) {
210 		u8 *instr = a->instr;
211 		BUG_ON(a->replacementlen > a->instrlen);
212 		BUG_ON(a->instrlen > sizeof(insnbuf));
213 		if (!boot_cpu_has(a->cpuid))
214 			continue;
215 #ifdef CONFIG_X86_64
216 		/* vsyscall code is not mapped yet. resolve it manually. */
217 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
218 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
219 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
220 				__FUNCTION__, a->instr, instr);
221 		}
222 #endif
223 		memcpy(insnbuf, a->replacement, a->replacementlen);
224 		add_nops(insnbuf + a->replacementlen,
225 			 a->instrlen - a->replacementlen);
226 		text_poke(instr, insnbuf, a->instrlen);
227 	}
228 }
229 
230 #ifdef CONFIG_SMP
231 
232 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
233 {
234 	u8 **ptr;
235 
236 	for (ptr = start; ptr < end; ptr++) {
237 		if (*ptr < text)
238 			continue;
239 		if (*ptr > text_end)
240 			continue;
241 		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
242 	};
243 }
244 
245 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
246 {
247 	u8 **ptr;
248 	char insn[1];
249 
250 	if (noreplace_smp)
251 		return;
252 
253 	add_nops(insn, 1);
254 	for (ptr = start; ptr < end; ptr++) {
255 		if (*ptr < text)
256 			continue;
257 		if (*ptr > text_end)
258 			continue;
259 		text_poke(*ptr, insn, 1);
260 	};
261 }
262 
263 struct smp_alt_module {
264 	/* what is this ??? */
265 	struct module	*mod;
266 	char		*name;
267 
268 	/* ptrs to lock prefixes */
269 	u8		**locks;
270 	u8		**locks_end;
271 
272 	/* .text segment, needed to avoid patching init code ;) */
273 	u8		*text;
274 	u8		*text_end;
275 
276 	struct list_head next;
277 };
278 static LIST_HEAD(smp_alt_modules);
279 static DEFINE_SPINLOCK(smp_alt);
280 static int smp_mode = 1;	/* protected by smp_alt */
281 
282 void alternatives_smp_module_add(struct module *mod, char *name,
283 				 void *locks, void *locks_end,
284 				 void *text,  void *text_end)
285 {
286 	struct smp_alt_module *smp;
287 	unsigned long flags;
288 
289 	if (noreplace_smp)
290 		return;
291 
292 	if (smp_alt_once) {
293 		if (boot_cpu_has(X86_FEATURE_UP))
294 			alternatives_smp_unlock(locks, locks_end,
295 						text, text_end);
296 		return;
297 	}
298 
299 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
300 	if (NULL == smp)
301 		return; /* we'll run the (safe but slow) SMP code then ... */
302 
303 	smp->mod	= mod;
304 	smp->name	= name;
305 	smp->locks	= locks;
306 	smp->locks_end	= locks_end;
307 	smp->text	= text;
308 	smp->text_end	= text_end;
309 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
310 		__FUNCTION__, smp->locks, smp->locks_end,
311 		smp->text, smp->text_end, smp->name);
312 
313 	spin_lock_irqsave(&smp_alt, flags);
314 	list_add_tail(&smp->next, &smp_alt_modules);
315 	if (boot_cpu_has(X86_FEATURE_UP))
316 		alternatives_smp_unlock(smp->locks, smp->locks_end,
317 					smp->text, smp->text_end);
318 	spin_unlock_irqrestore(&smp_alt, flags);
319 }
320 
321 void alternatives_smp_module_del(struct module *mod)
322 {
323 	struct smp_alt_module *item;
324 	unsigned long flags;
325 
326 	if (smp_alt_once || noreplace_smp)
327 		return;
328 
329 	spin_lock_irqsave(&smp_alt, flags);
330 	list_for_each_entry(item, &smp_alt_modules, next) {
331 		if (mod != item->mod)
332 			continue;
333 		list_del(&item->next);
334 		spin_unlock_irqrestore(&smp_alt, flags);
335 		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
336 		kfree(item);
337 		return;
338 	}
339 	spin_unlock_irqrestore(&smp_alt, flags);
340 }
341 
342 void alternatives_smp_switch(int smp)
343 {
344 	struct smp_alt_module *mod;
345 	unsigned long flags;
346 
347 #ifdef CONFIG_LOCKDEP
348 	/*
349 	 * Older binutils section handling bug prevented
350 	 * alternatives-replacement from working reliably.
351 	 *
352 	 * If this still occurs then you should see a hang
353 	 * or crash shortly after this line:
354 	 */
355 	printk("lockdep: fixing up alternatives.\n");
356 #endif
357 
358 	if (noreplace_smp || smp_alt_once)
359 		return;
360 	BUG_ON(!smp && (num_online_cpus() > 1));
361 
362 	spin_lock_irqsave(&smp_alt, flags);
363 
364 	/*
365 	 * Avoid unnecessary switches because it forces JIT based VMs to
366 	 * throw away all cached translations, which can be quite costly.
367 	 */
368 	if (smp == smp_mode) {
369 		/* nothing */
370 	} else if (smp) {
371 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
372 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
373 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
374 		list_for_each_entry(mod, &smp_alt_modules, next)
375 			alternatives_smp_lock(mod->locks, mod->locks_end,
376 					      mod->text, mod->text_end);
377 	} else {
378 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
379 		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
380 		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
381 		list_for_each_entry(mod, &smp_alt_modules, next)
382 			alternatives_smp_unlock(mod->locks, mod->locks_end,
383 						mod->text, mod->text_end);
384 	}
385 	smp_mode = smp;
386 	spin_unlock_irqrestore(&smp_alt, flags);
387 }
388 
389 #endif
390 
391 #ifdef CONFIG_PARAVIRT
392 void apply_paravirt(struct paravirt_patch_site *start,
393 		    struct paravirt_patch_site *end)
394 {
395 	struct paravirt_patch_site *p;
396 	char insnbuf[MAX_PATCH_LEN];
397 
398 	if (noreplace_paravirt)
399 		return;
400 
401 	for (p = start; p < end; p++) {
402 		unsigned int used;
403 
404 		BUG_ON(p->len > MAX_PATCH_LEN);
405 		/* prep the buffer with the original instructions */
406 		memcpy(insnbuf, p->instr, p->len);
407 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
408 					 (unsigned long)p->instr, p->len);
409 
410 		BUG_ON(used > p->len);
411 
412 		/* Pad the rest with nops */
413 		add_nops(insnbuf + used, p->len - used);
414 		text_poke(p->instr, insnbuf, p->len);
415 	}
416 }
417 extern struct paravirt_patch_site __start_parainstructions[],
418 	__stop_parainstructions[];
419 #endif	/* CONFIG_PARAVIRT */
420 
421 void __init alternative_instructions(void)
422 {
423 	unsigned long flags;
424 
425 	/* The patching is not fully atomic, so try to avoid local interruptions
426 	   that might execute the to be patched code.
427 	   Other CPUs are not running. */
428 	stop_nmi();
429 #ifdef CONFIG_X86_MCE
430 	stop_mce();
431 #endif
432 
433 	local_irq_save(flags);
434 	apply_alternatives(__alt_instructions, __alt_instructions_end);
435 
436 	/* switch to patch-once-at-boottime-only mode and free the
437 	 * tables in case we know the number of CPUs will never ever
438 	 * change */
439 #ifdef CONFIG_HOTPLUG_CPU
440 	if (num_possible_cpus() < 2)
441 		smp_alt_once = 1;
442 #endif
443 
444 #ifdef CONFIG_SMP
445 	if (smp_alt_once) {
446 		if (1 == num_possible_cpus()) {
447 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
448 			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
449 			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
450 
451 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
452 						_text, _etext);
453 		}
454 	} else {
455 		alternatives_smp_module_add(NULL, "core kernel",
456 					    __smp_locks, __smp_locks_end,
457 					    _text, _etext);
458 
459 		/* Only switch to UP mode if we don't immediately boot others */
460 		if (num_possible_cpus() == 1 || setup_max_cpus <= 1)
461 			alternatives_smp_switch(0);
462 	}
463 #endif
464  	apply_paravirt(__parainstructions, __parainstructions_end);
465 	local_irq_restore(flags);
466 
467 	if (smp_alt_once)
468 		free_init_pages("SMP alternatives",
469 				(unsigned long)__smp_locks,
470 				(unsigned long)__smp_locks_end);
471 
472 	restart_nmi();
473 #ifdef CONFIG_X86_MCE
474 	restart_mce();
475 #endif
476 }
477 
478 /*
479  * Warning:
480  * When you use this code to patch more than one byte of an instruction
481  * you need to make sure that other CPUs cannot execute this code in parallel.
482  * Also no thread must be currently preempted in the middle of these instructions.
483  * And on the local CPU you need to be protected again NMI or MCE handlers
484  * seeing an inconsistent instruction while you patch.
485  */
486 void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
487 {
488 	memcpy(addr, opcode, len);
489 	sync_core();
490 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
491 	   that causes hangs on some VIA CPUs. */
492 }
493