xref: /openbmc/linux/arch/x86/kernel/alternative.c (revision 96de0e252cedffad61b3cb5e05662c591898e69a)
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/spinlock.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <asm/alternative.h>
9 #include <asm/sections.h>
10 #include <asm/pgtable.h>
11 #include <asm/mce.h>
12 #include <asm/nmi.h>
13 #include <asm/vsyscall.h>
14 
15 #define MAX_PATCH_LEN (255-1)
16 
17 #ifdef CONFIG_HOTPLUG_CPU
18 static int smp_alt_once;
19 
20 static int __init bootonly(char *str)
21 {
22 	smp_alt_once = 1;
23 	return 1;
24 }
25 __setup("smp-alt-boot", bootonly);
26 #else
27 #define smp_alt_once 1
28 #endif
29 
30 static int debug_alternative;
31 
32 static int __init debug_alt(char *str)
33 {
34 	debug_alternative = 1;
35 	return 1;
36 }
37 __setup("debug-alternative", debug_alt);
38 
39 static int noreplace_smp;
40 
41 static int __init setup_noreplace_smp(char *str)
42 {
43 	noreplace_smp = 1;
44 	return 1;
45 }
46 __setup("noreplace-smp", setup_noreplace_smp);
47 
48 #ifdef CONFIG_PARAVIRT
49 static int noreplace_paravirt = 0;
50 
51 static int __init setup_noreplace_paravirt(char *str)
52 {
53 	noreplace_paravirt = 1;
54 	return 1;
55 }
56 __setup("noreplace-paravirt", setup_noreplace_paravirt);
57 #endif
58 
59 #define DPRINTK(fmt, args...) if (debug_alternative) \
60 	printk(KERN_DEBUG fmt, args)
61 
62 #ifdef GENERIC_NOP1
63 /* Use inline assembly to define this because the nops are defined
64    as inline assembly strings in the include files and we cannot
65    get them easily into strings. */
66 asm("\t.section .rodata, \"a\"\nintelnops: "
67 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
68 	GENERIC_NOP7 GENERIC_NOP8);
69 extern const unsigned char intelnops[];
70 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
71 	NULL,
72 	intelnops,
73 	intelnops + 1,
74 	intelnops + 1 + 2,
75 	intelnops + 1 + 2 + 3,
76 	intelnops + 1 + 2 + 3 + 4,
77 	intelnops + 1 + 2 + 3 + 4 + 5,
78 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
79 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
80 };
81 #endif
82 
83 #ifdef K8_NOP1
84 asm("\t.section .rodata, \"a\"\nk8nops: "
85 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
86 	K8_NOP7 K8_NOP8);
87 extern const unsigned char k8nops[];
88 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
89 	NULL,
90 	k8nops,
91 	k8nops + 1,
92 	k8nops + 1 + 2,
93 	k8nops + 1 + 2 + 3,
94 	k8nops + 1 + 2 + 3 + 4,
95 	k8nops + 1 + 2 + 3 + 4 + 5,
96 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
97 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
98 };
99 #endif
100 
101 #ifdef K7_NOP1
102 asm("\t.section .rodata, \"a\"\nk7nops: "
103 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
104 	K7_NOP7 K7_NOP8);
105 extern const unsigned char k7nops[];
106 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
107 	NULL,
108 	k7nops,
109 	k7nops + 1,
110 	k7nops + 1 + 2,
111 	k7nops + 1 + 2 + 3,
112 	k7nops + 1 + 2 + 3 + 4,
113 	k7nops + 1 + 2 + 3 + 4 + 5,
114 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
115 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
116 };
117 #endif
118 
119 #ifdef P6_NOP1
120 asm("\t.section .rodata, \"a\"\np6nops: "
121 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
122 	P6_NOP7 P6_NOP8);
123 extern const unsigned char p6nops[];
124 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
125 	NULL,
126 	p6nops,
127 	p6nops + 1,
128 	p6nops + 1 + 2,
129 	p6nops + 1 + 2 + 3,
130 	p6nops + 1 + 2 + 3 + 4,
131 	p6nops + 1 + 2 + 3 + 4 + 5,
132 	p6nops + 1 + 2 + 3 + 4 + 5 + 6,
133 	p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
134 };
135 #endif
136 
137 #ifdef CONFIG_X86_64
138 
139 extern char __vsyscall_0;
140 static inline const unsigned char*const * find_nop_table(void)
141 {
142 	return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
143 	       boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
144 }
145 
146 #else /* CONFIG_X86_64 */
147 
148 static const struct nop {
149 	int cpuid;
150 	const unsigned char *const *noptable;
151 } noptypes[] = {
152 	{ X86_FEATURE_K8, k8_nops },
153 	{ X86_FEATURE_K7, k7_nops },
154 	{ X86_FEATURE_P4, p6_nops },
155 	{ X86_FEATURE_P3, p6_nops },
156 	{ -1, NULL }
157 };
158 
159 static const unsigned char*const * find_nop_table(void)
160 {
161 	const unsigned char *const *noptable = intel_nops;
162 	int i;
163 
164 	for (i = 0; noptypes[i].cpuid >= 0; i++) {
165 		if (boot_cpu_has(noptypes[i].cpuid)) {
166 			noptable = noptypes[i].noptable;
167 			break;
168 		}
169 	}
170 	return noptable;
171 }
172 
173 #endif /* CONFIG_X86_64 */
174 
175 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
176 static void add_nops(void *insns, unsigned int len)
177 {
178 	const unsigned char *const *noptable = find_nop_table();
179 
180 	while (len > 0) {
181 		unsigned int noplen = len;
182 		if (noplen > ASM_NOP_MAX)
183 			noplen = ASM_NOP_MAX;
184 		memcpy(insns, noptable[noplen], noplen);
185 		insns += noplen;
186 		len -= noplen;
187 	}
188 }
189 
190 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
191 extern u8 *__smp_locks[], *__smp_locks_end[];
192 
193 /* Replace instructions with better alternatives for this CPU type.
194    This runs before SMP is initialized to avoid SMP problems with
195    self modifying code. This implies that assymetric systems where
196    APs have less capabilities than the boot processor are not handled.
197    Tough. Make sure you disable such features by hand. */
198 
199 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
200 {
201 	struct alt_instr *a;
202 	char insnbuf[MAX_PATCH_LEN];
203 
204 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
205 	for (a = start; a < end; a++) {
206 		u8 *instr = a->instr;
207 		BUG_ON(a->replacementlen > a->instrlen);
208 		BUG_ON(a->instrlen > sizeof(insnbuf));
209 		if (!boot_cpu_has(a->cpuid))
210 			continue;
211 #ifdef CONFIG_X86_64
212 		/* vsyscall code is not mapped yet. resolve it manually. */
213 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
214 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
215 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
216 				__FUNCTION__, a->instr, instr);
217 		}
218 #endif
219 		memcpy(insnbuf, a->replacement, a->replacementlen);
220 		add_nops(insnbuf + a->replacementlen,
221 			 a->instrlen - a->replacementlen);
222 		text_poke(instr, insnbuf, a->instrlen);
223 	}
224 }
225 
226 #ifdef CONFIG_SMP
227 
228 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
229 {
230 	u8 **ptr;
231 
232 	for (ptr = start; ptr < end; ptr++) {
233 		if (*ptr < text)
234 			continue;
235 		if (*ptr > text_end)
236 			continue;
237 		text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */
238 	};
239 }
240 
241 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
242 {
243 	u8 **ptr;
244 	char insn[1];
245 
246 	if (noreplace_smp)
247 		return;
248 
249 	add_nops(insn, 1);
250 	for (ptr = start; ptr < end; ptr++) {
251 		if (*ptr < text)
252 			continue;
253 		if (*ptr > text_end)
254 			continue;
255 		text_poke(*ptr, insn, 1);
256 	};
257 }
258 
259 struct smp_alt_module {
260 	/* what is this ??? */
261 	struct module	*mod;
262 	char		*name;
263 
264 	/* ptrs to lock prefixes */
265 	u8		**locks;
266 	u8		**locks_end;
267 
268 	/* .text segment, needed to avoid patching init code ;) */
269 	u8		*text;
270 	u8		*text_end;
271 
272 	struct list_head next;
273 };
274 static LIST_HEAD(smp_alt_modules);
275 static DEFINE_SPINLOCK(smp_alt);
276 
277 void alternatives_smp_module_add(struct module *mod, char *name,
278 				 void *locks, void *locks_end,
279 				 void *text,  void *text_end)
280 {
281 	struct smp_alt_module *smp;
282 	unsigned long flags;
283 
284 	if (noreplace_smp)
285 		return;
286 
287 	if (smp_alt_once) {
288 		if (boot_cpu_has(X86_FEATURE_UP))
289 			alternatives_smp_unlock(locks, locks_end,
290 						text, text_end);
291 		return;
292 	}
293 
294 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
295 	if (NULL == smp)
296 		return; /* we'll run the (safe but slow) SMP code then ... */
297 
298 	smp->mod	= mod;
299 	smp->name	= name;
300 	smp->locks	= locks;
301 	smp->locks_end	= locks_end;
302 	smp->text	= text;
303 	smp->text_end	= text_end;
304 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
305 		__FUNCTION__, smp->locks, smp->locks_end,
306 		smp->text, smp->text_end, smp->name);
307 
308 	spin_lock_irqsave(&smp_alt, flags);
309 	list_add_tail(&smp->next, &smp_alt_modules);
310 	if (boot_cpu_has(X86_FEATURE_UP))
311 		alternatives_smp_unlock(smp->locks, smp->locks_end,
312 					smp->text, smp->text_end);
313 	spin_unlock_irqrestore(&smp_alt, flags);
314 }
315 
316 void alternatives_smp_module_del(struct module *mod)
317 {
318 	struct smp_alt_module *item;
319 	unsigned long flags;
320 
321 	if (smp_alt_once || noreplace_smp)
322 		return;
323 
324 	spin_lock_irqsave(&smp_alt, flags);
325 	list_for_each_entry(item, &smp_alt_modules, next) {
326 		if (mod != item->mod)
327 			continue;
328 		list_del(&item->next);
329 		spin_unlock_irqrestore(&smp_alt, flags);
330 		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
331 		kfree(item);
332 		return;
333 	}
334 	spin_unlock_irqrestore(&smp_alt, flags);
335 }
336 
337 void alternatives_smp_switch(int smp)
338 {
339 	struct smp_alt_module *mod;
340 	unsigned long flags;
341 
342 #ifdef CONFIG_LOCKDEP
343 	/*
344 	 * A not yet fixed binutils section handling bug prevents
345 	 * alternatives-replacement from working reliably, so turn
346 	 * it off:
347 	 */
348 	printk("lockdep: not fixing up alternatives.\n");
349 	return;
350 #endif
351 
352 	if (noreplace_smp || smp_alt_once)
353 		return;
354 	BUG_ON(!smp && (num_online_cpus() > 1));
355 
356 	spin_lock_irqsave(&smp_alt, flags);
357 	if (smp) {
358 		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
359 		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
360 		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
361 		list_for_each_entry(mod, &smp_alt_modules, next)
362 			alternatives_smp_lock(mod->locks, mod->locks_end,
363 					      mod->text, mod->text_end);
364 	} else {
365 		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
366 		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
367 		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
368 		list_for_each_entry(mod, &smp_alt_modules, next)
369 			alternatives_smp_unlock(mod->locks, mod->locks_end,
370 						mod->text, mod->text_end);
371 	}
372 	spin_unlock_irqrestore(&smp_alt, flags);
373 }
374 
375 #endif
376 
377 #ifdef CONFIG_PARAVIRT
378 void apply_paravirt(struct paravirt_patch_site *start,
379 		    struct paravirt_patch_site *end)
380 {
381 	struct paravirt_patch_site *p;
382 	char insnbuf[MAX_PATCH_LEN];
383 
384 	if (noreplace_paravirt)
385 		return;
386 
387 	for (p = start; p < end; p++) {
388 		unsigned int used;
389 
390 		BUG_ON(p->len > MAX_PATCH_LEN);
391 		/* prep the buffer with the original instructions */
392 		memcpy(insnbuf, p->instr, p->len);
393 		used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
394 					 (unsigned long)p->instr, p->len);
395 
396 		BUG_ON(used > p->len);
397 
398 		/* Pad the rest with nops */
399 		add_nops(insnbuf + used, p->len - used);
400 		text_poke(p->instr, insnbuf, p->len);
401 	}
402 }
403 extern struct paravirt_patch_site __start_parainstructions[],
404 	__stop_parainstructions[];
405 #endif	/* CONFIG_PARAVIRT */
406 
407 void __init alternative_instructions(void)
408 {
409 	unsigned long flags;
410 
411 	/* The patching is not fully atomic, so try to avoid local interruptions
412 	   that might execute the to be patched code.
413 	   Other CPUs are not running. */
414 	stop_nmi();
415 #ifdef CONFIG_X86_MCE
416 	stop_mce();
417 #endif
418 
419 	local_irq_save(flags);
420 	apply_alternatives(__alt_instructions, __alt_instructions_end);
421 
422 	/* switch to patch-once-at-boottime-only mode and free the
423 	 * tables in case we know the number of CPUs will never ever
424 	 * change */
425 #ifdef CONFIG_HOTPLUG_CPU
426 	if (num_possible_cpus() < 2)
427 		smp_alt_once = 1;
428 #endif
429 
430 #ifdef CONFIG_SMP
431 	if (smp_alt_once) {
432 		if (1 == num_possible_cpus()) {
433 			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
434 			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
435 			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
436 			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
437 						_text, _etext);
438 		}
439 	} else {
440 		alternatives_smp_module_add(NULL, "core kernel",
441 					    __smp_locks, __smp_locks_end,
442 					    _text, _etext);
443 		alternatives_smp_switch(0);
444 	}
445 #endif
446  	apply_paravirt(__parainstructions, __parainstructions_end);
447 	local_irq_restore(flags);
448 
449 	if (smp_alt_once)
450 		free_init_pages("SMP alternatives",
451 				(unsigned long)__smp_locks,
452 				(unsigned long)__smp_locks_end);
453 
454 	restart_nmi();
455 #ifdef CONFIG_X86_MCE
456 	restart_mce();
457 #endif
458 }
459 
460 /*
461  * Warning:
462  * When you use this code to patch more than one byte of an instruction
463  * you need to make sure that other CPUs cannot execute this code in parallel.
464  * Also no thread must be currently preempted in the middle of these instructions.
465  * And on the local CPU you need to be protected again NMI or MCE handlers
466  * seeing an inconsistent instruction while you patch.
467  */
468 void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
469 {
470 	memcpy(addr, opcode, len);
471 	sync_core();
472 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
473 	   that causes hangs on some VIA CPUs. */
474 }
475