1 // SPDX-License-Identifier: GPL-2.0-only 2 #define pr_fmt(fmt) "SMP alternatives: " fmt 3 4 #include <linux/module.h> 5 #include <linux/sched.h> 6 #include <linux/perf_event.h> 7 #include <linux/mutex.h> 8 #include <linux/list.h> 9 #include <linux/stringify.h> 10 #include <linux/highmem.h> 11 #include <linux/mm.h> 12 #include <linux/vmalloc.h> 13 #include <linux/memory.h> 14 #include <linux/stop_machine.h> 15 #include <linux/slab.h> 16 #include <linux/kdebug.h> 17 #include <linux/kprobes.h> 18 #include <linux/mmu_context.h> 19 #include <linux/bsearch.h> 20 #include <linux/sync_core.h> 21 #include <asm/text-patching.h> 22 #include <asm/alternative.h> 23 #include <asm/sections.h> 24 #include <asm/mce.h> 25 #include <asm/nmi.h> 26 #include <asm/cacheflush.h> 27 #include <asm/tlbflush.h> 28 #include <asm/insn.h> 29 #include <asm/io.h> 30 #include <asm/fixmap.h> 31 #include <asm/paravirt.h> 32 #include <asm/asm-prototypes.h> 33 34 int __read_mostly alternatives_patched; 35 36 EXPORT_SYMBOL_GPL(alternatives_patched); 37 38 #define MAX_PATCH_LEN (255-1) 39 40 static int __initdata_or_module debug_alternative; 41 42 static int __init debug_alt(char *str) 43 { 44 debug_alternative = 1; 45 return 1; 46 } 47 __setup("debug-alternative", debug_alt); 48 49 static int noreplace_smp; 50 51 static int __init setup_noreplace_smp(char *str) 52 { 53 noreplace_smp = 1; 54 return 1; 55 } 56 __setup("noreplace-smp", setup_noreplace_smp); 57 58 #define DPRINTK(fmt, args...) \ 59 do { \ 60 if (debug_alternative) \ 61 printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \ 62 } while (0) 63 64 #define DUMP_BYTES(buf, len, fmt, args...) \ 65 do { \ 66 if (unlikely(debug_alternative)) { \ 67 int j; \ 68 \ 69 if (!(len)) \ 70 break; \ 71 \ 72 printk(KERN_DEBUG pr_fmt(fmt), ##args); \ 73 for (j = 0; j < (len) - 1; j++) \ 74 printk(KERN_CONT "%02hhx ", buf[j]); \ 75 printk(KERN_CONT "%02hhx\n", buf[j]); \ 76 } \ 77 } while (0) 78 79 static const unsigned char x86nops[] = 80 { 81 BYTES_NOP1, 82 BYTES_NOP2, 83 BYTES_NOP3, 84 BYTES_NOP4, 85 BYTES_NOP5, 86 BYTES_NOP6, 87 BYTES_NOP7, 88 BYTES_NOP8, 89 }; 90 91 const unsigned char * const x86_nops[ASM_NOP_MAX+1] = 92 { 93 NULL, 94 x86nops, 95 x86nops + 1, 96 x86nops + 1 + 2, 97 x86nops + 1 + 2 + 3, 98 x86nops + 1 + 2 + 3 + 4, 99 x86nops + 1 + 2 + 3 + 4 + 5, 100 x86nops + 1 + 2 + 3 + 4 + 5 + 6, 101 x86nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 102 }; 103 104 /* Use this to add nops to a buffer, then text_poke the whole buffer. */ 105 static void __init_or_module add_nops(void *insns, unsigned int len) 106 { 107 while (len > 0) { 108 unsigned int noplen = len; 109 if (noplen > ASM_NOP_MAX) 110 noplen = ASM_NOP_MAX; 111 memcpy(insns, x86_nops[noplen], noplen); 112 insns += noplen; 113 len -= noplen; 114 } 115 } 116 117 extern s32 __retpoline_sites[], __retpoline_sites_end[]; 118 extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 119 extern s32 __smp_locks[], __smp_locks_end[]; 120 void text_poke_early(void *addr, const void *opcode, size_t len); 121 122 /* 123 * Are we looking at a near JMP with a 1 or 4-byte displacement. 124 */ 125 static inline bool is_jmp(const u8 opcode) 126 { 127 return opcode == 0xeb || opcode == 0xe9; 128 } 129 130 static void __init_or_module 131 recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff) 132 { 133 u8 *next_rip, *tgt_rip; 134 s32 n_dspl, o_dspl; 135 int repl_len; 136 137 if (a->replacementlen != 5) 138 return; 139 140 o_dspl = *(s32 *)(insn_buff + 1); 141 142 /* next_rip of the replacement JMP */ 143 next_rip = repl_insn + a->replacementlen; 144 /* target rip of the replacement JMP */ 145 tgt_rip = next_rip + o_dspl; 146 n_dspl = tgt_rip - orig_insn; 147 148 DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl); 149 150 if (tgt_rip - orig_insn >= 0) { 151 if (n_dspl - 2 <= 127) 152 goto two_byte_jmp; 153 else 154 goto five_byte_jmp; 155 /* negative offset */ 156 } else { 157 if (((n_dspl - 2) & 0xff) == (n_dspl - 2)) 158 goto two_byte_jmp; 159 else 160 goto five_byte_jmp; 161 } 162 163 two_byte_jmp: 164 n_dspl -= 2; 165 166 insn_buff[0] = 0xeb; 167 insn_buff[1] = (s8)n_dspl; 168 add_nops(insn_buff + 2, 3); 169 170 repl_len = 2; 171 goto done; 172 173 five_byte_jmp: 174 n_dspl -= 5; 175 176 insn_buff[0] = 0xe9; 177 *(s32 *)&insn_buff[1] = n_dspl; 178 179 repl_len = 5; 180 181 done: 182 183 DPRINTK("final displ: 0x%08x, JMP 0x%lx", 184 n_dspl, (unsigned long)orig_insn + n_dspl + repl_len); 185 } 186 187 /* 188 * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90) 189 * 190 * @instr: instruction byte stream 191 * @instrlen: length of the above 192 * @off: offset within @instr where the first NOP has been detected 193 * 194 * Return: number of NOPs found (and replaced). 195 */ 196 static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) 197 { 198 unsigned long flags; 199 int i = off, nnops; 200 201 while (i < instrlen) { 202 if (instr[i] != 0x90) 203 break; 204 205 i++; 206 } 207 208 nnops = i - off; 209 210 if (nnops <= 1) 211 return nnops; 212 213 local_irq_save(flags); 214 add_nops(instr + off, nnops); 215 local_irq_restore(flags); 216 217 DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i); 218 219 return nnops; 220 } 221 222 /* 223 * "noinline" to cause control flow change and thus invalidate I$ and 224 * cause refetch after modification. 225 */ 226 static void __init_or_module noinline optimize_nops(u8 *instr, size_t len) 227 { 228 struct insn insn; 229 int i = 0; 230 231 /* 232 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger 233 * ones. 234 */ 235 for (;;) { 236 if (insn_decode_kernel(&insn, &instr[i])) 237 return; 238 239 /* 240 * See if this and any potentially following NOPs can be 241 * optimized. 242 */ 243 if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) 244 i += optimize_nops_range(instr, len, i); 245 else 246 i += insn.length; 247 248 if (i >= len) 249 return; 250 } 251 } 252 253 /* 254 * Replace instructions with better alternatives for this CPU type. This runs 255 * before SMP is initialized to avoid SMP problems with self modifying code. 256 * This implies that asymmetric systems where APs have less capabilities than 257 * the boot processor are not handled. Tough. Make sure you disable such 258 * features by hand. 259 * 260 * Marked "noinline" to cause control flow change and thus insn cache 261 * to refetch changed I$ lines. 262 */ 263 void __init_or_module noinline apply_alternatives(struct alt_instr *start, 264 struct alt_instr *end) 265 { 266 struct alt_instr *a; 267 u8 *instr, *replacement; 268 u8 insn_buff[MAX_PATCH_LEN]; 269 270 DPRINTK("alt table %px, -> %px", start, end); 271 /* 272 * The scan order should be from start to end. A later scanned 273 * alternative code can overwrite previously scanned alternative code. 274 * Some kernel functions (e.g. memcpy, memset, etc) use this order to 275 * patch code. 276 * 277 * So be careful if you want to change the scan order to any other 278 * order. 279 */ 280 for (a = start; a < end; a++) { 281 int insn_buff_sz = 0; 282 /* Mask away "NOT" flag bit for feature to test. */ 283 u16 feature = a->cpuid & ~ALTINSTR_FLAG_INV; 284 285 instr = (u8 *)&a->instr_offset + a->instr_offset; 286 replacement = (u8 *)&a->repl_offset + a->repl_offset; 287 BUG_ON(a->instrlen > sizeof(insn_buff)); 288 BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32); 289 290 /* 291 * Patch if either: 292 * - feature is present 293 * - feature not present but ALTINSTR_FLAG_INV is set to mean, 294 * patch if feature is *NOT* present. 295 */ 296 if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) 297 goto next; 298 299 DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)", 300 (a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "", 301 feature >> 5, 302 feature & 0x1f, 303 instr, instr, a->instrlen, 304 replacement, a->replacementlen); 305 306 DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); 307 DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement); 308 309 memcpy(insn_buff, replacement, a->replacementlen); 310 insn_buff_sz = a->replacementlen; 311 312 /* 313 * 0xe8 is a relative jump; fix the offset. 314 * 315 * Instruction length is checked before the opcode to avoid 316 * accessing uninitialized bytes for zero-length replacements. 317 */ 318 if (a->replacementlen == 5 && *insn_buff == 0xe8) { 319 *(s32 *)(insn_buff + 1) += replacement - instr; 320 DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", 321 *(s32 *)(insn_buff + 1), 322 (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5); 323 } 324 325 if (a->replacementlen && is_jmp(replacement[0])) 326 recompute_jump(a, instr, replacement, insn_buff); 327 328 for (; insn_buff_sz < a->instrlen; insn_buff_sz++) 329 insn_buff[insn_buff_sz] = 0x90; 330 331 DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr); 332 333 text_poke_early(instr, insn_buff, insn_buff_sz); 334 335 next: 336 optimize_nops(instr, a->instrlen); 337 } 338 } 339 340 #if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION) 341 342 /* 343 * CALL/JMP *%\reg 344 */ 345 static int emit_indirect(int op, int reg, u8 *bytes) 346 { 347 int i = 0; 348 u8 modrm; 349 350 switch (op) { 351 case CALL_INSN_OPCODE: 352 modrm = 0x10; /* Reg = 2; CALL r/m */ 353 break; 354 355 case JMP32_INSN_OPCODE: 356 modrm = 0x20; /* Reg = 4; JMP r/m */ 357 break; 358 359 default: 360 WARN_ON_ONCE(1); 361 return -1; 362 } 363 364 if (reg >= 8) { 365 bytes[i++] = 0x41; /* REX.B prefix */ 366 reg -= 8; 367 } 368 369 modrm |= 0xc0; /* Mod = 3 */ 370 modrm += reg; 371 372 bytes[i++] = 0xff; /* opcode */ 373 bytes[i++] = modrm; 374 375 return i; 376 } 377 378 /* 379 * Rewrite the compiler generated retpoline thunk calls. 380 * 381 * For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate 382 * indirect instructions, avoiding the extra indirection. 383 * 384 * For example, convert: 385 * 386 * CALL __x86_indirect_thunk_\reg 387 * 388 * into: 389 * 390 * CALL *%\reg 391 * 392 * It also tries to inline spectre_v2=retpoline,amd when size permits. 393 */ 394 static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) 395 { 396 retpoline_thunk_t *target; 397 int reg, ret, i = 0; 398 u8 op, cc; 399 400 target = addr + insn->length + insn->immediate.value; 401 reg = target - __x86_indirect_thunk_array; 402 403 if (WARN_ON_ONCE(reg & ~0xf)) 404 return -1; 405 406 /* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */ 407 BUG_ON(reg == 4); 408 409 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && 410 !cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) 411 return -1; 412 413 op = insn->opcode.bytes[0]; 414 415 /* 416 * Convert: 417 * 418 * Jcc.d32 __x86_indirect_thunk_\reg 419 * 420 * into: 421 * 422 * Jncc.d8 1f 423 * [ LFENCE ] 424 * JMP *%\reg 425 * [ NOP ] 426 * 1: 427 */ 428 /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ 429 if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { 430 cc = insn->opcode.bytes[1] & 0xf; 431 cc ^= 1; /* invert condition */ 432 433 bytes[i++] = 0x70 + cc; /* Jcc.d8 */ 434 bytes[i++] = insn->length - 2; /* sizeof(Jcc.d8) == 2 */ 435 436 /* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */ 437 op = JMP32_INSN_OPCODE; 438 } 439 440 /* 441 * For RETPOLINE_AMD: prepend the indirect CALL/JMP with an LFENCE. 442 */ 443 if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { 444 bytes[i++] = 0x0f; 445 bytes[i++] = 0xae; 446 bytes[i++] = 0xe8; /* LFENCE */ 447 } 448 449 ret = emit_indirect(op, reg, bytes + i); 450 if (ret < 0) 451 return ret; 452 i += ret; 453 454 for (; i < insn->length;) 455 bytes[i++] = BYTES_NOP1; 456 457 return i; 458 } 459 460 /* 461 * Generated by 'objtool --retpoline'. 462 */ 463 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) 464 { 465 s32 *s; 466 467 for (s = start; s < end; s++) { 468 void *addr = (void *)s + *s; 469 struct insn insn; 470 int len, ret; 471 u8 bytes[16]; 472 u8 op1, op2; 473 474 ret = insn_decode_kernel(&insn, addr); 475 if (WARN_ON_ONCE(ret < 0)) 476 continue; 477 478 op1 = insn.opcode.bytes[0]; 479 op2 = insn.opcode.bytes[1]; 480 481 switch (op1) { 482 case CALL_INSN_OPCODE: 483 case JMP32_INSN_OPCODE: 484 break; 485 486 case 0x0f: /* escape */ 487 if (op2 >= 0x80 && op2 <= 0x8f) 488 break; 489 fallthrough; 490 default: 491 WARN_ON_ONCE(1); 492 continue; 493 } 494 495 DPRINTK("retpoline at: %pS (%px) len: %d to: %pS", 496 addr, addr, insn.length, 497 addr + insn.length + insn.immediate.value); 498 499 len = patch_retpoline(addr, &insn, bytes); 500 if (len == insn.length) { 501 optimize_nops(bytes, len); 502 DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); 503 DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); 504 text_poke_early(addr, bytes, len); 505 } 506 } 507 } 508 509 #else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */ 510 511 void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } 512 513 #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ 514 515 #ifdef CONFIG_SMP 516 static void alternatives_smp_lock(const s32 *start, const s32 *end, 517 u8 *text, u8 *text_end) 518 { 519 const s32 *poff; 520 521 for (poff = start; poff < end; poff++) { 522 u8 *ptr = (u8 *)poff + *poff; 523 524 if (!*poff || ptr < text || ptr >= text_end) 525 continue; 526 /* turn DS segment override prefix into lock prefix */ 527 if (*ptr == 0x3e) 528 text_poke(ptr, ((unsigned char []){0xf0}), 1); 529 } 530 } 531 532 static void alternatives_smp_unlock(const s32 *start, const s32 *end, 533 u8 *text, u8 *text_end) 534 { 535 const s32 *poff; 536 537 for (poff = start; poff < end; poff++) { 538 u8 *ptr = (u8 *)poff + *poff; 539 540 if (!*poff || ptr < text || ptr >= text_end) 541 continue; 542 /* turn lock prefix into DS segment override prefix */ 543 if (*ptr == 0xf0) 544 text_poke(ptr, ((unsigned char []){0x3E}), 1); 545 } 546 } 547 548 struct smp_alt_module { 549 /* what is this ??? */ 550 struct module *mod; 551 char *name; 552 553 /* ptrs to lock prefixes */ 554 const s32 *locks; 555 const s32 *locks_end; 556 557 /* .text segment, needed to avoid patching init code ;) */ 558 u8 *text; 559 u8 *text_end; 560 561 struct list_head next; 562 }; 563 static LIST_HEAD(smp_alt_modules); 564 static bool uniproc_patched = false; /* protected by text_mutex */ 565 566 void __init_or_module alternatives_smp_module_add(struct module *mod, 567 char *name, 568 void *locks, void *locks_end, 569 void *text, void *text_end) 570 { 571 struct smp_alt_module *smp; 572 573 mutex_lock(&text_mutex); 574 if (!uniproc_patched) 575 goto unlock; 576 577 if (num_possible_cpus() == 1) 578 /* Don't bother remembering, we'll never have to undo it. */ 579 goto smp_unlock; 580 581 smp = kzalloc(sizeof(*smp), GFP_KERNEL); 582 if (NULL == smp) 583 /* we'll run the (safe but slow) SMP code then ... */ 584 goto unlock; 585 586 smp->mod = mod; 587 smp->name = name; 588 smp->locks = locks; 589 smp->locks_end = locks_end; 590 smp->text = text; 591 smp->text_end = text_end; 592 DPRINTK("locks %p -> %p, text %p -> %p, name %s\n", 593 smp->locks, smp->locks_end, 594 smp->text, smp->text_end, smp->name); 595 596 list_add_tail(&smp->next, &smp_alt_modules); 597 smp_unlock: 598 alternatives_smp_unlock(locks, locks_end, text, text_end); 599 unlock: 600 mutex_unlock(&text_mutex); 601 } 602 603 void __init_or_module alternatives_smp_module_del(struct module *mod) 604 { 605 struct smp_alt_module *item; 606 607 mutex_lock(&text_mutex); 608 list_for_each_entry(item, &smp_alt_modules, next) { 609 if (mod != item->mod) 610 continue; 611 list_del(&item->next); 612 kfree(item); 613 break; 614 } 615 mutex_unlock(&text_mutex); 616 } 617 618 void alternatives_enable_smp(void) 619 { 620 struct smp_alt_module *mod; 621 622 /* Why bother if there are no other CPUs? */ 623 BUG_ON(num_possible_cpus() == 1); 624 625 mutex_lock(&text_mutex); 626 627 if (uniproc_patched) { 628 pr_info("switching to SMP code\n"); 629 BUG_ON(num_online_cpus() != 1); 630 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP); 631 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP); 632 list_for_each_entry(mod, &smp_alt_modules, next) 633 alternatives_smp_lock(mod->locks, mod->locks_end, 634 mod->text, mod->text_end); 635 uniproc_patched = false; 636 } 637 mutex_unlock(&text_mutex); 638 } 639 640 /* 641 * Return 1 if the address range is reserved for SMP-alternatives. 642 * Must hold text_mutex. 643 */ 644 int alternatives_text_reserved(void *start, void *end) 645 { 646 struct smp_alt_module *mod; 647 const s32 *poff; 648 u8 *text_start = start; 649 u8 *text_end = end; 650 651 lockdep_assert_held(&text_mutex); 652 653 list_for_each_entry(mod, &smp_alt_modules, next) { 654 if (mod->text > text_end || mod->text_end < text_start) 655 continue; 656 for (poff = mod->locks; poff < mod->locks_end; poff++) { 657 const u8 *ptr = (const u8 *)poff + *poff; 658 659 if (text_start <= ptr && text_end > ptr) 660 return 1; 661 } 662 } 663 664 return 0; 665 } 666 #endif /* CONFIG_SMP */ 667 668 #ifdef CONFIG_PARAVIRT 669 void __init_or_module apply_paravirt(struct paravirt_patch_site *start, 670 struct paravirt_patch_site *end) 671 { 672 struct paravirt_patch_site *p; 673 char insn_buff[MAX_PATCH_LEN]; 674 675 for (p = start; p < end; p++) { 676 unsigned int used; 677 678 BUG_ON(p->len > MAX_PATCH_LEN); 679 /* prep the buffer with the original instructions */ 680 memcpy(insn_buff, p->instr, p->len); 681 used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len); 682 683 BUG_ON(used > p->len); 684 685 /* Pad the rest with nops */ 686 add_nops(insn_buff + used, p->len - used); 687 text_poke_early(p->instr, insn_buff, p->len); 688 } 689 } 690 extern struct paravirt_patch_site __start_parainstructions[], 691 __stop_parainstructions[]; 692 #endif /* CONFIG_PARAVIRT */ 693 694 /* 695 * Self-test for the INT3 based CALL emulation code. 696 * 697 * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up 698 * properly and that there is a stack gap between the INT3 frame and the 699 * previous context. Without this gap doing a virtual PUSH on the interrupted 700 * stack would corrupt the INT3 IRET frame. 701 * 702 * See entry_{32,64}.S for more details. 703 */ 704 705 /* 706 * We define the int3_magic() function in assembly to control the calling 707 * convention such that we can 'call' it from assembly. 708 */ 709 710 extern void int3_magic(unsigned int *ptr); /* defined in asm */ 711 712 asm ( 713 " .pushsection .init.text, \"ax\", @progbits\n" 714 " .type int3_magic, @function\n" 715 "int3_magic:\n" 716 " movl $1, (%" _ASM_ARG1 ")\n" 717 ASM_RET 718 " .size int3_magic, .-int3_magic\n" 719 " .popsection\n" 720 ); 721 722 extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ 723 724 static int __init 725 int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) 726 { 727 struct die_args *args = data; 728 struct pt_regs *regs = args->regs; 729 730 if (!regs || user_mode(regs)) 731 return NOTIFY_DONE; 732 733 if (val != DIE_INT3) 734 return NOTIFY_DONE; 735 736 if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) 737 return NOTIFY_DONE; 738 739 int3_emulate_call(regs, (unsigned long)&int3_magic); 740 return NOTIFY_STOP; 741 } 742 743 static void __init int3_selftest(void) 744 { 745 static __initdata struct notifier_block int3_exception_nb = { 746 .notifier_call = int3_exception_notify, 747 .priority = INT_MAX-1, /* last */ 748 }; 749 unsigned int val = 0; 750 751 BUG_ON(register_die_notifier(&int3_exception_nb)); 752 753 /* 754 * Basically: int3_magic(&val); but really complicated :-) 755 * 756 * Stick the address of the INT3 instruction into int3_selftest_ip, 757 * then trigger the INT3, padded with NOPs to match a CALL instruction 758 * length. 759 */ 760 asm volatile ("1: int3; nop; nop; nop; nop\n\t" 761 ".pushsection .init.data,\"aw\"\n\t" 762 ".align " __ASM_SEL(4, 8) "\n\t" 763 ".type int3_selftest_ip, @object\n\t" 764 ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" 765 "int3_selftest_ip:\n\t" 766 __ASM_SEL(.long, .quad) " 1b\n\t" 767 ".popsection\n\t" 768 : ASM_CALL_CONSTRAINT 769 : __ASM_SEL_RAW(a, D) (&val) 770 : "memory"); 771 772 BUG_ON(val != 1); 773 774 unregister_die_notifier(&int3_exception_nb); 775 } 776 777 void __init alternative_instructions(void) 778 { 779 int3_selftest(); 780 781 /* 782 * The patching is not fully atomic, so try to avoid local 783 * interruptions that might execute the to be patched code. 784 * Other CPUs are not running. 785 */ 786 stop_nmi(); 787 788 /* 789 * Don't stop machine check exceptions while patching. 790 * MCEs only happen when something got corrupted and in this 791 * case we must do something about the corruption. 792 * Ignoring it is worse than an unlikely patching race. 793 * Also machine checks tend to be broadcast and if one CPU 794 * goes into machine check the others follow quickly, so we don't 795 * expect a machine check to cause undue problems during to code 796 * patching. 797 */ 798 799 /* 800 * Paravirt patching and alternative patching can be combined to 801 * replace a function call with a short direct code sequence (e.g. 802 * by setting a constant return value instead of doing that in an 803 * external function). 804 * In order to make this work the following sequence is required: 805 * 1. set (artificial) features depending on used paravirt 806 * functions which can later influence alternative patching 807 * 2. apply paravirt patching (generally replacing an indirect 808 * function call with a direct one) 809 * 3. apply alternative patching (e.g. replacing a direct function 810 * call with a custom code sequence) 811 * Doing paravirt patching after alternative patching would clobber 812 * the optimization of the custom code with a function call again. 813 */ 814 paravirt_set_cap(); 815 816 /* 817 * First patch paravirt functions, such that we overwrite the indirect 818 * call with the direct call. 819 */ 820 apply_paravirt(__parainstructions, __parainstructions_end); 821 822 /* 823 * Rewrite the retpolines, must be done before alternatives since 824 * those can rewrite the retpoline thunks. 825 */ 826 apply_retpolines(__retpoline_sites, __retpoline_sites_end); 827 828 /* 829 * Then patch alternatives, such that those paravirt calls that are in 830 * alternatives can be overwritten by their immediate fragments. 831 */ 832 apply_alternatives(__alt_instructions, __alt_instructions_end); 833 834 #ifdef CONFIG_SMP 835 /* Patch to UP if other cpus not imminent. */ 836 if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { 837 uniproc_patched = true; 838 alternatives_smp_module_add(NULL, "core kernel", 839 __smp_locks, __smp_locks_end, 840 _text, _etext); 841 } 842 843 if (!uniproc_patched || num_possible_cpus() == 1) { 844 free_init_pages("SMP alternatives", 845 (unsigned long)__smp_locks, 846 (unsigned long)__smp_locks_end); 847 } 848 #endif 849 850 restart_nmi(); 851 alternatives_patched = 1; 852 } 853 854 /** 855 * text_poke_early - Update instructions on a live kernel at boot time 856 * @addr: address to modify 857 * @opcode: source of the copy 858 * @len: length to copy 859 * 860 * When you use this code to patch more than one byte of an instruction 861 * you need to make sure that other CPUs cannot execute this code in parallel. 862 * Also no thread must be currently preempted in the middle of these 863 * instructions. And on the local CPU you need to be protected against NMI or 864 * MCE handlers seeing an inconsistent instruction while you patch. 865 */ 866 void __init_or_module text_poke_early(void *addr, const void *opcode, 867 size_t len) 868 { 869 unsigned long flags; 870 871 if (boot_cpu_has(X86_FEATURE_NX) && 872 is_module_text_address((unsigned long)addr)) { 873 /* 874 * Modules text is marked initially as non-executable, so the 875 * code cannot be running and speculative code-fetches are 876 * prevented. Just change the code. 877 */ 878 memcpy(addr, opcode, len); 879 } else { 880 local_irq_save(flags); 881 memcpy(addr, opcode, len); 882 local_irq_restore(flags); 883 sync_core(); 884 885 /* 886 * Could also do a CLFLUSH here to speed up CPU recovery; but 887 * that causes hangs on some VIA CPUs. 888 */ 889 } 890 } 891 892 typedef struct { 893 struct mm_struct *mm; 894 } temp_mm_state_t; 895 896 /* 897 * Using a temporary mm allows to set temporary mappings that are not accessible 898 * by other CPUs. Such mappings are needed to perform sensitive memory writes 899 * that override the kernel memory protections (e.g., W^X), without exposing the 900 * temporary page-table mappings that are required for these write operations to 901 * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the 902 * mapping is torn down. 903 * 904 * Context: The temporary mm needs to be used exclusively by a single core. To 905 * harden security IRQs must be disabled while the temporary mm is 906 * loaded, thereby preventing interrupt handler bugs from overriding 907 * the kernel memory protection. 908 */ 909 static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) 910 { 911 temp_mm_state_t temp_state; 912 913 lockdep_assert_irqs_disabled(); 914 915 /* 916 * Make sure not to be in TLB lazy mode, as otherwise we'll end up 917 * with a stale address space WITHOUT being in lazy mode after 918 * restoring the previous mm. 919 */ 920 if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) 921 leave_mm(smp_processor_id()); 922 923 temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); 924 switch_mm_irqs_off(NULL, mm, current); 925 926 /* 927 * If breakpoints are enabled, disable them while the temporary mm is 928 * used. Userspace might set up watchpoints on addresses that are used 929 * in the temporary mm, which would lead to wrong signals being sent or 930 * crashes. 931 * 932 * Note that breakpoints are not disabled selectively, which also causes 933 * kernel breakpoints (e.g., perf's) to be disabled. This might be 934 * undesirable, but still seems reasonable as the code that runs in the 935 * temporary mm should be short. 936 */ 937 if (hw_breakpoint_active()) 938 hw_breakpoint_disable(); 939 940 return temp_state; 941 } 942 943 static inline void unuse_temporary_mm(temp_mm_state_t prev_state) 944 { 945 lockdep_assert_irqs_disabled(); 946 switch_mm_irqs_off(NULL, prev_state.mm, current); 947 948 /* 949 * Restore the breakpoints if they were disabled before the temporary mm 950 * was loaded. 951 */ 952 if (hw_breakpoint_active()) 953 hw_breakpoint_restore(); 954 } 955 956 __ro_after_init struct mm_struct *poking_mm; 957 __ro_after_init unsigned long poking_addr; 958 959 static void *__text_poke(void *addr, const void *opcode, size_t len) 960 { 961 bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; 962 struct page *pages[2] = {NULL}; 963 temp_mm_state_t prev; 964 unsigned long flags; 965 pte_t pte, *ptep; 966 spinlock_t *ptl; 967 pgprot_t pgprot; 968 969 /* 970 * While boot memory allocator is running we cannot use struct pages as 971 * they are not yet initialized. There is no way to recover. 972 */ 973 BUG_ON(!after_bootmem); 974 975 if (!core_kernel_text((unsigned long)addr)) { 976 pages[0] = vmalloc_to_page(addr); 977 if (cross_page_boundary) 978 pages[1] = vmalloc_to_page(addr + PAGE_SIZE); 979 } else { 980 pages[0] = virt_to_page(addr); 981 WARN_ON(!PageReserved(pages[0])); 982 if (cross_page_boundary) 983 pages[1] = virt_to_page(addr + PAGE_SIZE); 984 } 985 /* 986 * If something went wrong, crash and burn since recovery paths are not 987 * implemented. 988 */ 989 BUG_ON(!pages[0] || (cross_page_boundary && !pages[1])); 990 991 /* 992 * Map the page without the global bit, as TLB flushing is done with 993 * flush_tlb_mm_range(), which is intended for non-global PTEs. 994 */ 995 pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL); 996 997 /* 998 * The lock is not really needed, but this allows to avoid open-coding. 999 */ 1000 ptep = get_locked_pte(poking_mm, poking_addr, &ptl); 1001 1002 /* 1003 * This must not fail; preallocated in poking_init(). 1004 */ 1005 VM_BUG_ON(!ptep); 1006 1007 local_irq_save(flags); 1008 1009 pte = mk_pte(pages[0], pgprot); 1010 set_pte_at(poking_mm, poking_addr, ptep, pte); 1011 1012 if (cross_page_boundary) { 1013 pte = mk_pte(pages[1], pgprot); 1014 set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte); 1015 } 1016 1017 /* 1018 * Loading the temporary mm behaves as a compiler barrier, which 1019 * guarantees that the PTE will be set at the time memcpy() is done. 1020 */ 1021 prev = use_temporary_mm(poking_mm); 1022 1023 kasan_disable_current(); 1024 memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len); 1025 kasan_enable_current(); 1026 1027 /* 1028 * Ensure that the PTE is only cleared after the instructions of memcpy 1029 * were issued by using a compiler barrier. 1030 */ 1031 barrier(); 1032 1033 pte_clear(poking_mm, poking_addr, ptep); 1034 if (cross_page_boundary) 1035 pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1); 1036 1037 /* 1038 * Loading the previous page-table hierarchy requires a serializing 1039 * instruction that already allows the core to see the updated version. 1040 * Xen-PV is assumed to serialize execution in a similar manner. 1041 */ 1042 unuse_temporary_mm(prev); 1043 1044 /* 1045 * Flushing the TLB might involve IPIs, which would require enabled 1046 * IRQs, but not if the mm is not used, as it is in this point. 1047 */ 1048 flush_tlb_mm_range(poking_mm, poking_addr, poking_addr + 1049 (cross_page_boundary ? 2 : 1) * PAGE_SIZE, 1050 PAGE_SHIFT, false); 1051 1052 /* 1053 * If the text does not match what we just wrote then something is 1054 * fundamentally screwy; there's nothing we can really do about that. 1055 */ 1056 BUG_ON(memcmp(addr, opcode, len)); 1057 1058 local_irq_restore(flags); 1059 pte_unmap_unlock(ptep, ptl); 1060 return addr; 1061 } 1062 1063 /** 1064 * text_poke - Update instructions on a live kernel 1065 * @addr: address to modify 1066 * @opcode: source of the copy 1067 * @len: length to copy 1068 * 1069 * Only atomic text poke/set should be allowed when not doing early patching. 1070 * It means the size must be writable atomically and the address must be aligned 1071 * in a way that permits an atomic write. It also makes sure we fit on a single 1072 * page. 1073 * 1074 * Note that the caller must ensure that if the modified code is part of a 1075 * module, the module would not be removed during poking. This can be achieved 1076 * by registering a module notifier, and ordering module removal and patching 1077 * trough a mutex. 1078 */ 1079 void *text_poke(void *addr, const void *opcode, size_t len) 1080 { 1081 lockdep_assert_held(&text_mutex); 1082 1083 return __text_poke(addr, opcode, len); 1084 } 1085 1086 /** 1087 * text_poke_kgdb - Update instructions on a live kernel by kgdb 1088 * @addr: address to modify 1089 * @opcode: source of the copy 1090 * @len: length to copy 1091 * 1092 * Only atomic text poke/set should be allowed when not doing early patching. 1093 * It means the size must be writable atomically and the address must be aligned 1094 * in a way that permits an atomic write. It also makes sure we fit on a single 1095 * page. 1096 * 1097 * Context: should only be used by kgdb, which ensures no other core is running, 1098 * despite the fact it does not hold the text_mutex. 1099 */ 1100 void *text_poke_kgdb(void *addr, const void *opcode, size_t len) 1101 { 1102 return __text_poke(addr, opcode, len); 1103 } 1104 1105 static void do_sync_core(void *info) 1106 { 1107 sync_core(); 1108 } 1109 1110 void text_poke_sync(void) 1111 { 1112 on_each_cpu(do_sync_core, NULL, 1); 1113 } 1114 1115 struct text_poke_loc { 1116 /* addr := _stext + rel_addr */ 1117 s32 rel_addr; 1118 s32 disp; 1119 u8 len; 1120 u8 opcode; 1121 const u8 text[POKE_MAX_OPCODE_SIZE]; 1122 /* see text_poke_bp_batch() */ 1123 u8 old; 1124 }; 1125 1126 struct bp_patching_desc { 1127 struct text_poke_loc *vec; 1128 int nr_entries; 1129 atomic_t refs; 1130 }; 1131 1132 static struct bp_patching_desc *bp_desc; 1133 1134 static __always_inline 1135 struct bp_patching_desc *try_get_desc(struct bp_patching_desc **descp) 1136 { 1137 /* rcu_dereference */ 1138 struct bp_patching_desc *desc = __READ_ONCE(*descp); 1139 1140 if (!desc || !arch_atomic_inc_not_zero(&desc->refs)) 1141 return NULL; 1142 1143 return desc; 1144 } 1145 1146 static __always_inline void put_desc(struct bp_patching_desc *desc) 1147 { 1148 smp_mb__before_atomic(); 1149 arch_atomic_dec(&desc->refs); 1150 } 1151 1152 static __always_inline void *text_poke_addr(struct text_poke_loc *tp) 1153 { 1154 return _stext + tp->rel_addr; 1155 } 1156 1157 static __always_inline int patch_cmp(const void *key, const void *elt) 1158 { 1159 struct text_poke_loc *tp = (struct text_poke_loc *) elt; 1160 1161 if (key < text_poke_addr(tp)) 1162 return -1; 1163 if (key > text_poke_addr(tp)) 1164 return 1; 1165 return 0; 1166 } 1167 1168 noinstr int poke_int3_handler(struct pt_regs *regs) 1169 { 1170 struct bp_patching_desc *desc; 1171 struct text_poke_loc *tp; 1172 int ret = 0; 1173 void *ip; 1174 1175 if (user_mode(regs)) 1176 return 0; 1177 1178 /* 1179 * Having observed our INT3 instruction, we now must observe 1180 * bp_desc: 1181 * 1182 * bp_desc = desc INT3 1183 * WMB RMB 1184 * write INT3 if (desc) 1185 */ 1186 smp_rmb(); 1187 1188 desc = try_get_desc(&bp_desc); 1189 if (!desc) 1190 return 0; 1191 1192 /* 1193 * Discount the INT3. See text_poke_bp_batch(). 1194 */ 1195 ip = (void *) regs->ip - INT3_INSN_SIZE; 1196 1197 /* 1198 * Skip the binary search if there is a single member in the vector. 1199 */ 1200 if (unlikely(desc->nr_entries > 1)) { 1201 tp = __inline_bsearch(ip, desc->vec, desc->nr_entries, 1202 sizeof(struct text_poke_loc), 1203 patch_cmp); 1204 if (!tp) 1205 goto out_put; 1206 } else { 1207 tp = desc->vec; 1208 if (text_poke_addr(tp) != ip) 1209 goto out_put; 1210 } 1211 1212 ip += tp->len; 1213 1214 switch (tp->opcode) { 1215 case INT3_INSN_OPCODE: 1216 /* 1217 * Someone poked an explicit INT3, they'll want to handle it, 1218 * do not consume. 1219 */ 1220 goto out_put; 1221 1222 case RET_INSN_OPCODE: 1223 int3_emulate_ret(regs); 1224 break; 1225 1226 case CALL_INSN_OPCODE: 1227 int3_emulate_call(regs, (long)ip + tp->disp); 1228 break; 1229 1230 case JMP32_INSN_OPCODE: 1231 case JMP8_INSN_OPCODE: 1232 int3_emulate_jmp(regs, (long)ip + tp->disp); 1233 break; 1234 1235 default: 1236 BUG(); 1237 } 1238 1239 ret = 1; 1240 1241 out_put: 1242 put_desc(desc); 1243 return ret; 1244 } 1245 1246 #define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc)) 1247 static struct text_poke_loc tp_vec[TP_VEC_MAX]; 1248 static int tp_vec_nr; 1249 1250 /** 1251 * text_poke_bp_batch() -- update instructions on live kernel on SMP 1252 * @tp: vector of instructions to patch 1253 * @nr_entries: number of entries in the vector 1254 * 1255 * Modify multi-byte instruction by using int3 breakpoint on SMP. 1256 * We completely avoid stop_machine() here, and achieve the 1257 * synchronization using int3 breakpoint. 1258 * 1259 * The way it is done: 1260 * - For each entry in the vector: 1261 * - add a int3 trap to the address that will be patched 1262 * - sync cores 1263 * - For each entry in the vector: 1264 * - update all but the first byte of the patched range 1265 * - sync cores 1266 * - For each entry in the vector: 1267 * - replace the first byte (int3) by the first byte of 1268 * replacing opcode 1269 * - sync cores 1270 */ 1271 static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1272 { 1273 struct bp_patching_desc desc = { 1274 .vec = tp, 1275 .nr_entries = nr_entries, 1276 .refs = ATOMIC_INIT(1), 1277 }; 1278 unsigned char int3 = INT3_INSN_OPCODE; 1279 unsigned int i; 1280 int do_sync; 1281 1282 lockdep_assert_held(&text_mutex); 1283 1284 smp_store_release(&bp_desc, &desc); /* rcu_assign_pointer */ 1285 1286 /* 1287 * Corresponding read barrier in int3 notifier for making sure the 1288 * nr_entries and handler are correctly ordered wrt. patching. 1289 */ 1290 smp_wmb(); 1291 1292 /* 1293 * First step: add a int3 trap to the address that will be patched. 1294 */ 1295 for (i = 0; i < nr_entries; i++) { 1296 tp[i].old = *(u8 *)text_poke_addr(&tp[i]); 1297 text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE); 1298 } 1299 1300 text_poke_sync(); 1301 1302 /* 1303 * Second step: update all but the first byte of the patched range. 1304 */ 1305 for (do_sync = 0, i = 0; i < nr_entries; i++) { 1306 u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, }; 1307 int len = tp[i].len; 1308 1309 if (len - INT3_INSN_SIZE > 0) { 1310 memcpy(old + INT3_INSN_SIZE, 1311 text_poke_addr(&tp[i]) + INT3_INSN_SIZE, 1312 len - INT3_INSN_SIZE); 1313 text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE, 1314 (const char *)tp[i].text + INT3_INSN_SIZE, 1315 len - INT3_INSN_SIZE); 1316 do_sync++; 1317 } 1318 1319 /* 1320 * Emit a perf event to record the text poke, primarily to 1321 * support Intel PT decoding which must walk the executable code 1322 * to reconstruct the trace. The flow up to here is: 1323 * - write INT3 byte 1324 * - IPI-SYNC 1325 * - write instruction tail 1326 * At this point the actual control flow will be through the 1327 * INT3 and handler and not hit the old or new instruction. 1328 * Intel PT outputs FUP/TIP packets for the INT3, so the flow 1329 * can still be decoded. Subsequently: 1330 * - emit RECORD_TEXT_POKE with the new instruction 1331 * - IPI-SYNC 1332 * - write first byte 1333 * - IPI-SYNC 1334 * So before the text poke event timestamp, the decoder will see 1335 * either the old instruction flow or FUP/TIP of INT3. After the 1336 * text poke event timestamp, the decoder will see either the 1337 * new instruction flow or FUP/TIP of INT3. Thus decoders can 1338 * use the timestamp as the point at which to modify the 1339 * executable code. 1340 * The old instruction is recorded so that the event can be 1341 * processed forwards or backwards. 1342 */ 1343 perf_event_text_poke(text_poke_addr(&tp[i]), old, len, 1344 tp[i].text, len); 1345 } 1346 1347 if (do_sync) { 1348 /* 1349 * According to Intel, this core syncing is very likely 1350 * not necessary and we'd be safe even without it. But 1351 * better safe than sorry (plus there's not only Intel). 1352 */ 1353 text_poke_sync(); 1354 } 1355 1356 /* 1357 * Third step: replace the first byte (int3) by the first byte of 1358 * replacing opcode. 1359 */ 1360 for (do_sync = 0, i = 0; i < nr_entries; i++) { 1361 if (tp[i].text[0] == INT3_INSN_OPCODE) 1362 continue; 1363 1364 text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE); 1365 do_sync++; 1366 } 1367 1368 if (do_sync) 1369 text_poke_sync(); 1370 1371 /* 1372 * Remove and synchronize_rcu(), except we have a very primitive 1373 * refcount based completion. 1374 */ 1375 WRITE_ONCE(bp_desc, NULL); /* RCU_INIT_POINTER */ 1376 if (!atomic_dec_and_test(&desc.refs)) 1377 atomic_cond_read_acquire(&desc.refs, !VAL); 1378 } 1379 1380 static void text_poke_loc_init(struct text_poke_loc *tp, void *addr, 1381 const void *opcode, size_t len, const void *emulate) 1382 { 1383 struct insn insn; 1384 int ret, i; 1385 1386 memcpy((void *)tp->text, opcode, len); 1387 if (!emulate) 1388 emulate = opcode; 1389 1390 ret = insn_decode_kernel(&insn, emulate); 1391 BUG_ON(ret < 0); 1392 1393 tp->rel_addr = addr - (void *)_stext; 1394 tp->len = len; 1395 tp->opcode = insn.opcode.bytes[0]; 1396 1397 switch (tp->opcode) { 1398 case RET_INSN_OPCODE: 1399 case JMP32_INSN_OPCODE: 1400 case JMP8_INSN_OPCODE: 1401 /* 1402 * Control flow instructions without implied execution of the 1403 * next instruction can be padded with INT3. 1404 */ 1405 for (i = insn.length; i < len; i++) 1406 BUG_ON(tp->text[i] != INT3_INSN_OPCODE); 1407 break; 1408 1409 default: 1410 BUG_ON(len != insn.length); 1411 }; 1412 1413 1414 switch (tp->opcode) { 1415 case INT3_INSN_OPCODE: 1416 case RET_INSN_OPCODE: 1417 break; 1418 1419 case CALL_INSN_OPCODE: 1420 case JMP32_INSN_OPCODE: 1421 case JMP8_INSN_OPCODE: 1422 tp->disp = insn.immediate.value; 1423 break; 1424 1425 default: /* assume NOP */ 1426 switch (len) { 1427 case 2: /* NOP2 -- emulate as JMP8+0 */ 1428 BUG_ON(memcmp(emulate, x86_nops[len], len)); 1429 tp->opcode = JMP8_INSN_OPCODE; 1430 tp->disp = 0; 1431 break; 1432 1433 case 5: /* NOP5 -- emulate as JMP32+0 */ 1434 BUG_ON(memcmp(emulate, x86_nops[len], len)); 1435 tp->opcode = JMP32_INSN_OPCODE; 1436 tp->disp = 0; 1437 break; 1438 1439 default: /* unknown instruction */ 1440 BUG(); 1441 } 1442 break; 1443 } 1444 } 1445 1446 /* 1447 * We hard rely on the tp_vec being ordered; ensure this is so by flushing 1448 * early if needed. 1449 */ 1450 static bool tp_order_fail(void *addr) 1451 { 1452 struct text_poke_loc *tp; 1453 1454 if (!tp_vec_nr) 1455 return false; 1456 1457 if (!addr) /* force */ 1458 return true; 1459 1460 tp = &tp_vec[tp_vec_nr - 1]; 1461 if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr) 1462 return true; 1463 1464 return false; 1465 } 1466 1467 static void text_poke_flush(void *addr) 1468 { 1469 if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) { 1470 text_poke_bp_batch(tp_vec, tp_vec_nr); 1471 tp_vec_nr = 0; 1472 } 1473 } 1474 1475 void text_poke_finish(void) 1476 { 1477 text_poke_flush(NULL); 1478 } 1479 1480 void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate) 1481 { 1482 struct text_poke_loc *tp; 1483 1484 if (unlikely(system_state == SYSTEM_BOOTING)) { 1485 text_poke_early(addr, opcode, len); 1486 return; 1487 } 1488 1489 text_poke_flush(addr); 1490 1491 tp = &tp_vec[tp_vec_nr++]; 1492 text_poke_loc_init(tp, addr, opcode, len, emulate); 1493 } 1494 1495 /** 1496 * text_poke_bp() -- update instructions on live kernel on SMP 1497 * @addr: address to patch 1498 * @opcode: opcode of new instruction 1499 * @len: length to copy 1500 * @emulate: instruction to be emulated 1501 * 1502 * Update a single instruction with the vector in the stack, avoiding 1503 * dynamically allocated memory. This function should be used when it is 1504 * not possible to allocate memory. 1505 */ 1506 void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 1507 { 1508 struct text_poke_loc tp; 1509 1510 if (unlikely(system_state == SYSTEM_BOOTING)) { 1511 text_poke_early(addr, opcode, len); 1512 return; 1513 } 1514 1515 text_poke_loc_init(&tp, addr, opcode, len, emulate); 1516 text_poke_bp_batch(&tp, 1); 1517 } 1518