1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * x86 instruction analysis 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004, 2009 6 */ 7 8 #include <linux/kernel.h> 9 #ifdef __KERNEL__ 10 #include <linux/string.h> 11 #else 12 #include <string.h> 13 #endif 14 #include <asm/inat.h> 15 #include <asm/insn.h> 16 17 #include <asm/emulate_prefix.h> 18 19 #define leXX_to_cpu(t, r) \ 20 ({ \ 21 __typeof__(t) v; \ 22 switch (sizeof(t)) { \ 23 case 4: v = le32_to_cpu(r); break; \ 24 case 2: v = le16_to_cpu(r); break; \ 25 case 1: v = r; break; \ 26 default: \ 27 BUILD_BUG(); break; \ 28 } \ 29 v; \ 30 }) 31 32 /* Verify next sizeof(t) bytes can be on the same instruction */ 33 #define validate_next(t, insn, n) \ 34 ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) 35 36 #define __get_next(t, insn) \ 37 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) 38 39 #define __peek_nbyte_next(t, insn, n) \ 40 ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) 41 42 #define get_next(t, insn) \ 43 ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) 44 45 #define peek_nbyte_next(t, insn, n) \ 46 ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) 47 48 #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) 49 50 /** 51 * insn_init() - initialize struct insn 52 * @insn: &struct insn to be initialized 53 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 54 * @x86_64: !0 for 64-bit kernel or 64-bit app 55 */ 56 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) 57 { 58 /* 59 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid 60 * even if the input buffer is long enough to hold them. 61 */ 62 if (buf_len > MAX_INSN_SIZE) 63 buf_len = MAX_INSN_SIZE; 64 65 memset(insn, 0, sizeof(*insn)); 66 insn->kaddr = kaddr; 67 insn->end_kaddr = kaddr + buf_len; 68 insn->next_byte = kaddr; 69 insn->x86_64 = x86_64 ? 1 : 0; 70 insn->opnd_bytes = 4; 71 if (x86_64) 72 insn->addr_bytes = 8; 73 else 74 insn->addr_bytes = 4; 75 } 76 77 static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; 78 static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; 79 80 static int __insn_get_emulate_prefix(struct insn *insn, 81 const insn_byte_t *prefix, size_t len) 82 { 83 size_t i; 84 85 for (i = 0; i < len; i++) { 86 if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) 87 goto err_out; 88 } 89 90 insn->emulate_prefix_size = len; 91 insn->next_byte += len; 92 93 return 1; 94 95 err_out: 96 return 0; 97 } 98 99 static void insn_get_emulate_prefix(struct insn *insn) 100 { 101 if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) 102 return; 103 104 __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); 105 } 106 107 /** 108 * insn_get_prefixes - scan x86 instruction prefix bytes 109 * @insn: &struct insn containing instruction 110 * 111 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte 112 * to point to the (first) opcode. No effect if @insn->prefixes.got 113 * is already set. 114 */ 115 void insn_get_prefixes(struct insn *insn) 116 { 117 struct insn_field *prefixes = &insn->prefixes; 118 insn_attr_t attr; 119 insn_byte_t b, lb; 120 int i, nb; 121 122 if (prefixes->got) 123 return; 124 125 insn_get_emulate_prefix(insn); 126 127 nb = 0; 128 lb = 0; 129 b = peek_next(insn_byte_t, insn); 130 attr = inat_get_opcode_attribute(b); 131 while (inat_is_legacy_prefix(attr)) { 132 /* Skip if same prefix */ 133 for (i = 0; i < nb; i++) 134 if (prefixes->bytes[i] == b) 135 goto found; 136 if (nb == 4) 137 /* Invalid instruction */ 138 break; 139 prefixes->bytes[nb++] = b; 140 if (inat_is_address_size_prefix(attr)) { 141 /* address size switches 2/4 or 4/8 */ 142 if (insn->x86_64) 143 insn->addr_bytes ^= 12; 144 else 145 insn->addr_bytes ^= 6; 146 } else if (inat_is_operand_size_prefix(attr)) { 147 /* oprand size switches 2/4 */ 148 insn->opnd_bytes ^= 6; 149 } 150 found: 151 prefixes->nbytes++; 152 insn->next_byte++; 153 lb = b; 154 b = peek_next(insn_byte_t, insn); 155 attr = inat_get_opcode_attribute(b); 156 } 157 /* Set the last prefix */ 158 if (lb && lb != insn->prefixes.bytes[3]) { 159 if (unlikely(insn->prefixes.bytes[3])) { 160 /* Swap the last prefix */ 161 b = insn->prefixes.bytes[3]; 162 for (i = 0; i < nb; i++) 163 if (prefixes->bytes[i] == lb) 164 insn_set_byte(prefixes, i, b); 165 } 166 insn_set_byte(&insn->prefixes, 3, lb); 167 } 168 169 /* Decode REX prefix */ 170 if (insn->x86_64) { 171 b = peek_next(insn_byte_t, insn); 172 attr = inat_get_opcode_attribute(b); 173 if (inat_is_rex_prefix(attr)) { 174 insn_field_set(&insn->rex_prefix, b, 1); 175 insn->next_byte++; 176 if (X86_REX_W(b)) 177 /* REX.W overrides opnd_size */ 178 insn->opnd_bytes = 8; 179 } 180 } 181 insn->rex_prefix.got = 1; 182 183 /* Decode VEX prefix */ 184 b = peek_next(insn_byte_t, insn); 185 attr = inat_get_opcode_attribute(b); 186 if (inat_is_vex_prefix(attr)) { 187 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); 188 if (!insn->x86_64) { 189 /* 190 * In 32-bits mode, if the [7:6] bits (mod bits of 191 * ModRM) on the second byte are not 11b, it is 192 * LDS or LES or BOUND. 193 */ 194 if (X86_MODRM_MOD(b2) != 3) 195 goto vex_end; 196 } 197 insn_set_byte(&insn->vex_prefix, 0, b); 198 insn_set_byte(&insn->vex_prefix, 1, b2); 199 if (inat_is_evex_prefix(attr)) { 200 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 201 insn_set_byte(&insn->vex_prefix, 2, b2); 202 b2 = peek_nbyte_next(insn_byte_t, insn, 3); 203 insn_set_byte(&insn->vex_prefix, 3, b2); 204 insn->vex_prefix.nbytes = 4; 205 insn->next_byte += 4; 206 if (insn->x86_64 && X86_VEX_W(b2)) 207 /* VEX.W overrides opnd_size */ 208 insn->opnd_bytes = 8; 209 } else if (inat_is_vex3_prefix(attr)) { 210 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 211 insn_set_byte(&insn->vex_prefix, 2, b2); 212 insn->vex_prefix.nbytes = 3; 213 insn->next_byte += 3; 214 if (insn->x86_64 && X86_VEX_W(b2)) 215 /* VEX.W overrides opnd_size */ 216 insn->opnd_bytes = 8; 217 } else { 218 /* 219 * For VEX2, fake VEX3-like byte#2. 220 * Makes it easier to decode vex.W, vex.vvvv, 221 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. 222 */ 223 insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); 224 insn->vex_prefix.nbytes = 2; 225 insn->next_byte += 2; 226 } 227 } 228 vex_end: 229 insn->vex_prefix.got = 1; 230 231 prefixes->got = 1; 232 233 err_out: 234 return; 235 } 236 237 /** 238 * insn_get_opcode - collect opcode(s) 239 * @insn: &struct insn containing instruction 240 * 241 * Populates @insn->opcode, updates @insn->next_byte to point past the 242 * opcode byte(s), and set @insn->attr (except for groups). 243 * If necessary, first collects any preceding (prefix) bytes. 244 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got 245 * is already 1. 246 */ 247 void insn_get_opcode(struct insn *insn) 248 { 249 struct insn_field *opcode = &insn->opcode; 250 insn_byte_t op; 251 int pfx_id; 252 if (opcode->got) 253 return; 254 if (!insn->prefixes.got) 255 insn_get_prefixes(insn); 256 257 /* Get first opcode */ 258 op = get_next(insn_byte_t, insn); 259 insn_set_byte(opcode, 0, op); 260 opcode->nbytes = 1; 261 262 /* Check if there is VEX prefix or not */ 263 if (insn_is_avx(insn)) { 264 insn_byte_t m, p; 265 m = insn_vex_m_bits(insn); 266 p = insn_vex_p_bits(insn); 267 insn->attr = inat_get_avx_attribute(op, m, p); 268 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || 269 (!inat_accept_vex(insn->attr) && 270 !inat_is_group(insn->attr))) 271 insn->attr = 0; /* This instruction is bad */ 272 goto end; /* VEX has only 1 byte for opcode */ 273 } 274 275 insn->attr = inat_get_opcode_attribute(op); 276 while (inat_is_escape(insn->attr)) { 277 /* Get escaped opcode */ 278 op = get_next(insn_byte_t, insn); 279 opcode->bytes[opcode->nbytes++] = op; 280 pfx_id = insn_last_prefix_id(insn); 281 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); 282 } 283 if (inat_must_vex(insn->attr)) 284 insn->attr = 0; /* This instruction is bad */ 285 end: 286 opcode->got = 1; 287 288 err_out: 289 return; 290 } 291 292 /** 293 * insn_get_modrm - collect ModRM byte, if any 294 * @insn: &struct insn containing instruction 295 * 296 * Populates @insn->modrm and updates @insn->next_byte to point past the 297 * ModRM byte, if any. If necessary, first collects the preceding bytes 298 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. 299 */ 300 void insn_get_modrm(struct insn *insn) 301 { 302 struct insn_field *modrm = &insn->modrm; 303 insn_byte_t pfx_id, mod; 304 if (modrm->got) 305 return; 306 if (!insn->opcode.got) 307 insn_get_opcode(insn); 308 309 if (inat_has_modrm(insn->attr)) { 310 mod = get_next(insn_byte_t, insn); 311 insn_field_set(modrm, mod, 1); 312 if (inat_is_group(insn->attr)) { 313 pfx_id = insn_last_prefix_id(insn); 314 insn->attr = inat_get_group_attribute(mod, pfx_id, 315 insn->attr); 316 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 317 insn->attr = 0; /* This is bad */ 318 } 319 } 320 321 if (insn->x86_64 && inat_is_force64(insn->attr)) 322 insn->opnd_bytes = 8; 323 modrm->got = 1; 324 325 err_out: 326 return; 327 } 328 329 330 /** 331 * insn_rip_relative() - Does instruction use RIP-relative addressing mode? 332 * @insn: &struct insn containing instruction 333 * 334 * If necessary, first collects the instruction up to and including the 335 * ModRM byte. No effect if @insn->x86_64 is 0. 336 */ 337 int insn_rip_relative(struct insn *insn) 338 { 339 struct insn_field *modrm = &insn->modrm; 340 341 if (!insn->x86_64) 342 return 0; 343 if (!modrm->got) 344 insn_get_modrm(insn); 345 /* 346 * For rip-relative instructions, the mod field (top 2 bits) 347 * is zero and the r/m field (bottom 3 bits) is 0x5. 348 */ 349 return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); 350 } 351 352 /** 353 * insn_get_sib() - Get the SIB byte of instruction 354 * @insn: &struct insn containing instruction 355 * 356 * If necessary, first collects the instruction up to and including the 357 * ModRM byte. 358 */ 359 void insn_get_sib(struct insn *insn) 360 { 361 insn_byte_t modrm; 362 363 if (insn->sib.got) 364 return; 365 if (!insn->modrm.got) 366 insn_get_modrm(insn); 367 if (insn->modrm.nbytes) { 368 modrm = insn->modrm.bytes[0]; 369 if (insn->addr_bytes != 2 && 370 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { 371 insn_field_set(&insn->sib, 372 get_next(insn_byte_t, insn), 1); 373 } 374 } 375 insn->sib.got = 1; 376 377 err_out: 378 return; 379 } 380 381 382 /** 383 * insn_get_displacement() - Get the displacement of instruction 384 * @insn: &struct insn containing instruction 385 * 386 * If necessary, first collects the instruction up to and including the 387 * SIB byte. 388 * Displacement value is sign-expanded. 389 */ 390 void insn_get_displacement(struct insn *insn) 391 { 392 insn_byte_t mod, rm, base; 393 394 if (insn->displacement.got) 395 return; 396 if (!insn->sib.got) 397 insn_get_sib(insn); 398 if (insn->modrm.nbytes) { 399 /* 400 * Interpreting the modrm byte: 401 * mod = 00 - no displacement fields (exceptions below) 402 * mod = 01 - 1-byte displacement field 403 * mod = 10 - displacement field is 4 bytes, or 2 bytes if 404 * address size = 2 (0x67 prefix in 32-bit mode) 405 * mod = 11 - no memory operand 406 * 407 * If address size = 2... 408 * mod = 00, r/m = 110 - displacement field is 2 bytes 409 * 410 * If address size != 2... 411 * mod != 11, r/m = 100 - SIB byte exists 412 * mod = 00, SIB base = 101 - displacement field is 4 bytes 413 * mod = 00, r/m = 101 - rip-relative addressing, displacement 414 * field is 4 bytes 415 */ 416 mod = X86_MODRM_MOD(insn->modrm.value); 417 rm = X86_MODRM_RM(insn->modrm.value); 418 base = X86_SIB_BASE(insn->sib.value); 419 if (mod == 3) 420 goto out; 421 if (mod == 1) { 422 insn_field_set(&insn->displacement, 423 get_next(signed char, insn), 1); 424 } else if (insn->addr_bytes == 2) { 425 if ((mod == 0 && rm == 6) || mod == 2) { 426 insn_field_set(&insn->displacement, 427 get_next(short, insn), 2); 428 } 429 } else { 430 if ((mod == 0 && rm == 5) || mod == 2 || 431 (mod == 0 && base == 5)) { 432 insn_field_set(&insn->displacement, 433 get_next(int, insn), 4); 434 } 435 } 436 } 437 out: 438 insn->displacement.got = 1; 439 440 err_out: 441 return; 442 } 443 444 /* Decode moffset16/32/64. Return 0 if failed */ 445 static int __get_moffset(struct insn *insn) 446 { 447 switch (insn->addr_bytes) { 448 case 2: 449 insn_field_set(&insn->moffset1, get_next(short, insn), 2); 450 break; 451 case 4: 452 insn_field_set(&insn->moffset1, get_next(int, insn), 4); 453 break; 454 case 8: 455 insn_field_set(&insn->moffset1, get_next(int, insn), 4); 456 insn_field_set(&insn->moffset2, get_next(int, insn), 4); 457 break; 458 default: /* opnd_bytes must be modified manually */ 459 goto err_out; 460 } 461 insn->moffset1.got = insn->moffset2.got = 1; 462 463 return 1; 464 465 err_out: 466 return 0; 467 } 468 469 /* Decode imm v32(Iz). Return 0 if failed */ 470 static int __get_immv32(struct insn *insn) 471 { 472 switch (insn->opnd_bytes) { 473 case 2: 474 insn_field_set(&insn->immediate, get_next(short, insn), 2); 475 break; 476 case 4: 477 case 8: 478 insn_field_set(&insn->immediate, get_next(int, insn), 4); 479 break; 480 default: /* opnd_bytes must be modified manually */ 481 goto err_out; 482 } 483 484 return 1; 485 486 err_out: 487 return 0; 488 } 489 490 /* Decode imm v64(Iv/Ov), Return 0 if failed */ 491 static int __get_immv(struct insn *insn) 492 { 493 switch (insn->opnd_bytes) { 494 case 2: 495 insn_field_set(&insn->immediate1, get_next(short, insn), 2); 496 break; 497 case 4: 498 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 499 insn->immediate1.nbytes = 4; 500 break; 501 case 8: 502 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 503 insn_field_set(&insn->immediate2, get_next(int, insn), 4); 504 break; 505 default: /* opnd_bytes must be modified manually */ 506 goto err_out; 507 } 508 insn->immediate1.got = insn->immediate2.got = 1; 509 510 return 1; 511 err_out: 512 return 0; 513 } 514 515 /* Decode ptr16:16/32(Ap) */ 516 static int __get_immptr(struct insn *insn) 517 { 518 switch (insn->opnd_bytes) { 519 case 2: 520 insn_field_set(&insn->immediate1, get_next(short, insn), 2); 521 break; 522 case 4: 523 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 524 break; 525 case 8: 526 /* ptr16:64 is not exist (no segment) */ 527 return 0; 528 default: /* opnd_bytes must be modified manually */ 529 goto err_out; 530 } 531 insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); 532 insn->immediate1.got = insn->immediate2.got = 1; 533 534 return 1; 535 err_out: 536 return 0; 537 } 538 539 /** 540 * insn_get_immediate() - Get the immediates of instruction 541 * @insn: &struct insn containing instruction 542 * 543 * If necessary, first collects the instruction up to and including the 544 * displacement bytes. 545 * Basically, most of immediates are sign-expanded. Unsigned-value can be 546 * get by bit masking with ((1 << (nbytes * 8)) - 1) 547 */ 548 void insn_get_immediate(struct insn *insn) 549 { 550 if (insn->immediate.got) 551 return; 552 if (!insn->displacement.got) 553 insn_get_displacement(insn); 554 555 if (inat_has_moffset(insn->attr)) { 556 if (!__get_moffset(insn)) 557 goto err_out; 558 goto done; 559 } 560 561 if (!inat_has_immediate(insn->attr)) 562 /* no immediates */ 563 goto done; 564 565 switch (inat_immediate_size(insn->attr)) { 566 case INAT_IMM_BYTE: 567 insn_field_set(&insn->immediate, get_next(signed char, insn), 1); 568 break; 569 case INAT_IMM_WORD: 570 insn_field_set(&insn->immediate, get_next(short, insn), 2); 571 break; 572 case INAT_IMM_DWORD: 573 insn_field_set(&insn->immediate, get_next(int, insn), 4); 574 break; 575 case INAT_IMM_QWORD: 576 insn_field_set(&insn->immediate1, get_next(int, insn), 4); 577 insn_field_set(&insn->immediate2, get_next(int, insn), 4); 578 break; 579 case INAT_IMM_PTR: 580 if (!__get_immptr(insn)) 581 goto err_out; 582 break; 583 case INAT_IMM_VWORD32: 584 if (!__get_immv32(insn)) 585 goto err_out; 586 break; 587 case INAT_IMM_VWORD: 588 if (!__get_immv(insn)) 589 goto err_out; 590 break; 591 default: 592 /* Here, insn must have an immediate, but failed */ 593 goto err_out; 594 } 595 if (inat_has_second_immediate(insn->attr)) { 596 insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); 597 } 598 done: 599 insn->immediate.got = 1; 600 601 err_out: 602 return; 603 } 604 605 /** 606 * insn_get_length() - Get the length of instruction 607 * @insn: &struct insn containing instruction 608 * 609 * If necessary, first collects the instruction up to and including the 610 * immediates bytes. 611 */ 612 void insn_get_length(struct insn *insn) 613 { 614 if (insn->length) 615 return; 616 if (!insn->immediate.got) 617 insn_get_immediate(insn); 618 insn->length = (unsigned char)((unsigned long)insn->next_byte 619 - (unsigned long)insn->kaddr); 620 } 621