1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * x86 instruction analysis 4 * 5 * Copyright (C) IBM Corporation, 2002, 2004, 2009 6 */ 7 8 #ifdef __KERNEL__ 9 #include <linux/string.h> 10 #else 11 #include <string.h> 12 #endif 13 #include <asm/inat.h> 14 #include <asm/insn.h> 15 16 #include <asm/emulate_prefix.h> 17 18 /* Verify next sizeof(t) bytes can be on the same instruction */ 19 #define validate_next(t, insn, n) \ 20 ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) 21 22 #define __get_next(t, insn) \ 23 ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) 24 25 #define __peek_nbyte_next(t, insn, n) \ 26 ({ t r = *(t*)((insn)->next_byte + n); r; }) 27 28 #define get_next(t, insn) \ 29 ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) 30 31 #define peek_nbyte_next(t, insn, n) \ 32 ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) 33 34 #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) 35 36 /** 37 * insn_init() - initialize struct insn 38 * @insn: &struct insn to be initialized 39 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 40 * @x86_64: !0 for 64-bit kernel or 64-bit app 41 */ 42 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) 43 { 44 /* 45 * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid 46 * even if the input buffer is long enough to hold them. 47 */ 48 if (buf_len > MAX_INSN_SIZE) 49 buf_len = MAX_INSN_SIZE; 50 51 memset(insn, 0, sizeof(*insn)); 52 insn->kaddr = kaddr; 53 insn->end_kaddr = kaddr + buf_len; 54 insn->next_byte = kaddr; 55 insn->x86_64 = x86_64 ? 1 : 0; 56 insn->opnd_bytes = 4; 57 if (x86_64) 58 insn->addr_bytes = 8; 59 else 60 insn->addr_bytes = 4; 61 } 62 63 static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX }; 64 static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX }; 65 66 static int __insn_get_emulate_prefix(struct insn *insn, 67 const insn_byte_t *prefix, size_t len) 68 { 69 size_t i; 70 71 for (i = 0; i < len; i++) { 72 if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i]) 73 goto err_out; 74 } 75 76 insn->emulate_prefix_size = len; 77 insn->next_byte += len; 78 79 return 1; 80 81 err_out: 82 return 0; 83 } 84 85 static void insn_get_emulate_prefix(struct insn *insn) 86 { 87 if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix))) 88 return; 89 90 __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix)); 91 } 92 93 /** 94 * insn_get_prefixes - scan x86 instruction prefix bytes 95 * @insn: &struct insn containing instruction 96 * 97 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte 98 * to point to the (first) opcode. No effect if @insn->prefixes.got 99 * is already set. 100 */ 101 void insn_get_prefixes(struct insn *insn) 102 { 103 struct insn_field *prefixes = &insn->prefixes; 104 insn_attr_t attr; 105 insn_byte_t b, lb; 106 int i, nb; 107 108 if (prefixes->got) 109 return; 110 111 insn_get_emulate_prefix(insn); 112 113 nb = 0; 114 lb = 0; 115 b = peek_next(insn_byte_t, insn); 116 attr = inat_get_opcode_attribute(b); 117 while (inat_is_legacy_prefix(attr)) { 118 /* Skip if same prefix */ 119 for (i = 0; i < nb; i++) 120 if (prefixes->bytes[i] == b) 121 goto found; 122 if (nb == 4) 123 /* Invalid instruction */ 124 break; 125 prefixes->bytes[nb++] = b; 126 if (inat_is_address_size_prefix(attr)) { 127 /* address size switches 2/4 or 4/8 */ 128 if (insn->x86_64) 129 insn->addr_bytes ^= 12; 130 else 131 insn->addr_bytes ^= 6; 132 } else if (inat_is_operand_size_prefix(attr)) { 133 /* oprand size switches 2/4 */ 134 insn->opnd_bytes ^= 6; 135 } 136 found: 137 prefixes->nbytes++; 138 insn->next_byte++; 139 lb = b; 140 b = peek_next(insn_byte_t, insn); 141 attr = inat_get_opcode_attribute(b); 142 } 143 /* Set the last prefix */ 144 if (lb && lb != insn->prefixes.bytes[3]) { 145 if (unlikely(insn->prefixes.bytes[3])) { 146 /* Swap the last prefix */ 147 b = insn->prefixes.bytes[3]; 148 for (i = 0; i < nb; i++) 149 if (prefixes->bytes[i] == lb) 150 prefixes->bytes[i] = b; 151 } 152 insn->prefixes.bytes[3] = lb; 153 } 154 155 /* Decode REX prefix */ 156 if (insn->x86_64) { 157 b = peek_next(insn_byte_t, insn); 158 attr = inat_get_opcode_attribute(b); 159 if (inat_is_rex_prefix(attr)) { 160 insn->rex_prefix.value = b; 161 insn->rex_prefix.nbytes = 1; 162 insn->next_byte++; 163 if (X86_REX_W(b)) 164 /* REX.W overrides opnd_size */ 165 insn->opnd_bytes = 8; 166 } 167 } 168 insn->rex_prefix.got = 1; 169 170 /* Decode VEX prefix */ 171 b = peek_next(insn_byte_t, insn); 172 attr = inat_get_opcode_attribute(b); 173 if (inat_is_vex_prefix(attr)) { 174 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); 175 if (!insn->x86_64) { 176 /* 177 * In 32-bits mode, if the [7:6] bits (mod bits of 178 * ModRM) on the second byte are not 11b, it is 179 * LDS or LES or BOUND. 180 */ 181 if (X86_MODRM_MOD(b2) != 3) 182 goto vex_end; 183 } 184 insn->vex_prefix.bytes[0] = b; 185 insn->vex_prefix.bytes[1] = b2; 186 if (inat_is_evex_prefix(attr)) { 187 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 188 insn->vex_prefix.bytes[2] = b2; 189 b2 = peek_nbyte_next(insn_byte_t, insn, 3); 190 insn->vex_prefix.bytes[3] = b2; 191 insn->vex_prefix.nbytes = 4; 192 insn->next_byte += 4; 193 if (insn->x86_64 && X86_VEX_W(b2)) 194 /* VEX.W overrides opnd_size */ 195 insn->opnd_bytes = 8; 196 } else if (inat_is_vex3_prefix(attr)) { 197 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 198 insn->vex_prefix.bytes[2] = b2; 199 insn->vex_prefix.nbytes = 3; 200 insn->next_byte += 3; 201 if (insn->x86_64 && X86_VEX_W(b2)) 202 /* VEX.W overrides opnd_size */ 203 insn->opnd_bytes = 8; 204 } else { 205 /* 206 * For VEX2, fake VEX3-like byte#2. 207 * Makes it easier to decode vex.W, vex.vvvv, 208 * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. 209 */ 210 insn->vex_prefix.bytes[2] = b2 & 0x7f; 211 insn->vex_prefix.nbytes = 2; 212 insn->next_byte += 2; 213 } 214 } 215 vex_end: 216 insn->vex_prefix.got = 1; 217 218 prefixes->got = 1; 219 220 err_out: 221 return; 222 } 223 224 /** 225 * insn_get_opcode - collect opcode(s) 226 * @insn: &struct insn containing instruction 227 * 228 * Populates @insn->opcode, updates @insn->next_byte to point past the 229 * opcode byte(s), and set @insn->attr (except for groups). 230 * If necessary, first collects any preceding (prefix) bytes. 231 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got 232 * is already 1. 233 */ 234 void insn_get_opcode(struct insn *insn) 235 { 236 struct insn_field *opcode = &insn->opcode; 237 insn_byte_t op; 238 int pfx_id; 239 if (opcode->got) 240 return; 241 if (!insn->prefixes.got) 242 insn_get_prefixes(insn); 243 244 /* Get first opcode */ 245 op = get_next(insn_byte_t, insn); 246 opcode->bytes[0] = op; 247 opcode->nbytes = 1; 248 249 /* Check if there is VEX prefix or not */ 250 if (insn_is_avx(insn)) { 251 insn_byte_t m, p; 252 m = insn_vex_m_bits(insn); 253 p = insn_vex_p_bits(insn); 254 insn->attr = inat_get_avx_attribute(op, m, p); 255 if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) || 256 (!inat_accept_vex(insn->attr) && 257 !inat_is_group(insn->attr))) 258 insn->attr = 0; /* This instruction is bad */ 259 goto end; /* VEX has only 1 byte for opcode */ 260 } 261 262 insn->attr = inat_get_opcode_attribute(op); 263 while (inat_is_escape(insn->attr)) { 264 /* Get escaped opcode */ 265 op = get_next(insn_byte_t, insn); 266 opcode->bytes[opcode->nbytes++] = op; 267 pfx_id = insn_last_prefix_id(insn); 268 insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); 269 } 270 if (inat_must_vex(insn->attr)) 271 insn->attr = 0; /* This instruction is bad */ 272 end: 273 opcode->got = 1; 274 275 err_out: 276 return; 277 } 278 279 /** 280 * insn_get_modrm - collect ModRM byte, if any 281 * @insn: &struct insn containing instruction 282 * 283 * Populates @insn->modrm and updates @insn->next_byte to point past the 284 * ModRM byte, if any. If necessary, first collects the preceding bytes 285 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. 286 */ 287 void insn_get_modrm(struct insn *insn) 288 { 289 struct insn_field *modrm = &insn->modrm; 290 insn_byte_t pfx_id, mod; 291 if (modrm->got) 292 return; 293 if (!insn->opcode.got) 294 insn_get_opcode(insn); 295 296 if (inat_has_modrm(insn->attr)) { 297 mod = get_next(insn_byte_t, insn); 298 modrm->value = mod; 299 modrm->nbytes = 1; 300 if (inat_is_group(insn->attr)) { 301 pfx_id = insn_last_prefix_id(insn); 302 insn->attr = inat_get_group_attribute(mod, pfx_id, 303 insn->attr); 304 if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) 305 insn->attr = 0; /* This is bad */ 306 } 307 } 308 309 if (insn->x86_64 && inat_is_force64(insn->attr)) 310 insn->opnd_bytes = 8; 311 modrm->got = 1; 312 313 err_out: 314 return; 315 } 316 317 318 /** 319 * insn_rip_relative() - Does instruction use RIP-relative addressing mode? 320 * @insn: &struct insn containing instruction 321 * 322 * If necessary, first collects the instruction up to and including the 323 * ModRM byte. No effect if @insn->x86_64 is 0. 324 */ 325 int insn_rip_relative(struct insn *insn) 326 { 327 struct insn_field *modrm = &insn->modrm; 328 329 if (!insn->x86_64) 330 return 0; 331 if (!modrm->got) 332 insn_get_modrm(insn); 333 /* 334 * For rip-relative instructions, the mod field (top 2 bits) 335 * is zero and the r/m field (bottom 3 bits) is 0x5. 336 */ 337 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); 338 } 339 340 /** 341 * insn_get_sib() - Get the SIB byte of instruction 342 * @insn: &struct insn containing instruction 343 * 344 * If necessary, first collects the instruction up to and including the 345 * ModRM byte. 346 */ 347 void insn_get_sib(struct insn *insn) 348 { 349 insn_byte_t modrm; 350 351 if (insn->sib.got) 352 return; 353 if (!insn->modrm.got) 354 insn_get_modrm(insn); 355 if (insn->modrm.nbytes) { 356 modrm = (insn_byte_t)insn->modrm.value; 357 if (insn->addr_bytes != 2 && 358 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { 359 insn->sib.value = get_next(insn_byte_t, insn); 360 insn->sib.nbytes = 1; 361 } 362 } 363 insn->sib.got = 1; 364 365 err_out: 366 return; 367 } 368 369 370 /** 371 * insn_get_displacement() - Get the displacement of instruction 372 * @insn: &struct insn containing instruction 373 * 374 * If necessary, first collects the instruction up to and including the 375 * SIB byte. 376 * Displacement value is sign-expanded. 377 */ 378 void insn_get_displacement(struct insn *insn) 379 { 380 insn_byte_t mod, rm, base; 381 382 if (insn->displacement.got) 383 return; 384 if (!insn->sib.got) 385 insn_get_sib(insn); 386 if (insn->modrm.nbytes) { 387 /* 388 * Interpreting the modrm byte: 389 * mod = 00 - no displacement fields (exceptions below) 390 * mod = 01 - 1-byte displacement field 391 * mod = 10 - displacement field is 4 bytes, or 2 bytes if 392 * address size = 2 (0x67 prefix in 32-bit mode) 393 * mod = 11 - no memory operand 394 * 395 * If address size = 2... 396 * mod = 00, r/m = 110 - displacement field is 2 bytes 397 * 398 * If address size != 2... 399 * mod != 11, r/m = 100 - SIB byte exists 400 * mod = 00, SIB base = 101 - displacement field is 4 bytes 401 * mod = 00, r/m = 101 - rip-relative addressing, displacement 402 * field is 4 bytes 403 */ 404 mod = X86_MODRM_MOD(insn->modrm.value); 405 rm = X86_MODRM_RM(insn->modrm.value); 406 base = X86_SIB_BASE(insn->sib.value); 407 if (mod == 3) 408 goto out; 409 if (mod == 1) { 410 insn->displacement.value = get_next(signed char, insn); 411 insn->displacement.nbytes = 1; 412 } else if (insn->addr_bytes == 2) { 413 if ((mod == 0 && rm == 6) || mod == 2) { 414 insn->displacement.value = 415 get_next(short, insn); 416 insn->displacement.nbytes = 2; 417 } 418 } else { 419 if ((mod == 0 && rm == 5) || mod == 2 || 420 (mod == 0 && base == 5)) { 421 insn->displacement.value = get_next(int, insn); 422 insn->displacement.nbytes = 4; 423 } 424 } 425 } 426 out: 427 insn->displacement.got = 1; 428 429 err_out: 430 return; 431 } 432 433 /* Decode moffset16/32/64. Return 0 if failed */ 434 static int __get_moffset(struct insn *insn) 435 { 436 switch (insn->addr_bytes) { 437 case 2: 438 insn->moffset1.value = get_next(short, insn); 439 insn->moffset1.nbytes = 2; 440 break; 441 case 4: 442 insn->moffset1.value = get_next(int, insn); 443 insn->moffset1.nbytes = 4; 444 break; 445 case 8: 446 insn->moffset1.value = get_next(int, insn); 447 insn->moffset1.nbytes = 4; 448 insn->moffset2.value = get_next(int, insn); 449 insn->moffset2.nbytes = 4; 450 break; 451 default: /* opnd_bytes must be modified manually */ 452 goto err_out; 453 } 454 insn->moffset1.got = insn->moffset2.got = 1; 455 456 return 1; 457 458 err_out: 459 return 0; 460 } 461 462 /* Decode imm v32(Iz). Return 0 if failed */ 463 static int __get_immv32(struct insn *insn) 464 { 465 switch (insn->opnd_bytes) { 466 case 2: 467 insn->immediate.value = get_next(short, insn); 468 insn->immediate.nbytes = 2; 469 break; 470 case 4: 471 case 8: 472 insn->immediate.value = get_next(int, insn); 473 insn->immediate.nbytes = 4; 474 break; 475 default: /* opnd_bytes must be modified manually */ 476 goto err_out; 477 } 478 479 return 1; 480 481 err_out: 482 return 0; 483 } 484 485 /* Decode imm v64(Iv/Ov), Return 0 if failed */ 486 static int __get_immv(struct insn *insn) 487 { 488 switch (insn->opnd_bytes) { 489 case 2: 490 insn->immediate1.value = get_next(short, insn); 491 insn->immediate1.nbytes = 2; 492 break; 493 case 4: 494 insn->immediate1.value = get_next(int, insn); 495 insn->immediate1.nbytes = 4; 496 break; 497 case 8: 498 insn->immediate1.value = get_next(int, insn); 499 insn->immediate1.nbytes = 4; 500 insn->immediate2.value = get_next(int, insn); 501 insn->immediate2.nbytes = 4; 502 break; 503 default: /* opnd_bytes must be modified manually */ 504 goto err_out; 505 } 506 insn->immediate1.got = insn->immediate2.got = 1; 507 508 return 1; 509 err_out: 510 return 0; 511 } 512 513 /* Decode ptr16:16/32(Ap) */ 514 static int __get_immptr(struct insn *insn) 515 { 516 switch (insn->opnd_bytes) { 517 case 2: 518 insn->immediate1.value = get_next(short, insn); 519 insn->immediate1.nbytes = 2; 520 break; 521 case 4: 522 insn->immediate1.value = get_next(int, insn); 523 insn->immediate1.nbytes = 4; 524 break; 525 case 8: 526 /* ptr16:64 is not exist (no segment) */ 527 return 0; 528 default: /* opnd_bytes must be modified manually */ 529 goto err_out; 530 } 531 insn->immediate2.value = get_next(unsigned short, insn); 532 insn->immediate2.nbytes = 2; 533 insn->immediate1.got = insn->immediate2.got = 1; 534 535 return 1; 536 err_out: 537 return 0; 538 } 539 540 /** 541 * insn_get_immediate() - Get the immediates of instruction 542 * @insn: &struct insn containing instruction 543 * 544 * If necessary, first collects the instruction up to and including the 545 * displacement bytes. 546 * Basically, most of immediates are sign-expanded. Unsigned-value can be 547 * get by bit masking with ((1 << (nbytes * 8)) - 1) 548 */ 549 void insn_get_immediate(struct insn *insn) 550 { 551 if (insn->immediate.got) 552 return; 553 if (!insn->displacement.got) 554 insn_get_displacement(insn); 555 556 if (inat_has_moffset(insn->attr)) { 557 if (!__get_moffset(insn)) 558 goto err_out; 559 goto done; 560 } 561 562 if (!inat_has_immediate(insn->attr)) 563 /* no immediates */ 564 goto done; 565 566 switch (inat_immediate_size(insn->attr)) { 567 case INAT_IMM_BYTE: 568 insn->immediate.value = get_next(signed char, insn); 569 insn->immediate.nbytes = 1; 570 break; 571 case INAT_IMM_WORD: 572 insn->immediate.value = get_next(short, insn); 573 insn->immediate.nbytes = 2; 574 break; 575 case INAT_IMM_DWORD: 576 insn->immediate.value = get_next(int, insn); 577 insn->immediate.nbytes = 4; 578 break; 579 case INAT_IMM_QWORD: 580 insn->immediate1.value = get_next(int, insn); 581 insn->immediate1.nbytes = 4; 582 insn->immediate2.value = get_next(int, insn); 583 insn->immediate2.nbytes = 4; 584 break; 585 case INAT_IMM_PTR: 586 if (!__get_immptr(insn)) 587 goto err_out; 588 break; 589 case INAT_IMM_VWORD32: 590 if (!__get_immv32(insn)) 591 goto err_out; 592 break; 593 case INAT_IMM_VWORD: 594 if (!__get_immv(insn)) 595 goto err_out; 596 break; 597 default: 598 /* Here, insn must have an immediate, but failed */ 599 goto err_out; 600 } 601 if (inat_has_second_immediate(insn->attr)) { 602 insn->immediate2.value = get_next(signed char, insn); 603 insn->immediate2.nbytes = 1; 604 } 605 done: 606 insn->immediate.got = 1; 607 608 err_out: 609 return; 610 } 611 612 /** 613 * insn_get_length() - Get the length of instruction 614 * @insn: &struct insn containing instruction 615 * 616 * If necessary, first collects the instruction up to and including the 617 * immediates bytes. 618 */ 619 void insn_get_length(struct insn *insn) 620 { 621 if (insn->length) 622 return; 623 if (!insn->immediate.got) 624 insn_get_immediate(insn); 625 insn->length = (unsigned char)((unsigned long)insn->next_byte 626 - (unsigned long)insn->kaddr); 627 } 628