1 /* 2 * x86 instruction analysis 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 * 18 * Copyright (C) IBM Corporation, 2002, 2004, 2009 19 */ 20 21 #include <linux/string.h> 22 #include <asm/inat.h> 23 #include <asm/insn.h> 24 25 #define get_next(t, insn) \ 26 ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) 27 28 #define peek_next(t, insn) \ 29 ({t r; r = *(t*)insn->next_byte; r; }) 30 31 #define peek_nbyte_next(t, insn, n) \ 32 ({t r; r = *(t*)((insn)->next_byte + n); r; }) 33 34 /** 35 * insn_init() - initialize struct insn 36 * @insn: &struct insn to be initialized 37 * @kaddr: address (in kernel memory) of instruction (or copy thereof) 38 * @x86_64: !0 for 64-bit kernel or 64-bit app 39 */ 40 void insn_init(struct insn *insn, const void *kaddr, int x86_64) 41 { 42 memset(insn, 0, sizeof(*insn)); 43 insn->kaddr = kaddr; 44 insn->next_byte = kaddr; 45 insn->x86_64 = x86_64 ? 1 : 0; 46 insn->opnd_bytes = 4; 47 if (x86_64) 48 insn->addr_bytes = 8; 49 else 50 insn->addr_bytes = 4; 51 } 52 53 /** 54 * insn_get_prefixes - scan x86 instruction prefix bytes 55 * @insn: &struct insn containing instruction 56 * 57 * Populates the @insn->prefixes bitmap, and updates @insn->next_byte 58 * to point to the (first) opcode. No effect if @insn->prefixes.got 59 * is already set. 60 */ 61 void insn_get_prefixes(struct insn *insn) 62 { 63 struct insn_field *prefixes = &insn->prefixes; 64 insn_attr_t attr; 65 insn_byte_t b, lb; 66 int i, nb; 67 68 if (prefixes->got) 69 return; 70 71 nb = 0; 72 lb = 0; 73 b = peek_next(insn_byte_t, insn); 74 attr = inat_get_opcode_attribute(b); 75 while (inat_is_legacy_prefix(attr)) { 76 /* Skip if same prefix */ 77 for (i = 0; i < nb; i++) 78 if (prefixes->bytes[i] == b) 79 goto found; 80 if (nb == 4) 81 /* Invalid instruction */ 82 break; 83 prefixes->bytes[nb++] = b; 84 if (inat_is_address_size_prefix(attr)) { 85 /* address size switches 2/4 or 4/8 */ 86 if (insn->x86_64) 87 insn->addr_bytes ^= 12; 88 else 89 insn->addr_bytes ^= 6; 90 } else if (inat_is_operand_size_prefix(attr)) { 91 /* oprand size switches 2/4 */ 92 insn->opnd_bytes ^= 6; 93 } 94 found: 95 prefixes->nbytes++; 96 insn->next_byte++; 97 lb = b; 98 b = peek_next(insn_byte_t, insn); 99 attr = inat_get_opcode_attribute(b); 100 } 101 /* Set the last prefix */ 102 if (lb && lb != insn->prefixes.bytes[3]) { 103 if (unlikely(insn->prefixes.bytes[3])) { 104 /* Swap the last prefix */ 105 b = insn->prefixes.bytes[3]; 106 for (i = 0; i < nb; i++) 107 if (prefixes->bytes[i] == lb) 108 prefixes->bytes[i] = b; 109 } 110 insn->prefixes.bytes[3] = lb; 111 } 112 113 /* Decode REX prefix */ 114 if (insn->x86_64) { 115 b = peek_next(insn_byte_t, insn); 116 attr = inat_get_opcode_attribute(b); 117 if (inat_is_rex_prefix(attr)) { 118 insn->rex_prefix.value = b; 119 insn->rex_prefix.nbytes = 1; 120 insn->next_byte++; 121 if (X86_REX_W(b)) 122 /* REX.W overrides opnd_size */ 123 insn->opnd_bytes = 8; 124 } 125 } 126 insn->rex_prefix.got = 1; 127 128 /* Decode VEX prefix */ 129 b = peek_next(insn_byte_t, insn); 130 attr = inat_get_opcode_attribute(b); 131 if (inat_is_vex_prefix(attr)) { 132 insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); 133 if (!insn->x86_64) { 134 /* 135 * In 32-bits mode, if the [7:6] bits (mod bits of 136 * ModRM) on the second byte are not 11b, it is 137 * LDS or LES. 138 */ 139 if (X86_MODRM_MOD(b2) != 3) 140 goto vex_end; 141 } 142 insn->vex_prefix.bytes[0] = b; 143 insn->vex_prefix.bytes[1] = b2; 144 if (inat_is_vex3_prefix(attr)) { 145 b2 = peek_nbyte_next(insn_byte_t, insn, 2); 146 insn->vex_prefix.bytes[2] = b2; 147 insn->vex_prefix.nbytes = 3; 148 insn->next_byte += 3; 149 if (insn->x86_64 && X86_VEX_W(b2)) 150 /* VEX.W overrides opnd_size */ 151 insn->opnd_bytes = 8; 152 } else { 153 insn->vex_prefix.nbytes = 2; 154 insn->next_byte += 2; 155 } 156 } 157 vex_end: 158 insn->vex_prefix.got = 1; 159 160 prefixes->got = 1; 161 return; 162 } 163 164 /** 165 * insn_get_opcode - collect opcode(s) 166 * @insn: &struct insn containing instruction 167 * 168 * Populates @insn->opcode, updates @insn->next_byte to point past the 169 * opcode byte(s), and set @insn->attr (except for groups). 170 * If necessary, first collects any preceding (prefix) bytes. 171 * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got 172 * is already 1. 173 */ 174 void insn_get_opcode(struct insn *insn) 175 { 176 struct insn_field *opcode = &insn->opcode; 177 insn_byte_t op, pfx; 178 if (opcode->got) 179 return; 180 if (!insn->prefixes.got) 181 insn_get_prefixes(insn); 182 183 /* Get first opcode */ 184 op = get_next(insn_byte_t, insn); 185 opcode->bytes[0] = op; 186 opcode->nbytes = 1; 187 188 /* Check if there is VEX prefix or not */ 189 if (insn_is_avx(insn)) { 190 insn_byte_t m, p; 191 m = insn_vex_m_bits(insn); 192 p = insn_vex_p_bits(insn); 193 insn->attr = inat_get_avx_attribute(op, m, p); 194 if (!inat_accept_vex(insn->attr)) 195 insn->attr = 0; /* This instruction is bad */ 196 goto end; /* VEX has only 1 byte for opcode */ 197 } 198 199 insn->attr = inat_get_opcode_attribute(op); 200 while (inat_is_escape(insn->attr)) { 201 /* Get escaped opcode */ 202 op = get_next(insn_byte_t, insn); 203 opcode->bytes[opcode->nbytes++] = op; 204 pfx = insn_last_prefix(insn); 205 insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); 206 } 207 if (inat_must_vex(insn->attr)) 208 insn->attr = 0; /* This instruction is bad */ 209 end: 210 opcode->got = 1; 211 } 212 213 /** 214 * insn_get_modrm - collect ModRM byte, if any 215 * @insn: &struct insn containing instruction 216 * 217 * Populates @insn->modrm and updates @insn->next_byte to point past the 218 * ModRM byte, if any. If necessary, first collects the preceding bytes 219 * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. 220 */ 221 void insn_get_modrm(struct insn *insn) 222 { 223 struct insn_field *modrm = &insn->modrm; 224 insn_byte_t pfx, mod; 225 if (modrm->got) 226 return; 227 if (!insn->opcode.got) 228 insn_get_opcode(insn); 229 230 if (inat_has_modrm(insn->attr)) { 231 mod = get_next(insn_byte_t, insn); 232 modrm->value = mod; 233 modrm->nbytes = 1; 234 if (inat_is_group(insn->attr)) { 235 pfx = insn_last_prefix(insn); 236 insn->attr = inat_get_group_attribute(mod, pfx, 237 insn->attr); 238 } 239 } 240 241 if (insn->x86_64 && inat_is_force64(insn->attr)) 242 insn->opnd_bytes = 8; 243 modrm->got = 1; 244 } 245 246 247 /** 248 * insn_rip_relative() - Does instruction use RIP-relative addressing mode? 249 * @insn: &struct insn containing instruction 250 * 251 * If necessary, first collects the instruction up to and including the 252 * ModRM byte. No effect if @insn->x86_64 is 0. 253 */ 254 int insn_rip_relative(struct insn *insn) 255 { 256 struct insn_field *modrm = &insn->modrm; 257 258 if (!insn->x86_64) 259 return 0; 260 if (!modrm->got) 261 insn_get_modrm(insn); 262 /* 263 * For rip-relative instructions, the mod field (top 2 bits) 264 * is zero and the r/m field (bottom 3 bits) is 0x5. 265 */ 266 return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); 267 } 268 269 /** 270 * insn_get_sib() - Get the SIB byte of instruction 271 * @insn: &struct insn containing instruction 272 * 273 * If necessary, first collects the instruction up to and including the 274 * ModRM byte. 275 */ 276 void insn_get_sib(struct insn *insn) 277 { 278 insn_byte_t modrm; 279 280 if (insn->sib.got) 281 return; 282 if (!insn->modrm.got) 283 insn_get_modrm(insn); 284 if (insn->modrm.nbytes) { 285 modrm = (insn_byte_t)insn->modrm.value; 286 if (insn->addr_bytes != 2 && 287 X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { 288 insn->sib.value = get_next(insn_byte_t, insn); 289 insn->sib.nbytes = 1; 290 } 291 } 292 insn->sib.got = 1; 293 } 294 295 296 /** 297 * insn_get_displacement() - Get the displacement of instruction 298 * @insn: &struct insn containing instruction 299 * 300 * If necessary, first collects the instruction up to and including the 301 * SIB byte. 302 * Displacement value is sign-expanded. 303 */ 304 void insn_get_displacement(struct insn *insn) 305 { 306 insn_byte_t mod, rm, base; 307 308 if (insn->displacement.got) 309 return; 310 if (!insn->sib.got) 311 insn_get_sib(insn); 312 if (insn->modrm.nbytes) { 313 /* 314 * Interpreting the modrm byte: 315 * mod = 00 - no displacement fields (exceptions below) 316 * mod = 01 - 1-byte displacement field 317 * mod = 10 - displacement field is 4 bytes, or 2 bytes if 318 * address size = 2 (0x67 prefix in 32-bit mode) 319 * mod = 11 - no memory operand 320 * 321 * If address size = 2... 322 * mod = 00, r/m = 110 - displacement field is 2 bytes 323 * 324 * If address size != 2... 325 * mod != 11, r/m = 100 - SIB byte exists 326 * mod = 00, SIB base = 101 - displacement field is 4 bytes 327 * mod = 00, r/m = 101 - rip-relative addressing, displacement 328 * field is 4 bytes 329 */ 330 mod = X86_MODRM_MOD(insn->modrm.value); 331 rm = X86_MODRM_RM(insn->modrm.value); 332 base = X86_SIB_BASE(insn->sib.value); 333 if (mod == 3) 334 goto out; 335 if (mod == 1) { 336 insn->displacement.value = get_next(char, insn); 337 insn->displacement.nbytes = 1; 338 } else if (insn->addr_bytes == 2) { 339 if ((mod == 0 && rm == 6) || mod == 2) { 340 insn->displacement.value = 341 get_next(short, insn); 342 insn->displacement.nbytes = 2; 343 } 344 } else { 345 if ((mod == 0 && rm == 5) || mod == 2 || 346 (mod == 0 && base == 5)) { 347 insn->displacement.value = get_next(int, insn); 348 insn->displacement.nbytes = 4; 349 } 350 } 351 } 352 out: 353 insn->displacement.got = 1; 354 } 355 356 /* Decode moffset16/32/64 */ 357 static void __get_moffset(struct insn *insn) 358 { 359 switch (insn->addr_bytes) { 360 case 2: 361 insn->moffset1.value = get_next(short, insn); 362 insn->moffset1.nbytes = 2; 363 break; 364 case 4: 365 insn->moffset1.value = get_next(int, insn); 366 insn->moffset1.nbytes = 4; 367 break; 368 case 8: 369 insn->moffset1.value = get_next(int, insn); 370 insn->moffset1.nbytes = 4; 371 insn->moffset2.value = get_next(int, insn); 372 insn->moffset2.nbytes = 4; 373 break; 374 } 375 insn->moffset1.got = insn->moffset2.got = 1; 376 } 377 378 /* Decode imm v32(Iz) */ 379 static void __get_immv32(struct insn *insn) 380 { 381 switch (insn->opnd_bytes) { 382 case 2: 383 insn->immediate.value = get_next(short, insn); 384 insn->immediate.nbytes = 2; 385 break; 386 case 4: 387 case 8: 388 insn->immediate.value = get_next(int, insn); 389 insn->immediate.nbytes = 4; 390 break; 391 } 392 } 393 394 /* Decode imm v64(Iv/Ov) */ 395 static void __get_immv(struct insn *insn) 396 { 397 switch (insn->opnd_bytes) { 398 case 2: 399 insn->immediate1.value = get_next(short, insn); 400 insn->immediate1.nbytes = 2; 401 break; 402 case 4: 403 insn->immediate1.value = get_next(int, insn); 404 insn->immediate1.nbytes = 4; 405 break; 406 case 8: 407 insn->immediate1.value = get_next(int, insn); 408 insn->immediate1.nbytes = 4; 409 insn->immediate2.value = get_next(int, insn); 410 insn->immediate2.nbytes = 4; 411 break; 412 } 413 insn->immediate1.got = insn->immediate2.got = 1; 414 } 415 416 /* Decode ptr16:16/32(Ap) */ 417 static void __get_immptr(struct insn *insn) 418 { 419 switch (insn->opnd_bytes) { 420 case 2: 421 insn->immediate1.value = get_next(short, insn); 422 insn->immediate1.nbytes = 2; 423 break; 424 case 4: 425 insn->immediate1.value = get_next(int, insn); 426 insn->immediate1.nbytes = 4; 427 break; 428 case 8: 429 /* ptr16:64 is not exist (no segment) */ 430 return; 431 } 432 insn->immediate2.value = get_next(unsigned short, insn); 433 insn->immediate2.nbytes = 2; 434 insn->immediate1.got = insn->immediate2.got = 1; 435 } 436 437 /** 438 * insn_get_immediate() - Get the immediates of instruction 439 * @insn: &struct insn containing instruction 440 * 441 * If necessary, first collects the instruction up to and including the 442 * displacement bytes. 443 * Basically, most of immediates are sign-expanded. Unsigned-value can be 444 * get by bit masking with ((1 << (nbytes * 8)) - 1) 445 */ 446 void insn_get_immediate(struct insn *insn) 447 { 448 if (insn->immediate.got) 449 return; 450 if (!insn->displacement.got) 451 insn_get_displacement(insn); 452 453 if (inat_has_moffset(insn->attr)) { 454 __get_moffset(insn); 455 goto done; 456 } 457 458 if (!inat_has_immediate(insn->attr)) 459 /* no immediates */ 460 goto done; 461 462 switch (inat_immediate_size(insn->attr)) { 463 case INAT_IMM_BYTE: 464 insn->immediate.value = get_next(char, insn); 465 insn->immediate.nbytes = 1; 466 break; 467 case INAT_IMM_WORD: 468 insn->immediate.value = get_next(short, insn); 469 insn->immediate.nbytes = 2; 470 break; 471 case INAT_IMM_DWORD: 472 insn->immediate.value = get_next(int, insn); 473 insn->immediate.nbytes = 4; 474 break; 475 case INAT_IMM_QWORD: 476 insn->immediate1.value = get_next(int, insn); 477 insn->immediate1.nbytes = 4; 478 insn->immediate2.value = get_next(int, insn); 479 insn->immediate2.nbytes = 4; 480 break; 481 case INAT_IMM_PTR: 482 __get_immptr(insn); 483 break; 484 case INAT_IMM_VWORD32: 485 __get_immv32(insn); 486 break; 487 case INAT_IMM_VWORD: 488 __get_immv(insn); 489 break; 490 default: 491 break; 492 } 493 if (inat_has_second_immediate(insn->attr)) { 494 insn->immediate2.value = get_next(char, insn); 495 insn->immediate2.nbytes = 1; 496 } 497 done: 498 insn->immediate.got = 1; 499 } 500 501 /** 502 * insn_get_length() - Get the length of instruction 503 * @insn: &struct insn containing instruction 504 * 505 * If necessary, first collects the instruction up to and including the 506 * immediates bytes. 507 */ 508 void insn_get_length(struct insn *insn) 509 { 510 if (insn->length) 511 return; 512 if (!insn->immediate.got) 513 insn_get_immediate(insn); 514 insn->length = (unsigned char)((unsigned long)insn->next_byte 515 - (unsigned long)insn->kaddr); 516 } 517