1#!/bin/awk -f 2# gen-insn-attr-x86.awk: Instruction attribute table generator 3# Written by Masami Hiramatsu <mhiramat@redhat.com> 4# 5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c 6 7# Awk implementation sanity check 8function check_awk_implement() { 9 if (sprintf("%x", 0) != "0") 10 return "Your awk has a printf-format problem." 11 return "" 12} 13 14# Clear working vars 15function clear_vars() { 16 delete table 17 delete lptable2 18 delete lptable1 19 delete lptable3 20 eid = -1 # escape id 21 gid = -1 # group id 22 aid = -1 # AVX id 23 tname = "" 24} 25 26BEGIN { 27 # Implementation error checking 28 awkchecked = check_awk_implement() 29 if (awkchecked != "") { 30 print "Error: " awkchecked > "/dev/stderr" 31 print "Please try to use gawk." > "/dev/stderr" 32 exit 1 33 } 34 35 # Setup generating tables 36 print "/* x86 opcode map generated from x86-opcode-map.txt */" 37 print "/* Do not change this code. */\n" 38 ggid = 1 39 geid = 1 40 gaid = 0 41 delete etable 42 delete gtable 43 delete atable 44 45 opnd_expr = "^[A-Za-z/]" 46 ext_expr = "^\\(" 47 sep_expr = "^\\|$" 48 group_expr = "^Grp[0-9A-Za-z]+" 49 50 imm_expr = "^[IJAOL][a-z]" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 54 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" 55 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" 56 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" 57 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 58 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 60 imm_flag["Ob"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET" 62 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 63 64 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 65 force64_expr = "\\([df]64\\)" 66 rex_expr = "^REX(\\.[XRWB]+)*" 67 fpu_expr = "^ESC" # TODO 68 69 lprefix1_expr = "\\((66|!F3)\\)" 70 lprefix2_expr = "\\(F3\\)" 71 lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" 72 lprefix_expr = "\\((66|F2|F3)\\)" 73 max_lprefix = 4 74 75 # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript 76 # accepts VEX prefix 77 vexok_opcode_expr = "^[vk].*" 78 vexok_expr = "\\(v1\\)" 79 # All opcodes with (v) superscript supports *only* VEX prefix 80 vexonly_expr = "\\(v\\)" 81 # All opcodes with (ev) superscript supports *only* EVEX prefix 82 evexonly_expr = "\\(ev\\)" 83 84 prefix_expr = "\\(Prefix\\)" 85 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 86 prefix_num["REPNE"] = "INAT_PFX_REPNE" 87 prefix_num["REP/REPE"] = "INAT_PFX_REPE" 88 prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" 89 prefix_num["XRELEASE"] = "INAT_PFX_REPE" 90 prefix_num["LOCK"] = "INAT_PFX_LOCK" 91 prefix_num["SEG=CS"] = "INAT_PFX_CS" 92 prefix_num["SEG=DS"] = "INAT_PFX_DS" 93 prefix_num["SEG=ES"] = "INAT_PFX_ES" 94 prefix_num["SEG=FS"] = "INAT_PFX_FS" 95 prefix_num["SEG=GS"] = "INAT_PFX_GS" 96 prefix_num["SEG=SS"] = "INAT_PFX_SS" 97 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 98 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 99 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 100 prefix_num["EVEX"] = "INAT_PFX_EVEX" 101 102 clear_vars() 103} 104 105function semantic_error(msg) { 106 print "Semantic error at " NR ": " msg > "/dev/stderr" 107 exit 1 108} 109 110function debug(msg) { 111 print "DEBUG: " msg 112} 113 114function array_size(arr, i,c) { 115 c = 0 116 for (i in arr) 117 c++ 118 return c 119} 120 121/^Table:/ { 122 print "/* " $0 " */" 123 if (tname != "") 124 semantic_error("Hit Table: before EndTable:."); 125} 126 127/^Referrer:/ { 128 if (NF != 1) { 129 # escape opcode table 130 ref = "" 131 for (i = 2; i <= NF; i++) 132 ref = ref $i 133 eid = escape[ref] 134 tname = sprintf("inat_escape_table_%d", eid) 135 } 136} 137 138/^AVXcode:/ { 139 if (NF != 1) { 140 # AVX/escape opcode table 141 aid = $2 142 if (gaid <= aid) 143 gaid = aid + 1 144 if (tname == "") # AVX only opcode table 145 tname = sprintf("inat_avx_table_%d", $2) 146 } 147 if (aid == -1 && eid == -1) # primary opcode table 148 tname = "inat_primary_table" 149} 150 151/^GrpTable:/ { 152 print "/* " $0 " */" 153 if (!($2 in group)) 154 semantic_error("No group: " $2 ) 155 gid = group[$2] 156 tname = "inat_group_table_" gid 157} 158 159function print_table(tbl,name,fmt,n) 160{ 161 print "const insn_attr_t " name " = {" 162 for (i = 0; i < n; i++) { 163 id = sprintf(fmt, i) 164 if (tbl[id]) 165 print " [" id "] = " tbl[id] "," 166 } 167 print "};" 168} 169 170/^EndTable/ { 171 if (gid != -1) { 172 # print group tables 173 if (array_size(table) != 0) { 174 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", 175 "0x%x", 8) 176 gtable[gid,0] = tname 177 } 178 if (array_size(lptable1) != 0) { 179 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", 180 "0x%x", 8) 181 gtable[gid,1] = tname "_1" 182 } 183 if (array_size(lptable2) != 0) { 184 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", 185 "0x%x", 8) 186 gtable[gid,2] = tname "_2" 187 } 188 if (array_size(lptable3) != 0) { 189 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", 190 "0x%x", 8) 191 gtable[gid,3] = tname "_3" 192 } 193 } else { 194 # print primary/escaped tables 195 if (array_size(table) != 0) { 196 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", 197 "0x%02x", 256) 198 etable[eid,0] = tname 199 if (aid >= 0) 200 atable[aid,0] = tname 201 } 202 if (array_size(lptable1) != 0) { 203 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", 204 "0x%02x", 256) 205 etable[eid,1] = tname "_1" 206 if (aid >= 0) 207 atable[aid,1] = tname "_1" 208 } 209 if (array_size(lptable2) != 0) { 210 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", 211 "0x%02x", 256) 212 etable[eid,2] = tname "_2" 213 if (aid >= 0) 214 atable[aid,2] = tname "_2" 215 } 216 if (array_size(lptable3) != 0) { 217 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", 218 "0x%02x", 256) 219 etable[eid,3] = tname "_3" 220 if (aid >= 0) 221 atable[aid,3] = tname "_3" 222 } 223 } 224 print "" 225 clear_vars() 226} 227 228function add_flags(old,new) { 229 if (old && new) 230 return old " | " new 231 else if (old) 232 return old 233 else 234 return new 235} 236 237# convert operands to flags. 238function convert_operands(count,opnd, i,j,imm,mod) 239{ 240 imm = null 241 mod = null 242 for (j = 1; j <= count; j++) { 243 i = opnd[j] 244 if (match(i, imm_expr) == 1) { 245 if (!imm_flag[i]) 246 semantic_error("Unknown imm opnd: " i) 247 if (imm) { 248 if (i != "Ib") 249 semantic_error("Second IMM error") 250 imm = add_flags(imm, "INAT_SCNDIMM") 251 } else 252 imm = imm_flag[i] 253 } else if (match(i, modrm_expr)) 254 mod = "INAT_MODRM" 255 } 256 return add_flags(imm, mod) 257} 258 259/^[0-9a-f]+\:/ { 260 if (NR == 1) 261 next 262 # get index 263 idx = "0x" substr($1, 1, index($1,":") - 1) 264 if (idx in table) 265 semantic_error("Redefine " idx " in " tname) 266 267 # check if escaped opcode 268 if ("escape" == $2) { 269 if ($3 != "#") 270 semantic_error("No escaped name") 271 ref = "" 272 for (i = 4; i <= NF; i++) 273 ref = ref $i 274 if (ref in escape) 275 semantic_error("Redefine escape (" ref ")") 276 escape[ref] = geid 277 geid++ 278 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" 279 next 280 } 281 282 variant = null 283 # converts 284 i = 2 285 while (i <= NF) { 286 opcode = $(i++) 287 delete opnds 288 ext = null 289 flags = null 290 opnd = null 291 # parse one opcode 292 if (match($i, opnd_expr)) { 293 opnd = $i 294 count = split($(i++), opnds, ",") 295 flags = convert_operands(count, opnds) 296 } 297 if (match($i, ext_expr)) 298 ext = $(i++) 299 if (match($i, sep_expr)) 300 i++ 301 else if (i < NF) 302 semantic_error($i " is not a separator") 303 304 # check if group opcode 305 if (match(opcode, group_expr)) { 306 if (!(opcode in group)) { 307 group[opcode] = ggid 308 ggid++ 309 } 310 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") 311 } 312 # check force(or default) 64bit 313 if (match(ext, force64_expr)) 314 flags = add_flags(flags, "INAT_FORCE64") 315 316 # check REX prefix 317 if (match(opcode, rex_expr)) 318 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") 319 320 # check coprocessor escape : TODO 321 if (match(opcode, fpu_expr)) 322 flags = add_flags(flags, "INAT_MODRM") 323 324 # check VEX codes 325 if (match(ext, evexonly_expr)) 326 flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") 327 else if (match(ext, vexonly_expr)) 328 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 329 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 330 flags = add_flags(flags, "INAT_VEXOK") 331 332 # check prefixes 333 if (match(ext, prefix_expr)) { 334 if (!prefix_num[opcode]) 335 semantic_error("Unknown prefix: " opcode) 336 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") 337 } 338 if (length(flags) == 0) 339 continue 340 # check if last prefix 341 if (match(ext, lprefix1_expr)) { 342 lptable1[idx] = add_flags(lptable1[idx],flags) 343 variant = "INAT_VARIANT" 344 } 345 if (match(ext, lprefix2_expr)) { 346 lptable2[idx] = add_flags(lptable2[idx],flags) 347 variant = "INAT_VARIANT" 348 } 349 if (match(ext, lprefix3_expr)) { 350 lptable3[idx] = add_flags(lptable3[idx],flags) 351 variant = "INAT_VARIANT" 352 } 353 if (!match(ext, lprefix_expr)){ 354 table[idx] = add_flags(table[idx],flags) 355 } 356 } 357 if (variant) 358 table[idx] = add_flags(table[idx],variant) 359} 360 361END { 362 if (awkchecked != "") 363 exit 1 364 # print escape opcode map's array 365 print "/* Escape opcode map array */" 366 print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ 367 "[INAT_LSTPFX_MAX + 1] = {" 368 for (i = 0; i < geid; i++) 369 for (j = 0; j < max_lprefix; j++) 370 if (etable[i,j]) 371 print " ["i"]["j"] = "etable[i,j]"," 372 print "};\n" 373 # print group opcode map's array 374 print "/* Group opcode map array */" 375 print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ 376 "[INAT_LSTPFX_MAX + 1] = {" 377 for (i = 0; i < ggid; i++) 378 for (j = 0; j < max_lprefix; j++) 379 if (gtable[i,j]) 380 print " ["i"]["j"] = "gtable[i,j]"," 381 print "};\n" 382 # print AVX opcode map's array 383 print "/* AVX opcode map array */" 384 print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ 385 "[INAT_LSTPFX_MAX + 1] = {" 386 for (i = 0; i < gaid; i++) 387 for (j = 0; j < max_lprefix; j++) 388 if (atable[i,j]) 389 print " ["i"]["j"] = "atable[i,j]"," 390 print "};" 391} 392 393