1#!/bin/awk -f 2# SPDX-License-Identifier: GPL-2.0 3# gen-insn-attr-x86.awk: Instruction attribute table generator 4# Written by Masami Hiramatsu <mhiramat@redhat.com> 5# 6# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c 7 8# Awk implementation sanity check 9function check_awk_implement() { 10 if (sprintf("%x", 0) != "0") 11 return "Your awk has a printf-format problem." 12 return "" 13} 14 15# Clear working vars 16function clear_vars() { 17 delete table 18 delete lptable2 19 delete lptable1 20 delete lptable3 21 eid = -1 # escape id 22 gid = -1 # group id 23 aid = -1 # AVX id 24 tname = "" 25} 26 27BEGIN { 28 # Implementation error checking 29 awkchecked = check_awk_implement() 30 if (awkchecked != "") { 31 print "Error: " awkchecked > "/dev/stderr" 32 print "Please try to use gawk." > "/dev/stderr" 33 exit 1 34 } 35 36 # Setup generating tables 37 print "/* x86 opcode map generated from x86-opcode-map.txt */" 38 print "/* Do not change this code. */\n" 39 ggid = 1 40 geid = 1 41 gaid = 0 42 delete etable 43 delete gtable 44 delete atable 45 46 opnd_expr = "^[A-Za-z/]" 47 ext_expr = "^\\(" 48 sep_expr = "^\\|$" 49 group_expr = "^Grp[0-9A-Za-z]+" 50 51 imm_expr = "^[IJAOL][a-z]" 52 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 53 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 54 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 55 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" 56 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" 57 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" 58 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 59 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 60 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 61 imm_flag["Ob"] = "INAT_MOFFSET" 62 imm_flag["Ov"] = "INAT_MOFFSET" 63 imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 64 65 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 66 force64_expr = "\\([df]64\\)" 67 rex_expr = "^REX(\\.[XRWB]+)*" 68 fpu_expr = "^ESC" # TODO 69 70 lprefix1_expr = "\\((66|!F3)\\)" 71 lprefix2_expr = "\\(F3\\)" 72 lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" 73 lprefix_expr = "\\((66|F2|F3)\\)" 74 max_lprefix = 4 75 76 # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript 77 # accepts VEX prefix 78 vexok_opcode_expr = "^[vk].*" 79 vexok_expr = "\\(v1\\)" 80 # All opcodes with (v) superscript supports *only* VEX prefix 81 vexonly_expr = "\\(v\\)" 82 # All opcodes with (ev) superscript supports *only* EVEX prefix 83 evexonly_expr = "\\(ev\\)" 84 85 prefix_expr = "\\(Prefix\\)" 86 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 87 prefix_num["REPNE"] = "INAT_PFX_REPNE" 88 prefix_num["REP/REPE"] = "INAT_PFX_REPE" 89 prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" 90 prefix_num["XRELEASE"] = "INAT_PFX_REPE" 91 prefix_num["LOCK"] = "INAT_PFX_LOCK" 92 prefix_num["SEG=CS"] = "INAT_PFX_CS" 93 prefix_num["SEG=DS"] = "INAT_PFX_DS" 94 prefix_num["SEG=ES"] = "INAT_PFX_ES" 95 prefix_num["SEG=FS"] = "INAT_PFX_FS" 96 prefix_num["SEG=GS"] = "INAT_PFX_GS" 97 prefix_num["SEG=SS"] = "INAT_PFX_SS" 98 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 99 prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" 100 prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" 101 prefix_num["EVEX"] = "INAT_PFX_EVEX" 102 103 clear_vars() 104} 105 106function semantic_error(msg) { 107 print "Semantic error at " NR ": " msg > "/dev/stderr" 108 exit 1 109} 110 111function debug(msg) { 112 print "DEBUG: " msg 113} 114 115function array_size(arr, i,c) { 116 c = 0 117 for (i in arr) 118 c++ 119 return c 120} 121 122/^Table:/ { 123 print "/* " $0 " */" 124 if (tname != "") 125 semantic_error("Hit Table: before EndTable:."); 126} 127 128/^Referrer:/ { 129 if (NF != 1) { 130 # escape opcode table 131 ref = "" 132 for (i = 2; i <= NF; i++) 133 ref = ref $i 134 eid = escape[ref] 135 tname = sprintf("inat_escape_table_%d", eid) 136 } 137} 138 139/^AVXcode:/ { 140 if (NF != 1) { 141 # AVX/escape opcode table 142 aid = $2 143 if (gaid <= aid) 144 gaid = aid + 1 145 if (tname == "") # AVX only opcode table 146 tname = sprintf("inat_avx_table_%d", $2) 147 } 148 if (aid == -1 && eid == -1) # primary opcode table 149 tname = "inat_primary_table" 150} 151 152/^GrpTable:/ { 153 print "/* " $0 " */" 154 if (!($2 in group)) 155 semantic_error("No group: " $2 ) 156 gid = group[$2] 157 tname = "inat_group_table_" gid 158} 159 160function print_table(tbl,name,fmt,n) 161{ 162 print "const insn_attr_t " name " = {" 163 for (i = 0; i < n; i++) { 164 id = sprintf(fmt, i) 165 if (tbl[id]) 166 print " [" id "] = " tbl[id] "," 167 } 168 print "};" 169} 170 171/^EndTable/ { 172 if (gid != -1) { 173 # print group tables 174 if (array_size(table) != 0) { 175 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", 176 "0x%x", 8) 177 gtable[gid,0] = tname 178 } 179 if (array_size(lptable1) != 0) { 180 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", 181 "0x%x", 8) 182 gtable[gid,1] = tname "_1" 183 } 184 if (array_size(lptable2) != 0) { 185 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", 186 "0x%x", 8) 187 gtable[gid,2] = tname "_2" 188 } 189 if (array_size(lptable3) != 0) { 190 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", 191 "0x%x", 8) 192 gtable[gid,3] = tname "_3" 193 } 194 } else { 195 # print primary/escaped tables 196 if (array_size(table) != 0) { 197 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", 198 "0x%02x", 256) 199 etable[eid,0] = tname 200 if (aid >= 0) 201 atable[aid,0] = tname 202 } 203 if (array_size(lptable1) != 0) { 204 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", 205 "0x%02x", 256) 206 etable[eid,1] = tname "_1" 207 if (aid >= 0) 208 atable[aid,1] = tname "_1" 209 } 210 if (array_size(lptable2) != 0) { 211 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", 212 "0x%02x", 256) 213 etable[eid,2] = tname "_2" 214 if (aid >= 0) 215 atable[aid,2] = tname "_2" 216 } 217 if (array_size(lptable3) != 0) { 218 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", 219 "0x%02x", 256) 220 etable[eid,3] = tname "_3" 221 if (aid >= 0) 222 atable[aid,3] = tname "_3" 223 } 224 } 225 print "" 226 clear_vars() 227} 228 229function add_flags(old,new) { 230 if (old && new) 231 return old " | " new 232 else if (old) 233 return old 234 else 235 return new 236} 237 238# convert operands to flags. 239function convert_operands(count,opnd, i,j,imm,mod) 240{ 241 imm = null 242 mod = null 243 for (j = 1; j <= count; j++) { 244 i = opnd[j] 245 if (match(i, imm_expr) == 1) { 246 if (!imm_flag[i]) 247 semantic_error("Unknown imm opnd: " i) 248 if (imm) { 249 if (i != "Ib") 250 semantic_error("Second IMM error") 251 imm = add_flags(imm, "INAT_SCNDIMM") 252 } else 253 imm = imm_flag[i] 254 } else if (match(i, modrm_expr)) 255 mod = "INAT_MODRM" 256 } 257 return add_flags(imm, mod) 258} 259 260/^[0-9a-f]+\:/ { 261 if (NR == 1) 262 next 263 # get index 264 idx = "0x" substr($1, 1, index($1,":") - 1) 265 if (idx in table) 266 semantic_error("Redefine " idx " in " tname) 267 268 # check if escaped opcode 269 if ("escape" == $2) { 270 if ($3 != "#") 271 semantic_error("No escaped name") 272 ref = "" 273 for (i = 4; i <= NF; i++) 274 ref = ref $i 275 if (ref in escape) 276 semantic_error("Redefine escape (" ref ")") 277 escape[ref] = geid 278 geid++ 279 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" 280 next 281 } 282 283 variant = null 284 # converts 285 i = 2 286 while (i <= NF) { 287 opcode = $(i++) 288 delete opnds 289 ext = null 290 flags = null 291 opnd = null 292 # parse one opcode 293 if (match($i, opnd_expr)) { 294 opnd = $i 295 count = split($(i++), opnds, ",") 296 flags = convert_operands(count, opnds) 297 } 298 if (match($i, ext_expr)) 299 ext = $(i++) 300 if (match($i, sep_expr)) 301 i++ 302 else if (i < NF) 303 semantic_error($i " is not a separator") 304 305 # check if group opcode 306 if (match(opcode, group_expr)) { 307 if (!(opcode in group)) { 308 group[opcode] = ggid 309 ggid++ 310 } 311 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") 312 } 313 # check force(or default) 64bit 314 if (match(ext, force64_expr)) 315 flags = add_flags(flags, "INAT_FORCE64") 316 317 # check REX prefix 318 if (match(opcode, rex_expr)) 319 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") 320 321 # check coprocessor escape : TODO 322 if (match(opcode, fpu_expr)) 323 flags = add_flags(flags, "INAT_MODRM") 324 325 # check VEX codes 326 if (match(ext, evexonly_expr)) 327 flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") 328 else if (match(ext, vexonly_expr)) 329 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 330 else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) 331 flags = add_flags(flags, "INAT_VEXOK") 332 333 # check prefixes 334 if (match(ext, prefix_expr)) { 335 if (!prefix_num[opcode]) 336 semantic_error("Unknown prefix: " opcode) 337 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") 338 } 339 if (length(flags) == 0) 340 continue 341 # check if last prefix 342 if (match(ext, lprefix1_expr)) { 343 lptable1[idx] = add_flags(lptable1[idx],flags) 344 variant = "INAT_VARIANT" 345 } 346 if (match(ext, lprefix2_expr)) { 347 lptable2[idx] = add_flags(lptable2[idx],flags) 348 variant = "INAT_VARIANT" 349 } 350 if (match(ext, lprefix3_expr)) { 351 lptable3[idx] = add_flags(lptable3[idx],flags) 352 variant = "INAT_VARIANT" 353 } 354 if (!match(ext, lprefix_expr)){ 355 table[idx] = add_flags(table[idx],flags) 356 } 357 } 358 if (variant) 359 table[idx] = add_flags(table[idx],variant) 360} 361 362END { 363 if (awkchecked != "") 364 exit 1 365 # print escape opcode map's array 366 print "/* Escape opcode map array */" 367 print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ 368 "[INAT_LSTPFX_MAX + 1] = {" 369 for (i = 0; i < geid; i++) 370 for (j = 0; j < max_lprefix; j++) 371 if (etable[i,j]) 372 print " ["i"]["j"] = "etable[i,j]"," 373 print "};\n" 374 # print group opcode map's array 375 print "/* Group opcode map array */" 376 print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ 377 "[INAT_LSTPFX_MAX + 1] = {" 378 for (i = 0; i < ggid; i++) 379 for (j = 0; j < max_lprefix; j++) 380 if (gtable[i,j]) 381 print " ["i"]["j"] = "gtable[i,j]"," 382 print "};\n" 383 # print AVX opcode map's array 384 print "/* AVX opcode map array */" 385 print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ 386 "[INAT_LSTPFX_MAX + 1] = {" 387 for (i = 0; i < gaid; i++) 388 for (j = 0; j < max_lprefix; j++) 389 if (atable[i,j]) 390 print " ["i"]["j"] = "atable[i,j]"," 391 print "};" 392} 393 394