1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9	if (sprintf("%x", 0) != "0")
10		return "Your awk has a printf-format problem."
11	return ""
12}
13
14# Clear working vars
15function clear_vars() {
16	delete table
17	delete lptable2
18	delete lptable1
19	delete lptable3
20	eid = -1 # escape id
21	gid = -1 # group id
22	aid = -1 # AVX id
23	tname = ""
24}
25
26BEGIN {
27	# Implementation error checking
28	awkchecked = check_awk_implement()
29	if (awkchecked != "") {
30		print "Error: " awkchecked > "/dev/stderr"
31		print "Please try to use gawk." > "/dev/stderr"
32		exit 1
33	}
34
35	# Setup generating tables
36	print "/* x86 opcode map generated from x86-opcode-map.txt */"
37	print "/* Do not change this code. */\n"
38	ggid = 1
39	geid = 1
40	gaid = 0
41	delete etable
42	delete gtable
43	delete atable
44
45	opnd_expr = "^[A-Za-z/]"
46	ext_expr = "^\\("
47	sep_expr = "^\\|$"
48	group_expr = "^Grp[0-9A-Za-z]+"
49
50	imm_expr = "^[IJAOL][a-z]"
51	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
54	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
55	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
56	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
57	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
58	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60	imm_flag["Ob"] = "INAT_MOFFSET"
61	imm_flag["Ov"] = "INAT_MOFFSET"
62	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
63
64	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
65	force64_expr = "\\([df]64\\)"
66	rex_expr = "^REX(\\.[XRWB]+)*"
67	fpu_expr = "^ESC" # TODO
68
69	lprefix1_expr = "\\((66|!F3)\\)"
70	lprefix2_expr = "\\(F3\\)"
71	lprefix3_expr = "\\((F2|!F3)\\)"
72	lprefix_expr = "\\((66|F2|F3)\\)"
73	max_lprefix = 4
74
75	# All opcodes starting with lower-case 'v' or with (v1) superscript
76	# accepts VEX prefix
77	vexok_opcode_expr = "^v.*"
78	vexok_expr = "\\(v1\\)"
79	# All opcodes with (v) superscript supports *only* VEX prefix
80	vexonly_expr = "\\(v\\)"
81
82	prefix_expr = "\\(Prefix\\)"
83	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
84	prefix_num["REPNE"] = "INAT_PFX_REPNE"
85	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
86	prefix_num["LOCK"] = "INAT_PFX_LOCK"
87	prefix_num["SEG=CS"] = "INAT_PFX_CS"
88	prefix_num["SEG=DS"] = "INAT_PFX_DS"
89	prefix_num["SEG=ES"] = "INAT_PFX_ES"
90	prefix_num["SEG=FS"] = "INAT_PFX_FS"
91	prefix_num["SEG=GS"] = "INAT_PFX_GS"
92	prefix_num["SEG=SS"] = "INAT_PFX_SS"
93	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
94	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
95	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
96
97	clear_vars()
98}
99
100function semantic_error(msg) {
101	print "Semantic error at " NR ": " msg > "/dev/stderr"
102	exit 1
103}
104
105function debug(msg) {
106	print "DEBUG: " msg
107}
108
109function array_size(arr,   i,c) {
110	c = 0
111	for (i in arr)
112		c++
113	return c
114}
115
116/^Table:/ {
117	print "/* " $0 " */"
118	if (tname != "")
119		semantic_error("Hit Table: before EndTable:.");
120}
121
122/^Referrer:/ {
123	if (NF != 1) {
124		# escape opcode table
125		ref = ""
126		for (i = 2; i <= NF; i++)
127			ref = ref $i
128		eid = escape[ref]
129		tname = sprintf("inat_escape_table_%d", eid)
130	}
131}
132
133/^AVXcode:/ {
134	if (NF != 1) {
135		# AVX/escape opcode table
136		aid = $2
137		if (gaid <= aid)
138			gaid = aid + 1
139		if (tname == "")	# AVX only opcode table
140			tname = sprintf("inat_avx_table_%d", $2)
141	}
142	if (aid == -1 && eid == -1)	# primary opcode table
143		tname = "inat_primary_table"
144}
145
146/^GrpTable:/ {
147	print "/* " $0 " */"
148	if (!($2 in group))
149		semantic_error("No group: " $2 )
150	gid = group[$2]
151	tname = "inat_group_table_" gid
152}
153
154function print_table(tbl,name,fmt,n)
155{
156	print "const insn_attr_t " name " = {"
157	for (i = 0; i < n; i++) {
158		id = sprintf(fmt, i)
159		if (tbl[id])
160			print "	[" id "] = " tbl[id] ","
161	}
162	print "};"
163}
164
165/^EndTable/ {
166	if (gid != -1) {
167		# print group tables
168		if (array_size(table) != 0) {
169			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
170				    "0x%x", 8)
171			gtable[gid,0] = tname
172		}
173		if (array_size(lptable1) != 0) {
174			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
175				    "0x%x", 8)
176			gtable[gid,1] = tname "_1"
177		}
178		if (array_size(lptable2) != 0) {
179			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
180				    "0x%x", 8)
181			gtable[gid,2] = tname "_2"
182		}
183		if (array_size(lptable3) != 0) {
184			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
185				    "0x%x", 8)
186			gtable[gid,3] = tname "_3"
187		}
188	} else {
189		# print primary/escaped tables
190		if (array_size(table) != 0) {
191			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
192				    "0x%02x", 256)
193			etable[eid,0] = tname
194			if (aid >= 0)
195				atable[aid,0] = tname
196		}
197		if (array_size(lptable1) != 0) {
198			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
199				    "0x%02x", 256)
200			etable[eid,1] = tname "_1"
201			if (aid >= 0)
202				atable[aid,1] = tname "_1"
203		}
204		if (array_size(lptable2) != 0) {
205			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
206				    "0x%02x", 256)
207			etable[eid,2] = tname "_2"
208			if (aid >= 0)
209				atable[aid,2] = tname "_2"
210		}
211		if (array_size(lptable3) != 0) {
212			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
213				    "0x%02x", 256)
214			etable[eid,3] = tname "_3"
215			if (aid >= 0)
216				atable[aid,3] = tname "_3"
217		}
218	}
219	print ""
220	clear_vars()
221}
222
223function add_flags(old,new) {
224	if (old && new)
225		return old " | " new
226	else if (old)
227		return old
228	else
229		return new
230}
231
232# convert operands to flags.
233function convert_operands(count,opnd,       i,j,imm,mod)
234{
235	imm = null
236	mod = null
237	for (j = 1; j <= count; j++) {
238		i = opnd[j]
239		if (match(i, imm_expr) == 1) {
240			if (!imm_flag[i])
241				semantic_error("Unknown imm opnd: " i)
242			if (imm) {
243				if (i != "Ib")
244					semantic_error("Second IMM error")
245				imm = add_flags(imm, "INAT_SCNDIMM")
246			} else
247				imm = imm_flag[i]
248		} else if (match(i, modrm_expr))
249			mod = "INAT_MODRM"
250	}
251	return add_flags(imm, mod)
252}
253
254/^[0-9a-f]+\:/ {
255	if (NR == 1)
256		next
257	# get index
258	idx = "0x" substr($1, 1, index($1,":") - 1)
259	if (idx in table)
260		semantic_error("Redefine " idx " in " tname)
261
262	# check if escaped opcode
263	if ("escape" == $2) {
264		if ($3 != "#")
265			semantic_error("No escaped name")
266		ref = ""
267		for (i = 4; i <= NF; i++)
268			ref = ref $i
269		if (ref in escape)
270			semantic_error("Redefine escape (" ref ")")
271		escape[ref] = geid
272		geid++
273		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
274		next
275	}
276
277	variant = null
278	# converts
279	i = 2
280	while (i <= NF) {
281		opcode = $(i++)
282		delete opnds
283		ext = null
284		flags = null
285		opnd = null
286		# parse one opcode
287		if (match($i, opnd_expr)) {
288			opnd = $i
289			count = split($(i++), opnds, ",")
290			flags = convert_operands(count, opnds)
291		}
292		if (match($i, ext_expr))
293			ext = $(i++)
294		if (match($i, sep_expr))
295			i++
296		else if (i < NF)
297			semantic_error($i " is not a separator")
298
299		# check if group opcode
300		if (match(opcode, group_expr)) {
301			if (!(opcode in group)) {
302				group[opcode] = ggid
303				ggid++
304			}
305			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
306		}
307		# check force(or default) 64bit
308		if (match(ext, force64_expr))
309			flags = add_flags(flags, "INAT_FORCE64")
310
311		# check REX prefix
312		if (match(opcode, rex_expr))
313			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
314
315		# check coprocessor escape : TODO
316		if (match(opcode, fpu_expr))
317			flags = add_flags(flags, "INAT_MODRM")
318
319		# check VEX codes
320		if (match(ext, vexonly_expr))
321			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
322		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
323			flags = add_flags(flags, "INAT_VEXOK")
324
325		# check prefixes
326		if (match(ext, prefix_expr)) {
327			if (!prefix_num[opcode])
328				semantic_error("Unknown prefix: " opcode)
329			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
330		}
331		if (length(flags) == 0)
332			continue
333		# check if last prefix
334		if (match(ext, lprefix1_expr)) {
335			lptable1[idx] = add_flags(lptable1[idx],flags)
336			variant = "INAT_VARIANT"
337		}
338		if (match(ext, lprefix2_expr)) {
339			lptable2[idx] = add_flags(lptable2[idx],flags)
340			variant = "INAT_VARIANT"
341		}
342		if (match(ext, lprefix3_expr)) {
343			lptable3[idx] = add_flags(lptable3[idx],flags)
344			variant = "INAT_VARIANT"
345		}
346		if (!match(ext, lprefix_expr)){
347			table[idx] = add_flags(table[idx],flags)
348		}
349	}
350	if (variant)
351		table[idx] = add_flags(table[idx],variant)
352}
353
354END {
355	if (awkchecked != "")
356		exit 1
357	# print escape opcode map's array
358	print "/* Escape opcode map array */"
359	print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
360	      "[INAT_LSTPFX_MAX + 1] = {"
361	for (i = 0; i < geid; i++)
362		for (j = 0; j < max_lprefix; j++)
363			if (etable[i,j])
364				print "	["i"]["j"] = "etable[i,j]","
365	print "};\n"
366	# print group opcode map's array
367	print "/* Group opcode map array */"
368	print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
369	      "[INAT_LSTPFX_MAX + 1] = {"
370	for (i = 0; i < ggid; i++)
371		for (j = 0; j < max_lprefix; j++)
372			if (gtable[i,j])
373				print "	["i"]["j"] = "gtable[i,j]","
374	print "};\n"
375	# print AVX opcode map's array
376	print "/* AVX opcode map array */"
377	print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
378	      "[INAT_LSTPFX_MAX + 1] = {"
379	for (i = 0; i < gaid; i++)
380		for (j = 0; j < max_lprefix; j++)
381			if (atable[i,j])
382				print "	["i"]["j"] = "atable[i,j]","
383	print "};"
384}
385
386