1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9	if (sprintf("%x", 0) != "0")
10		return "Your awk has a printf-format problem."
11	return ""
12}
13
14# Clear working vars
15function clear_vars() {
16	delete table
17	delete lptable2
18	delete lptable1
19	delete lptable3
20	eid = -1 # escape id
21	gid = -1 # group id
22	aid = -1 # AVX id
23	tname = ""
24}
25
26BEGIN {
27	# Implementation error checking
28	awkchecked = check_awk_implement()
29	if (awkchecked != "") {
30		print "Error: " awkchecked > "/dev/stderr"
31		print "Please try to use gawk." > "/dev/stderr"
32		exit 1
33	}
34
35	# Setup generating tables
36	print "/* x86 opcode map generated from x86-opcode-map.txt */"
37	print "/* Do not change this code. */\n"
38	ggid = 1
39	geid = 1
40	gaid = 0
41	delete etable
42	delete gtable
43	delete atable
44
45	opnd_expr = "^[A-Za-z/]"
46	ext_expr = "^\\("
47	sep_expr = "^\\|$"
48	group_expr = "^Grp[0-9A-Za-z]+"
49
50	imm_expr = "^[IJAOL][a-z]"
51	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
54	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
55	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
56	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
57	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
58	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60	imm_flag["Ob"] = "INAT_MOFFSET"
61	imm_flag["Ov"] = "INAT_MOFFSET"
62	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
63
64	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
65	force64_expr = "\\([df]64\\)"
66	rex_expr = "^REX(\\.[XRWB]+)*"
67	fpu_expr = "^ESC" # TODO
68
69	lprefix1_expr = "\\((66|!F3)\\)"
70	lprefix2_expr = "\\(F3\\)"
71	lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
72	lprefix_expr = "\\((66|F2|F3)\\)"
73	max_lprefix = 4
74
75	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
76	# accepts VEX prefix
77	vexok_opcode_expr = "^[vk].*"
78	vexok_expr = "\\(v1\\)"
79	# All opcodes with (v) superscript supports *only* VEX prefix
80	vexonly_expr = "\\(v\\)"
81	# All opcodes with (ev) superscript supports *only* EVEX prefix
82	evexonly_expr = "\\(ev\\)"
83
84	prefix_expr = "\\(Prefix\\)"
85	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
86	prefix_num["REPNE"] = "INAT_PFX_REPNE"
87	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
88	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
89	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
90	prefix_num["LOCK"] = "INAT_PFX_LOCK"
91	prefix_num["SEG=CS"] = "INAT_PFX_CS"
92	prefix_num["SEG=DS"] = "INAT_PFX_DS"
93	prefix_num["SEG=ES"] = "INAT_PFX_ES"
94	prefix_num["SEG=FS"] = "INAT_PFX_FS"
95	prefix_num["SEG=GS"] = "INAT_PFX_GS"
96	prefix_num["SEG=SS"] = "INAT_PFX_SS"
97	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
98	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
99	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
100	prefix_num["EVEX"] = "INAT_PFX_EVEX"
101
102	clear_vars()
103}
104
105function semantic_error(msg) {
106	print "Semantic error at " NR ": " msg > "/dev/stderr"
107	exit 1
108}
109
110function debug(msg) {
111	print "DEBUG: " msg
112}
113
114function array_size(arr,   i,c) {
115	c = 0
116	for (i in arr)
117		c++
118	return c
119}
120
121/^Table:/ {
122	print "/* " $0 " */"
123	if (tname != "")
124		semantic_error("Hit Table: before EndTable:.");
125}
126
127/^Referrer:/ {
128	if (NF != 1) {
129		# escape opcode table
130		ref = ""
131		for (i = 2; i <= NF; i++)
132			ref = ref $i
133		eid = escape[ref]
134		tname = sprintf("inat_escape_table_%d", eid)
135	}
136}
137
138/^AVXcode:/ {
139	if (NF != 1) {
140		# AVX/escape opcode table
141		aid = $2
142		if (gaid <= aid)
143			gaid = aid + 1
144		if (tname == "")	# AVX only opcode table
145			tname = sprintf("inat_avx_table_%d", $2)
146	}
147	if (aid == -1 && eid == -1)	# primary opcode table
148		tname = "inat_primary_table"
149}
150
151/^GrpTable:/ {
152	print "/* " $0 " */"
153	if (!($2 in group))
154		semantic_error("No group: " $2 )
155	gid = group[$2]
156	tname = "inat_group_table_" gid
157}
158
159function print_table(tbl,name,fmt,n)
160{
161	print "const insn_attr_t " name " = {"
162	for (i = 0; i < n; i++) {
163		id = sprintf(fmt, i)
164		if (tbl[id])
165			print "	[" id "] = " tbl[id] ","
166	}
167	print "};"
168}
169
170/^EndTable/ {
171	if (gid != -1) {
172		# print group tables
173		if (array_size(table) != 0) {
174			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
175				    "0x%x", 8)
176			gtable[gid,0] = tname
177		}
178		if (array_size(lptable1) != 0) {
179			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
180				    "0x%x", 8)
181			gtable[gid,1] = tname "_1"
182		}
183		if (array_size(lptable2) != 0) {
184			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
185				    "0x%x", 8)
186			gtable[gid,2] = tname "_2"
187		}
188		if (array_size(lptable3) != 0) {
189			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
190				    "0x%x", 8)
191			gtable[gid,3] = tname "_3"
192		}
193	} else {
194		# print primary/escaped tables
195		if (array_size(table) != 0) {
196			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
197				    "0x%02x", 256)
198			etable[eid,0] = tname
199			if (aid >= 0)
200				atable[aid,0] = tname
201		}
202		if (array_size(lptable1) != 0) {
203			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
204				    "0x%02x", 256)
205			etable[eid,1] = tname "_1"
206			if (aid >= 0)
207				atable[aid,1] = tname "_1"
208		}
209		if (array_size(lptable2) != 0) {
210			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
211				    "0x%02x", 256)
212			etable[eid,2] = tname "_2"
213			if (aid >= 0)
214				atable[aid,2] = tname "_2"
215		}
216		if (array_size(lptable3) != 0) {
217			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
218				    "0x%02x", 256)
219			etable[eid,3] = tname "_3"
220			if (aid >= 0)
221				atable[aid,3] = tname "_3"
222		}
223	}
224	print ""
225	clear_vars()
226}
227
228function add_flags(old,new) {
229	if (old && new)
230		return old " | " new
231	else if (old)
232		return old
233	else
234		return new
235}
236
237# convert operands to flags.
238function convert_operands(count,opnd,       i,j,imm,mod)
239{
240	imm = null
241	mod = null
242	for (j = 1; j <= count; j++) {
243		i = opnd[j]
244		if (match(i, imm_expr) == 1) {
245			if (!imm_flag[i])
246				semantic_error("Unknown imm opnd: " i)
247			if (imm) {
248				if (i != "Ib")
249					semantic_error("Second IMM error")
250				imm = add_flags(imm, "INAT_SCNDIMM")
251			} else
252				imm = imm_flag[i]
253		} else if (match(i, modrm_expr))
254			mod = "INAT_MODRM"
255	}
256	return add_flags(imm, mod)
257}
258
259/^[0-9a-f]+\:/ {
260	if (NR == 1)
261		next
262	# get index
263	idx = "0x" substr($1, 1, index($1,":") - 1)
264	if (idx in table)
265		semantic_error("Redefine " idx " in " tname)
266
267	# check if escaped opcode
268	if ("escape" == $2) {
269		if ($3 != "#")
270			semantic_error("No escaped name")
271		ref = ""
272		for (i = 4; i <= NF; i++)
273			ref = ref $i
274		if (ref in escape)
275			semantic_error("Redefine escape (" ref ")")
276		escape[ref] = geid
277		geid++
278		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
279		next
280	}
281
282	variant = null
283	# converts
284	i = 2
285	while (i <= NF) {
286		opcode = $(i++)
287		delete opnds
288		ext = null
289		flags = null
290		opnd = null
291		# parse one opcode
292		if (match($i, opnd_expr)) {
293			opnd = $i
294			count = split($(i++), opnds, ",")
295			flags = convert_operands(count, opnds)
296		}
297		if (match($i, ext_expr))
298			ext = $(i++)
299		if (match($i, sep_expr))
300			i++
301		else if (i < NF)
302			semantic_error($i " is not a separator")
303
304		# check if group opcode
305		if (match(opcode, group_expr)) {
306			if (!(opcode in group)) {
307				group[opcode] = ggid
308				ggid++
309			}
310			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
311		}
312		# check force(or default) 64bit
313		if (match(ext, force64_expr))
314			flags = add_flags(flags, "INAT_FORCE64")
315
316		# check REX prefix
317		if (match(opcode, rex_expr))
318			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
319
320		# check coprocessor escape : TODO
321		if (match(opcode, fpu_expr))
322			flags = add_flags(flags, "INAT_MODRM")
323
324		# check VEX codes
325		if (match(ext, evexonly_expr))
326			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
327		else if (match(ext, vexonly_expr))
328			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
329		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
330			flags = add_flags(flags, "INAT_VEXOK")
331
332		# check prefixes
333		if (match(ext, prefix_expr)) {
334			if (!prefix_num[opcode])
335				semantic_error("Unknown prefix: " opcode)
336			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
337		}
338		if (length(flags) == 0)
339			continue
340		# check if last prefix
341		if (match(ext, lprefix1_expr)) {
342			lptable1[idx] = add_flags(lptable1[idx],flags)
343			variant = "INAT_VARIANT"
344		}
345		if (match(ext, lprefix2_expr)) {
346			lptable2[idx] = add_flags(lptable2[idx],flags)
347			variant = "INAT_VARIANT"
348		}
349		if (match(ext, lprefix3_expr)) {
350			lptable3[idx] = add_flags(lptable3[idx],flags)
351			variant = "INAT_VARIANT"
352		}
353		if (!match(ext, lprefix_expr)){
354			table[idx] = add_flags(table[idx],flags)
355		}
356	}
357	if (variant)
358		table[idx] = add_flags(table[idx],variant)
359}
360
361END {
362	if (awkchecked != "")
363		exit 1
364	# print escape opcode map's array
365	print "/* Escape opcode map array */"
366	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
367	      "[INAT_LSTPFX_MAX + 1] = {"
368	for (i = 0; i < geid; i++)
369		for (j = 0; j < max_lprefix; j++)
370			if (etable[i,j])
371				print "	["i"]["j"] = "etable[i,j]","
372	print "};\n"
373	# print group opcode map's array
374	print "/* Group opcode map array */"
375	print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
376	      "[INAT_LSTPFX_MAX + 1] = {"
377	for (i = 0; i < ggid; i++)
378		for (j = 0; j < max_lprefix; j++)
379			if (gtable[i,j])
380				print "	["i"]["j"] = "gtable[i,j]","
381	print "};\n"
382	# print AVX opcode map's array
383	print "/* AVX opcode map array */"
384	print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
385	      "[INAT_LSTPFX_MAX + 1] = {"
386	for (i = 0; i < gaid; i++)
387		for (j = 0; j < max_lprefix; j++)
388			if (atable[i,j])
389				print "	["i"]["j"] = "atable[i,j]","
390	print "};"
391}
392
393