1#!/bin/awk -f
2# gen-insn-attr-x86.awk: Instruction attribute table generator
3# Written by Masami Hiramatsu <mhiramat@redhat.com>
4#
5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
6
7# Awk implementation sanity check
8function check_awk_implement() {
9	if (sprintf("%x", 0) != "0")
10		return "Your awk has a printf-format problem."
11	return ""
12}
13
14# Clear working vars
15function clear_vars() {
16	delete table
17	delete lptable2
18	delete lptable1
19	delete lptable3
20	eid = -1 # escape id
21	gid = -1 # group id
22	aid = -1 # AVX id
23	tname = ""
24}
25
26BEGIN {
27	# Implementation error checking
28	awkchecked = check_awk_implement()
29	if (awkchecked != "") {
30		print "Error: " awkchecked > "/dev/stderr"
31		print "Please try to use gawk." > "/dev/stderr"
32		exit 1
33	}
34
35	# Setup generating tables
36	print "/* x86 opcode map generated from x86-opcode-map.txt */"
37	print "/* Do not change this code. */\n"
38	ggid = 1
39	geid = 1
40	gaid = 0
41	delete etable
42	delete gtable
43	delete atable
44
45	opnd_expr = "^[A-Za-z/]"
46	ext_expr = "^\\("
47	sep_expr = "^\\|$"
48	group_expr = "^Grp[0-9A-Za-z]+"
49
50	imm_expr = "^[IJAOL][a-z]"
51	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
52	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
54	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
55	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
56	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
57	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
58	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
60	imm_flag["Ob"] = "INAT_MOFFSET"
61	imm_flag["Ov"] = "INAT_MOFFSET"
62	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
63
64	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
65	force64_expr = "\\([df]64\\)"
66	rex_expr = "^REX(\\.[XRWB]+)*"
67	fpu_expr = "^ESC" # TODO
68
69	lprefix1_expr = "\\(66\\)"
70	lprefix2_expr = "\\(F3\\)"
71	lprefix3_expr = "\\(F2\\)"
72	max_lprefix = 4
73
74	# All opcodes starting with lower-case 'v' or with (v1) superscript
75	# accepts VEX prefix
76	vexok_opcode_expr = "^v.*"
77	vexok_expr = "\\(v1\\)"
78	# All opcodes with (v) superscript supports *only* VEX prefix
79	vexonly_expr = "\\(v\\)"
80
81	prefix_expr = "\\(Prefix\\)"
82	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
83	prefix_num["REPNE"] = "INAT_PFX_REPNE"
84	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
85	prefix_num["LOCK"] = "INAT_PFX_LOCK"
86	prefix_num["SEG=CS"] = "INAT_PFX_CS"
87	prefix_num["SEG=DS"] = "INAT_PFX_DS"
88	prefix_num["SEG=ES"] = "INAT_PFX_ES"
89	prefix_num["SEG=FS"] = "INAT_PFX_FS"
90	prefix_num["SEG=GS"] = "INAT_PFX_GS"
91	prefix_num["SEG=SS"] = "INAT_PFX_SS"
92	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
93	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
94	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
95
96	clear_vars()
97}
98
99function semantic_error(msg) {
100	print "Semantic error at " NR ": " msg > "/dev/stderr"
101	exit 1
102}
103
104function debug(msg) {
105	print "DEBUG: " msg
106}
107
108function array_size(arr,   i,c) {
109	c = 0
110	for (i in arr)
111		c++
112	return c
113}
114
115/^Table:/ {
116	print "/* " $0 " */"
117	if (tname != "")
118		semantic_error("Hit Table: before EndTable:.");
119}
120
121/^Referrer:/ {
122	if (NF != 1) {
123		# escape opcode table
124		ref = ""
125		for (i = 2; i <= NF; i++)
126			ref = ref $i
127		eid = escape[ref]
128		tname = sprintf("inat_escape_table_%d", eid)
129	}
130}
131
132/^AVXcode:/ {
133	if (NF != 1) {
134		# AVX/escape opcode table
135		aid = $2
136		if (gaid <= aid)
137			gaid = aid + 1
138		if (tname == "")	# AVX only opcode table
139			tname = sprintf("inat_avx_table_%d", $2)
140	}
141	if (aid == -1 && eid == -1)	# primary opcode table
142		tname = "inat_primary_table"
143}
144
145/^GrpTable:/ {
146	print "/* " $0 " */"
147	if (!($2 in group))
148		semantic_error("No group: " $2 )
149	gid = group[$2]
150	tname = "inat_group_table_" gid
151}
152
153function print_table(tbl,name,fmt,n)
154{
155	print "const insn_attr_t " name " = {"
156	for (i = 0; i < n; i++) {
157		id = sprintf(fmt, i)
158		if (tbl[id])
159			print "	[" id "] = " tbl[id] ","
160	}
161	print "};"
162}
163
164/^EndTable/ {
165	if (gid != -1) {
166		# print group tables
167		if (array_size(table) != 0) {
168			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
169				    "0x%x", 8)
170			gtable[gid,0] = tname
171		}
172		if (array_size(lptable1) != 0) {
173			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
174				    "0x%x", 8)
175			gtable[gid,1] = tname "_1"
176		}
177		if (array_size(lptable2) != 0) {
178			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
179				    "0x%x", 8)
180			gtable[gid,2] = tname "_2"
181		}
182		if (array_size(lptable3) != 0) {
183			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
184				    "0x%x", 8)
185			gtable[gid,3] = tname "_3"
186		}
187	} else {
188		# print primary/escaped tables
189		if (array_size(table) != 0) {
190			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
191				    "0x%02x", 256)
192			etable[eid,0] = tname
193			if (aid >= 0)
194				atable[aid,0] = tname
195		}
196		if (array_size(lptable1) != 0) {
197			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
198				    "0x%02x", 256)
199			etable[eid,1] = tname "_1"
200			if (aid >= 0)
201				atable[aid,1] = tname "_1"
202		}
203		if (array_size(lptable2) != 0) {
204			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
205				    "0x%02x", 256)
206			etable[eid,2] = tname "_2"
207			if (aid >= 0)
208				atable[aid,2] = tname "_2"
209		}
210		if (array_size(lptable3) != 0) {
211			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
212				    "0x%02x", 256)
213			etable[eid,3] = tname "_3"
214			if (aid >= 0)
215				atable[aid,3] = tname "_3"
216		}
217	}
218	print ""
219	clear_vars()
220}
221
222function add_flags(old,new) {
223	if (old && new)
224		return old " | " new
225	else if (old)
226		return old
227	else
228		return new
229}
230
231# convert operands to flags.
232function convert_operands(count,opnd,       i,j,imm,mod)
233{
234	imm = null
235	mod = null
236	for (j = 1; j <= count; j++) {
237		i = opnd[j]
238		if (match(i, imm_expr) == 1) {
239			if (!imm_flag[i])
240				semantic_error("Unknown imm opnd: " i)
241			if (imm) {
242				if (i != "Ib")
243					semantic_error("Second IMM error")
244				imm = add_flags(imm, "INAT_SCNDIMM")
245			} else
246				imm = imm_flag[i]
247		} else if (match(i, modrm_expr))
248			mod = "INAT_MODRM"
249	}
250	return add_flags(imm, mod)
251}
252
253/^[0-9a-f]+\:/ {
254	if (NR == 1)
255		next
256	# get index
257	idx = "0x" substr($1, 1, index($1,":") - 1)
258	if (idx in table)
259		semantic_error("Redefine " idx " in " tname)
260
261	# check if escaped opcode
262	if ("escape" == $2) {
263		if ($3 != "#")
264			semantic_error("No escaped name")
265		ref = ""
266		for (i = 4; i <= NF; i++)
267			ref = ref $i
268		if (ref in escape)
269			semantic_error("Redefine escape (" ref ")")
270		escape[ref] = geid
271		geid++
272		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
273		next
274	}
275
276	variant = null
277	# converts
278	i = 2
279	while (i <= NF) {
280		opcode = $(i++)
281		delete opnds
282		ext = null
283		flags = null
284		opnd = null
285		# parse one opcode
286		if (match($i, opnd_expr)) {
287			opnd = $i
288			count = split($(i++), opnds, ",")
289			flags = convert_operands(count, opnds)
290		}
291		if (match($i, ext_expr))
292			ext = $(i++)
293		if (match($i, sep_expr))
294			i++
295		else if (i < NF)
296			semantic_error($i " is not a separator")
297
298		# check if group opcode
299		if (match(opcode, group_expr)) {
300			if (!(opcode in group)) {
301				group[opcode] = ggid
302				ggid++
303			}
304			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
305		}
306		# check force(or default) 64bit
307		if (match(ext, force64_expr))
308			flags = add_flags(flags, "INAT_FORCE64")
309
310		# check REX prefix
311		if (match(opcode, rex_expr))
312			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
313
314		# check coprocessor escape : TODO
315		if (match(opcode, fpu_expr))
316			flags = add_flags(flags, "INAT_MODRM")
317
318		# check VEX codes
319		if (match(ext, vexonly_expr))
320			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
321		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
322			flags = add_flags(flags, "INAT_VEXOK")
323
324		# check prefixes
325		if (match(ext, prefix_expr)) {
326			if (!prefix_num[opcode])
327				semantic_error("Unknown prefix: " opcode)
328			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
329		}
330		if (length(flags) == 0)
331			continue
332		# check if last prefix
333		if (match(ext, lprefix1_expr)) {
334			lptable1[idx] = add_flags(lptable1[idx],flags)
335			variant = "INAT_VARIANT"
336		} else if (match(ext, lprefix2_expr)) {
337			lptable2[idx] = add_flags(lptable2[idx],flags)
338			variant = "INAT_VARIANT"
339		} else if (match(ext, lprefix3_expr)) {
340			lptable3[idx] = add_flags(lptable3[idx],flags)
341			variant = "INAT_VARIANT"
342		} else {
343			table[idx] = add_flags(table[idx],flags)
344		}
345	}
346	if (variant)
347		table[idx] = add_flags(table[idx],variant)
348}
349
350END {
351	if (awkchecked != "")
352		exit 1
353	# print escape opcode map's array
354	print "/* Escape opcode map array */"
355	print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
356	      "[INAT_LSTPFX_MAX + 1] = {"
357	for (i = 0; i < geid; i++)
358		for (j = 0; j < max_lprefix; j++)
359			if (etable[i,j])
360				print "	["i"]["j"] = "etable[i,j]","
361	print "};\n"
362	# print group opcode map's array
363	print "/* Group opcode map array */"
364	print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
365	      "[INAT_LSTPFX_MAX + 1] = {"
366	for (i = 0; i < ggid; i++)
367		for (j = 0; j < max_lprefix; j++)
368			if (gtable[i,j])
369				print "	["i"]["j"] = "gtable[i,j]","
370	print "};\n"
371	# print AVX opcode map's array
372	print "/* AVX opcode map array */"
373	print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
374	      "[INAT_LSTPFX_MAX + 1] = {"
375	for (i = 0; i < gaid; i++)
376		for (j = 0; j < max_lprefix; j++)
377			if (atable[i,j])
378				print "	["i"]["j"] = "atable[i,j]","
379	print "};"
380}
381
382