xref: /openbmc/linux/arch/x86/kvm/emulate.c (revision acc6a093)
1 /******************************************************************************
2  * emulate.c
3  *
4  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5  *
6  * Copyright (c) 2005 Keir Fraser
7  *
8  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9  * privileged instructions:
10  *
11  * Copyright (C) 2006 Qumranet
12  *
13  *   Avi Kivity <avi@qumranet.com>
14  *   Yaniv Kamay <yaniv@qumranet.com>
15  *
16  * This work is licensed under the terms of the GNU GPL, version 2.  See
17  * the COPYING file in the top-level directory.
18  *
19  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20  */
21 
22 #ifndef __KERNEL__
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
27 #else
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
31 #endif
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
34 
35 #include "x86.h"
36 
37 /*
38  * Opcode effective-address decode tables.
39  * Note that we only emulate instructions that have at least one memory
40  * operand (excluding implicit stack references). We assume that stack
41  * references and instruction fetches will never occur in special memory
42  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
43  * not be handled.
44  */
45 
46 /* Operand sizes: 8-bit operands or specified/overridden size. */
47 #define ByteOp      (1<<0)	/* 8-bit operands. */
48 /* Destination operand type. */
49 #define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
50 #define DstReg      (2<<1)	/* Register operand. */
51 #define DstMem      (3<<1)	/* Memory operand. */
52 #define DstAcc      (4<<1)      /* Destination Accumulator */
53 #define DstMask     (7<<1)
54 /* Source operand type. */
55 #define SrcNone     (0<<4)	/* No source operand. */
56 #define SrcImplicit (0<<4)	/* Source operand is implicit in the opcode. */
57 #define SrcReg      (1<<4)	/* Register operand. */
58 #define SrcMem      (2<<4)	/* Memory operand. */
59 #define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
60 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
61 #define SrcImm      (5<<4)	/* Immediate operand. */
62 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
63 #define SrcOne      (7<<4)	/* Implied '1' */
64 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
65 #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
66 #define SrcMask     (0xf<<4)
67 /* Generic ModRM decode. */
68 #define ModRM       (1<<8)
69 /* Destination is only written; never read. */
70 #define Mov         (1<<9)
71 #define BitOp       (1<<10)
72 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
73 #define String      (1<<12)     /* String instruction (rep capable) */
74 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
75 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
76 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
77 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
78 /* Misc flags */
79 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
80 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
81 #define No64	    (1<<28)
82 /* Source 2 operand type */
83 #define Src2None    (0<<29)
84 #define Src2CL      (1<<29)
85 #define Src2ImmByte (2<<29)
86 #define Src2One     (3<<29)
87 #define Src2Imm16   (4<<29)
88 #define Src2Mask    (7<<29)
89 
90 enum {
91 	Group1_80, Group1_81, Group1_82, Group1_83,
92 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
93 	Group8, Group9,
94 };
95 
96 static u32 opcode_table[256] = {
97 	/* 0x00 - 0x07 */
98 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
99 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
100 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
101 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
102 	/* 0x08 - 0x0F */
103 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
104 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
105 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
106 	ImplicitOps | Stack | No64, 0,
107 	/* 0x10 - 0x17 */
108 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
109 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
110 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
111 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
112 	/* 0x18 - 0x1F */
113 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
114 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
115 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
116 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
117 	/* 0x20 - 0x27 */
118 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
119 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
120 	DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
121 	/* 0x28 - 0x2F */
122 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
123 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
124 	0, 0, 0, 0,
125 	/* 0x30 - 0x37 */
126 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
127 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
128 	0, 0, 0, 0,
129 	/* 0x38 - 0x3F */
130 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
131 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
132 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
133 	0, 0,
134 	/* 0x40 - 0x47 */
135 	DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
136 	/* 0x48 - 0x4F */
137 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
138 	/* 0x50 - 0x57 */
139 	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
140 	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
141 	/* 0x58 - 0x5F */
142 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
143 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
144 	/* 0x60 - 0x67 */
145 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
146 	0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
147 	0, 0, 0, 0,
148 	/* 0x68 - 0x6F */
149 	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
150 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
151 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
152 	/* 0x70 - 0x77 */
153 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
154 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
155 	/* 0x78 - 0x7F */
156 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
157 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
158 	/* 0x80 - 0x87 */
159 	Group | Group1_80, Group | Group1_81,
160 	Group | Group1_82, Group | Group1_83,
161 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
162 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
163 	/* 0x88 - 0x8F */
164 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
165 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
166 	DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
167 	DstReg | SrcMem | ModRM | Mov, Group | Group1A,
168 	/* 0x90 - 0x97 */
169 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
170 	/* 0x98 - 0x9F */
171 	0, 0, SrcImm | Src2Imm16 | No64, 0,
172 	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
173 	/* 0xA0 - 0xA7 */
174 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
175 	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
176 	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
177 	ByteOp | ImplicitOps | String, ImplicitOps | String,
178 	/* 0xA8 - 0xAF */
179 	0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
180 	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
181 	ByteOp | ImplicitOps | String, ImplicitOps | String,
182 	/* 0xB0 - 0xB7 */
183 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
184 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
185 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
186 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
187 	/* 0xB8 - 0xBF */
188 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
189 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
190 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
191 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
192 	/* 0xC0 - 0xC7 */
193 	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
194 	0, ImplicitOps | Stack, 0, 0,
195 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
196 	/* 0xC8 - 0xCF */
197 	0, 0, 0, ImplicitOps | Stack,
198 	ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
199 	/* 0xD0 - 0xD7 */
200 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
201 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
202 	0, 0, 0, 0,
203 	/* 0xD8 - 0xDF */
204 	0, 0, 0, 0, 0, 0, 0, 0,
205 	/* 0xE0 - 0xE7 */
206 	0, 0, 0, 0,
207 	ByteOp | SrcImmUByte, SrcImmUByte,
208 	ByteOp | SrcImmUByte, SrcImmUByte,
209 	/* 0xE8 - 0xEF */
210 	SrcImm | Stack, SrcImm | ImplicitOps,
211 	SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
212 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
213 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
214 	/* 0xF0 - 0xF7 */
215 	0, 0, 0, 0,
216 	ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
217 	/* 0xF8 - 0xFF */
218 	ImplicitOps, 0, ImplicitOps, ImplicitOps,
219 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
220 };
221 
222 static u32 twobyte_table[256] = {
223 	/* 0x00 - 0x0F */
224 	0, Group | GroupDual | Group7, 0, 0,
225 	0, ImplicitOps, ImplicitOps | Priv, 0,
226 	ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
227 	0, ImplicitOps | ModRM, 0, 0,
228 	/* 0x10 - 0x1F */
229 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
230 	/* 0x20 - 0x2F */
231 	ModRM | ImplicitOps | Priv, ModRM | Priv,
232 	ModRM | ImplicitOps | Priv, ModRM | Priv,
233 	0, 0, 0, 0,
234 	0, 0, 0, 0, 0, 0, 0, 0,
235 	/* 0x30 - 0x3F */
236 	ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
237 	ImplicitOps, ImplicitOps | Priv, 0, 0,
238 	0, 0, 0, 0, 0, 0, 0, 0,
239 	/* 0x40 - 0x47 */
240 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
241 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
242 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
243 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
244 	/* 0x48 - 0x4F */
245 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
246 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
247 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 	/* 0x50 - 0x5F */
250 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
251 	/* 0x60 - 0x6F */
252 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
253 	/* 0x70 - 0x7F */
254 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255 	/* 0x80 - 0x8F */
256 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
257 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
258 	/* 0x90 - 0x9F */
259 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 	/* 0xA0 - 0xA7 */
261 	ImplicitOps | Stack, ImplicitOps | Stack,
262 	0, DstMem | SrcReg | ModRM | BitOp,
263 	DstMem | SrcReg | Src2ImmByte | ModRM,
264 	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
265 	/* 0xA8 - 0xAF */
266 	ImplicitOps | Stack, ImplicitOps | Stack,
267 	0, DstMem | SrcReg | ModRM | BitOp | Lock,
268 	DstMem | SrcReg | Src2ImmByte | ModRM,
269 	DstMem | SrcReg | Src2CL | ModRM,
270 	ModRM, 0,
271 	/* 0xB0 - 0xB7 */
272 	ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
273 	0, DstMem | SrcReg | ModRM | BitOp | Lock,
274 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
275 	    DstReg | SrcMem16 | ModRM | Mov,
276 	/* 0xB8 - 0xBF */
277 	0, 0,
278 	Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
279 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
280 	    DstReg | SrcMem16 | ModRM | Mov,
281 	/* 0xC0 - 0xCF */
282 	0, 0, 0, DstMem | SrcReg | ModRM | Mov,
283 	0, 0, 0, Group | GroupDual | Group9,
284 	0, 0, 0, 0, 0, 0, 0, 0,
285 	/* 0xD0 - 0xDF */
286 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
287 	/* 0xE0 - 0xEF */
288 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 	/* 0xF0 - 0xFF */
290 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
291 };
292 
293 static u32 group_table[] = {
294 	[Group1_80*8] =
295 	ByteOp | DstMem | SrcImm | ModRM | Lock,
296 	ByteOp | DstMem | SrcImm | ModRM | Lock,
297 	ByteOp | DstMem | SrcImm | ModRM | Lock,
298 	ByteOp | DstMem | SrcImm | ModRM | Lock,
299 	ByteOp | DstMem | SrcImm | ModRM | Lock,
300 	ByteOp | DstMem | SrcImm | ModRM | Lock,
301 	ByteOp | DstMem | SrcImm | ModRM | Lock,
302 	ByteOp | DstMem | SrcImm | ModRM,
303 	[Group1_81*8] =
304 	DstMem | SrcImm | ModRM | Lock,
305 	DstMem | SrcImm | ModRM | Lock,
306 	DstMem | SrcImm | ModRM | Lock,
307 	DstMem | SrcImm | ModRM | Lock,
308 	DstMem | SrcImm | ModRM | Lock,
309 	DstMem | SrcImm | ModRM | Lock,
310 	DstMem | SrcImm | ModRM | Lock,
311 	DstMem | SrcImm | ModRM,
312 	[Group1_82*8] =
313 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
314 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
315 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
316 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
317 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
318 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
319 	ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 	ByteOp | DstMem | SrcImm | ModRM | No64,
321 	[Group1_83*8] =
322 	DstMem | SrcImmByte | ModRM | Lock,
323 	DstMem | SrcImmByte | ModRM | Lock,
324 	DstMem | SrcImmByte | ModRM | Lock,
325 	DstMem | SrcImmByte | ModRM | Lock,
326 	DstMem | SrcImmByte | ModRM | Lock,
327 	DstMem | SrcImmByte | ModRM | Lock,
328 	DstMem | SrcImmByte | ModRM | Lock,
329 	DstMem | SrcImmByte | ModRM,
330 	[Group1A*8] =
331 	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
332 	[Group3_Byte*8] =
333 	ByteOp | SrcImm | DstMem | ModRM, 0,
334 	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
335 	0, 0, 0, 0,
336 	[Group3*8] =
337 	DstMem | SrcImm | ModRM, 0,
338 	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
339 	0, 0, 0, 0,
340 	[Group4*8] =
341 	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
342 	0, 0, 0, 0, 0, 0,
343 	[Group5*8] =
344 	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
345 	SrcMem | ModRM | Stack, 0,
346 	SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
347 	[Group7*8] =
348 	0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
349 	SrcNone | ModRM | DstMem | Mov, 0,
350 	SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
351 	[Group8*8] =
352 	0, 0, 0, 0,
353 	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
354 	DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
355 	[Group9*8] =
356 	0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
357 };
358 
359 static u32 group2_table[] = {
360 	[Group7*8] =
361 	SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
362 	SrcNone | ModRM | DstMem | Mov, 0,
363 	SrcMem16 | ModRM | Mov, 0,
364 	[Group9*8] =
365 	0, 0, 0, 0, 0, 0, 0, 0,
366 };
367 
368 /* EFLAGS bit definitions. */
369 #define EFLG_ID (1<<21)
370 #define EFLG_VIP (1<<20)
371 #define EFLG_VIF (1<<19)
372 #define EFLG_AC (1<<18)
373 #define EFLG_VM (1<<17)
374 #define EFLG_RF (1<<16)
375 #define EFLG_IOPL (3<<12)
376 #define EFLG_NT (1<<14)
377 #define EFLG_OF (1<<11)
378 #define EFLG_DF (1<<10)
379 #define EFLG_IF (1<<9)
380 #define EFLG_TF (1<<8)
381 #define EFLG_SF (1<<7)
382 #define EFLG_ZF (1<<6)
383 #define EFLG_AF (1<<4)
384 #define EFLG_PF (1<<2)
385 #define EFLG_CF (1<<0)
386 
387 /*
388  * Instruction emulation:
389  * Most instructions are emulated directly via a fragment of inline assembly
390  * code. This allows us to save/restore EFLAGS and thus very easily pick up
391  * any modified flags.
392  */
393 
394 #if defined(CONFIG_X86_64)
395 #define _LO32 "k"		/* force 32-bit operand */
396 #define _STK  "%%rsp"		/* stack pointer */
397 #elif defined(__i386__)
398 #define _LO32 ""		/* force 32-bit operand */
399 #define _STK  "%%esp"		/* stack pointer */
400 #endif
401 
402 /*
403  * These EFLAGS bits are restored from saved value during emulation, and
404  * any changes are written back to the saved value after emulation.
405  */
406 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
407 
408 /* Before executing instruction: restore necessary bits in EFLAGS. */
409 #define _PRE_EFLAGS(_sav, _msk, _tmp)					\
410 	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
411 	"movl %"_sav",%"_LO32 _tmp"; "                                  \
412 	"push %"_tmp"; "                                                \
413 	"push %"_tmp"; "                                                \
414 	"movl %"_msk",%"_LO32 _tmp"; "                                  \
415 	"andl %"_LO32 _tmp",("_STK"); "                                 \
416 	"pushf; "                                                       \
417 	"notl %"_LO32 _tmp"; "                                          \
418 	"andl %"_LO32 _tmp",("_STK"); "                                 \
419 	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
420 	"pop  %"_tmp"; "                                                \
421 	"orl  %"_LO32 _tmp",("_STK"); "                                 \
422 	"popf; "                                                        \
423 	"pop  %"_sav"; "
424 
425 /* After executing instruction: write-back necessary bits in EFLAGS. */
426 #define _POST_EFLAGS(_sav, _msk, _tmp) \
427 	/* _sav |= EFLAGS & _msk; */		\
428 	"pushf; "				\
429 	"pop  %"_tmp"; "			\
430 	"andl %"_msk",%"_LO32 _tmp"; "		\
431 	"orl  %"_LO32 _tmp",%"_sav"; "
432 
433 #ifdef CONFIG_X86_64
434 #define ON64(x) x
435 #else
436 #define ON64(x)
437 #endif
438 
439 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
440 	do {								\
441 		__asm__ __volatile__ (					\
442 			_PRE_EFLAGS("0", "4", "2")			\
443 			_op _suffix " %"_x"3,%1; "			\
444 			_POST_EFLAGS("0", "4", "2")			\
445 			: "=m" (_eflags), "=m" ((_dst).val),		\
446 			  "=&r" (_tmp)					\
447 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
448 	} while (0)
449 
450 
451 /* Raw emulation: instruction has two explicit operands. */
452 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
453 	do {								\
454 		unsigned long _tmp;					\
455 									\
456 		switch ((_dst).bytes) {					\
457 		case 2:							\
458 			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
459 			break;						\
460 		case 4:							\
461 			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
462 			break;						\
463 		case 8:							\
464 			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
465 			break;						\
466 		}							\
467 	} while (0)
468 
469 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
470 	do {								     \
471 		unsigned long _tmp;					     \
472 		switch ((_dst).bytes) {				             \
473 		case 1:							     \
474 			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
475 			break;						     \
476 		default:						     \
477 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
478 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
479 			break;						     \
480 		}							     \
481 	} while (0)
482 
483 /* Source operand is byte-sized and may be restricted to just %cl. */
484 #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
485 	__emulate_2op(_op, _src, _dst, _eflags,				\
486 		      "b", "c", "b", "c", "b", "c", "b", "c")
487 
488 /* Source operand is byte, word, long or quad sized. */
489 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
490 	__emulate_2op(_op, _src, _dst, _eflags,				\
491 		      "b", "q", "w", "r", _LO32, "r", "", "r")
492 
493 /* Source operand is word, long or quad sized. */
494 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
495 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
496 			     "w", "r", _LO32, "r", "", "r")
497 
498 /* Instruction has three operands and one operand is stored in ECX register */
499 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
500 	do {									\
501 		unsigned long _tmp;						\
502 		_type _clv  = (_cl).val;  					\
503 		_type _srcv = (_src).val;    					\
504 		_type _dstv = (_dst).val;					\
505 										\
506 		__asm__ __volatile__ (						\
507 			_PRE_EFLAGS("0", "5", "2")				\
508 			_op _suffix " %4,%1 \n"					\
509 			_POST_EFLAGS("0", "5", "2")				\
510 			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
511 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
512 			); 							\
513 										\
514 		(_cl).val  = (unsigned long) _clv;				\
515 		(_src).val = (unsigned long) _srcv;				\
516 		(_dst).val = (unsigned long) _dstv;				\
517 	} while (0)
518 
519 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
520 	do {									\
521 		switch ((_dst).bytes) {						\
522 		case 2:								\
523 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
524 						"w", unsigned short);         	\
525 			break;							\
526 		case 4: 							\
527 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
528 						"l", unsigned int);           	\
529 			break;							\
530 		case 8:								\
531 			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
532 						"q", unsigned long));  		\
533 			break;							\
534 		}								\
535 	} while (0)
536 
537 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
538 	do {								\
539 		unsigned long _tmp;					\
540 									\
541 		__asm__ __volatile__ (					\
542 			_PRE_EFLAGS("0", "3", "2")			\
543 			_op _suffix " %1; "				\
544 			_POST_EFLAGS("0", "3", "2")			\
545 			: "=m" (_eflags), "+m" ((_dst).val),		\
546 			  "=&r" (_tmp)					\
547 			: "i" (EFLAGS_MASK));				\
548 	} while (0)
549 
550 /* Instruction has only one explicit operand (no source operand). */
551 #define emulate_1op(_op, _dst, _eflags)                                    \
552 	do {								\
553 		switch ((_dst).bytes) {				        \
554 		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
555 		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
556 		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
557 		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
558 		}							\
559 	} while (0)
560 
561 /* Fetch next part of the instruction being emulated. */
562 #define insn_fetch(_type, _size, _eip)                                  \
563 ({	unsigned long _x;						\
564 	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
565 	if (rc != 0)							\
566 		goto done;						\
567 	(_eip) += (_size);						\
568 	(_type)_x;							\
569 })
570 
571 static inline unsigned long ad_mask(struct decode_cache *c)
572 {
573 	return (1UL << (c->ad_bytes << 3)) - 1;
574 }
575 
576 /* Access/update address held in a register, based on addressing mode. */
577 static inline unsigned long
578 address_mask(struct decode_cache *c, unsigned long reg)
579 {
580 	if (c->ad_bytes == sizeof(unsigned long))
581 		return reg;
582 	else
583 		return reg & ad_mask(c);
584 }
585 
586 static inline unsigned long
587 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
588 {
589 	return base + address_mask(c, reg);
590 }
591 
592 static inline void
593 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
594 {
595 	if (c->ad_bytes == sizeof(unsigned long))
596 		*reg += inc;
597 	else
598 		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
599 }
600 
601 static inline void jmp_rel(struct decode_cache *c, int rel)
602 {
603 	register_address_increment(c, &c->eip, rel);
604 }
605 
606 static void set_seg_override(struct decode_cache *c, int seg)
607 {
608 	c->has_seg_override = true;
609 	c->seg_override = seg;
610 }
611 
612 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
613 {
614 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
615 		return 0;
616 
617 	return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
618 }
619 
620 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
621 				       struct decode_cache *c)
622 {
623 	if (!c->has_seg_override)
624 		return 0;
625 
626 	return seg_base(ctxt, c->seg_override);
627 }
628 
629 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
630 {
631 	return seg_base(ctxt, VCPU_SREG_ES);
632 }
633 
634 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
635 {
636 	return seg_base(ctxt, VCPU_SREG_SS);
637 }
638 
639 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
640 			      struct x86_emulate_ops *ops,
641 			      unsigned long linear, u8 *dest)
642 {
643 	struct fetch_cache *fc = &ctxt->decode.fetch;
644 	int rc;
645 	int size;
646 
647 	if (linear < fc->start || linear >= fc->end) {
648 		size = min(15UL, PAGE_SIZE - offset_in_page(linear));
649 		rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
650 		if (rc)
651 			return rc;
652 		fc->start = linear;
653 		fc->end = linear + size;
654 	}
655 	*dest = fc->data[linear - fc->start];
656 	return 0;
657 }
658 
659 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
660 			 struct x86_emulate_ops *ops,
661 			 unsigned long eip, void *dest, unsigned size)
662 {
663 	int rc = 0;
664 
665 	/* x86 instructions are limited to 15 bytes. */
666 	if (eip + size - ctxt->decode.eip_orig > 15)
667 		return X86EMUL_UNHANDLEABLE;
668 	eip += ctxt->cs_base;
669 	while (size--) {
670 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
671 		if (rc)
672 			return rc;
673 	}
674 	return 0;
675 }
676 
677 /*
678  * Given the 'reg' portion of a ModRM byte, and a register block, return a
679  * pointer into the block that addresses the relevant register.
680  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
681  */
682 static void *decode_register(u8 modrm_reg, unsigned long *regs,
683 			     int highbyte_regs)
684 {
685 	void *p;
686 
687 	p = &regs[modrm_reg];
688 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
689 		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
690 	return p;
691 }
692 
693 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
694 			   struct x86_emulate_ops *ops,
695 			   void *ptr,
696 			   u16 *size, unsigned long *address, int op_bytes)
697 {
698 	int rc;
699 
700 	if (op_bytes == 2)
701 		op_bytes = 3;
702 	*address = 0;
703 	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
704 			   ctxt->vcpu, NULL);
705 	if (rc)
706 		return rc;
707 	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
708 			   ctxt->vcpu, NULL);
709 	return rc;
710 }
711 
712 static int test_cc(unsigned int condition, unsigned int flags)
713 {
714 	int rc = 0;
715 
716 	switch ((condition & 15) >> 1) {
717 	case 0: /* o */
718 		rc |= (flags & EFLG_OF);
719 		break;
720 	case 1: /* b/c/nae */
721 		rc |= (flags & EFLG_CF);
722 		break;
723 	case 2: /* z/e */
724 		rc |= (flags & EFLG_ZF);
725 		break;
726 	case 3: /* be/na */
727 		rc |= (flags & (EFLG_CF|EFLG_ZF));
728 		break;
729 	case 4: /* s */
730 		rc |= (flags & EFLG_SF);
731 		break;
732 	case 5: /* p/pe */
733 		rc |= (flags & EFLG_PF);
734 		break;
735 	case 7: /* le/ng */
736 		rc |= (flags & EFLG_ZF);
737 		/* fall through */
738 	case 6: /* l/nge */
739 		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
740 		break;
741 	}
742 
743 	/* Odd condition identifiers (lsb == 1) have inverted sense. */
744 	return (!!rc ^ (condition & 1));
745 }
746 
747 static void decode_register_operand(struct operand *op,
748 				    struct decode_cache *c,
749 				    int inhibit_bytereg)
750 {
751 	unsigned reg = c->modrm_reg;
752 	int highbyte_regs = c->rex_prefix == 0;
753 
754 	if (!(c->d & ModRM))
755 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
756 	op->type = OP_REG;
757 	if ((c->d & ByteOp) && !inhibit_bytereg) {
758 		op->ptr = decode_register(reg, c->regs, highbyte_regs);
759 		op->val = *(u8 *)op->ptr;
760 		op->bytes = 1;
761 	} else {
762 		op->ptr = decode_register(reg, c->regs, 0);
763 		op->bytes = c->op_bytes;
764 		switch (op->bytes) {
765 		case 2:
766 			op->val = *(u16 *)op->ptr;
767 			break;
768 		case 4:
769 			op->val = *(u32 *)op->ptr;
770 			break;
771 		case 8:
772 			op->val = *(u64 *) op->ptr;
773 			break;
774 		}
775 	}
776 	op->orig_val = op->val;
777 }
778 
779 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
780 			struct x86_emulate_ops *ops)
781 {
782 	struct decode_cache *c = &ctxt->decode;
783 	u8 sib;
784 	int index_reg = 0, base_reg = 0, scale;
785 	int rc = 0;
786 
787 	if (c->rex_prefix) {
788 		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
789 		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
790 		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
791 	}
792 
793 	c->modrm = insn_fetch(u8, 1, c->eip);
794 	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
795 	c->modrm_reg |= (c->modrm & 0x38) >> 3;
796 	c->modrm_rm |= (c->modrm & 0x07);
797 	c->modrm_ea = 0;
798 	c->use_modrm_ea = 1;
799 
800 	if (c->modrm_mod == 3) {
801 		c->modrm_ptr = decode_register(c->modrm_rm,
802 					       c->regs, c->d & ByteOp);
803 		c->modrm_val = *(unsigned long *)c->modrm_ptr;
804 		return rc;
805 	}
806 
807 	if (c->ad_bytes == 2) {
808 		unsigned bx = c->regs[VCPU_REGS_RBX];
809 		unsigned bp = c->regs[VCPU_REGS_RBP];
810 		unsigned si = c->regs[VCPU_REGS_RSI];
811 		unsigned di = c->regs[VCPU_REGS_RDI];
812 
813 		/* 16-bit ModR/M decode. */
814 		switch (c->modrm_mod) {
815 		case 0:
816 			if (c->modrm_rm == 6)
817 				c->modrm_ea += insn_fetch(u16, 2, c->eip);
818 			break;
819 		case 1:
820 			c->modrm_ea += insn_fetch(s8, 1, c->eip);
821 			break;
822 		case 2:
823 			c->modrm_ea += insn_fetch(u16, 2, c->eip);
824 			break;
825 		}
826 		switch (c->modrm_rm) {
827 		case 0:
828 			c->modrm_ea += bx + si;
829 			break;
830 		case 1:
831 			c->modrm_ea += bx + di;
832 			break;
833 		case 2:
834 			c->modrm_ea += bp + si;
835 			break;
836 		case 3:
837 			c->modrm_ea += bp + di;
838 			break;
839 		case 4:
840 			c->modrm_ea += si;
841 			break;
842 		case 5:
843 			c->modrm_ea += di;
844 			break;
845 		case 6:
846 			if (c->modrm_mod != 0)
847 				c->modrm_ea += bp;
848 			break;
849 		case 7:
850 			c->modrm_ea += bx;
851 			break;
852 		}
853 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
854 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
855 			if (!c->has_seg_override)
856 				set_seg_override(c, VCPU_SREG_SS);
857 		c->modrm_ea = (u16)c->modrm_ea;
858 	} else {
859 		/* 32/64-bit ModR/M decode. */
860 		if ((c->modrm_rm & 7) == 4) {
861 			sib = insn_fetch(u8, 1, c->eip);
862 			index_reg |= (sib >> 3) & 7;
863 			base_reg |= sib & 7;
864 			scale = sib >> 6;
865 
866 			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
867 				c->modrm_ea += insn_fetch(s32, 4, c->eip);
868 			else
869 				c->modrm_ea += c->regs[base_reg];
870 			if (index_reg != 4)
871 				c->modrm_ea += c->regs[index_reg] << scale;
872 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
873 			if (ctxt->mode == X86EMUL_MODE_PROT64)
874 				c->rip_relative = 1;
875 		} else
876 			c->modrm_ea += c->regs[c->modrm_rm];
877 		switch (c->modrm_mod) {
878 		case 0:
879 			if (c->modrm_rm == 5)
880 				c->modrm_ea += insn_fetch(s32, 4, c->eip);
881 			break;
882 		case 1:
883 			c->modrm_ea += insn_fetch(s8, 1, c->eip);
884 			break;
885 		case 2:
886 			c->modrm_ea += insn_fetch(s32, 4, c->eip);
887 			break;
888 		}
889 	}
890 done:
891 	return rc;
892 }
893 
894 static int decode_abs(struct x86_emulate_ctxt *ctxt,
895 		      struct x86_emulate_ops *ops)
896 {
897 	struct decode_cache *c = &ctxt->decode;
898 	int rc = 0;
899 
900 	switch (c->ad_bytes) {
901 	case 2:
902 		c->modrm_ea = insn_fetch(u16, 2, c->eip);
903 		break;
904 	case 4:
905 		c->modrm_ea = insn_fetch(u32, 4, c->eip);
906 		break;
907 	case 8:
908 		c->modrm_ea = insn_fetch(u64, 8, c->eip);
909 		break;
910 	}
911 done:
912 	return rc;
913 }
914 
915 int
916 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
917 {
918 	struct decode_cache *c = &ctxt->decode;
919 	int rc = 0;
920 	int mode = ctxt->mode;
921 	int def_op_bytes, def_ad_bytes, group;
922 
923 	/* Shadow copy of register state. Committed on successful emulation. */
924 
925 	memset(c, 0, sizeof(struct decode_cache));
926 	c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
927 	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
928 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
929 
930 	switch (mode) {
931 	case X86EMUL_MODE_REAL:
932 	case X86EMUL_MODE_VM86:
933 	case X86EMUL_MODE_PROT16:
934 		def_op_bytes = def_ad_bytes = 2;
935 		break;
936 	case X86EMUL_MODE_PROT32:
937 		def_op_bytes = def_ad_bytes = 4;
938 		break;
939 #ifdef CONFIG_X86_64
940 	case X86EMUL_MODE_PROT64:
941 		def_op_bytes = 4;
942 		def_ad_bytes = 8;
943 		break;
944 #endif
945 	default:
946 		return -1;
947 	}
948 
949 	c->op_bytes = def_op_bytes;
950 	c->ad_bytes = def_ad_bytes;
951 
952 	/* Legacy prefixes. */
953 	for (;;) {
954 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
955 		case 0x66:	/* operand-size override */
956 			/* switch between 2/4 bytes */
957 			c->op_bytes = def_op_bytes ^ 6;
958 			break;
959 		case 0x67:	/* address-size override */
960 			if (mode == X86EMUL_MODE_PROT64)
961 				/* switch between 4/8 bytes */
962 				c->ad_bytes = def_ad_bytes ^ 12;
963 			else
964 				/* switch between 2/4 bytes */
965 				c->ad_bytes = def_ad_bytes ^ 6;
966 			break;
967 		case 0x26:	/* ES override */
968 		case 0x2e:	/* CS override */
969 		case 0x36:	/* SS override */
970 		case 0x3e:	/* DS override */
971 			set_seg_override(c, (c->b >> 3) & 3);
972 			break;
973 		case 0x64:	/* FS override */
974 		case 0x65:	/* GS override */
975 			set_seg_override(c, c->b & 7);
976 			break;
977 		case 0x40 ... 0x4f: /* REX */
978 			if (mode != X86EMUL_MODE_PROT64)
979 				goto done_prefixes;
980 			c->rex_prefix = c->b;
981 			continue;
982 		case 0xf0:	/* LOCK */
983 			c->lock_prefix = 1;
984 			break;
985 		case 0xf2:	/* REPNE/REPNZ */
986 			c->rep_prefix = REPNE_PREFIX;
987 			break;
988 		case 0xf3:	/* REP/REPE/REPZ */
989 			c->rep_prefix = REPE_PREFIX;
990 			break;
991 		default:
992 			goto done_prefixes;
993 		}
994 
995 		/* Any legacy prefix after a REX prefix nullifies its effect. */
996 
997 		c->rex_prefix = 0;
998 	}
999 
1000 done_prefixes:
1001 
1002 	/* REX prefix. */
1003 	if (c->rex_prefix)
1004 		if (c->rex_prefix & 8)
1005 			c->op_bytes = 8;	/* REX.W */
1006 
1007 	/* Opcode byte(s). */
1008 	c->d = opcode_table[c->b];
1009 	if (c->d == 0) {
1010 		/* Two-byte opcode? */
1011 		if (c->b == 0x0f) {
1012 			c->twobyte = 1;
1013 			c->b = insn_fetch(u8, 1, c->eip);
1014 			c->d = twobyte_table[c->b];
1015 		}
1016 	}
1017 
1018 	if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
1019 		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
1020 		return -1;
1021 	}
1022 
1023 	if (c->d & Group) {
1024 		group = c->d & GroupMask;
1025 		c->modrm = insn_fetch(u8, 1, c->eip);
1026 		--c->eip;
1027 
1028 		group = (group << 3) + ((c->modrm >> 3) & 7);
1029 		if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1030 			c->d = group2_table[group];
1031 		else
1032 			c->d = group_table[group];
1033 	}
1034 
1035 	/* Unrecognised? */
1036 	if (c->d == 0) {
1037 		DPRINTF("Cannot emulate %02x\n", c->b);
1038 		return -1;
1039 	}
1040 
1041 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1042 		c->op_bytes = 8;
1043 
1044 	/* ModRM and SIB bytes. */
1045 	if (c->d & ModRM)
1046 		rc = decode_modrm(ctxt, ops);
1047 	else if (c->d & MemAbs)
1048 		rc = decode_abs(ctxt, ops);
1049 	if (rc)
1050 		goto done;
1051 
1052 	if (!c->has_seg_override)
1053 		set_seg_override(c, VCPU_SREG_DS);
1054 
1055 	if (!(!c->twobyte && c->b == 0x8d))
1056 		c->modrm_ea += seg_override_base(ctxt, c);
1057 
1058 	if (c->ad_bytes != 8)
1059 		c->modrm_ea = (u32)c->modrm_ea;
1060 	/*
1061 	 * Decode and fetch the source operand: register, memory
1062 	 * or immediate.
1063 	 */
1064 	switch (c->d & SrcMask) {
1065 	case SrcNone:
1066 		break;
1067 	case SrcReg:
1068 		decode_register_operand(&c->src, c, 0);
1069 		break;
1070 	case SrcMem16:
1071 		c->src.bytes = 2;
1072 		goto srcmem_common;
1073 	case SrcMem32:
1074 		c->src.bytes = 4;
1075 		goto srcmem_common;
1076 	case SrcMem:
1077 		c->src.bytes = (c->d & ByteOp) ? 1 :
1078 							   c->op_bytes;
1079 		/* Don't fetch the address for invlpg: it could be unmapped. */
1080 		if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1081 			break;
1082 	srcmem_common:
1083 		/*
1084 		 * For instructions with a ModR/M byte, switch to register
1085 		 * access if Mod = 3.
1086 		 */
1087 		if ((c->d & ModRM) && c->modrm_mod == 3) {
1088 			c->src.type = OP_REG;
1089 			c->src.val = c->modrm_val;
1090 			c->src.ptr = c->modrm_ptr;
1091 			break;
1092 		}
1093 		c->src.type = OP_MEM;
1094 		break;
1095 	case SrcImm:
1096 	case SrcImmU:
1097 		c->src.type = OP_IMM;
1098 		c->src.ptr = (unsigned long *)c->eip;
1099 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1100 		if (c->src.bytes == 8)
1101 			c->src.bytes = 4;
1102 		/* NB. Immediates are sign-extended as necessary. */
1103 		switch (c->src.bytes) {
1104 		case 1:
1105 			c->src.val = insn_fetch(s8, 1, c->eip);
1106 			break;
1107 		case 2:
1108 			c->src.val = insn_fetch(s16, 2, c->eip);
1109 			break;
1110 		case 4:
1111 			c->src.val = insn_fetch(s32, 4, c->eip);
1112 			break;
1113 		}
1114 		if ((c->d & SrcMask) == SrcImmU) {
1115 			switch (c->src.bytes) {
1116 			case 1:
1117 				c->src.val &= 0xff;
1118 				break;
1119 			case 2:
1120 				c->src.val &= 0xffff;
1121 				break;
1122 			case 4:
1123 				c->src.val &= 0xffffffff;
1124 				break;
1125 			}
1126 		}
1127 		break;
1128 	case SrcImmByte:
1129 	case SrcImmUByte:
1130 		c->src.type = OP_IMM;
1131 		c->src.ptr = (unsigned long *)c->eip;
1132 		c->src.bytes = 1;
1133 		if ((c->d & SrcMask) == SrcImmByte)
1134 			c->src.val = insn_fetch(s8, 1, c->eip);
1135 		else
1136 			c->src.val = insn_fetch(u8, 1, c->eip);
1137 		break;
1138 	case SrcOne:
1139 		c->src.bytes = 1;
1140 		c->src.val = 1;
1141 		break;
1142 	}
1143 
1144 	/*
1145 	 * Decode and fetch the second source operand: register, memory
1146 	 * or immediate.
1147 	 */
1148 	switch (c->d & Src2Mask) {
1149 	case Src2None:
1150 		break;
1151 	case Src2CL:
1152 		c->src2.bytes = 1;
1153 		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1154 		break;
1155 	case Src2ImmByte:
1156 		c->src2.type = OP_IMM;
1157 		c->src2.ptr = (unsigned long *)c->eip;
1158 		c->src2.bytes = 1;
1159 		c->src2.val = insn_fetch(u8, 1, c->eip);
1160 		break;
1161 	case Src2Imm16:
1162 		c->src2.type = OP_IMM;
1163 		c->src2.ptr = (unsigned long *)c->eip;
1164 		c->src2.bytes = 2;
1165 		c->src2.val = insn_fetch(u16, 2, c->eip);
1166 		break;
1167 	case Src2One:
1168 		c->src2.bytes = 1;
1169 		c->src2.val = 1;
1170 		break;
1171 	}
1172 
1173 	/* Decode and fetch the destination operand: register or memory. */
1174 	switch (c->d & DstMask) {
1175 	case ImplicitOps:
1176 		/* Special instructions do their own operand decoding. */
1177 		return 0;
1178 	case DstReg:
1179 		decode_register_operand(&c->dst, c,
1180 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1181 		break;
1182 	case DstMem:
1183 		if ((c->d & ModRM) && c->modrm_mod == 3) {
1184 			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1185 			c->dst.type = OP_REG;
1186 			c->dst.val = c->dst.orig_val = c->modrm_val;
1187 			c->dst.ptr = c->modrm_ptr;
1188 			break;
1189 		}
1190 		c->dst.type = OP_MEM;
1191 		break;
1192 	case DstAcc:
1193 		c->dst.type = OP_REG;
1194 		c->dst.bytes = c->op_bytes;
1195 		c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1196 		switch (c->op_bytes) {
1197 			case 1:
1198 				c->dst.val = *(u8 *)c->dst.ptr;
1199 				break;
1200 			case 2:
1201 				c->dst.val = *(u16 *)c->dst.ptr;
1202 				break;
1203 			case 4:
1204 				c->dst.val = *(u32 *)c->dst.ptr;
1205 				break;
1206 		}
1207 		c->dst.orig_val = c->dst.val;
1208 		break;
1209 	}
1210 
1211 	if (c->rip_relative)
1212 		c->modrm_ea += c->eip;
1213 
1214 done:
1215 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1216 }
1217 
1218 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1219 {
1220 	struct decode_cache *c = &ctxt->decode;
1221 
1222 	c->dst.type  = OP_MEM;
1223 	c->dst.bytes = c->op_bytes;
1224 	c->dst.val = c->src.val;
1225 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1226 	c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1227 					       c->regs[VCPU_REGS_RSP]);
1228 }
1229 
1230 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1231 		       struct x86_emulate_ops *ops,
1232 		       void *dest, int len)
1233 {
1234 	struct decode_cache *c = &ctxt->decode;
1235 	int rc;
1236 
1237 	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1238 						 c->regs[VCPU_REGS_RSP]),
1239 				dest, len, ctxt->vcpu);
1240 	if (rc != X86EMUL_CONTINUE)
1241 		return rc;
1242 
1243 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1244 	return rc;
1245 }
1246 
1247 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1248 		       struct x86_emulate_ops *ops,
1249 		       void *dest, int len)
1250 {
1251 	int rc;
1252 	unsigned long val, change_mask;
1253 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1254 	int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
1255 
1256 	rc = emulate_pop(ctxt, ops, &val, len);
1257 	if (rc != X86EMUL_CONTINUE)
1258 		return rc;
1259 
1260 	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1261 		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1262 
1263 	switch(ctxt->mode) {
1264 	case X86EMUL_MODE_PROT64:
1265 	case X86EMUL_MODE_PROT32:
1266 	case X86EMUL_MODE_PROT16:
1267 		if (cpl == 0)
1268 			change_mask |= EFLG_IOPL;
1269 		if (cpl <= iopl)
1270 			change_mask |= EFLG_IF;
1271 		break;
1272 	case X86EMUL_MODE_VM86:
1273 		if (iopl < 3) {
1274 			kvm_inject_gp(ctxt->vcpu, 0);
1275 			return X86EMUL_PROPAGATE_FAULT;
1276 		}
1277 		change_mask |= EFLG_IF;
1278 		break;
1279 	default: /* real mode */
1280 		change_mask |= (EFLG_IOPL | EFLG_IF);
1281 		break;
1282 	}
1283 
1284 	*(unsigned long *)dest =
1285 		(ctxt->eflags & ~change_mask) | (val & change_mask);
1286 
1287 	return rc;
1288 }
1289 
1290 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1291 {
1292 	struct decode_cache *c = &ctxt->decode;
1293 	struct kvm_segment segment;
1294 
1295 	kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1296 
1297 	c->src.val = segment.selector;
1298 	emulate_push(ctxt);
1299 }
1300 
1301 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1302 			     struct x86_emulate_ops *ops, int seg)
1303 {
1304 	struct decode_cache *c = &ctxt->decode;
1305 	unsigned long selector;
1306 	int rc;
1307 
1308 	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1309 	if (rc != 0)
1310 		return rc;
1311 
1312 	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
1313 	return rc;
1314 }
1315 
1316 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1317 {
1318 	struct decode_cache *c = &ctxt->decode;
1319 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1320 	int reg = VCPU_REGS_RAX;
1321 
1322 	while (reg <= VCPU_REGS_RDI) {
1323 		(reg == VCPU_REGS_RSP) ?
1324 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1325 
1326 		emulate_push(ctxt);
1327 		++reg;
1328 	}
1329 }
1330 
1331 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1332 			struct x86_emulate_ops *ops)
1333 {
1334 	struct decode_cache *c = &ctxt->decode;
1335 	int rc = 0;
1336 	int reg = VCPU_REGS_RDI;
1337 
1338 	while (reg >= VCPU_REGS_RAX) {
1339 		if (reg == VCPU_REGS_RSP) {
1340 			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1341 							c->op_bytes);
1342 			--reg;
1343 		}
1344 
1345 		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1346 		if (rc != 0)
1347 			break;
1348 		--reg;
1349 	}
1350 	return rc;
1351 }
1352 
1353 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1354 				struct x86_emulate_ops *ops)
1355 {
1356 	struct decode_cache *c = &ctxt->decode;
1357 	int rc;
1358 
1359 	rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1360 	if (rc != 0)
1361 		return rc;
1362 	return 0;
1363 }
1364 
1365 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1366 {
1367 	struct decode_cache *c = &ctxt->decode;
1368 	switch (c->modrm_reg) {
1369 	case 0:	/* rol */
1370 		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1371 		break;
1372 	case 1:	/* ror */
1373 		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1374 		break;
1375 	case 2:	/* rcl */
1376 		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1377 		break;
1378 	case 3:	/* rcr */
1379 		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1380 		break;
1381 	case 4:	/* sal/shl */
1382 	case 6:	/* sal/shl */
1383 		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1384 		break;
1385 	case 5:	/* shr */
1386 		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1387 		break;
1388 	case 7:	/* sar */
1389 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1390 		break;
1391 	}
1392 }
1393 
1394 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1395 			       struct x86_emulate_ops *ops)
1396 {
1397 	struct decode_cache *c = &ctxt->decode;
1398 	int rc = 0;
1399 
1400 	switch (c->modrm_reg) {
1401 	case 0 ... 1:	/* test */
1402 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1403 		break;
1404 	case 2:	/* not */
1405 		c->dst.val = ~c->dst.val;
1406 		break;
1407 	case 3:	/* neg */
1408 		emulate_1op("neg", c->dst, ctxt->eflags);
1409 		break;
1410 	default:
1411 		DPRINTF("Cannot emulate %02x\n", c->b);
1412 		rc = X86EMUL_UNHANDLEABLE;
1413 		break;
1414 	}
1415 	return rc;
1416 }
1417 
1418 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1419 			       struct x86_emulate_ops *ops)
1420 {
1421 	struct decode_cache *c = &ctxt->decode;
1422 
1423 	switch (c->modrm_reg) {
1424 	case 0:	/* inc */
1425 		emulate_1op("inc", c->dst, ctxt->eflags);
1426 		break;
1427 	case 1:	/* dec */
1428 		emulate_1op("dec", c->dst, ctxt->eflags);
1429 		break;
1430 	case 2: /* call near abs */ {
1431 		long int old_eip;
1432 		old_eip = c->eip;
1433 		c->eip = c->src.val;
1434 		c->src.val = old_eip;
1435 		emulate_push(ctxt);
1436 		break;
1437 	}
1438 	case 4: /* jmp abs */
1439 		c->eip = c->src.val;
1440 		break;
1441 	case 6:	/* push */
1442 		emulate_push(ctxt);
1443 		break;
1444 	}
1445 	return 0;
1446 }
1447 
1448 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1449 			       struct x86_emulate_ops *ops,
1450 			       unsigned long memop)
1451 {
1452 	struct decode_cache *c = &ctxt->decode;
1453 	u64 old, new;
1454 	int rc;
1455 
1456 	rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1457 	if (rc != X86EMUL_CONTINUE)
1458 		return rc;
1459 
1460 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1461 	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1462 
1463 		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1464 		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1465 		ctxt->eflags &= ~EFLG_ZF;
1466 
1467 	} else {
1468 		new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1469 		       (u32) c->regs[VCPU_REGS_RBX];
1470 
1471 		rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1472 		if (rc != X86EMUL_CONTINUE)
1473 			return rc;
1474 		ctxt->eflags |= EFLG_ZF;
1475 	}
1476 	return 0;
1477 }
1478 
1479 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1480 			   struct x86_emulate_ops *ops)
1481 {
1482 	struct decode_cache *c = &ctxt->decode;
1483 	int rc;
1484 	unsigned long cs;
1485 
1486 	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1487 	if (rc)
1488 		return rc;
1489 	if (c->op_bytes == 4)
1490 		c->eip = (u32)c->eip;
1491 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1492 	if (rc)
1493 		return rc;
1494 	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
1495 	return rc;
1496 }
1497 
1498 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1499 			    struct x86_emulate_ops *ops)
1500 {
1501 	int rc;
1502 	struct decode_cache *c = &ctxt->decode;
1503 
1504 	switch (c->dst.type) {
1505 	case OP_REG:
1506 		/* The 4-byte case *is* correct:
1507 		 * in 64-bit mode we zero-extend.
1508 		 */
1509 		switch (c->dst.bytes) {
1510 		case 1:
1511 			*(u8 *)c->dst.ptr = (u8)c->dst.val;
1512 			break;
1513 		case 2:
1514 			*(u16 *)c->dst.ptr = (u16)c->dst.val;
1515 			break;
1516 		case 4:
1517 			*c->dst.ptr = (u32)c->dst.val;
1518 			break;	/* 64b: zero-ext */
1519 		case 8:
1520 			*c->dst.ptr = c->dst.val;
1521 			break;
1522 		}
1523 		break;
1524 	case OP_MEM:
1525 		if (c->lock_prefix)
1526 			rc = ops->cmpxchg_emulated(
1527 					(unsigned long)c->dst.ptr,
1528 					&c->dst.orig_val,
1529 					&c->dst.val,
1530 					c->dst.bytes,
1531 					ctxt->vcpu);
1532 		else
1533 			rc = ops->write_emulated(
1534 					(unsigned long)c->dst.ptr,
1535 					&c->dst.val,
1536 					c->dst.bytes,
1537 					ctxt->vcpu);
1538 		if (rc != X86EMUL_CONTINUE)
1539 			return rc;
1540 		break;
1541 	case OP_NONE:
1542 		/* no writeback */
1543 		break;
1544 	default:
1545 		break;
1546 	}
1547 	return 0;
1548 }
1549 
1550 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1551 {
1552 	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1553 	/*
1554 	 * an sti; sti; sequence only disable interrupts for the first
1555 	 * instruction. So, if the last instruction, be it emulated or
1556 	 * not, left the system with the INT_STI flag enabled, it
1557 	 * means that the last instruction is an sti. We should not
1558 	 * leave the flag on in this case. The same goes for mov ss
1559 	 */
1560 	if (!(int_shadow & mask))
1561 		ctxt->interruptibility = mask;
1562 }
1563 
1564 static inline void
1565 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1566 	struct kvm_segment *cs, struct kvm_segment *ss)
1567 {
1568 	memset(cs, 0, sizeof(struct kvm_segment));
1569 	kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1570 	memset(ss, 0, sizeof(struct kvm_segment));
1571 
1572 	cs->l = 0;		/* will be adjusted later */
1573 	cs->base = 0;		/* flat segment */
1574 	cs->g = 1;		/* 4kb granularity */
1575 	cs->limit = 0xffffffff;	/* 4GB limit */
1576 	cs->type = 0x0b;	/* Read, Execute, Accessed */
1577 	cs->s = 1;
1578 	cs->dpl = 0;		/* will be adjusted later */
1579 	cs->present = 1;
1580 	cs->db = 1;
1581 
1582 	ss->unusable = 0;
1583 	ss->base = 0;		/* flat segment */
1584 	ss->limit = 0xffffffff;	/* 4GB limit */
1585 	ss->g = 1;		/* 4kb granularity */
1586 	ss->s = 1;
1587 	ss->type = 0x03;	/* Read/Write, Accessed */
1588 	ss->db = 1;		/* 32bit stack segment */
1589 	ss->dpl = 0;
1590 	ss->present = 1;
1591 }
1592 
1593 static int
1594 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1595 {
1596 	struct decode_cache *c = &ctxt->decode;
1597 	struct kvm_segment cs, ss;
1598 	u64 msr_data;
1599 
1600 	/* syscall is not available in real mode */
1601 	if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
1602 		return X86EMUL_UNHANDLEABLE;
1603 
1604 	setup_syscalls_segments(ctxt, &cs, &ss);
1605 
1606 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1607 	msr_data >>= 32;
1608 	cs.selector = (u16)(msr_data & 0xfffc);
1609 	ss.selector = (u16)(msr_data + 8);
1610 
1611 	if (is_long_mode(ctxt->vcpu)) {
1612 		cs.db = 0;
1613 		cs.l = 1;
1614 	}
1615 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1616 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1617 
1618 	c->regs[VCPU_REGS_RCX] = c->eip;
1619 	if (is_long_mode(ctxt->vcpu)) {
1620 #ifdef CONFIG_X86_64
1621 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1622 
1623 		kvm_x86_ops->get_msr(ctxt->vcpu,
1624 			ctxt->mode == X86EMUL_MODE_PROT64 ?
1625 			MSR_LSTAR : MSR_CSTAR, &msr_data);
1626 		c->eip = msr_data;
1627 
1628 		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1629 		ctxt->eflags &= ~(msr_data | EFLG_RF);
1630 #endif
1631 	} else {
1632 		/* legacy mode */
1633 		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1634 		c->eip = (u32)msr_data;
1635 
1636 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1637 	}
1638 
1639 	return X86EMUL_CONTINUE;
1640 }
1641 
1642 static int
1643 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1644 {
1645 	struct decode_cache *c = &ctxt->decode;
1646 	struct kvm_segment cs, ss;
1647 	u64 msr_data;
1648 
1649 	/* inject #GP if in real mode */
1650 	if (ctxt->mode == X86EMUL_MODE_REAL) {
1651 		kvm_inject_gp(ctxt->vcpu, 0);
1652 		return X86EMUL_UNHANDLEABLE;
1653 	}
1654 
1655 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
1656 	* Therefore, we inject an #UD.
1657 	*/
1658 	if (ctxt->mode == X86EMUL_MODE_PROT64)
1659 		return X86EMUL_UNHANDLEABLE;
1660 
1661 	setup_syscalls_segments(ctxt, &cs, &ss);
1662 
1663 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1664 	switch (ctxt->mode) {
1665 	case X86EMUL_MODE_PROT32:
1666 		if ((msr_data & 0xfffc) == 0x0) {
1667 			kvm_inject_gp(ctxt->vcpu, 0);
1668 			return X86EMUL_PROPAGATE_FAULT;
1669 		}
1670 		break;
1671 	case X86EMUL_MODE_PROT64:
1672 		if (msr_data == 0x0) {
1673 			kvm_inject_gp(ctxt->vcpu, 0);
1674 			return X86EMUL_PROPAGATE_FAULT;
1675 		}
1676 		break;
1677 	}
1678 
1679 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1680 	cs.selector = (u16)msr_data;
1681 	cs.selector &= ~SELECTOR_RPL_MASK;
1682 	ss.selector = cs.selector + 8;
1683 	ss.selector &= ~SELECTOR_RPL_MASK;
1684 	if (ctxt->mode == X86EMUL_MODE_PROT64
1685 		|| is_long_mode(ctxt->vcpu)) {
1686 		cs.db = 0;
1687 		cs.l = 1;
1688 	}
1689 
1690 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1691 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1692 
1693 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1694 	c->eip = msr_data;
1695 
1696 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1697 	c->regs[VCPU_REGS_RSP] = msr_data;
1698 
1699 	return X86EMUL_CONTINUE;
1700 }
1701 
1702 static int
1703 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1704 {
1705 	struct decode_cache *c = &ctxt->decode;
1706 	struct kvm_segment cs, ss;
1707 	u64 msr_data;
1708 	int usermode;
1709 
1710 	/* inject #GP if in real mode or Virtual 8086 mode */
1711 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1712 	    ctxt->mode == X86EMUL_MODE_VM86) {
1713 		kvm_inject_gp(ctxt->vcpu, 0);
1714 		return X86EMUL_UNHANDLEABLE;
1715 	}
1716 
1717 	setup_syscalls_segments(ctxt, &cs, &ss);
1718 
1719 	if ((c->rex_prefix & 0x8) != 0x0)
1720 		usermode = X86EMUL_MODE_PROT64;
1721 	else
1722 		usermode = X86EMUL_MODE_PROT32;
1723 
1724 	cs.dpl = 3;
1725 	ss.dpl = 3;
1726 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1727 	switch (usermode) {
1728 	case X86EMUL_MODE_PROT32:
1729 		cs.selector = (u16)(msr_data + 16);
1730 		if ((msr_data & 0xfffc) == 0x0) {
1731 			kvm_inject_gp(ctxt->vcpu, 0);
1732 			return X86EMUL_PROPAGATE_FAULT;
1733 		}
1734 		ss.selector = (u16)(msr_data + 24);
1735 		break;
1736 	case X86EMUL_MODE_PROT64:
1737 		cs.selector = (u16)(msr_data + 32);
1738 		if (msr_data == 0x0) {
1739 			kvm_inject_gp(ctxt->vcpu, 0);
1740 			return X86EMUL_PROPAGATE_FAULT;
1741 		}
1742 		ss.selector = cs.selector + 8;
1743 		cs.db = 0;
1744 		cs.l = 1;
1745 		break;
1746 	}
1747 	cs.selector |= SELECTOR_RPL_MASK;
1748 	ss.selector |= SELECTOR_RPL_MASK;
1749 
1750 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1751 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1752 
1753 	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1754 	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1755 
1756 	return X86EMUL_CONTINUE;
1757 }
1758 
1759 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
1760 {
1761 	int iopl;
1762 	if (ctxt->mode == X86EMUL_MODE_REAL)
1763 		return false;
1764 	if (ctxt->mode == X86EMUL_MODE_VM86)
1765 		return true;
1766 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1767 	return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
1768 }
1769 
1770 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1771 					    struct x86_emulate_ops *ops,
1772 					    u16 port, u16 len)
1773 {
1774 	struct kvm_segment tr_seg;
1775 	int r;
1776 	u16 io_bitmap_ptr;
1777 	u8 perm, bit_idx = port & 0x7;
1778 	unsigned mask = (1 << len) - 1;
1779 
1780 	kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
1781 	if (tr_seg.unusable)
1782 		return false;
1783 	if (tr_seg.limit < 103)
1784 		return false;
1785 	r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
1786 			  NULL);
1787 	if (r != X86EMUL_CONTINUE)
1788 		return false;
1789 	if (io_bitmap_ptr + port/8 > tr_seg.limit)
1790 		return false;
1791 	r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
1792 			  ctxt->vcpu, NULL);
1793 	if (r != X86EMUL_CONTINUE)
1794 		return false;
1795 	if ((perm >> bit_idx) & mask)
1796 		return false;
1797 	return true;
1798 }
1799 
1800 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1801 				 struct x86_emulate_ops *ops,
1802 				 u16 port, u16 len)
1803 {
1804 	if (emulator_bad_iopl(ctxt))
1805 		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1806 			return false;
1807 	return true;
1808 }
1809 
1810 int
1811 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1812 {
1813 	unsigned long memop = 0;
1814 	u64 msr_data;
1815 	unsigned long saved_eip = 0;
1816 	struct decode_cache *c = &ctxt->decode;
1817 	unsigned int port;
1818 	int io_dir_in;
1819 	int rc = 0;
1820 
1821 	ctxt->interruptibility = 0;
1822 
1823 	/* Shadow copy of register state. Committed on successful emulation.
1824 	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
1825 	 * modify them.
1826 	 */
1827 
1828 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
1829 	saved_eip = c->eip;
1830 
1831 	/* LOCK prefix is allowed only with some instructions */
1832 	if (c->lock_prefix && !(c->d & Lock)) {
1833 		kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1834 		goto done;
1835 	}
1836 
1837 	/* Privileged instruction can be executed only in CPL=0 */
1838 	if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
1839 		kvm_inject_gp(ctxt->vcpu, 0);
1840 		goto done;
1841 	}
1842 
1843 	if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
1844 		memop = c->modrm_ea;
1845 
1846 	if (c->rep_prefix && (c->d & String)) {
1847 		/* All REP prefixes have the same first termination condition */
1848 		if (c->regs[VCPU_REGS_RCX] == 0) {
1849 			kvm_rip_write(ctxt->vcpu, c->eip);
1850 			goto done;
1851 		}
1852 		/* The second termination condition only applies for REPE
1853 		 * and REPNE. Test if the repeat string operation prefix is
1854 		 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
1855 		 * corresponding termination condition according to:
1856 		 * 	- if REPE/REPZ and ZF = 0 then done
1857 		 * 	- if REPNE/REPNZ and ZF = 1 then done
1858 		 */
1859 		if ((c->b == 0xa6) || (c->b == 0xa7) ||
1860 				(c->b == 0xae) || (c->b == 0xaf)) {
1861 			if ((c->rep_prefix == REPE_PREFIX) &&
1862 				((ctxt->eflags & EFLG_ZF) == 0)) {
1863 					kvm_rip_write(ctxt->vcpu, c->eip);
1864 					goto done;
1865 			}
1866 			if ((c->rep_prefix == REPNE_PREFIX) &&
1867 				((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
1868 				kvm_rip_write(ctxt->vcpu, c->eip);
1869 				goto done;
1870 			}
1871 		}
1872 		c->regs[VCPU_REGS_RCX]--;
1873 		c->eip = kvm_rip_read(ctxt->vcpu);
1874 	}
1875 
1876 	if (c->src.type == OP_MEM) {
1877 		c->src.ptr = (unsigned long *)memop;
1878 		c->src.val = 0;
1879 		rc = ops->read_emulated((unsigned long)c->src.ptr,
1880 					&c->src.val,
1881 					c->src.bytes,
1882 					ctxt->vcpu);
1883 		if (rc != X86EMUL_CONTINUE)
1884 			goto done;
1885 		c->src.orig_val = c->src.val;
1886 	}
1887 
1888 	if ((c->d & DstMask) == ImplicitOps)
1889 		goto special_insn;
1890 
1891 
1892 	if (c->dst.type == OP_MEM) {
1893 		c->dst.ptr = (unsigned long *)memop;
1894 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1895 		c->dst.val = 0;
1896 		if (c->d & BitOp) {
1897 			unsigned long mask = ~(c->dst.bytes * 8 - 1);
1898 
1899 			c->dst.ptr = (void *)c->dst.ptr +
1900 						   (c->src.val & mask) / 8;
1901 		}
1902 		if (!(c->d & Mov)) {
1903 			/* optimisation - avoid slow emulated read */
1904 			rc = ops->read_emulated((unsigned long)c->dst.ptr,
1905 						&c->dst.val,
1906 						c->dst.bytes,
1907 						ctxt->vcpu);
1908 			if (rc != X86EMUL_CONTINUE)
1909 				goto done;
1910 		}
1911 	}
1912 	c->dst.orig_val = c->dst.val;
1913 
1914 special_insn:
1915 
1916 	if (c->twobyte)
1917 		goto twobyte_insn;
1918 
1919 	switch (c->b) {
1920 	case 0x00 ... 0x05:
1921 	      add:		/* add */
1922 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
1923 		break;
1924 	case 0x06:		/* push es */
1925 		emulate_push_sreg(ctxt, VCPU_SREG_ES);
1926 		break;
1927 	case 0x07:		/* pop es */
1928 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
1929 		if (rc != 0)
1930 			goto done;
1931 		break;
1932 	case 0x08 ... 0x0d:
1933 	      or:		/* or */
1934 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
1935 		break;
1936 	case 0x0e:		/* push cs */
1937 		emulate_push_sreg(ctxt, VCPU_SREG_CS);
1938 		break;
1939 	case 0x10 ... 0x15:
1940 	      adc:		/* adc */
1941 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
1942 		break;
1943 	case 0x16:		/* push ss */
1944 		emulate_push_sreg(ctxt, VCPU_SREG_SS);
1945 		break;
1946 	case 0x17:		/* pop ss */
1947 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
1948 		if (rc != 0)
1949 			goto done;
1950 		break;
1951 	case 0x18 ... 0x1d:
1952 	      sbb:		/* sbb */
1953 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
1954 		break;
1955 	case 0x1e:		/* push ds */
1956 		emulate_push_sreg(ctxt, VCPU_SREG_DS);
1957 		break;
1958 	case 0x1f:		/* pop ds */
1959 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
1960 		if (rc != 0)
1961 			goto done;
1962 		break;
1963 	case 0x20 ... 0x25:
1964 	      and:		/* and */
1965 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
1966 		break;
1967 	case 0x28 ... 0x2d:
1968 	      sub:		/* sub */
1969 		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
1970 		break;
1971 	case 0x30 ... 0x35:
1972 	      xor:		/* xor */
1973 		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
1974 		break;
1975 	case 0x38 ... 0x3d:
1976 	      cmp:		/* cmp */
1977 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
1978 		break;
1979 	case 0x40 ... 0x47: /* inc r16/r32 */
1980 		emulate_1op("inc", c->dst, ctxt->eflags);
1981 		break;
1982 	case 0x48 ... 0x4f: /* dec r16/r32 */
1983 		emulate_1op("dec", c->dst, ctxt->eflags);
1984 		break;
1985 	case 0x50 ... 0x57:  /* push reg */
1986 		emulate_push(ctxt);
1987 		break;
1988 	case 0x58 ... 0x5f: /* pop reg */
1989 	pop_instruction:
1990 		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1991 		if (rc != 0)
1992 			goto done;
1993 		break;
1994 	case 0x60:	/* pusha */
1995 		emulate_pusha(ctxt);
1996 		break;
1997 	case 0x61:	/* popa */
1998 		rc = emulate_popa(ctxt, ops);
1999 		if (rc != 0)
2000 			goto done;
2001 		break;
2002 	case 0x63:		/* movsxd */
2003 		if (ctxt->mode != X86EMUL_MODE_PROT64)
2004 			goto cannot_emulate;
2005 		c->dst.val = (s32) c->src.val;
2006 		break;
2007 	case 0x68: /* push imm */
2008 	case 0x6a: /* push imm8 */
2009 		emulate_push(ctxt);
2010 		break;
2011 	case 0x6c:		/* insb */
2012 	case 0x6d:		/* insw/insd */
2013 		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2014 					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
2015 			kvm_inject_gp(ctxt->vcpu, 0);
2016 			goto done;
2017 		}
2018 		if (kvm_emulate_pio_string(ctxt->vcpu,
2019 				1,
2020 				(c->d & ByteOp) ? 1 : c->op_bytes,
2021 				c->rep_prefix ?
2022 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2023 				(ctxt->eflags & EFLG_DF),
2024 				register_address(c, es_base(ctxt),
2025 						 c->regs[VCPU_REGS_RDI]),
2026 				c->rep_prefix,
2027 				c->regs[VCPU_REGS_RDX]) == 0) {
2028 			c->eip = saved_eip;
2029 			return -1;
2030 		}
2031 		return 0;
2032 	case 0x6e:		/* outsb */
2033 	case 0x6f:		/* outsw/outsd */
2034 		if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2035 					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
2036 			kvm_inject_gp(ctxt->vcpu, 0);
2037 			goto done;
2038 		}
2039 		if (kvm_emulate_pio_string(ctxt->vcpu,
2040 				0,
2041 				(c->d & ByteOp) ? 1 : c->op_bytes,
2042 				c->rep_prefix ?
2043 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2044 				(ctxt->eflags & EFLG_DF),
2045 					 register_address(c,
2046 					  seg_override_base(ctxt, c),
2047 						 c->regs[VCPU_REGS_RSI]),
2048 				c->rep_prefix,
2049 				c->regs[VCPU_REGS_RDX]) == 0) {
2050 			c->eip = saved_eip;
2051 			return -1;
2052 		}
2053 		return 0;
2054 	case 0x70 ... 0x7f: /* jcc (short) */
2055 		if (test_cc(c->b, ctxt->eflags))
2056 			jmp_rel(c, c->src.val);
2057 		break;
2058 	case 0x80 ... 0x83:	/* Grp1 */
2059 		switch (c->modrm_reg) {
2060 		case 0:
2061 			goto add;
2062 		case 1:
2063 			goto or;
2064 		case 2:
2065 			goto adc;
2066 		case 3:
2067 			goto sbb;
2068 		case 4:
2069 			goto and;
2070 		case 5:
2071 			goto sub;
2072 		case 6:
2073 			goto xor;
2074 		case 7:
2075 			goto cmp;
2076 		}
2077 		break;
2078 	case 0x84 ... 0x85:
2079 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2080 		break;
2081 	case 0x86 ... 0x87:	/* xchg */
2082 	xchg:
2083 		/* Write back the register source. */
2084 		switch (c->dst.bytes) {
2085 		case 1:
2086 			*(u8 *) c->src.ptr = (u8) c->dst.val;
2087 			break;
2088 		case 2:
2089 			*(u16 *) c->src.ptr = (u16) c->dst.val;
2090 			break;
2091 		case 4:
2092 			*c->src.ptr = (u32) c->dst.val;
2093 			break;	/* 64b reg: zero-extend */
2094 		case 8:
2095 			*c->src.ptr = c->dst.val;
2096 			break;
2097 		}
2098 		/*
2099 		 * Write back the memory destination with implicit LOCK
2100 		 * prefix.
2101 		 */
2102 		c->dst.val = c->src.val;
2103 		c->lock_prefix = 1;
2104 		break;
2105 	case 0x88 ... 0x8b:	/* mov */
2106 		goto mov;
2107 	case 0x8c: { /* mov r/m, sreg */
2108 		struct kvm_segment segreg;
2109 
2110 		if (c->modrm_reg <= 5)
2111 			kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2112 		else {
2113 			printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n",
2114 			       c->modrm);
2115 			goto cannot_emulate;
2116 		}
2117 		c->dst.val = segreg.selector;
2118 		break;
2119 	}
2120 	case 0x8d: /* lea r16/r32, m */
2121 		c->dst.val = c->modrm_ea;
2122 		break;
2123 	case 0x8e: { /* mov seg, r/m16 */
2124 		uint16_t sel;
2125 
2126 		sel = c->src.val;
2127 
2128 		if (c->modrm_reg == VCPU_SREG_CS ||
2129 		    c->modrm_reg > VCPU_SREG_GS) {
2130 			kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2131 			goto done;
2132 		}
2133 
2134 		if (c->modrm_reg == VCPU_SREG_SS)
2135 			toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
2136 
2137 		rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
2138 
2139 		c->dst.type = OP_NONE;  /* Disable writeback. */
2140 		break;
2141 	}
2142 	case 0x8f:		/* pop (sole member of Grp1a) */
2143 		rc = emulate_grp1a(ctxt, ops);
2144 		if (rc != 0)
2145 			goto done;
2146 		break;
2147 	case 0x90: /* nop / xchg r8,rax */
2148 		if (!(c->rex_prefix & 1)) { /* nop */
2149 			c->dst.type = OP_NONE;
2150 			break;
2151 		}
2152 	case 0x91 ... 0x97: /* xchg reg,rax */
2153 		c->src.type = c->dst.type = OP_REG;
2154 		c->src.bytes = c->dst.bytes = c->op_bytes;
2155 		c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2156 		c->src.val = *(c->src.ptr);
2157 		goto xchg;
2158 	case 0x9c: /* pushf */
2159 		c->src.val =  (unsigned long) ctxt->eflags;
2160 		emulate_push(ctxt);
2161 		break;
2162 	case 0x9d: /* popf */
2163 		c->dst.type = OP_REG;
2164 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
2165 		c->dst.bytes = c->op_bytes;
2166 		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2167 		if (rc != X86EMUL_CONTINUE)
2168 			goto done;
2169 		break;
2170 	case 0xa0 ... 0xa1:	/* mov */
2171 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2172 		c->dst.val = c->src.val;
2173 		break;
2174 	case 0xa2 ... 0xa3:	/* mov */
2175 		c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2176 		break;
2177 	case 0xa4 ... 0xa5:	/* movs */
2178 		c->dst.type = OP_MEM;
2179 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2180 		c->dst.ptr = (unsigned long *)register_address(c,
2181 						   es_base(ctxt),
2182 						   c->regs[VCPU_REGS_RDI]);
2183 		rc = ops->read_emulated(register_address(c,
2184 						seg_override_base(ctxt, c),
2185 						c->regs[VCPU_REGS_RSI]),
2186 					&c->dst.val,
2187 					c->dst.bytes, ctxt->vcpu);
2188 		if (rc != X86EMUL_CONTINUE)
2189 			goto done;
2190 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2191 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2192 							   : c->dst.bytes);
2193 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2194 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2195 							   : c->dst.bytes);
2196 		break;
2197 	case 0xa6 ... 0xa7:	/* cmps */
2198 		c->src.type = OP_NONE; /* Disable writeback. */
2199 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2200 		c->src.ptr = (unsigned long *)register_address(c,
2201 				       seg_override_base(ctxt, c),
2202 						   c->regs[VCPU_REGS_RSI]);
2203 		rc = ops->read_emulated((unsigned long)c->src.ptr,
2204 					&c->src.val,
2205 					c->src.bytes,
2206 					ctxt->vcpu);
2207 		if (rc != X86EMUL_CONTINUE)
2208 			goto done;
2209 
2210 		c->dst.type = OP_NONE; /* Disable writeback. */
2211 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2212 		c->dst.ptr = (unsigned long *)register_address(c,
2213 						   es_base(ctxt),
2214 						   c->regs[VCPU_REGS_RDI]);
2215 		rc = ops->read_emulated((unsigned long)c->dst.ptr,
2216 					&c->dst.val,
2217 					c->dst.bytes,
2218 					ctxt->vcpu);
2219 		if (rc != X86EMUL_CONTINUE)
2220 			goto done;
2221 
2222 		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2223 
2224 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2225 
2226 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2227 				       (ctxt->eflags & EFLG_DF) ? -c->src.bytes
2228 								  : c->src.bytes);
2229 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2230 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2231 								  : c->dst.bytes);
2232 
2233 		break;
2234 	case 0xaa ... 0xab:	/* stos */
2235 		c->dst.type = OP_MEM;
2236 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2237 		c->dst.ptr = (unsigned long *)register_address(c,
2238 						   es_base(ctxt),
2239 						   c->regs[VCPU_REGS_RDI]);
2240 		c->dst.val = c->regs[VCPU_REGS_RAX];
2241 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2242 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2243 							   : c->dst.bytes);
2244 		break;
2245 	case 0xac ... 0xad:	/* lods */
2246 		c->dst.type = OP_REG;
2247 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2248 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2249 		rc = ops->read_emulated(register_address(c,
2250 						seg_override_base(ctxt, c),
2251 						c->regs[VCPU_REGS_RSI]),
2252 					&c->dst.val,
2253 					c->dst.bytes,
2254 					ctxt->vcpu);
2255 		if (rc != X86EMUL_CONTINUE)
2256 			goto done;
2257 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2258 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2259 							   : c->dst.bytes);
2260 		break;
2261 	case 0xae ... 0xaf:	/* scas */
2262 		DPRINTF("Urk! I don't handle SCAS.\n");
2263 		goto cannot_emulate;
2264 	case 0xb0 ... 0xbf: /* mov r, imm */
2265 		goto mov;
2266 	case 0xc0 ... 0xc1:
2267 		emulate_grp2(ctxt);
2268 		break;
2269 	case 0xc3: /* ret */
2270 		c->dst.type = OP_REG;
2271 		c->dst.ptr = &c->eip;
2272 		c->dst.bytes = c->op_bytes;
2273 		goto pop_instruction;
2274 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
2275 	mov:
2276 		c->dst.val = c->src.val;
2277 		break;
2278 	case 0xcb:		/* ret far */
2279 		rc = emulate_ret_far(ctxt, ops);
2280 		if (rc)
2281 			goto done;
2282 		break;
2283 	case 0xd0 ... 0xd1:	/* Grp2 */
2284 		c->src.val = 1;
2285 		emulate_grp2(ctxt);
2286 		break;
2287 	case 0xd2 ... 0xd3:	/* Grp2 */
2288 		c->src.val = c->regs[VCPU_REGS_RCX];
2289 		emulate_grp2(ctxt);
2290 		break;
2291 	case 0xe4: 	/* inb */
2292 	case 0xe5: 	/* in */
2293 		port = c->src.val;
2294 		io_dir_in = 1;
2295 		goto do_io;
2296 	case 0xe6: /* outb */
2297 	case 0xe7: /* out */
2298 		port = c->src.val;
2299 		io_dir_in = 0;
2300 		goto do_io;
2301 	case 0xe8: /* call (near) */ {
2302 		long int rel = c->src.val;
2303 		c->src.val = (unsigned long) c->eip;
2304 		jmp_rel(c, rel);
2305 		emulate_push(ctxt);
2306 		break;
2307 	}
2308 	case 0xe9: /* jmp rel */
2309 		goto jmp;
2310 	case 0xea: /* jmp far */
2311 		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
2312 						VCPU_SREG_CS))
2313 			goto done;
2314 
2315 		c->eip = c->src.val;
2316 		break;
2317 	case 0xeb:
2318 	      jmp:		/* jmp rel short */
2319 		jmp_rel(c, c->src.val);
2320 		c->dst.type = OP_NONE; /* Disable writeback. */
2321 		break;
2322 	case 0xec: /* in al,dx */
2323 	case 0xed: /* in (e/r)ax,dx */
2324 		port = c->regs[VCPU_REGS_RDX];
2325 		io_dir_in = 1;
2326 		goto do_io;
2327 	case 0xee: /* out al,dx */
2328 	case 0xef: /* out (e/r)ax,dx */
2329 		port = c->regs[VCPU_REGS_RDX];
2330 		io_dir_in = 0;
2331 	do_io:
2332 		if (!emulator_io_permited(ctxt, ops, port,
2333 					  (c->d & ByteOp) ? 1 : c->op_bytes)) {
2334 			kvm_inject_gp(ctxt->vcpu, 0);
2335 			goto done;
2336 		}
2337 		if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2338 				   (c->d & ByteOp) ? 1 : c->op_bytes,
2339 				   port) != 0) {
2340 			c->eip = saved_eip;
2341 			goto cannot_emulate;
2342 		}
2343 		break;
2344 	case 0xf4:              /* hlt */
2345 		ctxt->vcpu->arch.halt_request = 1;
2346 		break;
2347 	case 0xf5:	/* cmc */
2348 		/* complement carry flag from eflags reg */
2349 		ctxt->eflags ^= EFLG_CF;
2350 		c->dst.type = OP_NONE;	/* Disable writeback. */
2351 		break;
2352 	case 0xf6 ... 0xf7:	/* Grp3 */
2353 		rc = emulate_grp3(ctxt, ops);
2354 		if (rc != 0)
2355 			goto done;
2356 		break;
2357 	case 0xf8: /* clc */
2358 		ctxt->eflags &= ~EFLG_CF;
2359 		c->dst.type = OP_NONE;	/* Disable writeback. */
2360 		break;
2361 	case 0xfa: /* cli */
2362 		if (emulator_bad_iopl(ctxt))
2363 			kvm_inject_gp(ctxt->vcpu, 0);
2364 		else {
2365 			ctxt->eflags &= ~X86_EFLAGS_IF;
2366 			c->dst.type = OP_NONE;	/* Disable writeback. */
2367 		}
2368 		break;
2369 	case 0xfb: /* sti */
2370 		if (emulator_bad_iopl(ctxt))
2371 			kvm_inject_gp(ctxt->vcpu, 0);
2372 		else {
2373 			toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
2374 			ctxt->eflags |= X86_EFLAGS_IF;
2375 			c->dst.type = OP_NONE;	/* Disable writeback. */
2376 		}
2377 		break;
2378 	case 0xfc: /* cld */
2379 		ctxt->eflags &= ~EFLG_DF;
2380 		c->dst.type = OP_NONE;	/* Disable writeback. */
2381 		break;
2382 	case 0xfd: /* std */
2383 		ctxt->eflags |= EFLG_DF;
2384 		c->dst.type = OP_NONE;	/* Disable writeback. */
2385 		break;
2386 	case 0xfe ... 0xff:	/* Grp4/Grp5 */
2387 		rc = emulate_grp45(ctxt, ops);
2388 		if (rc != 0)
2389 			goto done;
2390 		break;
2391 	}
2392 
2393 writeback:
2394 	rc = writeback(ctxt, ops);
2395 	if (rc != 0)
2396 		goto done;
2397 
2398 	/* Commit shadow register state. */
2399 	memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2400 	kvm_rip_write(ctxt->vcpu, c->eip);
2401 
2402 done:
2403 	if (rc == X86EMUL_UNHANDLEABLE) {
2404 		c->eip = saved_eip;
2405 		return -1;
2406 	}
2407 	return 0;
2408 
2409 twobyte_insn:
2410 	switch (c->b) {
2411 	case 0x01: /* lgdt, lidt, lmsw */
2412 		switch (c->modrm_reg) {
2413 			u16 size;
2414 			unsigned long address;
2415 
2416 		case 0: /* vmcall */
2417 			if (c->modrm_mod != 3 || c->modrm_rm != 1)
2418 				goto cannot_emulate;
2419 
2420 			rc = kvm_fix_hypercall(ctxt->vcpu);
2421 			if (rc)
2422 				goto done;
2423 
2424 			/* Let the processor re-execute the fixed hypercall */
2425 			c->eip = kvm_rip_read(ctxt->vcpu);
2426 			/* Disable writeback. */
2427 			c->dst.type = OP_NONE;
2428 			break;
2429 		case 2: /* lgdt */
2430 			rc = read_descriptor(ctxt, ops, c->src.ptr,
2431 					     &size, &address, c->op_bytes);
2432 			if (rc)
2433 				goto done;
2434 			realmode_lgdt(ctxt->vcpu, size, address);
2435 			/* Disable writeback. */
2436 			c->dst.type = OP_NONE;
2437 			break;
2438 		case 3: /* lidt/vmmcall */
2439 			if (c->modrm_mod == 3) {
2440 				switch (c->modrm_rm) {
2441 				case 1:
2442 					rc = kvm_fix_hypercall(ctxt->vcpu);
2443 					if (rc)
2444 						goto done;
2445 					break;
2446 				default:
2447 					goto cannot_emulate;
2448 				}
2449 			} else {
2450 				rc = read_descriptor(ctxt, ops, c->src.ptr,
2451 						     &size, &address,
2452 						     c->op_bytes);
2453 				if (rc)
2454 					goto done;
2455 				realmode_lidt(ctxt->vcpu, size, address);
2456 			}
2457 			/* Disable writeback. */
2458 			c->dst.type = OP_NONE;
2459 			break;
2460 		case 4: /* smsw */
2461 			c->dst.bytes = 2;
2462 			c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
2463 			break;
2464 		case 6: /* lmsw */
2465 			realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
2466 				      &ctxt->eflags);
2467 			c->dst.type = OP_NONE;
2468 			break;
2469 		case 7: /* invlpg*/
2470 			emulate_invlpg(ctxt->vcpu, memop);
2471 			/* Disable writeback. */
2472 			c->dst.type = OP_NONE;
2473 			break;
2474 		default:
2475 			goto cannot_emulate;
2476 		}
2477 		break;
2478 	case 0x05: 		/* syscall */
2479 		rc = emulate_syscall(ctxt);
2480 		if (rc != X86EMUL_CONTINUE)
2481 			goto done;
2482 		else
2483 			goto writeback;
2484 		break;
2485 	case 0x06:
2486 		emulate_clts(ctxt->vcpu);
2487 		c->dst.type = OP_NONE;
2488 		break;
2489 	case 0x08:		/* invd */
2490 	case 0x09:		/* wbinvd */
2491 	case 0x0d:		/* GrpP (prefetch) */
2492 	case 0x18:		/* Grp16 (prefetch/nop) */
2493 		c->dst.type = OP_NONE;
2494 		break;
2495 	case 0x20: /* mov cr, reg */
2496 		if (c->modrm_mod != 3)
2497 			goto cannot_emulate;
2498 		c->regs[c->modrm_rm] =
2499 				realmode_get_cr(ctxt->vcpu, c->modrm_reg);
2500 		c->dst.type = OP_NONE;	/* no writeback */
2501 		break;
2502 	case 0x21: /* mov from dr to reg */
2503 		if (c->modrm_mod != 3)
2504 			goto cannot_emulate;
2505 		rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
2506 		if (rc)
2507 			goto cannot_emulate;
2508 		c->dst.type = OP_NONE;	/* no writeback */
2509 		break;
2510 	case 0x22: /* mov reg, cr */
2511 		if (c->modrm_mod != 3)
2512 			goto cannot_emulate;
2513 		realmode_set_cr(ctxt->vcpu,
2514 				c->modrm_reg, c->modrm_val, &ctxt->eflags);
2515 		c->dst.type = OP_NONE;
2516 		break;
2517 	case 0x23: /* mov from reg to dr */
2518 		if (c->modrm_mod != 3)
2519 			goto cannot_emulate;
2520 		rc = emulator_set_dr(ctxt, c->modrm_reg,
2521 				     c->regs[c->modrm_rm]);
2522 		if (rc)
2523 			goto cannot_emulate;
2524 		c->dst.type = OP_NONE;	/* no writeback */
2525 		break;
2526 	case 0x30:
2527 		/* wrmsr */
2528 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
2529 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
2530 		rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
2531 		if (rc) {
2532 			kvm_inject_gp(ctxt->vcpu, 0);
2533 			c->eip = kvm_rip_read(ctxt->vcpu);
2534 		}
2535 		rc = X86EMUL_CONTINUE;
2536 		c->dst.type = OP_NONE;
2537 		break;
2538 	case 0x32:
2539 		/* rdmsr */
2540 		rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
2541 		if (rc) {
2542 			kvm_inject_gp(ctxt->vcpu, 0);
2543 			c->eip = kvm_rip_read(ctxt->vcpu);
2544 		} else {
2545 			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
2546 			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
2547 		}
2548 		rc = X86EMUL_CONTINUE;
2549 		c->dst.type = OP_NONE;
2550 		break;
2551 	case 0x34:		/* sysenter */
2552 		rc = emulate_sysenter(ctxt);
2553 		if (rc != X86EMUL_CONTINUE)
2554 			goto done;
2555 		else
2556 			goto writeback;
2557 		break;
2558 	case 0x35:		/* sysexit */
2559 		rc = emulate_sysexit(ctxt);
2560 		if (rc != X86EMUL_CONTINUE)
2561 			goto done;
2562 		else
2563 			goto writeback;
2564 		break;
2565 	case 0x40 ... 0x4f:	/* cmov */
2566 		c->dst.val = c->dst.orig_val = c->src.val;
2567 		if (!test_cc(c->b, ctxt->eflags))
2568 			c->dst.type = OP_NONE; /* no writeback */
2569 		break;
2570 	case 0x80 ... 0x8f: /* jnz rel, etc*/
2571 		if (test_cc(c->b, ctxt->eflags))
2572 			jmp_rel(c, c->src.val);
2573 		c->dst.type = OP_NONE;
2574 		break;
2575 	case 0xa0:	  /* push fs */
2576 		emulate_push_sreg(ctxt, VCPU_SREG_FS);
2577 		break;
2578 	case 0xa1:	 /* pop fs */
2579 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
2580 		if (rc != 0)
2581 			goto done;
2582 		break;
2583 	case 0xa3:
2584 	      bt:		/* bt */
2585 		c->dst.type = OP_NONE;
2586 		/* only subword offset */
2587 		c->src.val &= (c->dst.bytes << 3) - 1;
2588 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
2589 		break;
2590 	case 0xa4: /* shld imm8, r, r/m */
2591 	case 0xa5: /* shld cl, r, r/m */
2592 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
2593 		break;
2594 	case 0xa8:	/* push gs */
2595 		emulate_push_sreg(ctxt, VCPU_SREG_GS);
2596 		break;
2597 	case 0xa9:	/* pop gs */
2598 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
2599 		if (rc != 0)
2600 			goto done;
2601 		break;
2602 	case 0xab:
2603 	      bts:		/* bts */
2604 		/* only subword offset */
2605 		c->src.val &= (c->dst.bytes << 3) - 1;
2606 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
2607 		break;
2608 	case 0xac: /* shrd imm8, r, r/m */
2609 	case 0xad: /* shrd cl, r, r/m */
2610 		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
2611 		break;
2612 	case 0xae:              /* clflush */
2613 		break;
2614 	case 0xb0 ... 0xb1:	/* cmpxchg */
2615 		/*
2616 		 * Save real source value, then compare EAX against
2617 		 * destination.
2618 		 */
2619 		c->src.orig_val = c->src.val;
2620 		c->src.val = c->regs[VCPU_REGS_RAX];
2621 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2622 		if (ctxt->eflags & EFLG_ZF) {
2623 			/* Success: write back to memory. */
2624 			c->dst.val = c->src.orig_val;
2625 		} else {
2626 			/* Failure: write the value we saw to EAX. */
2627 			c->dst.type = OP_REG;
2628 			c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2629 		}
2630 		break;
2631 	case 0xb3:
2632 	      btr:		/* btr */
2633 		/* only subword offset */
2634 		c->src.val &= (c->dst.bytes << 3) - 1;
2635 		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
2636 		break;
2637 	case 0xb6 ... 0xb7:	/* movzx */
2638 		c->dst.bytes = c->op_bytes;
2639 		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
2640 						       : (u16) c->src.val;
2641 		break;
2642 	case 0xba:		/* Grp8 */
2643 		switch (c->modrm_reg & 3) {
2644 		case 0:
2645 			goto bt;
2646 		case 1:
2647 			goto bts;
2648 		case 2:
2649 			goto btr;
2650 		case 3:
2651 			goto btc;
2652 		}
2653 		break;
2654 	case 0xbb:
2655 	      btc:		/* btc */
2656 		/* only subword offset */
2657 		c->src.val &= (c->dst.bytes << 3) - 1;
2658 		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
2659 		break;
2660 	case 0xbe ... 0xbf:	/* movsx */
2661 		c->dst.bytes = c->op_bytes;
2662 		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
2663 							(s16) c->src.val;
2664 		break;
2665 	case 0xc3:		/* movnti */
2666 		c->dst.bytes = c->op_bytes;
2667 		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
2668 							(u64) c->src.val;
2669 		break;
2670 	case 0xc7:		/* Grp9 (cmpxchg8b) */
2671 		rc = emulate_grp9(ctxt, ops, memop);
2672 		if (rc != 0)
2673 			goto done;
2674 		c->dst.type = OP_NONE;
2675 		break;
2676 	}
2677 	goto writeback;
2678 
2679 cannot_emulate:
2680 	DPRINTF("Cannot emulate %02x\n", c->b);
2681 	c->eip = saved_eip;
2682 	return -1;
2683 }
2684