xref: /openbmc/linux/arch/x86/kvm/emulate.c (revision a09d2831)
1 /******************************************************************************
2  * emulate.c
3  *
4  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5  *
6  * Copyright (c) 2005 Keir Fraser
7  *
8  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9  * privileged instructions:
10  *
11  * Copyright (C) 2006 Qumranet
12  *
13  *   Avi Kivity <avi@qumranet.com>
14  *   Yaniv Kamay <yaniv@qumranet.com>
15  *
16  * This work is licensed under the terms of the GNU GPL, version 2.  See
17  * the COPYING file in the top-level directory.
18  *
19  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
20  */
21 
22 #ifndef __KERNEL__
23 #include <stdio.h>
24 #include <stdint.h>
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
27 #else
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
31 #endif
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
34 
35 #include "mmu.h"		/* for is_long_mode() */
36 
37 /*
38  * Opcode effective-address decode tables.
39  * Note that we only emulate instructions that have at least one memory
40  * operand (excluding implicit stack references). We assume that stack
41  * references and instruction fetches will never occur in special memory
42  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
43  * not be handled.
44  */
45 
46 /* Operand sizes: 8-bit operands or specified/overridden size. */
47 #define ByteOp      (1<<0)	/* 8-bit operands. */
48 /* Destination operand type. */
49 #define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
50 #define DstReg      (2<<1)	/* Register operand. */
51 #define DstMem      (3<<1)	/* Memory operand. */
52 #define DstAcc      (4<<1)      /* Destination Accumulator */
53 #define DstMask     (7<<1)
54 /* Source operand type. */
55 #define SrcNone     (0<<4)	/* No source operand. */
56 #define SrcImplicit (0<<4)	/* Source operand is implicit in the opcode. */
57 #define SrcReg      (1<<4)	/* Register operand. */
58 #define SrcMem      (2<<4)	/* Memory operand. */
59 #define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
60 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
61 #define SrcImm      (5<<4)	/* Immediate operand. */
62 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
63 #define SrcOne      (7<<4)	/* Implied '1' */
64 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
65 #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
66 #define SrcMask     (0xf<<4)
67 /* Generic ModRM decode. */
68 #define ModRM       (1<<8)
69 /* Destination is only written; never read. */
70 #define Mov         (1<<9)
71 #define BitOp       (1<<10)
72 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
73 #define String      (1<<12)     /* String instruction (rep capable) */
74 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
75 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
76 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
77 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
78 /* Misc flags */
79 #define No64	    (1<<28)
80 /* Source 2 operand type */
81 #define Src2None    (0<<29)
82 #define Src2CL      (1<<29)
83 #define Src2ImmByte (2<<29)
84 #define Src2One     (3<<29)
85 #define Src2Imm16   (4<<29)
86 #define Src2Mask    (7<<29)
87 
88 enum {
89 	Group1_80, Group1_81, Group1_82, Group1_83,
90 	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
91 };
92 
93 static u32 opcode_table[256] = {
94 	/* 0x00 - 0x07 */
95 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
96 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
97 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
98 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
99 	/* 0x08 - 0x0F */
100 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
101 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
102 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
103 	ImplicitOps | Stack | No64, 0,
104 	/* 0x10 - 0x17 */
105 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
106 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
109 	/* 0x18 - 0x1F */
110 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
111 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
114 	/* 0x20 - 0x27 */
115 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
116 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 	DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
118 	/* 0x28 - 0x2F */
119 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
120 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
121 	0, 0, 0, 0,
122 	/* 0x30 - 0x37 */
123 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
124 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
125 	0, 0, 0, 0,
126 	/* 0x38 - 0x3F */
127 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
128 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
129 	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
130 	0, 0,
131 	/* 0x40 - 0x47 */
132 	DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
133 	/* 0x48 - 0x4F */
134 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
135 	/* 0x50 - 0x57 */
136 	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
137 	SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
138 	/* 0x58 - 0x5F */
139 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
140 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
141 	/* 0x60 - 0x67 */
142 	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
143 	0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
144 	0, 0, 0, 0,
145 	/* 0x68 - 0x6F */
146 	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
147 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* insb, insw/insd */
148 	SrcNone  | ByteOp  | ImplicitOps, SrcNone  | ImplicitOps, /* outsb, outsw/outsd */
149 	/* 0x70 - 0x77 */
150 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
151 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
152 	/* 0x78 - 0x7F */
153 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
154 	SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
155 	/* 0x80 - 0x87 */
156 	Group | Group1_80, Group | Group1_81,
157 	Group | Group1_82, Group | Group1_83,
158 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
159 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
160 	/* 0x88 - 0x8F */
161 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
162 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
163 	DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
164 	DstReg | SrcMem | ModRM | Mov, Group | Group1A,
165 	/* 0x90 - 0x97 */
166 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
167 	/* 0x98 - 0x9F */
168 	0, 0, SrcImm | Src2Imm16 | No64, 0,
169 	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
170 	/* 0xA0 - 0xA7 */
171 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
172 	ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
173 	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
174 	ByteOp | ImplicitOps | String, ImplicitOps | String,
175 	/* 0xA8 - 0xAF */
176 	0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
177 	ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
178 	ByteOp | ImplicitOps | String, ImplicitOps | String,
179 	/* 0xB0 - 0xB7 */
180 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
181 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
182 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
183 	ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
184 	/* 0xB8 - 0xBF */
185 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
186 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
187 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
188 	DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
189 	/* 0xC0 - 0xC7 */
190 	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
191 	0, ImplicitOps | Stack, 0, 0,
192 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
193 	/* 0xC8 - 0xCF */
194 	0, 0, 0, ImplicitOps | Stack,
195 	ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
196 	/* 0xD0 - 0xD7 */
197 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
198 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
199 	0, 0, 0, 0,
200 	/* 0xD8 - 0xDF */
201 	0, 0, 0, 0, 0, 0, 0, 0,
202 	/* 0xE0 - 0xE7 */
203 	0, 0, 0, 0,
204 	ByteOp | SrcImmUByte, SrcImmUByte,
205 	ByteOp | SrcImmUByte, SrcImmUByte,
206 	/* 0xE8 - 0xEF */
207 	SrcImm | Stack, SrcImm | ImplicitOps,
208 	SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
209 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
210 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
211 	/* 0xF0 - 0xF7 */
212 	0, 0, 0, 0,
213 	ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
214 	/* 0xF8 - 0xFF */
215 	ImplicitOps, 0, ImplicitOps, ImplicitOps,
216 	ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
217 };
218 
219 static u32 twobyte_table[256] = {
220 	/* 0x00 - 0x0F */
221 	0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
222 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
223 	/* 0x10 - 0x1F */
224 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
225 	/* 0x20 - 0x2F */
226 	ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
227 	0, 0, 0, 0, 0, 0, 0, 0,
228 	/* 0x30 - 0x3F */
229 	ImplicitOps, 0, ImplicitOps, 0,
230 	ImplicitOps, ImplicitOps, 0, 0,
231 	0, 0, 0, 0, 0, 0, 0, 0,
232 	/* 0x40 - 0x47 */
233 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
234 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
235 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
236 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
237 	/* 0x48 - 0x4F */
238 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
239 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
240 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
241 	DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
242 	/* 0x50 - 0x5F */
243 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
244 	/* 0x60 - 0x6F */
245 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246 	/* 0x70 - 0x7F */
247 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248 	/* 0x80 - 0x8F */
249 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
250 	SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
251 	/* 0x90 - 0x9F */
252 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
253 	/* 0xA0 - 0xA7 */
254 	ImplicitOps | Stack, ImplicitOps | Stack,
255 	0, DstMem | SrcReg | ModRM | BitOp,
256 	DstMem | SrcReg | Src2ImmByte | ModRM,
257 	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
258 	/* 0xA8 - 0xAF */
259 	ImplicitOps | Stack, ImplicitOps | Stack,
260 	0, DstMem | SrcReg | ModRM | BitOp,
261 	DstMem | SrcReg | Src2ImmByte | ModRM,
262 	DstMem | SrcReg | Src2CL | ModRM,
263 	ModRM, 0,
264 	/* 0xB0 - 0xB7 */
265 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
266 	    DstMem | SrcReg | ModRM | BitOp,
267 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
268 	    DstReg | SrcMem16 | ModRM | Mov,
269 	/* 0xB8 - 0xBF */
270 	0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
271 	0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
272 	    DstReg | SrcMem16 | ModRM | Mov,
273 	/* 0xC0 - 0xCF */
274 	0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
275 	0, 0, 0, 0, 0, 0, 0, 0,
276 	/* 0xD0 - 0xDF */
277 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 	/* 0xE0 - 0xEF */
279 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 	/* 0xF0 - 0xFF */
281 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
282 };
283 
284 static u32 group_table[] = {
285 	[Group1_80*8] =
286 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
287 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
288 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
289 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
290 	[Group1_81*8] =
291 	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
292 	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
293 	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
294 	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
295 	[Group1_82*8] =
296 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
297 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
298 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
299 	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
300 	[Group1_83*8] =
301 	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
302 	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
303 	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
304 	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
305 	[Group1A*8] =
306 	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
307 	[Group3_Byte*8] =
308 	ByteOp | SrcImm | DstMem | ModRM, 0,
309 	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
310 	0, 0, 0, 0,
311 	[Group3*8] =
312 	DstMem | SrcImm | ModRM, 0,
313 	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
314 	0, 0, 0, 0,
315 	[Group4*8] =
316 	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
317 	0, 0, 0, 0, 0, 0,
318 	[Group5*8] =
319 	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
320 	SrcMem | ModRM | Stack, 0,
321 	SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
322 	[Group7*8] =
323 	0, 0, ModRM | SrcMem, ModRM | SrcMem,
324 	SrcNone | ModRM | DstMem | Mov, 0,
325 	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
326 };
327 
328 static u32 group2_table[] = {
329 	[Group7*8] =
330 	SrcNone | ModRM, 0, 0, SrcNone | ModRM,
331 	SrcNone | ModRM | DstMem | Mov, 0,
332 	SrcMem16 | ModRM | Mov, 0,
333 };
334 
335 /* EFLAGS bit definitions. */
336 #define EFLG_VM (1<<17)
337 #define EFLG_RF (1<<16)
338 #define EFLG_OF (1<<11)
339 #define EFLG_DF (1<<10)
340 #define EFLG_IF (1<<9)
341 #define EFLG_SF (1<<7)
342 #define EFLG_ZF (1<<6)
343 #define EFLG_AF (1<<4)
344 #define EFLG_PF (1<<2)
345 #define EFLG_CF (1<<0)
346 
347 /*
348  * Instruction emulation:
349  * Most instructions are emulated directly via a fragment of inline assembly
350  * code. This allows us to save/restore EFLAGS and thus very easily pick up
351  * any modified flags.
352  */
353 
354 #if defined(CONFIG_X86_64)
355 #define _LO32 "k"		/* force 32-bit operand */
356 #define _STK  "%%rsp"		/* stack pointer */
357 #elif defined(__i386__)
358 #define _LO32 ""		/* force 32-bit operand */
359 #define _STK  "%%esp"		/* stack pointer */
360 #endif
361 
362 /*
363  * These EFLAGS bits are restored from saved value during emulation, and
364  * any changes are written back to the saved value after emulation.
365  */
366 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
367 
368 /* Before executing instruction: restore necessary bits in EFLAGS. */
369 #define _PRE_EFLAGS(_sav, _msk, _tmp)					\
370 	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
371 	"movl %"_sav",%"_LO32 _tmp"; "                                  \
372 	"push %"_tmp"; "                                                \
373 	"push %"_tmp"; "                                                \
374 	"movl %"_msk",%"_LO32 _tmp"; "                                  \
375 	"andl %"_LO32 _tmp",("_STK"); "                                 \
376 	"pushf; "                                                       \
377 	"notl %"_LO32 _tmp"; "                                          \
378 	"andl %"_LO32 _tmp",("_STK"); "                                 \
379 	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
380 	"pop  %"_tmp"; "                                                \
381 	"orl  %"_LO32 _tmp",("_STK"); "                                 \
382 	"popf; "                                                        \
383 	"pop  %"_sav"; "
384 
385 /* After executing instruction: write-back necessary bits in EFLAGS. */
386 #define _POST_EFLAGS(_sav, _msk, _tmp) \
387 	/* _sav |= EFLAGS & _msk; */		\
388 	"pushf; "				\
389 	"pop  %"_tmp"; "			\
390 	"andl %"_msk",%"_LO32 _tmp"; "		\
391 	"orl  %"_LO32 _tmp",%"_sav"; "
392 
393 #ifdef CONFIG_X86_64
394 #define ON64(x) x
395 #else
396 #define ON64(x)
397 #endif
398 
399 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix)	\
400 	do {								\
401 		__asm__ __volatile__ (					\
402 			_PRE_EFLAGS("0", "4", "2")			\
403 			_op _suffix " %"_x"3,%1; "			\
404 			_POST_EFLAGS("0", "4", "2")			\
405 			: "=m" (_eflags), "=m" ((_dst).val),		\
406 			  "=&r" (_tmp)					\
407 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
408 	} while (0)
409 
410 
411 /* Raw emulation: instruction has two explicit operands. */
412 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
413 	do {								\
414 		unsigned long _tmp;					\
415 									\
416 		switch ((_dst).bytes) {					\
417 		case 2:							\
418 			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
419 			break;						\
420 		case 4:							\
421 			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
422 			break;						\
423 		case 8:							\
424 			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
425 			break;						\
426 		}							\
427 	} while (0)
428 
429 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
430 	do {								     \
431 		unsigned long _tmp;					     \
432 		switch ((_dst).bytes) {				             \
433 		case 1:							     \
434 			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b");  \
435 			break;						     \
436 		default:						     \
437 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
438 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
439 			break;						     \
440 		}							     \
441 	} while (0)
442 
443 /* Source operand is byte-sized and may be restricted to just %cl. */
444 #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
445 	__emulate_2op(_op, _src, _dst, _eflags,				\
446 		      "b", "c", "b", "c", "b", "c", "b", "c")
447 
448 /* Source operand is byte, word, long or quad sized. */
449 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
450 	__emulate_2op(_op, _src, _dst, _eflags,				\
451 		      "b", "q", "w", "r", _LO32, "r", "", "r")
452 
453 /* Source operand is word, long or quad sized. */
454 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
455 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
456 			     "w", "r", _LO32, "r", "", "r")
457 
458 /* Instruction has three operands and one operand is stored in ECX register */
459 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
460 	do {									\
461 		unsigned long _tmp;						\
462 		_type _clv  = (_cl).val;  					\
463 		_type _srcv = (_src).val;    					\
464 		_type _dstv = (_dst).val;					\
465 										\
466 		__asm__ __volatile__ (						\
467 			_PRE_EFLAGS("0", "5", "2")				\
468 			_op _suffix " %4,%1 \n"					\
469 			_POST_EFLAGS("0", "5", "2")				\
470 			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
471 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
472 			); 							\
473 										\
474 		(_cl).val  = (unsigned long) _clv;				\
475 		(_src).val = (unsigned long) _srcv;				\
476 		(_dst).val = (unsigned long) _dstv;				\
477 	} while (0)
478 
479 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
480 	do {									\
481 		switch ((_dst).bytes) {						\
482 		case 2:								\
483 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
484 						"w", unsigned short);         	\
485 			break;							\
486 		case 4: 							\
487 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
488 						"l", unsigned int);           	\
489 			break;							\
490 		case 8:								\
491 			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
492 						"q", unsigned long));  		\
493 			break;							\
494 		}								\
495 	} while (0)
496 
497 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
498 	do {								\
499 		unsigned long _tmp;					\
500 									\
501 		__asm__ __volatile__ (					\
502 			_PRE_EFLAGS("0", "3", "2")			\
503 			_op _suffix " %1; "				\
504 			_POST_EFLAGS("0", "3", "2")			\
505 			: "=m" (_eflags), "+m" ((_dst).val),		\
506 			  "=&r" (_tmp)					\
507 			: "i" (EFLAGS_MASK));				\
508 	} while (0)
509 
510 /* Instruction has only one explicit operand (no source operand). */
511 #define emulate_1op(_op, _dst, _eflags)                                    \
512 	do {								\
513 		switch ((_dst).bytes) {				        \
514 		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
515 		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
516 		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
517 		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
518 		}							\
519 	} while (0)
520 
521 /* Fetch next part of the instruction being emulated. */
522 #define insn_fetch(_type, _size, _eip)                                  \
523 ({	unsigned long _x;						\
524 	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
525 	if (rc != 0)							\
526 		goto done;						\
527 	(_eip) += (_size);						\
528 	(_type)_x;							\
529 })
530 
531 static inline unsigned long ad_mask(struct decode_cache *c)
532 {
533 	return (1UL << (c->ad_bytes << 3)) - 1;
534 }
535 
536 /* Access/update address held in a register, based on addressing mode. */
537 static inline unsigned long
538 address_mask(struct decode_cache *c, unsigned long reg)
539 {
540 	if (c->ad_bytes == sizeof(unsigned long))
541 		return reg;
542 	else
543 		return reg & ad_mask(c);
544 }
545 
546 static inline unsigned long
547 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
548 {
549 	return base + address_mask(c, reg);
550 }
551 
552 static inline void
553 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
554 {
555 	if (c->ad_bytes == sizeof(unsigned long))
556 		*reg += inc;
557 	else
558 		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
559 }
560 
561 static inline void jmp_rel(struct decode_cache *c, int rel)
562 {
563 	register_address_increment(c, &c->eip, rel);
564 }
565 
566 static void set_seg_override(struct decode_cache *c, int seg)
567 {
568 	c->has_seg_override = true;
569 	c->seg_override = seg;
570 }
571 
572 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
573 {
574 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
575 		return 0;
576 
577 	return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
578 }
579 
580 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
581 				       struct decode_cache *c)
582 {
583 	if (!c->has_seg_override)
584 		return 0;
585 
586 	return seg_base(ctxt, c->seg_override);
587 }
588 
589 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
590 {
591 	return seg_base(ctxt, VCPU_SREG_ES);
592 }
593 
594 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
595 {
596 	return seg_base(ctxt, VCPU_SREG_SS);
597 }
598 
599 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
600 			      struct x86_emulate_ops *ops,
601 			      unsigned long linear, u8 *dest)
602 {
603 	struct fetch_cache *fc = &ctxt->decode.fetch;
604 	int rc;
605 	int size;
606 
607 	if (linear < fc->start || linear >= fc->end) {
608 		size = min(15UL, PAGE_SIZE - offset_in_page(linear));
609 		rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
610 		if (rc)
611 			return rc;
612 		fc->start = linear;
613 		fc->end = linear + size;
614 	}
615 	*dest = fc->data[linear - fc->start];
616 	return 0;
617 }
618 
619 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
620 			 struct x86_emulate_ops *ops,
621 			 unsigned long eip, void *dest, unsigned size)
622 {
623 	int rc = 0;
624 
625 	/* x86 instructions are limited to 15 bytes. */
626 	if (eip + size - ctxt->decode.eip_orig > 15)
627 		return X86EMUL_UNHANDLEABLE;
628 	eip += ctxt->cs_base;
629 	while (size--) {
630 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
631 		if (rc)
632 			return rc;
633 	}
634 	return 0;
635 }
636 
637 /*
638  * Given the 'reg' portion of a ModRM byte, and a register block, return a
639  * pointer into the block that addresses the relevant register.
640  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
641  */
642 static void *decode_register(u8 modrm_reg, unsigned long *regs,
643 			     int highbyte_regs)
644 {
645 	void *p;
646 
647 	p = &regs[modrm_reg];
648 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
649 		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
650 	return p;
651 }
652 
653 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
654 			   struct x86_emulate_ops *ops,
655 			   void *ptr,
656 			   u16 *size, unsigned long *address, int op_bytes)
657 {
658 	int rc;
659 
660 	if (op_bytes == 2)
661 		op_bytes = 3;
662 	*address = 0;
663 	rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
664 			   ctxt->vcpu);
665 	if (rc)
666 		return rc;
667 	rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
668 			   ctxt->vcpu);
669 	return rc;
670 }
671 
672 static int test_cc(unsigned int condition, unsigned int flags)
673 {
674 	int rc = 0;
675 
676 	switch ((condition & 15) >> 1) {
677 	case 0: /* o */
678 		rc |= (flags & EFLG_OF);
679 		break;
680 	case 1: /* b/c/nae */
681 		rc |= (flags & EFLG_CF);
682 		break;
683 	case 2: /* z/e */
684 		rc |= (flags & EFLG_ZF);
685 		break;
686 	case 3: /* be/na */
687 		rc |= (flags & (EFLG_CF|EFLG_ZF));
688 		break;
689 	case 4: /* s */
690 		rc |= (flags & EFLG_SF);
691 		break;
692 	case 5: /* p/pe */
693 		rc |= (flags & EFLG_PF);
694 		break;
695 	case 7: /* le/ng */
696 		rc |= (flags & EFLG_ZF);
697 		/* fall through */
698 	case 6: /* l/nge */
699 		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
700 		break;
701 	}
702 
703 	/* Odd condition identifiers (lsb == 1) have inverted sense. */
704 	return (!!rc ^ (condition & 1));
705 }
706 
707 static void decode_register_operand(struct operand *op,
708 				    struct decode_cache *c,
709 				    int inhibit_bytereg)
710 {
711 	unsigned reg = c->modrm_reg;
712 	int highbyte_regs = c->rex_prefix == 0;
713 
714 	if (!(c->d & ModRM))
715 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
716 	op->type = OP_REG;
717 	if ((c->d & ByteOp) && !inhibit_bytereg) {
718 		op->ptr = decode_register(reg, c->regs, highbyte_regs);
719 		op->val = *(u8 *)op->ptr;
720 		op->bytes = 1;
721 	} else {
722 		op->ptr = decode_register(reg, c->regs, 0);
723 		op->bytes = c->op_bytes;
724 		switch (op->bytes) {
725 		case 2:
726 			op->val = *(u16 *)op->ptr;
727 			break;
728 		case 4:
729 			op->val = *(u32 *)op->ptr;
730 			break;
731 		case 8:
732 			op->val = *(u64 *) op->ptr;
733 			break;
734 		}
735 	}
736 	op->orig_val = op->val;
737 }
738 
739 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
740 			struct x86_emulate_ops *ops)
741 {
742 	struct decode_cache *c = &ctxt->decode;
743 	u8 sib;
744 	int index_reg = 0, base_reg = 0, scale;
745 	int rc = 0;
746 
747 	if (c->rex_prefix) {
748 		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
749 		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
750 		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
751 	}
752 
753 	c->modrm = insn_fetch(u8, 1, c->eip);
754 	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
755 	c->modrm_reg |= (c->modrm & 0x38) >> 3;
756 	c->modrm_rm |= (c->modrm & 0x07);
757 	c->modrm_ea = 0;
758 	c->use_modrm_ea = 1;
759 
760 	if (c->modrm_mod == 3) {
761 		c->modrm_ptr = decode_register(c->modrm_rm,
762 					       c->regs, c->d & ByteOp);
763 		c->modrm_val = *(unsigned long *)c->modrm_ptr;
764 		return rc;
765 	}
766 
767 	if (c->ad_bytes == 2) {
768 		unsigned bx = c->regs[VCPU_REGS_RBX];
769 		unsigned bp = c->regs[VCPU_REGS_RBP];
770 		unsigned si = c->regs[VCPU_REGS_RSI];
771 		unsigned di = c->regs[VCPU_REGS_RDI];
772 
773 		/* 16-bit ModR/M decode. */
774 		switch (c->modrm_mod) {
775 		case 0:
776 			if (c->modrm_rm == 6)
777 				c->modrm_ea += insn_fetch(u16, 2, c->eip);
778 			break;
779 		case 1:
780 			c->modrm_ea += insn_fetch(s8, 1, c->eip);
781 			break;
782 		case 2:
783 			c->modrm_ea += insn_fetch(u16, 2, c->eip);
784 			break;
785 		}
786 		switch (c->modrm_rm) {
787 		case 0:
788 			c->modrm_ea += bx + si;
789 			break;
790 		case 1:
791 			c->modrm_ea += bx + di;
792 			break;
793 		case 2:
794 			c->modrm_ea += bp + si;
795 			break;
796 		case 3:
797 			c->modrm_ea += bp + di;
798 			break;
799 		case 4:
800 			c->modrm_ea += si;
801 			break;
802 		case 5:
803 			c->modrm_ea += di;
804 			break;
805 		case 6:
806 			if (c->modrm_mod != 0)
807 				c->modrm_ea += bp;
808 			break;
809 		case 7:
810 			c->modrm_ea += bx;
811 			break;
812 		}
813 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
814 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
815 			if (!c->has_seg_override)
816 				set_seg_override(c, VCPU_SREG_SS);
817 		c->modrm_ea = (u16)c->modrm_ea;
818 	} else {
819 		/* 32/64-bit ModR/M decode. */
820 		if ((c->modrm_rm & 7) == 4) {
821 			sib = insn_fetch(u8, 1, c->eip);
822 			index_reg |= (sib >> 3) & 7;
823 			base_reg |= sib & 7;
824 			scale = sib >> 6;
825 
826 			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
827 				c->modrm_ea += insn_fetch(s32, 4, c->eip);
828 			else
829 				c->modrm_ea += c->regs[base_reg];
830 			if (index_reg != 4)
831 				c->modrm_ea += c->regs[index_reg] << scale;
832 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
833 			if (ctxt->mode == X86EMUL_MODE_PROT64)
834 				c->rip_relative = 1;
835 		} else
836 			c->modrm_ea += c->regs[c->modrm_rm];
837 		switch (c->modrm_mod) {
838 		case 0:
839 			if (c->modrm_rm == 5)
840 				c->modrm_ea += insn_fetch(s32, 4, c->eip);
841 			break;
842 		case 1:
843 			c->modrm_ea += insn_fetch(s8, 1, c->eip);
844 			break;
845 		case 2:
846 			c->modrm_ea += insn_fetch(s32, 4, c->eip);
847 			break;
848 		}
849 	}
850 done:
851 	return rc;
852 }
853 
854 static int decode_abs(struct x86_emulate_ctxt *ctxt,
855 		      struct x86_emulate_ops *ops)
856 {
857 	struct decode_cache *c = &ctxt->decode;
858 	int rc = 0;
859 
860 	switch (c->ad_bytes) {
861 	case 2:
862 		c->modrm_ea = insn_fetch(u16, 2, c->eip);
863 		break;
864 	case 4:
865 		c->modrm_ea = insn_fetch(u32, 4, c->eip);
866 		break;
867 	case 8:
868 		c->modrm_ea = insn_fetch(u64, 8, c->eip);
869 		break;
870 	}
871 done:
872 	return rc;
873 }
874 
875 int
876 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
877 {
878 	struct decode_cache *c = &ctxt->decode;
879 	int rc = 0;
880 	int mode = ctxt->mode;
881 	int def_op_bytes, def_ad_bytes, group;
882 
883 	/* Shadow copy of register state. Committed on successful emulation. */
884 
885 	memset(c, 0, sizeof(struct decode_cache));
886 	c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
887 	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
888 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
889 
890 	switch (mode) {
891 	case X86EMUL_MODE_REAL:
892 	case X86EMUL_MODE_PROT16:
893 		def_op_bytes = def_ad_bytes = 2;
894 		break;
895 	case X86EMUL_MODE_PROT32:
896 		def_op_bytes = def_ad_bytes = 4;
897 		break;
898 #ifdef CONFIG_X86_64
899 	case X86EMUL_MODE_PROT64:
900 		def_op_bytes = 4;
901 		def_ad_bytes = 8;
902 		break;
903 #endif
904 	default:
905 		return -1;
906 	}
907 
908 	c->op_bytes = def_op_bytes;
909 	c->ad_bytes = def_ad_bytes;
910 
911 	/* Legacy prefixes. */
912 	for (;;) {
913 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
914 		case 0x66:	/* operand-size override */
915 			/* switch between 2/4 bytes */
916 			c->op_bytes = def_op_bytes ^ 6;
917 			break;
918 		case 0x67:	/* address-size override */
919 			if (mode == X86EMUL_MODE_PROT64)
920 				/* switch between 4/8 bytes */
921 				c->ad_bytes = def_ad_bytes ^ 12;
922 			else
923 				/* switch between 2/4 bytes */
924 				c->ad_bytes = def_ad_bytes ^ 6;
925 			break;
926 		case 0x26:	/* ES override */
927 		case 0x2e:	/* CS override */
928 		case 0x36:	/* SS override */
929 		case 0x3e:	/* DS override */
930 			set_seg_override(c, (c->b >> 3) & 3);
931 			break;
932 		case 0x64:	/* FS override */
933 		case 0x65:	/* GS override */
934 			set_seg_override(c, c->b & 7);
935 			break;
936 		case 0x40 ... 0x4f: /* REX */
937 			if (mode != X86EMUL_MODE_PROT64)
938 				goto done_prefixes;
939 			c->rex_prefix = c->b;
940 			continue;
941 		case 0xf0:	/* LOCK */
942 			c->lock_prefix = 1;
943 			break;
944 		case 0xf2:	/* REPNE/REPNZ */
945 			c->rep_prefix = REPNE_PREFIX;
946 			break;
947 		case 0xf3:	/* REP/REPE/REPZ */
948 			c->rep_prefix = REPE_PREFIX;
949 			break;
950 		default:
951 			goto done_prefixes;
952 		}
953 
954 		/* Any legacy prefix after a REX prefix nullifies its effect. */
955 
956 		c->rex_prefix = 0;
957 	}
958 
959 done_prefixes:
960 
961 	/* REX prefix. */
962 	if (c->rex_prefix)
963 		if (c->rex_prefix & 8)
964 			c->op_bytes = 8;	/* REX.W */
965 
966 	/* Opcode byte(s). */
967 	c->d = opcode_table[c->b];
968 	if (c->d == 0) {
969 		/* Two-byte opcode? */
970 		if (c->b == 0x0f) {
971 			c->twobyte = 1;
972 			c->b = insn_fetch(u8, 1, c->eip);
973 			c->d = twobyte_table[c->b];
974 		}
975 	}
976 
977 	if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
978 		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
979 		return -1;
980 	}
981 
982 	if (c->d & Group) {
983 		group = c->d & GroupMask;
984 		c->modrm = insn_fetch(u8, 1, c->eip);
985 		--c->eip;
986 
987 		group = (group << 3) + ((c->modrm >> 3) & 7);
988 		if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
989 			c->d = group2_table[group];
990 		else
991 			c->d = group_table[group];
992 	}
993 
994 	/* Unrecognised? */
995 	if (c->d == 0) {
996 		DPRINTF("Cannot emulate %02x\n", c->b);
997 		return -1;
998 	}
999 
1000 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1001 		c->op_bytes = 8;
1002 
1003 	/* ModRM and SIB bytes. */
1004 	if (c->d & ModRM)
1005 		rc = decode_modrm(ctxt, ops);
1006 	else if (c->d & MemAbs)
1007 		rc = decode_abs(ctxt, ops);
1008 	if (rc)
1009 		goto done;
1010 
1011 	if (!c->has_seg_override)
1012 		set_seg_override(c, VCPU_SREG_DS);
1013 
1014 	if (!(!c->twobyte && c->b == 0x8d))
1015 		c->modrm_ea += seg_override_base(ctxt, c);
1016 
1017 	if (c->ad_bytes != 8)
1018 		c->modrm_ea = (u32)c->modrm_ea;
1019 	/*
1020 	 * Decode and fetch the source operand: register, memory
1021 	 * or immediate.
1022 	 */
1023 	switch (c->d & SrcMask) {
1024 	case SrcNone:
1025 		break;
1026 	case SrcReg:
1027 		decode_register_operand(&c->src, c, 0);
1028 		break;
1029 	case SrcMem16:
1030 		c->src.bytes = 2;
1031 		goto srcmem_common;
1032 	case SrcMem32:
1033 		c->src.bytes = 4;
1034 		goto srcmem_common;
1035 	case SrcMem:
1036 		c->src.bytes = (c->d & ByteOp) ? 1 :
1037 							   c->op_bytes;
1038 		/* Don't fetch the address for invlpg: it could be unmapped. */
1039 		if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1040 			break;
1041 	srcmem_common:
1042 		/*
1043 		 * For instructions with a ModR/M byte, switch to register
1044 		 * access if Mod = 3.
1045 		 */
1046 		if ((c->d & ModRM) && c->modrm_mod == 3) {
1047 			c->src.type = OP_REG;
1048 			c->src.val = c->modrm_val;
1049 			c->src.ptr = c->modrm_ptr;
1050 			break;
1051 		}
1052 		c->src.type = OP_MEM;
1053 		break;
1054 	case SrcImm:
1055 	case SrcImmU:
1056 		c->src.type = OP_IMM;
1057 		c->src.ptr = (unsigned long *)c->eip;
1058 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1059 		if (c->src.bytes == 8)
1060 			c->src.bytes = 4;
1061 		/* NB. Immediates are sign-extended as necessary. */
1062 		switch (c->src.bytes) {
1063 		case 1:
1064 			c->src.val = insn_fetch(s8, 1, c->eip);
1065 			break;
1066 		case 2:
1067 			c->src.val = insn_fetch(s16, 2, c->eip);
1068 			break;
1069 		case 4:
1070 			c->src.val = insn_fetch(s32, 4, c->eip);
1071 			break;
1072 		}
1073 		if ((c->d & SrcMask) == SrcImmU) {
1074 			switch (c->src.bytes) {
1075 			case 1:
1076 				c->src.val &= 0xff;
1077 				break;
1078 			case 2:
1079 				c->src.val &= 0xffff;
1080 				break;
1081 			case 4:
1082 				c->src.val &= 0xffffffff;
1083 				break;
1084 			}
1085 		}
1086 		break;
1087 	case SrcImmByte:
1088 	case SrcImmUByte:
1089 		c->src.type = OP_IMM;
1090 		c->src.ptr = (unsigned long *)c->eip;
1091 		c->src.bytes = 1;
1092 		if ((c->d & SrcMask) == SrcImmByte)
1093 			c->src.val = insn_fetch(s8, 1, c->eip);
1094 		else
1095 			c->src.val = insn_fetch(u8, 1, c->eip);
1096 		break;
1097 	case SrcOne:
1098 		c->src.bytes = 1;
1099 		c->src.val = 1;
1100 		break;
1101 	}
1102 
1103 	/*
1104 	 * Decode and fetch the second source operand: register, memory
1105 	 * or immediate.
1106 	 */
1107 	switch (c->d & Src2Mask) {
1108 	case Src2None:
1109 		break;
1110 	case Src2CL:
1111 		c->src2.bytes = 1;
1112 		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1113 		break;
1114 	case Src2ImmByte:
1115 		c->src2.type = OP_IMM;
1116 		c->src2.ptr = (unsigned long *)c->eip;
1117 		c->src2.bytes = 1;
1118 		c->src2.val = insn_fetch(u8, 1, c->eip);
1119 		break;
1120 	case Src2Imm16:
1121 		c->src2.type = OP_IMM;
1122 		c->src2.ptr = (unsigned long *)c->eip;
1123 		c->src2.bytes = 2;
1124 		c->src2.val = insn_fetch(u16, 2, c->eip);
1125 		break;
1126 	case Src2One:
1127 		c->src2.bytes = 1;
1128 		c->src2.val = 1;
1129 		break;
1130 	}
1131 
1132 	/* Decode and fetch the destination operand: register or memory. */
1133 	switch (c->d & DstMask) {
1134 	case ImplicitOps:
1135 		/* Special instructions do their own operand decoding. */
1136 		return 0;
1137 	case DstReg:
1138 		decode_register_operand(&c->dst, c,
1139 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1140 		break;
1141 	case DstMem:
1142 		if ((c->d & ModRM) && c->modrm_mod == 3) {
1143 			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1144 			c->dst.type = OP_REG;
1145 			c->dst.val = c->dst.orig_val = c->modrm_val;
1146 			c->dst.ptr = c->modrm_ptr;
1147 			break;
1148 		}
1149 		c->dst.type = OP_MEM;
1150 		break;
1151 	case DstAcc:
1152 		c->dst.type = OP_REG;
1153 		c->dst.bytes = c->op_bytes;
1154 		c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1155 		switch (c->op_bytes) {
1156 			case 1:
1157 				c->dst.val = *(u8 *)c->dst.ptr;
1158 				break;
1159 			case 2:
1160 				c->dst.val = *(u16 *)c->dst.ptr;
1161 				break;
1162 			case 4:
1163 				c->dst.val = *(u32 *)c->dst.ptr;
1164 				break;
1165 		}
1166 		c->dst.orig_val = c->dst.val;
1167 		break;
1168 	}
1169 
1170 	if (c->rip_relative)
1171 		c->modrm_ea += c->eip;
1172 
1173 done:
1174 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1175 }
1176 
1177 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1178 {
1179 	struct decode_cache *c = &ctxt->decode;
1180 
1181 	c->dst.type  = OP_MEM;
1182 	c->dst.bytes = c->op_bytes;
1183 	c->dst.val = c->src.val;
1184 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1185 	c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1186 					       c->regs[VCPU_REGS_RSP]);
1187 }
1188 
1189 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1190 		       struct x86_emulate_ops *ops,
1191 		       void *dest, int len)
1192 {
1193 	struct decode_cache *c = &ctxt->decode;
1194 	int rc;
1195 
1196 	rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1197 						 c->regs[VCPU_REGS_RSP]),
1198 				dest, len, ctxt->vcpu);
1199 	if (rc != 0)
1200 		return rc;
1201 
1202 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1203 	return rc;
1204 }
1205 
1206 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1207 {
1208 	struct decode_cache *c = &ctxt->decode;
1209 	struct kvm_segment segment;
1210 
1211 	kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1212 
1213 	c->src.val = segment.selector;
1214 	emulate_push(ctxt);
1215 }
1216 
1217 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1218 			     struct x86_emulate_ops *ops, int seg)
1219 {
1220 	struct decode_cache *c = &ctxt->decode;
1221 	unsigned long selector;
1222 	int rc;
1223 
1224 	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1225 	if (rc != 0)
1226 		return rc;
1227 
1228 	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
1229 	return rc;
1230 }
1231 
1232 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1233 {
1234 	struct decode_cache *c = &ctxt->decode;
1235 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1236 	int reg = VCPU_REGS_RAX;
1237 
1238 	while (reg <= VCPU_REGS_RDI) {
1239 		(reg == VCPU_REGS_RSP) ?
1240 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1241 
1242 		emulate_push(ctxt);
1243 		++reg;
1244 	}
1245 }
1246 
1247 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1248 			struct x86_emulate_ops *ops)
1249 {
1250 	struct decode_cache *c = &ctxt->decode;
1251 	int rc = 0;
1252 	int reg = VCPU_REGS_RDI;
1253 
1254 	while (reg >= VCPU_REGS_RAX) {
1255 		if (reg == VCPU_REGS_RSP) {
1256 			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1257 							c->op_bytes);
1258 			--reg;
1259 		}
1260 
1261 		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1262 		if (rc != 0)
1263 			break;
1264 		--reg;
1265 	}
1266 	return rc;
1267 }
1268 
1269 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1270 				struct x86_emulate_ops *ops)
1271 {
1272 	struct decode_cache *c = &ctxt->decode;
1273 	int rc;
1274 
1275 	rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1276 	if (rc != 0)
1277 		return rc;
1278 	return 0;
1279 }
1280 
1281 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1282 {
1283 	struct decode_cache *c = &ctxt->decode;
1284 	switch (c->modrm_reg) {
1285 	case 0:	/* rol */
1286 		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1287 		break;
1288 	case 1:	/* ror */
1289 		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1290 		break;
1291 	case 2:	/* rcl */
1292 		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1293 		break;
1294 	case 3:	/* rcr */
1295 		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1296 		break;
1297 	case 4:	/* sal/shl */
1298 	case 6:	/* sal/shl */
1299 		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1300 		break;
1301 	case 5:	/* shr */
1302 		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1303 		break;
1304 	case 7:	/* sar */
1305 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1306 		break;
1307 	}
1308 }
1309 
1310 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1311 			       struct x86_emulate_ops *ops)
1312 {
1313 	struct decode_cache *c = &ctxt->decode;
1314 	int rc = 0;
1315 
1316 	switch (c->modrm_reg) {
1317 	case 0 ... 1:	/* test */
1318 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1319 		break;
1320 	case 2:	/* not */
1321 		c->dst.val = ~c->dst.val;
1322 		break;
1323 	case 3:	/* neg */
1324 		emulate_1op("neg", c->dst, ctxt->eflags);
1325 		break;
1326 	default:
1327 		DPRINTF("Cannot emulate %02x\n", c->b);
1328 		rc = X86EMUL_UNHANDLEABLE;
1329 		break;
1330 	}
1331 	return rc;
1332 }
1333 
1334 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1335 			       struct x86_emulate_ops *ops)
1336 {
1337 	struct decode_cache *c = &ctxt->decode;
1338 
1339 	switch (c->modrm_reg) {
1340 	case 0:	/* inc */
1341 		emulate_1op("inc", c->dst, ctxt->eflags);
1342 		break;
1343 	case 1:	/* dec */
1344 		emulate_1op("dec", c->dst, ctxt->eflags);
1345 		break;
1346 	case 2: /* call near abs */ {
1347 		long int old_eip;
1348 		old_eip = c->eip;
1349 		c->eip = c->src.val;
1350 		c->src.val = old_eip;
1351 		emulate_push(ctxt);
1352 		break;
1353 	}
1354 	case 4: /* jmp abs */
1355 		c->eip = c->src.val;
1356 		break;
1357 	case 6:	/* push */
1358 		emulate_push(ctxt);
1359 		break;
1360 	}
1361 	return 0;
1362 }
1363 
1364 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1365 			       struct x86_emulate_ops *ops,
1366 			       unsigned long memop)
1367 {
1368 	struct decode_cache *c = &ctxt->decode;
1369 	u64 old, new;
1370 	int rc;
1371 
1372 	rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1373 	if (rc != 0)
1374 		return rc;
1375 
1376 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1377 	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1378 
1379 		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1380 		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1381 		ctxt->eflags &= ~EFLG_ZF;
1382 
1383 	} else {
1384 		new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1385 		       (u32) c->regs[VCPU_REGS_RBX];
1386 
1387 		rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1388 		if (rc != 0)
1389 			return rc;
1390 		ctxt->eflags |= EFLG_ZF;
1391 	}
1392 	return 0;
1393 }
1394 
1395 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1396 			   struct x86_emulate_ops *ops)
1397 {
1398 	struct decode_cache *c = &ctxt->decode;
1399 	int rc;
1400 	unsigned long cs;
1401 
1402 	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1403 	if (rc)
1404 		return rc;
1405 	if (c->op_bytes == 4)
1406 		c->eip = (u32)c->eip;
1407 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1408 	if (rc)
1409 		return rc;
1410 	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
1411 	return rc;
1412 }
1413 
1414 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1415 			    struct x86_emulate_ops *ops)
1416 {
1417 	int rc;
1418 	struct decode_cache *c = &ctxt->decode;
1419 
1420 	switch (c->dst.type) {
1421 	case OP_REG:
1422 		/* The 4-byte case *is* correct:
1423 		 * in 64-bit mode we zero-extend.
1424 		 */
1425 		switch (c->dst.bytes) {
1426 		case 1:
1427 			*(u8 *)c->dst.ptr = (u8)c->dst.val;
1428 			break;
1429 		case 2:
1430 			*(u16 *)c->dst.ptr = (u16)c->dst.val;
1431 			break;
1432 		case 4:
1433 			*c->dst.ptr = (u32)c->dst.val;
1434 			break;	/* 64b: zero-ext */
1435 		case 8:
1436 			*c->dst.ptr = c->dst.val;
1437 			break;
1438 		}
1439 		break;
1440 	case OP_MEM:
1441 		if (c->lock_prefix)
1442 			rc = ops->cmpxchg_emulated(
1443 					(unsigned long)c->dst.ptr,
1444 					&c->dst.orig_val,
1445 					&c->dst.val,
1446 					c->dst.bytes,
1447 					ctxt->vcpu);
1448 		else
1449 			rc = ops->write_emulated(
1450 					(unsigned long)c->dst.ptr,
1451 					&c->dst.val,
1452 					c->dst.bytes,
1453 					ctxt->vcpu);
1454 		if (rc != 0)
1455 			return rc;
1456 		break;
1457 	case OP_NONE:
1458 		/* no writeback */
1459 		break;
1460 	default:
1461 		break;
1462 	}
1463 	return 0;
1464 }
1465 
1466 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1467 {
1468 	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1469 	/*
1470 	 * an sti; sti; sequence only disable interrupts for the first
1471 	 * instruction. So, if the last instruction, be it emulated or
1472 	 * not, left the system with the INT_STI flag enabled, it
1473 	 * means that the last instruction is an sti. We should not
1474 	 * leave the flag on in this case. The same goes for mov ss
1475 	 */
1476 	if (!(int_shadow & mask))
1477 		ctxt->interruptibility = mask;
1478 }
1479 
1480 static inline void
1481 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1482 	struct kvm_segment *cs, struct kvm_segment *ss)
1483 {
1484 	memset(cs, 0, sizeof(struct kvm_segment));
1485 	kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1486 	memset(ss, 0, sizeof(struct kvm_segment));
1487 
1488 	cs->l = 0;		/* will be adjusted later */
1489 	cs->base = 0;		/* flat segment */
1490 	cs->g = 1;		/* 4kb granularity */
1491 	cs->limit = 0xffffffff;	/* 4GB limit */
1492 	cs->type = 0x0b;	/* Read, Execute, Accessed */
1493 	cs->s = 1;
1494 	cs->dpl = 0;		/* will be adjusted later */
1495 	cs->present = 1;
1496 	cs->db = 1;
1497 
1498 	ss->unusable = 0;
1499 	ss->base = 0;		/* flat segment */
1500 	ss->limit = 0xffffffff;	/* 4GB limit */
1501 	ss->g = 1;		/* 4kb granularity */
1502 	ss->s = 1;
1503 	ss->type = 0x03;	/* Read/Write, Accessed */
1504 	ss->db = 1;		/* 32bit stack segment */
1505 	ss->dpl = 0;
1506 	ss->present = 1;
1507 }
1508 
1509 static int
1510 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1511 {
1512 	struct decode_cache *c = &ctxt->decode;
1513 	struct kvm_segment cs, ss;
1514 	u64 msr_data;
1515 
1516 	/* syscall is not available in real mode */
1517 	if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
1518 		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
1519 		return -1;
1520 
1521 	setup_syscalls_segments(ctxt, &cs, &ss);
1522 
1523 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1524 	msr_data >>= 32;
1525 	cs.selector = (u16)(msr_data & 0xfffc);
1526 	ss.selector = (u16)(msr_data + 8);
1527 
1528 	if (is_long_mode(ctxt->vcpu)) {
1529 		cs.db = 0;
1530 		cs.l = 1;
1531 	}
1532 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1533 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1534 
1535 	c->regs[VCPU_REGS_RCX] = c->eip;
1536 	if (is_long_mode(ctxt->vcpu)) {
1537 #ifdef CONFIG_X86_64
1538 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1539 
1540 		kvm_x86_ops->get_msr(ctxt->vcpu,
1541 			ctxt->mode == X86EMUL_MODE_PROT64 ?
1542 			MSR_LSTAR : MSR_CSTAR, &msr_data);
1543 		c->eip = msr_data;
1544 
1545 		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1546 		ctxt->eflags &= ~(msr_data | EFLG_RF);
1547 #endif
1548 	} else {
1549 		/* legacy mode */
1550 		kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1551 		c->eip = (u32)msr_data;
1552 
1553 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1554 	}
1555 
1556 	return 0;
1557 }
1558 
1559 static int
1560 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1561 {
1562 	struct decode_cache *c = &ctxt->decode;
1563 	struct kvm_segment cs, ss;
1564 	u64 msr_data;
1565 
1566 	/* inject #UD if LOCK prefix is used */
1567 	if (c->lock_prefix)
1568 		return -1;
1569 
1570 	/* inject #GP if in real mode or paging is disabled */
1571 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1572 		!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1573 		kvm_inject_gp(ctxt->vcpu, 0);
1574 		return -1;
1575 	}
1576 
1577 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
1578 	* Therefore, we inject an #UD.
1579 	*/
1580 	if (ctxt->mode == X86EMUL_MODE_PROT64)
1581 		return -1;
1582 
1583 	setup_syscalls_segments(ctxt, &cs, &ss);
1584 
1585 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1586 	switch (ctxt->mode) {
1587 	case X86EMUL_MODE_PROT32:
1588 		if ((msr_data & 0xfffc) == 0x0) {
1589 			kvm_inject_gp(ctxt->vcpu, 0);
1590 			return -1;
1591 		}
1592 		break;
1593 	case X86EMUL_MODE_PROT64:
1594 		if (msr_data == 0x0) {
1595 			kvm_inject_gp(ctxt->vcpu, 0);
1596 			return -1;
1597 		}
1598 		break;
1599 	}
1600 
1601 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1602 	cs.selector = (u16)msr_data;
1603 	cs.selector &= ~SELECTOR_RPL_MASK;
1604 	ss.selector = cs.selector + 8;
1605 	ss.selector &= ~SELECTOR_RPL_MASK;
1606 	if (ctxt->mode == X86EMUL_MODE_PROT64
1607 		|| is_long_mode(ctxt->vcpu)) {
1608 		cs.db = 0;
1609 		cs.l = 1;
1610 	}
1611 
1612 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1613 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1614 
1615 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1616 	c->eip = msr_data;
1617 
1618 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1619 	c->regs[VCPU_REGS_RSP] = msr_data;
1620 
1621 	return 0;
1622 }
1623 
1624 static int
1625 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1626 {
1627 	struct decode_cache *c = &ctxt->decode;
1628 	struct kvm_segment cs, ss;
1629 	u64 msr_data;
1630 	int usermode;
1631 
1632 	/* inject #UD if LOCK prefix is used */
1633 	if (c->lock_prefix)
1634 		return -1;
1635 
1636 	/* inject #GP if in real mode or paging is disabled */
1637 	if (ctxt->mode == X86EMUL_MODE_REAL
1638 		|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
1639 		kvm_inject_gp(ctxt->vcpu, 0);
1640 		return -1;
1641 	}
1642 
1643 	/* sysexit must be called from CPL 0 */
1644 	if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
1645 		kvm_inject_gp(ctxt->vcpu, 0);
1646 		return -1;
1647 	}
1648 
1649 	setup_syscalls_segments(ctxt, &cs, &ss);
1650 
1651 	if ((c->rex_prefix & 0x8) != 0x0)
1652 		usermode = X86EMUL_MODE_PROT64;
1653 	else
1654 		usermode = X86EMUL_MODE_PROT32;
1655 
1656 	cs.dpl = 3;
1657 	ss.dpl = 3;
1658 	kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1659 	switch (usermode) {
1660 	case X86EMUL_MODE_PROT32:
1661 		cs.selector = (u16)(msr_data + 16);
1662 		if ((msr_data & 0xfffc) == 0x0) {
1663 			kvm_inject_gp(ctxt->vcpu, 0);
1664 			return -1;
1665 		}
1666 		ss.selector = (u16)(msr_data + 24);
1667 		break;
1668 	case X86EMUL_MODE_PROT64:
1669 		cs.selector = (u16)(msr_data + 32);
1670 		if (msr_data == 0x0) {
1671 			kvm_inject_gp(ctxt->vcpu, 0);
1672 			return -1;
1673 		}
1674 		ss.selector = cs.selector + 8;
1675 		cs.db = 0;
1676 		cs.l = 1;
1677 		break;
1678 	}
1679 	cs.selector |= SELECTOR_RPL_MASK;
1680 	ss.selector |= SELECTOR_RPL_MASK;
1681 
1682 	kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1683 	kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1684 
1685 	c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1686 	c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1687 
1688 	return 0;
1689 }
1690 
1691 int
1692 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1693 {
1694 	unsigned long memop = 0;
1695 	u64 msr_data;
1696 	unsigned long saved_eip = 0;
1697 	struct decode_cache *c = &ctxt->decode;
1698 	unsigned int port;
1699 	int io_dir_in;
1700 	int rc = 0;
1701 
1702 	ctxt->interruptibility = 0;
1703 
1704 	/* Shadow copy of register state. Committed on successful emulation.
1705 	 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
1706 	 * modify them.
1707 	 */
1708 
1709 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
1710 	saved_eip = c->eip;
1711 
1712 	if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
1713 		memop = c->modrm_ea;
1714 
1715 	if (c->rep_prefix && (c->d & String)) {
1716 		/* All REP prefixes have the same first termination condition */
1717 		if (c->regs[VCPU_REGS_RCX] == 0) {
1718 			kvm_rip_write(ctxt->vcpu, c->eip);
1719 			goto done;
1720 		}
1721 		/* The second termination condition only applies for REPE
1722 		 * and REPNE. Test if the repeat string operation prefix is
1723 		 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
1724 		 * corresponding termination condition according to:
1725 		 * 	- if REPE/REPZ and ZF = 0 then done
1726 		 * 	- if REPNE/REPNZ and ZF = 1 then done
1727 		 */
1728 		if ((c->b == 0xa6) || (c->b == 0xa7) ||
1729 				(c->b == 0xae) || (c->b == 0xaf)) {
1730 			if ((c->rep_prefix == REPE_PREFIX) &&
1731 				((ctxt->eflags & EFLG_ZF) == 0)) {
1732 					kvm_rip_write(ctxt->vcpu, c->eip);
1733 					goto done;
1734 			}
1735 			if ((c->rep_prefix == REPNE_PREFIX) &&
1736 				((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
1737 				kvm_rip_write(ctxt->vcpu, c->eip);
1738 				goto done;
1739 			}
1740 		}
1741 		c->regs[VCPU_REGS_RCX]--;
1742 		c->eip = kvm_rip_read(ctxt->vcpu);
1743 	}
1744 
1745 	if (c->src.type == OP_MEM) {
1746 		c->src.ptr = (unsigned long *)memop;
1747 		c->src.val = 0;
1748 		rc = ops->read_emulated((unsigned long)c->src.ptr,
1749 					&c->src.val,
1750 					c->src.bytes,
1751 					ctxt->vcpu);
1752 		if (rc != 0)
1753 			goto done;
1754 		c->src.orig_val = c->src.val;
1755 	}
1756 
1757 	if ((c->d & DstMask) == ImplicitOps)
1758 		goto special_insn;
1759 
1760 
1761 	if (c->dst.type == OP_MEM) {
1762 		c->dst.ptr = (unsigned long *)memop;
1763 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1764 		c->dst.val = 0;
1765 		if (c->d & BitOp) {
1766 			unsigned long mask = ~(c->dst.bytes * 8 - 1);
1767 
1768 			c->dst.ptr = (void *)c->dst.ptr +
1769 						   (c->src.val & mask) / 8;
1770 		}
1771 		if (!(c->d & Mov) &&
1772 				   /* optimisation - avoid slow emulated read */
1773 		    ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
1774 					   &c->dst.val,
1775 					  c->dst.bytes, ctxt->vcpu)) != 0))
1776 			goto done;
1777 	}
1778 	c->dst.orig_val = c->dst.val;
1779 
1780 special_insn:
1781 
1782 	if (c->twobyte)
1783 		goto twobyte_insn;
1784 
1785 	switch (c->b) {
1786 	case 0x00 ... 0x05:
1787 	      add:		/* add */
1788 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
1789 		break;
1790 	case 0x06:		/* push es */
1791 		emulate_push_sreg(ctxt, VCPU_SREG_ES);
1792 		break;
1793 	case 0x07:		/* pop es */
1794 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
1795 		if (rc != 0)
1796 			goto done;
1797 		break;
1798 	case 0x08 ... 0x0d:
1799 	      or:		/* or */
1800 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
1801 		break;
1802 	case 0x0e:		/* push cs */
1803 		emulate_push_sreg(ctxt, VCPU_SREG_CS);
1804 		break;
1805 	case 0x10 ... 0x15:
1806 	      adc:		/* adc */
1807 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
1808 		break;
1809 	case 0x16:		/* push ss */
1810 		emulate_push_sreg(ctxt, VCPU_SREG_SS);
1811 		break;
1812 	case 0x17:		/* pop ss */
1813 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
1814 		if (rc != 0)
1815 			goto done;
1816 		break;
1817 	case 0x18 ... 0x1d:
1818 	      sbb:		/* sbb */
1819 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
1820 		break;
1821 	case 0x1e:		/* push ds */
1822 		emulate_push_sreg(ctxt, VCPU_SREG_DS);
1823 		break;
1824 	case 0x1f:		/* pop ds */
1825 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
1826 		if (rc != 0)
1827 			goto done;
1828 		break;
1829 	case 0x20 ... 0x25:
1830 	      and:		/* and */
1831 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
1832 		break;
1833 	case 0x28 ... 0x2d:
1834 	      sub:		/* sub */
1835 		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
1836 		break;
1837 	case 0x30 ... 0x35:
1838 	      xor:		/* xor */
1839 		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
1840 		break;
1841 	case 0x38 ... 0x3d:
1842 	      cmp:		/* cmp */
1843 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
1844 		break;
1845 	case 0x40 ... 0x47: /* inc r16/r32 */
1846 		emulate_1op("inc", c->dst, ctxt->eflags);
1847 		break;
1848 	case 0x48 ... 0x4f: /* dec r16/r32 */
1849 		emulate_1op("dec", c->dst, ctxt->eflags);
1850 		break;
1851 	case 0x50 ... 0x57:  /* push reg */
1852 		emulate_push(ctxt);
1853 		break;
1854 	case 0x58 ... 0x5f: /* pop reg */
1855 	pop_instruction:
1856 		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
1857 		if (rc != 0)
1858 			goto done;
1859 		break;
1860 	case 0x60:	/* pusha */
1861 		emulate_pusha(ctxt);
1862 		break;
1863 	case 0x61:	/* popa */
1864 		rc = emulate_popa(ctxt, ops);
1865 		if (rc != 0)
1866 			goto done;
1867 		break;
1868 	case 0x63:		/* movsxd */
1869 		if (ctxt->mode != X86EMUL_MODE_PROT64)
1870 			goto cannot_emulate;
1871 		c->dst.val = (s32) c->src.val;
1872 		break;
1873 	case 0x68: /* push imm */
1874 	case 0x6a: /* push imm8 */
1875 		emulate_push(ctxt);
1876 		break;
1877 	case 0x6c:		/* insb */
1878 	case 0x6d:		/* insw/insd */
1879 		 if (kvm_emulate_pio_string(ctxt->vcpu,
1880 				1,
1881 				(c->d & ByteOp) ? 1 : c->op_bytes,
1882 				c->rep_prefix ?
1883 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1884 				(ctxt->eflags & EFLG_DF),
1885 				register_address(c, es_base(ctxt),
1886 						 c->regs[VCPU_REGS_RDI]),
1887 				c->rep_prefix,
1888 				c->regs[VCPU_REGS_RDX]) == 0) {
1889 			c->eip = saved_eip;
1890 			return -1;
1891 		}
1892 		return 0;
1893 	case 0x6e:		/* outsb */
1894 	case 0x6f:		/* outsw/outsd */
1895 		if (kvm_emulate_pio_string(ctxt->vcpu,
1896 				0,
1897 				(c->d & ByteOp) ? 1 : c->op_bytes,
1898 				c->rep_prefix ?
1899 				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
1900 				(ctxt->eflags & EFLG_DF),
1901 					 register_address(c,
1902 					  seg_override_base(ctxt, c),
1903 						 c->regs[VCPU_REGS_RSI]),
1904 				c->rep_prefix,
1905 				c->regs[VCPU_REGS_RDX]) == 0) {
1906 			c->eip = saved_eip;
1907 			return -1;
1908 		}
1909 		return 0;
1910 	case 0x70 ... 0x7f: /* jcc (short) */
1911 		if (test_cc(c->b, ctxt->eflags))
1912 			jmp_rel(c, c->src.val);
1913 		break;
1914 	case 0x80 ... 0x83:	/* Grp1 */
1915 		switch (c->modrm_reg) {
1916 		case 0:
1917 			goto add;
1918 		case 1:
1919 			goto or;
1920 		case 2:
1921 			goto adc;
1922 		case 3:
1923 			goto sbb;
1924 		case 4:
1925 			goto and;
1926 		case 5:
1927 			goto sub;
1928 		case 6:
1929 			goto xor;
1930 		case 7:
1931 			goto cmp;
1932 		}
1933 		break;
1934 	case 0x84 ... 0x85:
1935 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1936 		break;
1937 	case 0x86 ... 0x87:	/* xchg */
1938 	xchg:
1939 		/* Write back the register source. */
1940 		switch (c->dst.bytes) {
1941 		case 1:
1942 			*(u8 *) c->src.ptr = (u8) c->dst.val;
1943 			break;
1944 		case 2:
1945 			*(u16 *) c->src.ptr = (u16) c->dst.val;
1946 			break;
1947 		case 4:
1948 			*c->src.ptr = (u32) c->dst.val;
1949 			break;	/* 64b reg: zero-extend */
1950 		case 8:
1951 			*c->src.ptr = c->dst.val;
1952 			break;
1953 		}
1954 		/*
1955 		 * Write back the memory destination with implicit LOCK
1956 		 * prefix.
1957 		 */
1958 		c->dst.val = c->src.val;
1959 		c->lock_prefix = 1;
1960 		break;
1961 	case 0x88 ... 0x8b:	/* mov */
1962 		goto mov;
1963 	case 0x8c: { /* mov r/m, sreg */
1964 		struct kvm_segment segreg;
1965 
1966 		if (c->modrm_reg <= 5)
1967 			kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
1968 		else {
1969 			printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n",
1970 			       c->modrm);
1971 			goto cannot_emulate;
1972 		}
1973 		c->dst.val = segreg.selector;
1974 		break;
1975 	}
1976 	case 0x8d: /* lea r16/r32, m */
1977 		c->dst.val = c->modrm_ea;
1978 		break;
1979 	case 0x8e: { /* mov seg, r/m16 */
1980 		uint16_t sel;
1981 		int type_bits;
1982 		int err;
1983 
1984 		sel = c->src.val;
1985 		if (c->modrm_reg == VCPU_SREG_SS)
1986 			toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
1987 
1988 		if (c->modrm_reg <= 5) {
1989 			type_bits = (c->modrm_reg == 1) ? 9 : 1;
1990 			err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
1991 							  type_bits, c->modrm_reg);
1992 		} else {
1993 			printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
1994 					c->modrm);
1995 			goto cannot_emulate;
1996 		}
1997 
1998 		if (err < 0)
1999 			goto cannot_emulate;
2000 
2001 		c->dst.type = OP_NONE;  /* Disable writeback. */
2002 		break;
2003 	}
2004 	case 0x8f:		/* pop (sole member of Grp1a) */
2005 		rc = emulate_grp1a(ctxt, ops);
2006 		if (rc != 0)
2007 			goto done;
2008 		break;
2009 	case 0x90: /* nop / xchg r8,rax */
2010 		if (!(c->rex_prefix & 1)) { /* nop */
2011 			c->dst.type = OP_NONE;
2012 			break;
2013 		}
2014 	case 0x91 ... 0x97: /* xchg reg,rax */
2015 		c->src.type = c->dst.type = OP_REG;
2016 		c->src.bytes = c->dst.bytes = c->op_bytes;
2017 		c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2018 		c->src.val = *(c->src.ptr);
2019 		goto xchg;
2020 	case 0x9c: /* pushf */
2021 		c->src.val =  (unsigned long) ctxt->eflags;
2022 		emulate_push(ctxt);
2023 		break;
2024 	case 0x9d: /* popf */
2025 		c->dst.type = OP_REG;
2026 		c->dst.ptr = (unsigned long *) &ctxt->eflags;
2027 		c->dst.bytes = c->op_bytes;
2028 		goto pop_instruction;
2029 	case 0xa0 ... 0xa1:	/* mov */
2030 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2031 		c->dst.val = c->src.val;
2032 		break;
2033 	case 0xa2 ... 0xa3:	/* mov */
2034 		c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2035 		break;
2036 	case 0xa4 ... 0xa5:	/* movs */
2037 		c->dst.type = OP_MEM;
2038 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2039 		c->dst.ptr = (unsigned long *)register_address(c,
2040 						   es_base(ctxt),
2041 						   c->regs[VCPU_REGS_RDI]);
2042 		if ((rc = ops->read_emulated(register_address(c,
2043 					   seg_override_base(ctxt, c),
2044 					c->regs[VCPU_REGS_RSI]),
2045 					&c->dst.val,
2046 					c->dst.bytes, ctxt->vcpu)) != 0)
2047 			goto done;
2048 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2049 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2050 							   : c->dst.bytes);
2051 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2052 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2053 							   : c->dst.bytes);
2054 		break;
2055 	case 0xa6 ... 0xa7:	/* cmps */
2056 		c->src.type = OP_NONE; /* Disable writeback. */
2057 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2058 		c->src.ptr = (unsigned long *)register_address(c,
2059 				       seg_override_base(ctxt, c),
2060 						   c->regs[VCPU_REGS_RSI]);
2061 		if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
2062 						&c->src.val,
2063 						c->src.bytes,
2064 						ctxt->vcpu)) != 0)
2065 			goto done;
2066 
2067 		c->dst.type = OP_NONE; /* Disable writeback. */
2068 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2069 		c->dst.ptr = (unsigned long *)register_address(c,
2070 						   es_base(ctxt),
2071 						   c->regs[VCPU_REGS_RDI]);
2072 		if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
2073 						&c->dst.val,
2074 						c->dst.bytes,
2075 						ctxt->vcpu)) != 0)
2076 			goto done;
2077 
2078 		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2079 
2080 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2081 
2082 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2083 				       (ctxt->eflags & EFLG_DF) ? -c->src.bytes
2084 								  : c->src.bytes);
2085 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2086 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2087 								  : c->dst.bytes);
2088 
2089 		break;
2090 	case 0xaa ... 0xab:	/* stos */
2091 		c->dst.type = OP_MEM;
2092 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2093 		c->dst.ptr = (unsigned long *)register_address(c,
2094 						   es_base(ctxt),
2095 						   c->regs[VCPU_REGS_RDI]);
2096 		c->dst.val = c->regs[VCPU_REGS_RAX];
2097 		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2098 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2099 							   : c->dst.bytes);
2100 		break;
2101 	case 0xac ... 0xad:	/* lods */
2102 		c->dst.type = OP_REG;
2103 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2104 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2105 		if ((rc = ops->read_emulated(register_address(c,
2106 						 seg_override_base(ctxt, c),
2107 						 c->regs[VCPU_REGS_RSI]),
2108 						 &c->dst.val,
2109 						 c->dst.bytes,
2110 						 ctxt->vcpu)) != 0)
2111 			goto done;
2112 		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2113 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2114 							   : c->dst.bytes);
2115 		break;
2116 	case 0xae ... 0xaf:	/* scas */
2117 		DPRINTF("Urk! I don't handle SCAS.\n");
2118 		goto cannot_emulate;
2119 	case 0xb0 ... 0xbf: /* mov r, imm */
2120 		goto mov;
2121 	case 0xc0 ... 0xc1:
2122 		emulate_grp2(ctxt);
2123 		break;
2124 	case 0xc3: /* ret */
2125 		c->dst.type = OP_REG;
2126 		c->dst.ptr = &c->eip;
2127 		c->dst.bytes = c->op_bytes;
2128 		goto pop_instruction;
2129 	case 0xc6 ... 0xc7:	/* mov (sole member of Grp11) */
2130 	mov:
2131 		c->dst.val = c->src.val;
2132 		break;
2133 	case 0xcb:		/* ret far */
2134 		rc = emulate_ret_far(ctxt, ops);
2135 		if (rc)
2136 			goto done;
2137 		break;
2138 	case 0xd0 ... 0xd1:	/* Grp2 */
2139 		c->src.val = 1;
2140 		emulate_grp2(ctxt);
2141 		break;
2142 	case 0xd2 ... 0xd3:	/* Grp2 */
2143 		c->src.val = c->regs[VCPU_REGS_RCX];
2144 		emulate_grp2(ctxt);
2145 		break;
2146 	case 0xe4: 	/* inb */
2147 	case 0xe5: 	/* in */
2148 		port = c->src.val;
2149 		io_dir_in = 1;
2150 		goto do_io;
2151 	case 0xe6: /* outb */
2152 	case 0xe7: /* out */
2153 		port = c->src.val;
2154 		io_dir_in = 0;
2155 		goto do_io;
2156 	case 0xe8: /* call (near) */ {
2157 		long int rel = c->src.val;
2158 		c->src.val = (unsigned long) c->eip;
2159 		jmp_rel(c, rel);
2160 		emulate_push(ctxt);
2161 		break;
2162 	}
2163 	case 0xe9: /* jmp rel */
2164 		goto jmp;
2165 	case 0xea: /* jmp far */
2166 		if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
2167 					VCPU_SREG_CS) < 0) {
2168 			DPRINTF("jmp far: Failed to load CS descriptor\n");
2169 			goto cannot_emulate;
2170 		}
2171 
2172 		c->eip = c->src.val;
2173 		break;
2174 	case 0xeb:
2175 	      jmp:		/* jmp rel short */
2176 		jmp_rel(c, c->src.val);
2177 		c->dst.type = OP_NONE; /* Disable writeback. */
2178 		break;
2179 	case 0xec: /* in al,dx */
2180 	case 0xed: /* in (e/r)ax,dx */
2181 		port = c->regs[VCPU_REGS_RDX];
2182 		io_dir_in = 1;
2183 		goto do_io;
2184 	case 0xee: /* out al,dx */
2185 	case 0xef: /* out (e/r)ax,dx */
2186 		port = c->regs[VCPU_REGS_RDX];
2187 		io_dir_in = 0;
2188 	do_io:	if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2189 				   (c->d & ByteOp) ? 1 : c->op_bytes,
2190 				   port) != 0) {
2191 			c->eip = saved_eip;
2192 			goto cannot_emulate;
2193 		}
2194 		break;
2195 	case 0xf4:              /* hlt */
2196 		ctxt->vcpu->arch.halt_request = 1;
2197 		break;
2198 	case 0xf5:	/* cmc */
2199 		/* complement carry flag from eflags reg */
2200 		ctxt->eflags ^= EFLG_CF;
2201 		c->dst.type = OP_NONE;	/* Disable writeback. */
2202 		break;
2203 	case 0xf6 ... 0xf7:	/* Grp3 */
2204 		rc = emulate_grp3(ctxt, ops);
2205 		if (rc != 0)
2206 			goto done;
2207 		break;
2208 	case 0xf8: /* clc */
2209 		ctxt->eflags &= ~EFLG_CF;
2210 		c->dst.type = OP_NONE;	/* Disable writeback. */
2211 		break;
2212 	case 0xfa: /* cli */
2213 		ctxt->eflags &= ~X86_EFLAGS_IF;
2214 		c->dst.type = OP_NONE;	/* Disable writeback. */
2215 		break;
2216 	case 0xfb: /* sti */
2217 		toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
2218 		ctxt->eflags |= X86_EFLAGS_IF;
2219 		c->dst.type = OP_NONE;	/* Disable writeback. */
2220 		break;
2221 	case 0xfc: /* cld */
2222 		ctxt->eflags &= ~EFLG_DF;
2223 		c->dst.type = OP_NONE;	/* Disable writeback. */
2224 		break;
2225 	case 0xfd: /* std */
2226 		ctxt->eflags |= EFLG_DF;
2227 		c->dst.type = OP_NONE;	/* Disable writeback. */
2228 		break;
2229 	case 0xfe ... 0xff:	/* Grp4/Grp5 */
2230 		rc = emulate_grp45(ctxt, ops);
2231 		if (rc != 0)
2232 			goto done;
2233 		break;
2234 	}
2235 
2236 writeback:
2237 	rc = writeback(ctxt, ops);
2238 	if (rc != 0)
2239 		goto done;
2240 
2241 	/* Commit shadow register state. */
2242 	memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2243 	kvm_rip_write(ctxt->vcpu, c->eip);
2244 
2245 done:
2246 	if (rc == X86EMUL_UNHANDLEABLE) {
2247 		c->eip = saved_eip;
2248 		return -1;
2249 	}
2250 	return 0;
2251 
2252 twobyte_insn:
2253 	switch (c->b) {
2254 	case 0x01: /* lgdt, lidt, lmsw */
2255 		switch (c->modrm_reg) {
2256 			u16 size;
2257 			unsigned long address;
2258 
2259 		case 0: /* vmcall */
2260 			if (c->modrm_mod != 3 || c->modrm_rm != 1)
2261 				goto cannot_emulate;
2262 
2263 			rc = kvm_fix_hypercall(ctxt->vcpu);
2264 			if (rc)
2265 				goto done;
2266 
2267 			/* Let the processor re-execute the fixed hypercall */
2268 			c->eip = kvm_rip_read(ctxt->vcpu);
2269 			/* Disable writeback. */
2270 			c->dst.type = OP_NONE;
2271 			break;
2272 		case 2: /* lgdt */
2273 			rc = read_descriptor(ctxt, ops, c->src.ptr,
2274 					     &size, &address, c->op_bytes);
2275 			if (rc)
2276 				goto done;
2277 			realmode_lgdt(ctxt->vcpu, size, address);
2278 			/* Disable writeback. */
2279 			c->dst.type = OP_NONE;
2280 			break;
2281 		case 3: /* lidt/vmmcall */
2282 			if (c->modrm_mod == 3) {
2283 				switch (c->modrm_rm) {
2284 				case 1:
2285 					rc = kvm_fix_hypercall(ctxt->vcpu);
2286 					if (rc)
2287 						goto done;
2288 					break;
2289 				default:
2290 					goto cannot_emulate;
2291 				}
2292 			} else {
2293 				rc = read_descriptor(ctxt, ops, c->src.ptr,
2294 						     &size, &address,
2295 						     c->op_bytes);
2296 				if (rc)
2297 					goto done;
2298 				realmode_lidt(ctxt->vcpu, size, address);
2299 			}
2300 			/* Disable writeback. */
2301 			c->dst.type = OP_NONE;
2302 			break;
2303 		case 4: /* smsw */
2304 			c->dst.bytes = 2;
2305 			c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
2306 			break;
2307 		case 6: /* lmsw */
2308 			realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
2309 				      &ctxt->eflags);
2310 			c->dst.type = OP_NONE;
2311 			break;
2312 		case 7: /* invlpg*/
2313 			emulate_invlpg(ctxt->vcpu, memop);
2314 			/* Disable writeback. */
2315 			c->dst.type = OP_NONE;
2316 			break;
2317 		default:
2318 			goto cannot_emulate;
2319 		}
2320 		break;
2321 	case 0x05: 		/* syscall */
2322 		if (emulate_syscall(ctxt) == -1)
2323 			goto cannot_emulate;
2324 		else
2325 			goto writeback;
2326 		break;
2327 	case 0x06:
2328 		emulate_clts(ctxt->vcpu);
2329 		c->dst.type = OP_NONE;
2330 		break;
2331 	case 0x08:		/* invd */
2332 	case 0x09:		/* wbinvd */
2333 	case 0x0d:		/* GrpP (prefetch) */
2334 	case 0x18:		/* Grp16 (prefetch/nop) */
2335 		c->dst.type = OP_NONE;
2336 		break;
2337 	case 0x20: /* mov cr, reg */
2338 		if (c->modrm_mod != 3)
2339 			goto cannot_emulate;
2340 		c->regs[c->modrm_rm] =
2341 				realmode_get_cr(ctxt->vcpu, c->modrm_reg);
2342 		c->dst.type = OP_NONE;	/* no writeback */
2343 		break;
2344 	case 0x21: /* mov from dr to reg */
2345 		if (c->modrm_mod != 3)
2346 			goto cannot_emulate;
2347 		rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
2348 		if (rc)
2349 			goto cannot_emulate;
2350 		c->dst.type = OP_NONE;	/* no writeback */
2351 		break;
2352 	case 0x22: /* mov reg, cr */
2353 		if (c->modrm_mod != 3)
2354 			goto cannot_emulate;
2355 		realmode_set_cr(ctxt->vcpu,
2356 				c->modrm_reg, c->modrm_val, &ctxt->eflags);
2357 		c->dst.type = OP_NONE;
2358 		break;
2359 	case 0x23: /* mov from reg to dr */
2360 		if (c->modrm_mod != 3)
2361 			goto cannot_emulate;
2362 		rc = emulator_set_dr(ctxt, c->modrm_reg,
2363 				     c->regs[c->modrm_rm]);
2364 		if (rc)
2365 			goto cannot_emulate;
2366 		c->dst.type = OP_NONE;	/* no writeback */
2367 		break;
2368 	case 0x30:
2369 		/* wrmsr */
2370 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
2371 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
2372 		rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
2373 		if (rc) {
2374 			kvm_inject_gp(ctxt->vcpu, 0);
2375 			c->eip = kvm_rip_read(ctxt->vcpu);
2376 		}
2377 		rc = X86EMUL_CONTINUE;
2378 		c->dst.type = OP_NONE;
2379 		break;
2380 	case 0x32:
2381 		/* rdmsr */
2382 		rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
2383 		if (rc) {
2384 			kvm_inject_gp(ctxt->vcpu, 0);
2385 			c->eip = kvm_rip_read(ctxt->vcpu);
2386 		} else {
2387 			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
2388 			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
2389 		}
2390 		rc = X86EMUL_CONTINUE;
2391 		c->dst.type = OP_NONE;
2392 		break;
2393 	case 0x34:		/* sysenter */
2394 		if (emulate_sysenter(ctxt) == -1)
2395 			goto cannot_emulate;
2396 		else
2397 			goto writeback;
2398 		break;
2399 	case 0x35:		/* sysexit */
2400 		if (emulate_sysexit(ctxt) == -1)
2401 			goto cannot_emulate;
2402 		else
2403 			goto writeback;
2404 		break;
2405 	case 0x40 ... 0x4f:	/* cmov */
2406 		c->dst.val = c->dst.orig_val = c->src.val;
2407 		if (!test_cc(c->b, ctxt->eflags))
2408 			c->dst.type = OP_NONE; /* no writeback */
2409 		break;
2410 	case 0x80 ... 0x8f: /* jnz rel, etc*/
2411 		if (test_cc(c->b, ctxt->eflags))
2412 			jmp_rel(c, c->src.val);
2413 		c->dst.type = OP_NONE;
2414 		break;
2415 	case 0xa0:	  /* push fs */
2416 		emulate_push_sreg(ctxt, VCPU_SREG_FS);
2417 		break;
2418 	case 0xa1:	 /* pop fs */
2419 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
2420 		if (rc != 0)
2421 			goto done;
2422 		break;
2423 	case 0xa3:
2424 	      bt:		/* bt */
2425 		c->dst.type = OP_NONE;
2426 		/* only subword offset */
2427 		c->src.val &= (c->dst.bytes << 3) - 1;
2428 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
2429 		break;
2430 	case 0xa4: /* shld imm8, r, r/m */
2431 	case 0xa5: /* shld cl, r, r/m */
2432 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
2433 		break;
2434 	case 0xa8:	/* push gs */
2435 		emulate_push_sreg(ctxt, VCPU_SREG_GS);
2436 		break;
2437 	case 0xa9:	/* pop gs */
2438 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
2439 		if (rc != 0)
2440 			goto done;
2441 		break;
2442 	case 0xab:
2443 	      bts:		/* bts */
2444 		/* only subword offset */
2445 		c->src.val &= (c->dst.bytes << 3) - 1;
2446 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
2447 		break;
2448 	case 0xac: /* shrd imm8, r, r/m */
2449 	case 0xad: /* shrd cl, r, r/m */
2450 		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
2451 		break;
2452 	case 0xae:              /* clflush */
2453 		break;
2454 	case 0xb0 ... 0xb1:	/* cmpxchg */
2455 		/*
2456 		 * Save real source value, then compare EAX against
2457 		 * destination.
2458 		 */
2459 		c->src.orig_val = c->src.val;
2460 		c->src.val = c->regs[VCPU_REGS_RAX];
2461 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2462 		if (ctxt->eflags & EFLG_ZF) {
2463 			/* Success: write back to memory. */
2464 			c->dst.val = c->src.orig_val;
2465 		} else {
2466 			/* Failure: write the value we saw to EAX. */
2467 			c->dst.type = OP_REG;
2468 			c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2469 		}
2470 		break;
2471 	case 0xb3:
2472 	      btr:		/* btr */
2473 		/* only subword offset */
2474 		c->src.val &= (c->dst.bytes << 3) - 1;
2475 		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
2476 		break;
2477 	case 0xb6 ... 0xb7:	/* movzx */
2478 		c->dst.bytes = c->op_bytes;
2479 		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
2480 						       : (u16) c->src.val;
2481 		break;
2482 	case 0xba:		/* Grp8 */
2483 		switch (c->modrm_reg & 3) {
2484 		case 0:
2485 			goto bt;
2486 		case 1:
2487 			goto bts;
2488 		case 2:
2489 			goto btr;
2490 		case 3:
2491 			goto btc;
2492 		}
2493 		break;
2494 	case 0xbb:
2495 	      btc:		/* btc */
2496 		/* only subword offset */
2497 		c->src.val &= (c->dst.bytes << 3) - 1;
2498 		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
2499 		break;
2500 	case 0xbe ... 0xbf:	/* movsx */
2501 		c->dst.bytes = c->op_bytes;
2502 		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
2503 							(s16) c->src.val;
2504 		break;
2505 	case 0xc3:		/* movnti */
2506 		c->dst.bytes = c->op_bytes;
2507 		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
2508 							(u64) c->src.val;
2509 		break;
2510 	case 0xc7:		/* Grp9 (cmpxchg8b) */
2511 		rc = emulate_grp9(ctxt, ops, memop);
2512 		if (rc != 0)
2513 			goto done;
2514 		c->dst.type = OP_NONE;
2515 		break;
2516 	}
2517 	goto writeback;
2518 
2519 cannot_emulate:
2520 	DPRINTF("Cannot emulate %02x\n", c->b);
2521 	c->eip = saved_eip;
2522 	return -1;
2523 }
2524