xref: /openbmc/linux/arch/x86/kvm/emulate.c (revision 565d76cb)
1 /******************************************************************************
2  * emulate.c
3  *
4  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5  *
6  * Copyright (c) 2005 Keir Fraser
7  *
8  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9  * privileged instructions:
10  *
11  * Copyright (C) 2006 Qumranet
12  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
13  *
14  *   Avi Kivity <avi@qumranet.com>
15  *   Yaniv Kamay <yaniv@qumranet.com>
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2.  See
18  * the COPYING file in the top-level directory.
19  *
20  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21  */
22 
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 
28 #include "x86.h"
29 #include "tss.h"
30 
31 /*
32  * Opcode effective-address decode tables.
33  * Note that we only emulate instructions that have at least one memory
34  * operand (excluding implicit stack references). We assume that stack
35  * references and instruction fetches will never occur in special memory
36  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
37  * not be handled.
38  */
39 
40 /* Operand sizes: 8-bit operands or specified/overridden size. */
41 #define ByteOp      (1<<0)	/* 8-bit operands. */
42 /* Destination operand type. */
43 #define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
44 #define DstReg      (2<<1)	/* Register operand. */
45 #define DstMem      (3<<1)	/* Memory operand. */
46 #define DstAcc      (4<<1)	/* Destination Accumulator */
47 #define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
48 #define DstMem64    (6<<1)	/* 64bit memory operand */
49 #define DstImmUByte (7<<1)	/* 8-bit unsigned immediate operand */
50 #define DstMask     (7<<1)
51 /* Source operand type. */
52 #define SrcNone     (0<<4)	/* No source operand. */
53 #define SrcReg      (1<<4)	/* Register operand. */
54 #define SrcMem      (2<<4)	/* Memory operand. */
55 #define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
56 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
57 #define SrcImm      (5<<4)	/* Immediate operand. */
58 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
59 #define SrcOne      (7<<4)	/* Implied '1' */
60 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
61 #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
62 #define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
63 #define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
64 #define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
65 #define SrcAcc      (0xd<<4)	/* Source Accumulator */
66 #define SrcImmU16   (0xe<<4)    /* Immediate operand, unsigned, 16 bits */
67 #define SrcMask     (0xf<<4)
68 /* Generic ModRM decode. */
69 #define ModRM       (1<<8)
70 /* Destination is only written; never read. */
71 #define Mov         (1<<9)
72 #define BitOp       (1<<10)
73 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
74 #define String      (1<<12)     /* String instruction (rep capable) */
75 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
76 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
77 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
78 /* Misc flags */
79 #define VendorSpecific (1<<22) /* Vendor specific instruction */
80 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
81 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
82 #define Undefined   (1<<25) /* No Such Instruction */
83 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
84 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
85 #define No64	    (1<<28)
86 /* Source 2 operand type */
87 #define Src2None    (0<<29)
88 #define Src2CL      (1<<29)
89 #define Src2ImmByte (2<<29)
90 #define Src2One     (3<<29)
91 #define Src2Imm     (4<<29)
92 #define Src2Mask    (7<<29)
93 
94 #define X2(x...) x, x
95 #define X3(x...) X2(x), x
96 #define X4(x...) X2(x), X2(x)
97 #define X5(x...) X4(x), x
98 #define X6(x...) X4(x), X2(x)
99 #define X7(x...) X4(x), X3(x)
100 #define X8(x...) X4(x), X4(x)
101 #define X16(x...) X8(x), X8(x)
102 
103 struct opcode {
104 	u32 flags;
105 	union {
106 		int (*execute)(struct x86_emulate_ctxt *ctxt);
107 		struct opcode *group;
108 		struct group_dual *gdual;
109 	} u;
110 };
111 
112 struct group_dual {
113 	struct opcode mod012[8];
114 	struct opcode mod3[8];
115 };
116 
117 /* EFLAGS bit definitions. */
118 #define EFLG_ID (1<<21)
119 #define EFLG_VIP (1<<20)
120 #define EFLG_VIF (1<<19)
121 #define EFLG_AC (1<<18)
122 #define EFLG_VM (1<<17)
123 #define EFLG_RF (1<<16)
124 #define EFLG_IOPL (3<<12)
125 #define EFLG_NT (1<<14)
126 #define EFLG_OF (1<<11)
127 #define EFLG_DF (1<<10)
128 #define EFLG_IF (1<<9)
129 #define EFLG_TF (1<<8)
130 #define EFLG_SF (1<<7)
131 #define EFLG_ZF (1<<6)
132 #define EFLG_AF (1<<4)
133 #define EFLG_PF (1<<2)
134 #define EFLG_CF (1<<0)
135 
136 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
137 #define EFLG_RESERVED_ONE_MASK 2
138 
139 /*
140  * Instruction emulation:
141  * Most instructions are emulated directly via a fragment of inline assembly
142  * code. This allows us to save/restore EFLAGS and thus very easily pick up
143  * any modified flags.
144  */
145 
146 #if defined(CONFIG_X86_64)
147 #define _LO32 "k"		/* force 32-bit operand */
148 #define _STK  "%%rsp"		/* stack pointer */
149 #elif defined(__i386__)
150 #define _LO32 ""		/* force 32-bit operand */
151 #define _STK  "%%esp"		/* stack pointer */
152 #endif
153 
154 /*
155  * These EFLAGS bits are restored from saved value during emulation, and
156  * any changes are written back to the saved value after emulation.
157  */
158 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
159 
160 /* Before executing instruction: restore necessary bits in EFLAGS. */
161 #define _PRE_EFLAGS(_sav, _msk, _tmp)					\
162 	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
163 	"movl %"_sav",%"_LO32 _tmp"; "                                  \
164 	"push %"_tmp"; "                                                \
165 	"push %"_tmp"; "                                                \
166 	"movl %"_msk",%"_LO32 _tmp"; "                                  \
167 	"andl %"_LO32 _tmp",("_STK"); "                                 \
168 	"pushf; "                                                       \
169 	"notl %"_LO32 _tmp"; "                                          \
170 	"andl %"_LO32 _tmp",("_STK"); "                                 \
171 	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
172 	"pop  %"_tmp"; "                                                \
173 	"orl  %"_LO32 _tmp",("_STK"); "                                 \
174 	"popf; "                                                        \
175 	"pop  %"_sav"; "
176 
177 /* After executing instruction: write-back necessary bits in EFLAGS. */
178 #define _POST_EFLAGS(_sav, _msk, _tmp) \
179 	/* _sav |= EFLAGS & _msk; */		\
180 	"pushf; "				\
181 	"pop  %"_tmp"; "			\
182 	"andl %"_msk",%"_LO32 _tmp"; "		\
183 	"orl  %"_LO32 _tmp",%"_sav"; "
184 
185 #ifdef CONFIG_X86_64
186 #define ON64(x) x
187 #else
188 #define ON64(x)
189 #endif
190 
191 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \
192 	do {								\
193 		__asm__ __volatile__ (					\
194 			_PRE_EFLAGS("0", "4", "2")			\
195 			_op _suffix " %"_x"3,%1; "			\
196 			_POST_EFLAGS("0", "4", "2")			\
197 			: "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\
198 			  "=&r" (_tmp)					\
199 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
200 	} while (0)
201 
202 
203 /* Raw emulation: instruction has two explicit operands. */
204 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
205 	do {								\
206 		unsigned long _tmp;					\
207 									\
208 		switch ((_dst).bytes) {					\
209 		case 2:							\
210 			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\
211 			break;						\
212 		case 4:							\
213 			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\
214 			break;						\
215 		case 8:							\
216 			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \
217 			break;						\
218 		}							\
219 	} while (0)
220 
221 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
222 	do {								     \
223 		unsigned long _tmp;					     \
224 		switch ((_dst).bytes) {				             \
225 		case 1:							     \
226 			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \
227 			break;						     \
228 		default:						     \
229 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
230 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
231 			break;						     \
232 		}							     \
233 	} while (0)
234 
235 /* Source operand is byte-sized and may be restricted to just %cl. */
236 #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
237 	__emulate_2op(_op, _src, _dst, _eflags,				\
238 		      "b", "c", "b", "c", "b", "c", "b", "c")
239 
240 /* Source operand is byte, word, long or quad sized. */
241 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
242 	__emulate_2op(_op, _src, _dst, _eflags,				\
243 		      "b", "q", "w", "r", _LO32, "r", "", "r")
244 
245 /* Source operand is word, long or quad sized. */
246 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
247 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
248 			     "w", "r", _LO32, "r", "", "r")
249 
250 /* Instruction has three operands and one operand is stored in ECX register */
251 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
252 	do {									\
253 		unsigned long _tmp;						\
254 		_type _clv  = (_cl).val;  					\
255 		_type _srcv = (_src).val;    					\
256 		_type _dstv = (_dst).val;					\
257 										\
258 		__asm__ __volatile__ (						\
259 			_PRE_EFLAGS("0", "5", "2")				\
260 			_op _suffix " %4,%1 \n"					\
261 			_POST_EFLAGS("0", "5", "2")				\
262 			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
263 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
264 			); 							\
265 										\
266 		(_cl).val  = (unsigned long) _clv;				\
267 		(_src).val = (unsigned long) _srcv;				\
268 		(_dst).val = (unsigned long) _dstv;				\
269 	} while (0)
270 
271 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
272 	do {									\
273 		switch ((_dst).bytes) {						\
274 		case 2:								\
275 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
276 						"w", unsigned short);         	\
277 			break;							\
278 		case 4: 							\
279 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
280 						"l", unsigned int);           	\
281 			break;							\
282 		case 8:								\
283 			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
284 						"q", unsigned long));  		\
285 			break;							\
286 		}								\
287 	} while (0)
288 
289 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
290 	do {								\
291 		unsigned long _tmp;					\
292 									\
293 		__asm__ __volatile__ (					\
294 			_PRE_EFLAGS("0", "3", "2")			\
295 			_op _suffix " %1; "				\
296 			_POST_EFLAGS("0", "3", "2")			\
297 			: "=m" (_eflags), "+m" ((_dst).val),		\
298 			  "=&r" (_tmp)					\
299 			: "i" (EFLAGS_MASK));				\
300 	} while (0)
301 
302 /* Instruction has only one explicit operand (no source operand). */
303 #define emulate_1op(_op, _dst, _eflags)                                    \
304 	do {								\
305 		switch ((_dst).bytes) {				        \
306 		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
307 		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
308 		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
309 		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
310 		}							\
311 	} while (0)
312 
313 #define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix)		\
314 	do {								\
315 		unsigned long _tmp;					\
316 									\
317 		__asm__ __volatile__ (					\
318 			_PRE_EFLAGS("0", "4", "1")			\
319 			_op _suffix " %5; "				\
320 			_POST_EFLAGS("0", "4", "1")			\
321 			: "=m" (_eflags), "=&r" (_tmp),			\
322 			  "+a" (_rax), "+d" (_rdx)			\
323 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
324 			  "a" (_rax), "d" (_rdx));			\
325 	} while (0)
326 
327 #define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \
328 	do {								\
329 		unsigned long _tmp;					\
330 									\
331 		__asm__ __volatile__ (					\
332 			_PRE_EFLAGS("0", "5", "1")			\
333 			"1: \n\t"					\
334 			_op _suffix " %6; "				\
335 			"2: \n\t"					\
336 			_POST_EFLAGS("0", "5", "1")			\
337 			".pushsection .fixup,\"ax\" \n\t"		\
338 			"3: movb $1, %4 \n\t"				\
339 			"jmp 2b \n\t"					\
340 			".popsection \n\t"				\
341 			_ASM_EXTABLE(1b, 3b)				\
342 			: "=m" (_eflags), "=&r" (_tmp),			\
343 			  "+a" (_rax), "+d" (_rdx), "+qm"(_ex)		\
344 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
345 			  "a" (_rax), "d" (_rdx));			\
346 	} while (0)
347 
348 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
349 #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)			\
350 	do {									\
351 		switch((_src).bytes) {						\
352 		case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
353 		case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx,  _eflags, "w"); break; \
354 		case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
355 		case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
356 		}							\
357 	} while (0)
358 
359 #define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex)	\
360 	do {								\
361 		switch((_src).bytes) {					\
362 		case 1:							\
363 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx,	\
364 						 _eflags, "b", _ex);	\
365 			break;						\
366 		case 2:							\
367 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
368 						 _eflags, "w", _ex);	\
369 			break;						\
370 		case 4:							\
371 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
372 						 _eflags, "l", _ex);	\
373 			break;						\
374 		case 8: ON64(						\
375 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
376 						 _eflags, "q", _ex));	\
377 			break;						\
378 		}							\
379 	} while (0)
380 
381 /* Fetch next part of the instruction being emulated. */
382 #define insn_fetch(_type, _size, _eip)                                  \
383 ({	unsigned long _x;						\
384 	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
385 	if (rc != X86EMUL_CONTINUE)					\
386 		goto done;						\
387 	(_eip) += (_size);						\
388 	(_type)_x;							\
389 })
390 
391 #define insn_fetch_arr(_arr, _size, _eip)                                \
392 ({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
393 	if (rc != X86EMUL_CONTINUE)					\
394 		goto done;						\
395 	(_eip) += (_size);						\
396 })
397 
398 static inline unsigned long ad_mask(struct decode_cache *c)
399 {
400 	return (1UL << (c->ad_bytes << 3)) - 1;
401 }
402 
403 /* Access/update address held in a register, based on addressing mode. */
404 static inline unsigned long
405 address_mask(struct decode_cache *c, unsigned long reg)
406 {
407 	if (c->ad_bytes == sizeof(unsigned long))
408 		return reg;
409 	else
410 		return reg & ad_mask(c);
411 }
412 
413 static inline unsigned long
414 register_address(struct decode_cache *c, unsigned long reg)
415 {
416 	return address_mask(c, reg);
417 }
418 
419 static inline void
420 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
421 {
422 	if (c->ad_bytes == sizeof(unsigned long))
423 		*reg += inc;
424 	else
425 		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
426 }
427 
428 static inline void jmp_rel(struct decode_cache *c, int rel)
429 {
430 	register_address_increment(c, &c->eip, rel);
431 }
432 
433 static void set_seg_override(struct decode_cache *c, int seg)
434 {
435 	c->has_seg_override = true;
436 	c->seg_override = seg;
437 }
438 
439 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
440 			      struct x86_emulate_ops *ops, int seg)
441 {
442 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
443 		return 0;
444 
445 	return ops->get_cached_segment_base(seg, ctxt->vcpu);
446 }
447 
448 static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
449 			     struct x86_emulate_ops *ops,
450 			     struct decode_cache *c)
451 {
452 	if (!c->has_seg_override)
453 		return 0;
454 
455 	return c->seg_override;
456 }
457 
458 static ulong linear(struct x86_emulate_ctxt *ctxt,
459 		    struct segmented_address addr)
460 {
461 	struct decode_cache *c = &ctxt->decode;
462 	ulong la;
463 
464 	la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
465 	if (c->ad_bytes != 8)
466 		la &= (u32)-1;
467 	return la;
468 }
469 
470 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
471 			     u32 error, bool valid)
472 {
473 	ctxt->exception.vector = vec;
474 	ctxt->exception.error_code = error;
475 	ctxt->exception.error_code_valid = valid;
476 	return X86EMUL_PROPAGATE_FAULT;
477 }
478 
479 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
480 {
481 	return emulate_exception(ctxt, GP_VECTOR, err, true);
482 }
483 
484 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
485 {
486 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
487 }
488 
489 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
490 {
491 	return emulate_exception(ctxt, TS_VECTOR, err, true);
492 }
493 
494 static int emulate_de(struct x86_emulate_ctxt *ctxt)
495 {
496 	return emulate_exception(ctxt, DE_VECTOR, 0, false);
497 }
498 
499 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
500 			      struct x86_emulate_ops *ops,
501 			      unsigned long eip, u8 *dest)
502 {
503 	struct fetch_cache *fc = &ctxt->decode.fetch;
504 	int rc;
505 	int size, cur_size;
506 
507 	if (eip == fc->end) {
508 		cur_size = fc->end - fc->start;
509 		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
510 		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
511 				size, ctxt->vcpu, &ctxt->exception);
512 		if (rc != X86EMUL_CONTINUE)
513 			return rc;
514 		fc->end += size;
515 	}
516 	*dest = fc->data[eip - fc->start];
517 	return X86EMUL_CONTINUE;
518 }
519 
520 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
521 			 struct x86_emulate_ops *ops,
522 			 unsigned long eip, void *dest, unsigned size)
523 {
524 	int rc;
525 
526 	/* x86 instructions are limited to 15 bytes. */
527 	if (eip + size - ctxt->eip > 15)
528 		return X86EMUL_UNHANDLEABLE;
529 	while (size--) {
530 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
531 		if (rc != X86EMUL_CONTINUE)
532 			return rc;
533 	}
534 	return X86EMUL_CONTINUE;
535 }
536 
537 /*
538  * Given the 'reg' portion of a ModRM byte, and a register block, return a
539  * pointer into the block that addresses the relevant register.
540  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
541  */
542 static void *decode_register(u8 modrm_reg, unsigned long *regs,
543 			     int highbyte_regs)
544 {
545 	void *p;
546 
547 	p = &regs[modrm_reg];
548 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
549 		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
550 	return p;
551 }
552 
553 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
554 			   struct x86_emulate_ops *ops,
555 			   struct segmented_address addr,
556 			   u16 *size, unsigned long *address, int op_bytes)
557 {
558 	int rc;
559 
560 	if (op_bytes == 2)
561 		op_bytes = 3;
562 	*address = 0;
563 	rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
564 			   ctxt->vcpu, &ctxt->exception);
565 	if (rc != X86EMUL_CONTINUE)
566 		return rc;
567 	addr.ea += 2;
568 	rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
569 			   ctxt->vcpu, &ctxt->exception);
570 	return rc;
571 }
572 
573 static int test_cc(unsigned int condition, unsigned int flags)
574 {
575 	int rc = 0;
576 
577 	switch ((condition & 15) >> 1) {
578 	case 0: /* o */
579 		rc |= (flags & EFLG_OF);
580 		break;
581 	case 1: /* b/c/nae */
582 		rc |= (flags & EFLG_CF);
583 		break;
584 	case 2: /* z/e */
585 		rc |= (flags & EFLG_ZF);
586 		break;
587 	case 3: /* be/na */
588 		rc |= (flags & (EFLG_CF|EFLG_ZF));
589 		break;
590 	case 4: /* s */
591 		rc |= (flags & EFLG_SF);
592 		break;
593 	case 5: /* p/pe */
594 		rc |= (flags & EFLG_PF);
595 		break;
596 	case 7: /* le/ng */
597 		rc |= (flags & EFLG_ZF);
598 		/* fall through */
599 	case 6: /* l/nge */
600 		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
601 		break;
602 	}
603 
604 	/* Odd condition identifiers (lsb == 1) have inverted sense. */
605 	return (!!rc ^ (condition & 1));
606 }
607 
608 static void fetch_register_operand(struct operand *op)
609 {
610 	switch (op->bytes) {
611 	case 1:
612 		op->val = *(u8 *)op->addr.reg;
613 		break;
614 	case 2:
615 		op->val = *(u16 *)op->addr.reg;
616 		break;
617 	case 4:
618 		op->val = *(u32 *)op->addr.reg;
619 		break;
620 	case 8:
621 		op->val = *(u64 *)op->addr.reg;
622 		break;
623 	}
624 }
625 
626 static void decode_register_operand(struct operand *op,
627 				    struct decode_cache *c,
628 				    int inhibit_bytereg)
629 {
630 	unsigned reg = c->modrm_reg;
631 	int highbyte_regs = c->rex_prefix == 0;
632 
633 	if (!(c->d & ModRM))
634 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
635 	op->type = OP_REG;
636 	if ((c->d & ByteOp) && !inhibit_bytereg) {
637 		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
638 		op->bytes = 1;
639 	} else {
640 		op->addr.reg = decode_register(reg, c->regs, 0);
641 		op->bytes = c->op_bytes;
642 	}
643 	fetch_register_operand(op);
644 	op->orig_val = op->val;
645 }
646 
647 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
648 			struct x86_emulate_ops *ops,
649 			struct operand *op)
650 {
651 	struct decode_cache *c = &ctxt->decode;
652 	u8 sib;
653 	int index_reg = 0, base_reg = 0, scale;
654 	int rc = X86EMUL_CONTINUE;
655 	ulong modrm_ea = 0;
656 
657 	if (c->rex_prefix) {
658 		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
659 		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
660 		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
661 	}
662 
663 	c->modrm = insn_fetch(u8, 1, c->eip);
664 	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
665 	c->modrm_reg |= (c->modrm & 0x38) >> 3;
666 	c->modrm_rm |= (c->modrm & 0x07);
667 	c->modrm_seg = VCPU_SREG_DS;
668 
669 	if (c->modrm_mod == 3) {
670 		op->type = OP_REG;
671 		op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
672 		op->addr.reg = decode_register(c->modrm_rm,
673 					       c->regs, c->d & ByteOp);
674 		fetch_register_operand(op);
675 		return rc;
676 	}
677 
678 	op->type = OP_MEM;
679 
680 	if (c->ad_bytes == 2) {
681 		unsigned bx = c->regs[VCPU_REGS_RBX];
682 		unsigned bp = c->regs[VCPU_REGS_RBP];
683 		unsigned si = c->regs[VCPU_REGS_RSI];
684 		unsigned di = c->regs[VCPU_REGS_RDI];
685 
686 		/* 16-bit ModR/M decode. */
687 		switch (c->modrm_mod) {
688 		case 0:
689 			if (c->modrm_rm == 6)
690 				modrm_ea += insn_fetch(u16, 2, c->eip);
691 			break;
692 		case 1:
693 			modrm_ea += insn_fetch(s8, 1, c->eip);
694 			break;
695 		case 2:
696 			modrm_ea += insn_fetch(u16, 2, c->eip);
697 			break;
698 		}
699 		switch (c->modrm_rm) {
700 		case 0:
701 			modrm_ea += bx + si;
702 			break;
703 		case 1:
704 			modrm_ea += bx + di;
705 			break;
706 		case 2:
707 			modrm_ea += bp + si;
708 			break;
709 		case 3:
710 			modrm_ea += bp + di;
711 			break;
712 		case 4:
713 			modrm_ea += si;
714 			break;
715 		case 5:
716 			modrm_ea += di;
717 			break;
718 		case 6:
719 			if (c->modrm_mod != 0)
720 				modrm_ea += bp;
721 			break;
722 		case 7:
723 			modrm_ea += bx;
724 			break;
725 		}
726 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
727 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
728 			c->modrm_seg = VCPU_SREG_SS;
729 		modrm_ea = (u16)modrm_ea;
730 	} else {
731 		/* 32/64-bit ModR/M decode. */
732 		if ((c->modrm_rm & 7) == 4) {
733 			sib = insn_fetch(u8, 1, c->eip);
734 			index_reg |= (sib >> 3) & 7;
735 			base_reg |= sib & 7;
736 			scale = sib >> 6;
737 
738 			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
739 				modrm_ea += insn_fetch(s32, 4, c->eip);
740 			else
741 				modrm_ea += c->regs[base_reg];
742 			if (index_reg != 4)
743 				modrm_ea += c->regs[index_reg] << scale;
744 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
745 			if (ctxt->mode == X86EMUL_MODE_PROT64)
746 				c->rip_relative = 1;
747 		} else
748 			modrm_ea += c->regs[c->modrm_rm];
749 		switch (c->modrm_mod) {
750 		case 0:
751 			if (c->modrm_rm == 5)
752 				modrm_ea += insn_fetch(s32, 4, c->eip);
753 			break;
754 		case 1:
755 			modrm_ea += insn_fetch(s8, 1, c->eip);
756 			break;
757 		case 2:
758 			modrm_ea += insn_fetch(s32, 4, c->eip);
759 			break;
760 		}
761 	}
762 	op->addr.mem.ea = modrm_ea;
763 done:
764 	return rc;
765 }
766 
767 static int decode_abs(struct x86_emulate_ctxt *ctxt,
768 		      struct x86_emulate_ops *ops,
769 		      struct operand *op)
770 {
771 	struct decode_cache *c = &ctxt->decode;
772 	int rc = X86EMUL_CONTINUE;
773 
774 	op->type = OP_MEM;
775 	switch (c->ad_bytes) {
776 	case 2:
777 		op->addr.mem.ea = insn_fetch(u16, 2, c->eip);
778 		break;
779 	case 4:
780 		op->addr.mem.ea = insn_fetch(u32, 4, c->eip);
781 		break;
782 	case 8:
783 		op->addr.mem.ea = insn_fetch(u64, 8, c->eip);
784 		break;
785 	}
786 done:
787 	return rc;
788 }
789 
790 static void fetch_bit_operand(struct decode_cache *c)
791 {
792 	long sv = 0, mask;
793 
794 	if (c->dst.type == OP_MEM && c->src.type == OP_REG) {
795 		mask = ~(c->dst.bytes * 8 - 1);
796 
797 		if (c->src.bytes == 2)
798 			sv = (s16)c->src.val & (s16)mask;
799 		else if (c->src.bytes == 4)
800 			sv = (s32)c->src.val & (s32)mask;
801 
802 		c->dst.addr.mem.ea += (sv >> 3);
803 	}
804 
805 	/* only subword offset */
806 	c->src.val &= (c->dst.bytes << 3) - 1;
807 }
808 
809 static int read_emulated(struct x86_emulate_ctxt *ctxt,
810 			 struct x86_emulate_ops *ops,
811 			 unsigned long addr, void *dest, unsigned size)
812 {
813 	int rc;
814 	struct read_cache *mc = &ctxt->decode.mem_read;
815 
816 	while (size) {
817 		int n = min(size, 8u);
818 		size -= n;
819 		if (mc->pos < mc->end)
820 			goto read_cached;
821 
822 		rc = ops->read_emulated(addr, mc->data + mc->end, n,
823 					&ctxt->exception, ctxt->vcpu);
824 		if (rc != X86EMUL_CONTINUE)
825 			return rc;
826 		mc->end += n;
827 
828 	read_cached:
829 		memcpy(dest, mc->data + mc->pos, n);
830 		mc->pos += n;
831 		dest += n;
832 		addr += n;
833 	}
834 	return X86EMUL_CONTINUE;
835 }
836 
837 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
838 			   struct x86_emulate_ops *ops,
839 			   unsigned int size, unsigned short port,
840 			   void *dest)
841 {
842 	struct read_cache *rc = &ctxt->decode.io_read;
843 
844 	if (rc->pos == rc->end) { /* refill pio read ahead */
845 		struct decode_cache *c = &ctxt->decode;
846 		unsigned int in_page, n;
847 		unsigned int count = c->rep_prefix ?
848 			address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
849 		in_page = (ctxt->eflags & EFLG_DF) ?
850 			offset_in_page(c->regs[VCPU_REGS_RDI]) :
851 			PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
852 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
853 			count);
854 		if (n == 0)
855 			n = 1;
856 		rc->pos = rc->end = 0;
857 		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
858 			return 0;
859 		rc->end = n * size;
860 	}
861 
862 	memcpy(dest, rc->data + rc->pos, size);
863 	rc->pos += size;
864 	return 1;
865 }
866 
867 static u32 desc_limit_scaled(struct desc_struct *desc)
868 {
869 	u32 limit = get_desc_limit(desc);
870 
871 	return desc->g ? (limit << 12) | 0xfff : limit;
872 }
873 
874 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
875 				     struct x86_emulate_ops *ops,
876 				     u16 selector, struct desc_ptr *dt)
877 {
878 	if (selector & 1 << 2) {
879 		struct desc_struct desc;
880 		memset (dt, 0, sizeof *dt);
881 		if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR,
882 						ctxt->vcpu))
883 			return;
884 
885 		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
886 		dt->address = get_desc_base(&desc);
887 	} else
888 		ops->get_gdt(dt, ctxt->vcpu);
889 }
890 
891 /* allowed just for 8 bytes segments */
892 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
893 				   struct x86_emulate_ops *ops,
894 				   u16 selector, struct desc_struct *desc)
895 {
896 	struct desc_ptr dt;
897 	u16 index = selector >> 3;
898 	int ret;
899 	ulong addr;
900 
901 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
902 
903 	if (dt.size < index * 8 + 7)
904 		return emulate_gp(ctxt, selector & 0xfffc);
905 	addr = dt.address + index * 8;
906 	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,
907 			    &ctxt->exception);
908 
909        return ret;
910 }
911 
912 /* allowed just for 8 bytes segments */
913 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
914 				    struct x86_emulate_ops *ops,
915 				    u16 selector, struct desc_struct *desc)
916 {
917 	struct desc_ptr dt;
918 	u16 index = selector >> 3;
919 	ulong addr;
920 	int ret;
921 
922 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
923 
924 	if (dt.size < index * 8 + 7)
925 		return emulate_gp(ctxt, selector & 0xfffc);
926 
927 	addr = dt.address + index * 8;
928 	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu,
929 			     &ctxt->exception);
930 
931 	return ret;
932 }
933 
934 /* Does not support long mode */
935 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
936 				   struct x86_emulate_ops *ops,
937 				   u16 selector, int seg)
938 {
939 	struct desc_struct seg_desc;
940 	u8 dpl, rpl, cpl;
941 	unsigned err_vec = GP_VECTOR;
942 	u32 err_code = 0;
943 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
944 	int ret;
945 
946 	memset(&seg_desc, 0, sizeof seg_desc);
947 
948 	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
949 	    || ctxt->mode == X86EMUL_MODE_REAL) {
950 		/* set real mode segment descriptor */
951 		set_desc_base(&seg_desc, selector << 4);
952 		set_desc_limit(&seg_desc, 0xffff);
953 		seg_desc.type = 3;
954 		seg_desc.p = 1;
955 		seg_desc.s = 1;
956 		goto load;
957 	}
958 
959 	/* NULL selector is not valid for TR, CS and SS */
960 	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
961 	    && null_selector)
962 		goto exception;
963 
964 	/* TR should be in GDT only */
965 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
966 		goto exception;
967 
968 	if (null_selector) /* for NULL selector skip all following checks */
969 		goto load;
970 
971 	ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
972 	if (ret != X86EMUL_CONTINUE)
973 		return ret;
974 
975 	err_code = selector & 0xfffc;
976 	err_vec = GP_VECTOR;
977 
978 	/* can't load system descriptor into segment selecor */
979 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
980 		goto exception;
981 
982 	if (!seg_desc.p) {
983 		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
984 		goto exception;
985 	}
986 
987 	rpl = selector & 3;
988 	dpl = seg_desc.dpl;
989 	cpl = ops->cpl(ctxt->vcpu);
990 
991 	switch (seg) {
992 	case VCPU_SREG_SS:
993 		/*
994 		 * segment is not a writable data segment or segment
995 		 * selector's RPL != CPL or segment selector's RPL != CPL
996 		 */
997 		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
998 			goto exception;
999 		break;
1000 	case VCPU_SREG_CS:
1001 		if (!(seg_desc.type & 8))
1002 			goto exception;
1003 
1004 		if (seg_desc.type & 4) {
1005 			/* conforming */
1006 			if (dpl > cpl)
1007 				goto exception;
1008 		} else {
1009 			/* nonconforming */
1010 			if (rpl > cpl || dpl != cpl)
1011 				goto exception;
1012 		}
1013 		/* CS(RPL) <- CPL */
1014 		selector = (selector & 0xfffc) | cpl;
1015 		break;
1016 	case VCPU_SREG_TR:
1017 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1018 			goto exception;
1019 		break;
1020 	case VCPU_SREG_LDTR:
1021 		if (seg_desc.s || seg_desc.type != 2)
1022 			goto exception;
1023 		break;
1024 	default: /*  DS, ES, FS, or GS */
1025 		/*
1026 		 * segment is not a data or readable code segment or
1027 		 * ((segment is a data or nonconforming code segment)
1028 		 * and (both RPL and CPL > DPL))
1029 		 */
1030 		if ((seg_desc.type & 0xa) == 0x8 ||
1031 		    (((seg_desc.type & 0xc) != 0xc) &&
1032 		     (rpl > dpl && cpl > dpl)))
1033 			goto exception;
1034 		break;
1035 	}
1036 
1037 	if (seg_desc.s) {
1038 		/* mark segment as accessed */
1039 		seg_desc.type |= 1;
1040 		ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1041 		if (ret != X86EMUL_CONTINUE)
1042 			return ret;
1043 	}
1044 load:
1045 	ops->set_segment_selector(selector, seg, ctxt->vcpu);
1046 	ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu);
1047 	return X86EMUL_CONTINUE;
1048 exception:
1049 	emulate_exception(ctxt, err_vec, err_code, true);
1050 	return X86EMUL_PROPAGATE_FAULT;
1051 }
1052 
1053 static void write_register_operand(struct operand *op)
1054 {
1055 	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
1056 	switch (op->bytes) {
1057 	case 1:
1058 		*(u8 *)op->addr.reg = (u8)op->val;
1059 		break;
1060 	case 2:
1061 		*(u16 *)op->addr.reg = (u16)op->val;
1062 		break;
1063 	case 4:
1064 		*op->addr.reg = (u32)op->val;
1065 		break;	/* 64b: zero-extend */
1066 	case 8:
1067 		*op->addr.reg = op->val;
1068 		break;
1069 	}
1070 }
1071 
1072 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1073 			    struct x86_emulate_ops *ops)
1074 {
1075 	int rc;
1076 	struct decode_cache *c = &ctxt->decode;
1077 
1078 	switch (c->dst.type) {
1079 	case OP_REG:
1080 		write_register_operand(&c->dst);
1081 		break;
1082 	case OP_MEM:
1083 		if (c->lock_prefix)
1084 			rc = ops->cmpxchg_emulated(
1085 					linear(ctxt, c->dst.addr.mem),
1086 					&c->dst.orig_val,
1087 					&c->dst.val,
1088 					c->dst.bytes,
1089 					&ctxt->exception,
1090 					ctxt->vcpu);
1091 		else
1092 			rc = ops->write_emulated(
1093 					linear(ctxt, c->dst.addr.mem),
1094 					&c->dst.val,
1095 					c->dst.bytes,
1096 					&ctxt->exception,
1097 					ctxt->vcpu);
1098 		if (rc != X86EMUL_CONTINUE)
1099 			return rc;
1100 		break;
1101 	case OP_NONE:
1102 		/* no writeback */
1103 		break;
1104 	default:
1105 		break;
1106 	}
1107 	return X86EMUL_CONTINUE;
1108 }
1109 
1110 static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
1111 				struct x86_emulate_ops *ops)
1112 {
1113 	struct decode_cache *c = &ctxt->decode;
1114 
1115 	c->dst.type  = OP_MEM;
1116 	c->dst.bytes = c->op_bytes;
1117 	c->dst.val = c->src.val;
1118 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1119 	c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1120 	c->dst.addr.mem.seg = VCPU_SREG_SS;
1121 }
1122 
1123 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1124 		       struct x86_emulate_ops *ops,
1125 		       void *dest, int len)
1126 {
1127 	struct decode_cache *c = &ctxt->decode;
1128 	int rc;
1129 	struct segmented_address addr;
1130 
1131 	addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1132 	addr.seg = VCPU_SREG_SS;
1133 	rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len);
1134 	if (rc != X86EMUL_CONTINUE)
1135 		return rc;
1136 
1137 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1138 	return rc;
1139 }
1140 
1141 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1142 		       struct x86_emulate_ops *ops,
1143 		       void *dest, int len)
1144 {
1145 	int rc;
1146 	unsigned long val, change_mask;
1147 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1148 	int cpl = ops->cpl(ctxt->vcpu);
1149 
1150 	rc = emulate_pop(ctxt, ops, &val, len);
1151 	if (rc != X86EMUL_CONTINUE)
1152 		return rc;
1153 
1154 	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1155 		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1156 
1157 	switch(ctxt->mode) {
1158 	case X86EMUL_MODE_PROT64:
1159 	case X86EMUL_MODE_PROT32:
1160 	case X86EMUL_MODE_PROT16:
1161 		if (cpl == 0)
1162 			change_mask |= EFLG_IOPL;
1163 		if (cpl <= iopl)
1164 			change_mask |= EFLG_IF;
1165 		break;
1166 	case X86EMUL_MODE_VM86:
1167 		if (iopl < 3)
1168 			return emulate_gp(ctxt, 0);
1169 		change_mask |= EFLG_IF;
1170 		break;
1171 	default: /* real mode */
1172 		change_mask |= (EFLG_IOPL | EFLG_IF);
1173 		break;
1174 	}
1175 
1176 	*(unsigned long *)dest =
1177 		(ctxt->eflags & ~change_mask) | (val & change_mask);
1178 
1179 	return rc;
1180 }
1181 
1182 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
1183 			      struct x86_emulate_ops *ops, int seg)
1184 {
1185 	struct decode_cache *c = &ctxt->decode;
1186 
1187 	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
1188 
1189 	emulate_push(ctxt, ops);
1190 }
1191 
1192 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1193 			     struct x86_emulate_ops *ops, int seg)
1194 {
1195 	struct decode_cache *c = &ctxt->decode;
1196 	unsigned long selector;
1197 	int rc;
1198 
1199 	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1200 	if (rc != X86EMUL_CONTINUE)
1201 		return rc;
1202 
1203 	rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1204 	return rc;
1205 }
1206 
1207 static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
1208 			  struct x86_emulate_ops *ops)
1209 {
1210 	struct decode_cache *c = &ctxt->decode;
1211 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1212 	int rc = X86EMUL_CONTINUE;
1213 	int reg = VCPU_REGS_RAX;
1214 
1215 	while (reg <= VCPU_REGS_RDI) {
1216 		(reg == VCPU_REGS_RSP) ?
1217 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1218 
1219 		emulate_push(ctxt, ops);
1220 
1221 		rc = writeback(ctxt, ops);
1222 		if (rc != X86EMUL_CONTINUE)
1223 			return rc;
1224 
1225 		++reg;
1226 	}
1227 
1228 	/* Disable writeback. */
1229 	c->dst.type = OP_NONE;
1230 
1231 	return rc;
1232 }
1233 
1234 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1235 			struct x86_emulate_ops *ops)
1236 {
1237 	struct decode_cache *c = &ctxt->decode;
1238 	int rc = X86EMUL_CONTINUE;
1239 	int reg = VCPU_REGS_RDI;
1240 
1241 	while (reg >= VCPU_REGS_RAX) {
1242 		if (reg == VCPU_REGS_RSP) {
1243 			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1244 							c->op_bytes);
1245 			--reg;
1246 		}
1247 
1248 		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1249 		if (rc != X86EMUL_CONTINUE)
1250 			break;
1251 		--reg;
1252 	}
1253 	return rc;
1254 }
1255 
1256 int emulate_int_real(struct x86_emulate_ctxt *ctxt,
1257 			       struct x86_emulate_ops *ops, int irq)
1258 {
1259 	struct decode_cache *c = &ctxt->decode;
1260 	int rc;
1261 	struct desc_ptr dt;
1262 	gva_t cs_addr;
1263 	gva_t eip_addr;
1264 	u16 cs, eip;
1265 
1266 	/* TODO: Add limit checks */
1267 	c->src.val = ctxt->eflags;
1268 	emulate_push(ctxt, ops);
1269 	rc = writeback(ctxt, ops);
1270 	if (rc != X86EMUL_CONTINUE)
1271 		return rc;
1272 
1273 	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
1274 
1275 	c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1276 	emulate_push(ctxt, ops);
1277 	rc = writeback(ctxt, ops);
1278 	if (rc != X86EMUL_CONTINUE)
1279 		return rc;
1280 
1281 	c->src.val = c->eip;
1282 	emulate_push(ctxt, ops);
1283 	rc = writeback(ctxt, ops);
1284 	if (rc != X86EMUL_CONTINUE)
1285 		return rc;
1286 
1287 	c->dst.type = OP_NONE;
1288 
1289 	ops->get_idt(&dt, ctxt->vcpu);
1290 
1291 	eip_addr = dt.address + (irq << 2);
1292 	cs_addr = dt.address + (irq << 2) + 2;
1293 
1294 	rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception);
1295 	if (rc != X86EMUL_CONTINUE)
1296 		return rc;
1297 
1298 	rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception);
1299 	if (rc != X86EMUL_CONTINUE)
1300 		return rc;
1301 
1302 	rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS);
1303 	if (rc != X86EMUL_CONTINUE)
1304 		return rc;
1305 
1306 	c->eip = eip;
1307 
1308 	return rc;
1309 }
1310 
1311 static int emulate_int(struct x86_emulate_ctxt *ctxt,
1312 		       struct x86_emulate_ops *ops, int irq)
1313 {
1314 	switch(ctxt->mode) {
1315 	case X86EMUL_MODE_REAL:
1316 		return emulate_int_real(ctxt, ops, irq);
1317 	case X86EMUL_MODE_VM86:
1318 	case X86EMUL_MODE_PROT16:
1319 	case X86EMUL_MODE_PROT32:
1320 	case X86EMUL_MODE_PROT64:
1321 	default:
1322 		/* Protected mode interrupts unimplemented yet */
1323 		return X86EMUL_UNHANDLEABLE;
1324 	}
1325 }
1326 
1327 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
1328 			     struct x86_emulate_ops *ops)
1329 {
1330 	struct decode_cache *c = &ctxt->decode;
1331 	int rc = X86EMUL_CONTINUE;
1332 	unsigned long temp_eip = 0;
1333 	unsigned long temp_eflags = 0;
1334 	unsigned long cs = 0;
1335 	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
1336 			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
1337 			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
1338 	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1339 
1340 	/* TODO: Add stack limit check */
1341 
1342 	rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
1343 
1344 	if (rc != X86EMUL_CONTINUE)
1345 		return rc;
1346 
1347 	if (temp_eip & ~0xffff)
1348 		return emulate_gp(ctxt, 0);
1349 
1350 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1351 
1352 	if (rc != X86EMUL_CONTINUE)
1353 		return rc;
1354 
1355 	rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
1356 
1357 	if (rc != X86EMUL_CONTINUE)
1358 		return rc;
1359 
1360 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1361 
1362 	if (rc != X86EMUL_CONTINUE)
1363 		return rc;
1364 
1365 	c->eip = temp_eip;
1366 
1367 
1368 	if (c->op_bytes == 4)
1369 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
1370 	else if (c->op_bytes == 2) {
1371 		ctxt->eflags &= ~0xffff;
1372 		ctxt->eflags |= temp_eflags;
1373 	}
1374 
1375 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
1376 	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
1377 
1378 	return rc;
1379 }
1380 
1381 static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
1382 				    struct x86_emulate_ops* ops)
1383 {
1384 	switch(ctxt->mode) {
1385 	case X86EMUL_MODE_REAL:
1386 		return emulate_iret_real(ctxt, ops);
1387 	case X86EMUL_MODE_VM86:
1388 	case X86EMUL_MODE_PROT16:
1389 	case X86EMUL_MODE_PROT32:
1390 	case X86EMUL_MODE_PROT64:
1391 	default:
1392 		/* iret from protected mode unimplemented yet */
1393 		return X86EMUL_UNHANDLEABLE;
1394 	}
1395 }
1396 
1397 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1398 				struct x86_emulate_ops *ops)
1399 {
1400 	struct decode_cache *c = &ctxt->decode;
1401 
1402 	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1403 }
1404 
1405 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1406 {
1407 	struct decode_cache *c = &ctxt->decode;
1408 	switch (c->modrm_reg) {
1409 	case 0:	/* rol */
1410 		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1411 		break;
1412 	case 1:	/* ror */
1413 		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1414 		break;
1415 	case 2:	/* rcl */
1416 		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1417 		break;
1418 	case 3:	/* rcr */
1419 		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1420 		break;
1421 	case 4:	/* sal/shl */
1422 	case 6:	/* sal/shl */
1423 		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1424 		break;
1425 	case 5:	/* shr */
1426 		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1427 		break;
1428 	case 7:	/* sar */
1429 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1430 		break;
1431 	}
1432 }
1433 
1434 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1435 			       struct x86_emulate_ops *ops)
1436 {
1437 	struct decode_cache *c = &ctxt->decode;
1438 	unsigned long *rax = &c->regs[VCPU_REGS_RAX];
1439 	unsigned long *rdx = &c->regs[VCPU_REGS_RDX];
1440 	u8 de = 0;
1441 
1442 	switch (c->modrm_reg) {
1443 	case 0 ... 1:	/* test */
1444 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1445 		break;
1446 	case 2:	/* not */
1447 		c->dst.val = ~c->dst.val;
1448 		break;
1449 	case 3:	/* neg */
1450 		emulate_1op("neg", c->dst, ctxt->eflags);
1451 		break;
1452 	case 4: /* mul */
1453 		emulate_1op_rax_rdx("mul", c->src, *rax, *rdx, ctxt->eflags);
1454 		break;
1455 	case 5: /* imul */
1456 		emulate_1op_rax_rdx("imul", c->src, *rax, *rdx, ctxt->eflags);
1457 		break;
1458 	case 6: /* div */
1459 		emulate_1op_rax_rdx_ex("div", c->src, *rax, *rdx,
1460 				       ctxt->eflags, de);
1461 		break;
1462 	case 7: /* idiv */
1463 		emulate_1op_rax_rdx_ex("idiv", c->src, *rax, *rdx,
1464 				       ctxt->eflags, de);
1465 		break;
1466 	default:
1467 		return X86EMUL_UNHANDLEABLE;
1468 	}
1469 	if (de)
1470 		return emulate_de(ctxt);
1471 	return X86EMUL_CONTINUE;
1472 }
1473 
1474 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1475 			       struct x86_emulate_ops *ops)
1476 {
1477 	struct decode_cache *c = &ctxt->decode;
1478 
1479 	switch (c->modrm_reg) {
1480 	case 0:	/* inc */
1481 		emulate_1op("inc", c->dst, ctxt->eflags);
1482 		break;
1483 	case 1:	/* dec */
1484 		emulate_1op("dec", c->dst, ctxt->eflags);
1485 		break;
1486 	case 2: /* call near abs */ {
1487 		long int old_eip;
1488 		old_eip = c->eip;
1489 		c->eip = c->src.val;
1490 		c->src.val = old_eip;
1491 		emulate_push(ctxt, ops);
1492 		break;
1493 	}
1494 	case 4: /* jmp abs */
1495 		c->eip = c->src.val;
1496 		break;
1497 	case 6:	/* push */
1498 		emulate_push(ctxt, ops);
1499 		break;
1500 	}
1501 	return X86EMUL_CONTINUE;
1502 }
1503 
1504 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1505 			       struct x86_emulate_ops *ops)
1506 {
1507 	struct decode_cache *c = &ctxt->decode;
1508 	u64 old = c->dst.orig_val64;
1509 
1510 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1511 	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1512 		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1513 		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1514 		ctxt->eflags &= ~EFLG_ZF;
1515 	} else {
1516 		c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1517 			(u32) c->regs[VCPU_REGS_RBX];
1518 
1519 		ctxt->eflags |= EFLG_ZF;
1520 	}
1521 	return X86EMUL_CONTINUE;
1522 }
1523 
1524 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1525 			   struct x86_emulate_ops *ops)
1526 {
1527 	struct decode_cache *c = &ctxt->decode;
1528 	int rc;
1529 	unsigned long cs;
1530 
1531 	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1532 	if (rc != X86EMUL_CONTINUE)
1533 		return rc;
1534 	if (c->op_bytes == 4)
1535 		c->eip = (u32)c->eip;
1536 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1537 	if (rc != X86EMUL_CONTINUE)
1538 		return rc;
1539 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1540 	return rc;
1541 }
1542 
1543 static int emulate_load_segment(struct x86_emulate_ctxt *ctxt,
1544 			   struct x86_emulate_ops *ops, int seg)
1545 {
1546 	struct decode_cache *c = &ctxt->decode;
1547 	unsigned short sel;
1548 	int rc;
1549 
1550 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
1551 
1552 	rc = load_segment_descriptor(ctxt, ops, sel, seg);
1553 	if (rc != X86EMUL_CONTINUE)
1554 		return rc;
1555 
1556 	c->dst.val = c->src.val;
1557 	return rc;
1558 }
1559 
1560 static inline void
1561 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1562 			struct x86_emulate_ops *ops, struct desc_struct *cs,
1563 			struct desc_struct *ss)
1564 {
1565 	memset(cs, 0, sizeof(struct desc_struct));
1566 	ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu);
1567 	memset(ss, 0, sizeof(struct desc_struct));
1568 
1569 	cs->l = 0;		/* will be adjusted later */
1570 	set_desc_base(cs, 0);	/* flat segment */
1571 	cs->g = 1;		/* 4kb granularity */
1572 	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1573 	cs->type = 0x0b;	/* Read, Execute, Accessed */
1574 	cs->s = 1;
1575 	cs->dpl = 0;		/* will be adjusted later */
1576 	cs->p = 1;
1577 	cs->d = 1;
1578 
1579 	set_desc_base(ss, 0);	/* flat segment */
1580 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1581 	ss->g = 1;		/* 4kb granularity */
1582 	ss->s = 1;
1583 	ss->type = 0x03;	/* Read/Write, Accessed */
1584 	ss->d = 1;		/* 32bit stack segment */
1585 	ss->dpl = 0;
1586 	ss->p = 1;
1587 }
1588 
1589 static int
1590 emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1591 {
1592 	struct decode_cache *c = &ctxt->decode;
1593 	struct desc_struct cs, ss;
1594 	u64 msr_data;
1595 	u16 cs_sel, ss_sel;
1596 
1597 	/* syscall is not available in real mode */
1598 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1599 	    ctxt->mode == X86EMUL_MODE_VM86)
1600 		return emulate_ud(ctxt);
1601 
1602 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1603 
1604 	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1605 	msr_data >>= 32;
1606 	cs_sel = (u16)(msr_data & 0xfffc);
1607 	ss_sel = (u16)(msr_data + 8);
1608 
1609 	if (is_long_mode(ctxt->vcpu)) {
1610 		cs.d = 0;
1611 		cs.l = 1;
1612 	}
1613 	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
1614 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1615 	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1616 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1617 
1618 	c->regs[VCPU_REGS_RCX] = c->eip;
1619 	if (is_long_mode(ctxt->vcpu)) {
1620 #ifdef CONFIG_X86_64
1621 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1622 
1623 		ops->get_msr(ctxt->vcpu,
1624 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
1625 			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1626 		c->eip = msr_data;
1627 
1628 		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1629 		ctxt->eflags &= ~(msr_data | EFLG_RF);
1630 #endif
1631 	} else {
1632 		/* legacy mode */
1633 		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1634 		c->eip = (u32)msr_data;
1635 
1636 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1637 	}
1638 
1639 	return X86EMUL_CONTINUE;
1640 }
1641 
1642 static int
1643 emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1644 {
1645 	struct decode_cache *c = &ctxt->decode;
1646 	struct desc_struct cs, ss;
1647 	u64 msr_data;
1648 	u16 cs_sel, ss_sel;
1649 
1650 	/* inject #GP if in real mode */
1651 	if (ctxt->mode == X86EMUL_MODE_REAL)
1652 		return emulate_gp(ctxt, 0);
1653 
1654 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
1655 	* Therefore, we inject an #UD.
1656 	*/
1657 	if (ctxt->mode == X86EMUL_MODE_PROT64)
1658 		return emulate_ud(ctxt);
1659 
1660 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1661 
1662 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1663 	switch (ctxt->mode) {
1664 	case X86EMUL_MODE_PROT32:
1665 		if ((msr_data & 0xfffc) == 0x0)
1666 			return emulate_gp(ctxt, 0);
1667 		break;
1668 	case X86EMUL_MODE_PROT64:
1669 		if (msr_data == 0x0)
1670 			return emulate_gp(ctxt, 0);
1671 		break;
1672 	}
1673 
1674 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1675 	cs_sel = (u16)msr_data;
1676 	cs_sel &= ~SELECTOR_RPL_MASK;
1677 	ss_sel = cs_sel + 8;
1678 	ss_sel &= ~SELECTOR_RPL_MASK;
1679 	if (ctxt->mode == X86EMUL_MODE_PROT64
1680 		|| is_long_mode(ctxt->vcpu)) {
1681 		cs.d = 0;
1682 		cs.l = 1;
1683 	}
1684 
1685 	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
1686 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1687 	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1688 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1689 
1690 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1691 	c->eip = msr_data;
1692 
1693 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1694 	c->regs[VCPU_REGS_RSP] = msr_data;
1695 
1696 	return X86EMUL_CONTINUE;
1697 }
1698 
1699 static int
1700 emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1701 {
1702 	struct decode_cache *c = &ctxt->decode;
1703 	struct desc_struct cs, ss;
1704 	u64 msr_data;
1705 	int usermode;
1706 	u16 cs_sel, ss_sel;
1707 
1708 	/* inject #GP if in real mode or Virtual 8086 mode */
1709 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1710 	    ctxt->mode == X86EMUL_MODE_VM86)
1711 		return emulate_gp(ctxt, 0);
1712 
1713 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1714 
1715 	if ((c->rex_prefix & 0x8) != 0x0)
1716 		usermode = X86EMUL_MODE_PROT64;
1717 	else
1718 		usermode = X86EMUL_MODE_PROT32;
1719 
1720 	cs.dpl = 3;
1721 	ss.dpl = 3;
1722 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1723 	switch (usermode) {
1724 	case X86EMUL_MODE_PROT32:
1725 		cs_sel = (u16)(msr_data + 16);
1726 		if ((msr_data & 0xfffc) == 0x0)
1727 			return emulate_gp(ctxt, 0);
1728 		ss_sel = (u16)(msr_data + 24);
1729 		break;
1730 	case X86EMUL_MODE_PROT64:
1731 		cs_sel = (u16)(msr_data + 32);
1732 		if (msr_data == 0x0)
1733 			return emulate_gp(ctxt, 0);
1734 		ss_sel = cs_sel + 8;
1735 		cs.d = 0;
1736 		cs.l = 1;
1737 		break;
1738 	}
1739 	cs_sel |= SELECTOR_RPL_MASK;
1740 	ss_sel |= SELECTOR_RPL_MASK;
1741 
1742 	ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu);
1743 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1744 	ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu);
1745 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1746 
1747 	c->eip = c->regs[VCPU_REGS_RDX];
1748 	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
1749 
1750 	return X86EMUL_CONTINUE;
1751 }
1752 
1753 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1754 			      struct x86_emulate_ops *ops)
1755 {
1756 	int iopl;
1757 	if (ctxt->mode == X86EMUL_MODE_REAL)
1758 		return false;
1759 	if (ctxt->mode == X86EMUL_MODE_VM86)
1760 		return true;
1761 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1762 	return ops->cpl(ctxt->vcpu) > iopl;
1763 }
1764 
1765 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1766 					    struct x86_emulate_ops *ops,
1767 					    u16 port, u16 len)
1768 {
1769 	struct desc_struct tr_seg;
1770 	u32 base3;
1771 	int r;
1772 	u16 io_bitmap_ptr, perm, bit_idx = port & 0x7;
1773 	unsigned mask = (1 << len) - 1;
1774 	unsigned long base;
1775 
1776 	ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu);
1777 	if (!tr_seg.p)
1778 		return false;
1779 	if (desc_limit_scaled(&tr_seg) < 103)
1780 		return false;
1781 	base = get_desc_base(&tr_seg);
1782 #ifdef CONFIG_X86_64
1783 	base |= ((u64)base3) << 32;
1784 #endif
1785 	r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL);
1786 	if (r != X86EMUL_CONTINUE)
1787 		return false;
1788 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
1789 		return false;
1790 	r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu,
1791 			  NULL);
1792 	if (r != X86EMUL_CONTINUE)
1793 		return false;
1794 	if ((perm >> bit_idx) & mask)
1795 		return false;
1796 	return true;
1797 }
1798 
1799 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1800 				 struct x86_emulate_ops *ops,
1801 				 u16 port, u16 len)
1802 {
1803 	if (ctxt->perm_ok)
1804 		return true;
1805 
1806 	if (emulator_bad_iopl(ctxt, ops))
1807 		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1808 			return false;
1809 
1810 	ctxt->perm_ok = true;
1811 
1812 	return true;
1813 }
1814 
1815 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
1816 				struct x86_emulate_ops *ops,
1817 				struct tss_segment_16 *tss)
1818 {
1819 	struct decode_cache *c = &ctxt->decode;
1820 
1821 	tss->ip = c->eip;
1822 	tss->flag = ctxt->eflags;
1823 	tss->ax = c->regs[VCPU_REGS_RAX];
1824 	tss->cx = c->regs[VCPU_REGS_RCX];
1825 	tss->dx = c->regs[VCPU_REGS_RDX];
1826 	tss->bx = c->regs[VCPU_REGS_RBX];
1827 	tss->sp = c->regs[VCPU_REGS_RSP];
1828 	tss->bp = c->regs[VCPU_REGS_RBP];
1829 	tss->si = c->regs[VCPU_REGS_RSI];
1830 	tss->di = c->regs[VCPU_REGS_RDI];
1831 
1832 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
1833 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1834 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
1835 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
1836 	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
1837 }
1838 
1839 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
1840 				 struct x86_emulate_ops *ops,
1841 				 struct tss_segment_16 *tss)
1842 {
1843 	struct decode_cache *c = &ctxt->decode;
1844 	int ret;
1845 
1846 	c->eip = tss->ip;
1847 	ctxt->eflags = tss->flag | 2;
1848 	c->regs[VCPU_REGS_RAX] = tss->ax;
1849 	c->regs[VCPU_REGS_RCX] = tss->cx;
1850 	c->regs[VCPU_REGS_RDX] = tss->dx;
1851 	c->regs[VCPU_REGS_RBX] = tss->bx;
1852 	c->regs[VCPU_REGS_RSP] = tss->sp;
1853 	c->regs[VCPU_REGS_RBP] = tss->bp;
1854 	c->regs[VCPU_REGS_RSI] = tss->si;
1855 	c->regs[VCPU_REGS_RDI] = tss->di;
1856 
1857 	/*
1858 	 * SDM says that segment selectors are loaded before segment
1859 	 * descriptors
1860 	 */
1861 	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
1862 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
1863 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
1864 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
1865 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
1866 
1867 	/*
1868 	 * Now load segment descriptors. If fault happenes at this stage
1869 	 * it is handled in a context of new task
1870 	 */
1871 	ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
1872 	if (ret != X86EMUL_CONTINUE)
1873 		return ret;
1874 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
1875 	if (ret != X86EMUL_CONTINUE)
1876 		return ret;
1877 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
1878 	if (ret != X86EMUL_CONTINUE)
1879 		return ret;
1880 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
1881 	if (ret != X86EMUL_CONTINUE)
1882 		return ret;
1883 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
1884 	if (ret != X86EMUL_CONTINUE)
1885 		return ret;
1886 
1887 	return X86EMUL_CONTINUE;
1888 }
1889 
1890 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1891 			  struct x86_emulate_ops *ops,
1892 			  u16 tss_selector, u16 old_tss_sel,
1893 			  ulong old_tss_base, struct desc_struct *new_desc)
1894 {
1895 	struct tss_segment_16 tss_seg;
1896 	int ret;
1897 	u32 new_tss_base = get_desc_base(new_desc);
1898 
1899 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1900 			    &ctxt->exception);
1901 	if (ret != X86EMUL_CONTINUE)
1902 		/* FIXME: need to provide precise fault address */
1903 		return ret;
1904 
1905 	save_state_to_tss16(ctxt, ops, &tss_seg);
1906 
1907 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1908 			     &ctxt->exception);
1909 	if (ret != X86EMUL_CONTINUE)
1910 		/* FIXME: need to provide precise fault address */
1911 		return ret;
1912 
1913 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1914 			    &ctxt->exception);
1915 	if (ret != X86EMUL_CONTINUE)
1916 		/* FIXME: need to provide precise fault address */
1917 		return ret;
1918 
1919 	if (old_tss_sel != 0xffff) {
1920 		tss_seg.prev_task_link = old_tss_sel;
1921 
1922 		ret = ops->write_std(new_tss_base,
1923 				     &tss_seg.prev_task_link,
1924 				     sizeof tss_seg.prev_task_link,
1925 				     ctxt->vcpu, &ctxt->exception);
1926 		if (ret != X86EMUL_CONTINUE)
1927 			/* FIXME: need to provide precise fault address */
1928 			return ret;
1929 	}
1930 
1931 	return load_state_from_tss16(ctxt, ops, &tss_seg);
1932 }
1933 
1934 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
1935 				struct x86_emulate_ops *ops,
1936 				struct tss_segment_32 *tss)
1937 {
1938 	struct decode_cache *c = &ctxt->decode;
1939 
1940 	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
1941 	tss->eip = c->eip;
1942 	tss->eflags = ctxt->eflags;
1943 	tss->eax = c->regs[VCPU_REGS_RAX];
1944 	tss->ecx = c->regs[VCPU_REGS_RCX];
1945 	tss->edx = c->regs[VCPU_REGS_RDX];
1946 	tss->ebx = c->regs[VCPU_REGS_RBX];
1947 	tss->esp = c->regs[VCPU_REGS_RSP];
1948 	tss->ebp = c->regs[VCPU_REGS_RBP];
1949 	tss->esi = c->regs[VCPU_REGS_RSI];
1950 	tss->edi = c->regs[VCPU_REGS_RDI];
1951 
1952 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
1953 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1954 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
1955 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
1956 	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
1957 	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
1958 	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
1959 }
1960 
1961 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
1962 				 struct x86_emulate_ops *ops,
1963 				 struct tss_segment_32 *tss)
1964 {
1965 	struct decode_cache *c = &ctxt->decode;
1966 	int ret;
1967 
1968 	if (ops->set_cr(3, tss->cr3, ctxt->vcpu))
1969 		return emulate_gp(ctxt, 0);
1970 	c->eip = tss->eip;
1971 	ctxt->eflags = tss->eflags | 2;
1972 	c->regs[VCPU_REGS_RAX] = tss->eax;
1973 	c->regs[VCPU_REGS_RCX] = tss->ecx;
1974 	c->regs[VCPU_REGS_RDX] = tss->edx;
1975 	c->regs[VCPU_REGS_RBX] = tss->ebx;
1976 	c->regs[VCPU_REGS_RSP] = tss->esp;
1977 	c->regs[VCPU_REGS_RBP] = tss->ebp;
1978 	c->regs[VCPU_REGS_RSI] = tss->esi;
1979 	c->regs[VCPU_REGS_RDI] = tss->edi;
1980 
1981 	/*
1982 	 * SDM says that segment selectors are loaded before segment
1983 	 * descriptors
1984 	 */
1985 	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
1986 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
1987 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
1988 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
1989 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
1990 	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
1991 	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
1992 
1993 	/*
1994 	 * Now load segment descriptors. If fault happenes at this stage
1995 	 * it is handled in a context of new task
1996 	 */
1997 	ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
1998 	if (ret != X86EMUL_CONTINUE)
1999 		return ret;
2000 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2001 	if (ret != X86EMUL_CONTINUE)
2002 		return ret;
2003 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2004 	if (ret != X86EMUL_CONTINUE)
2005 		return ret;
2006 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2007 	if (ret != X86EMUL_CONTINUE)
2008 		return ret;
2009 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2010 	if (ret != X86EMUL_CONTINUE)
2011 		return ret;
2012 	ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2013 	if (ret != X86EMUL_CONTINUE)
2014 		return ret;
2015 	ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2016 	if (ret != X86EMUL_CONTINUE)
2017 		return ret;
2018 
2019 	return X86EMUL_CONTINUE;
2020 }
2021 
2022 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2023 			  struct x86_emulate_ops *ops,
2024 			  u16 tss_selector, u16 old_tss_sel,
2025 			  ulong old_tss_base, struct desc_struct *new_desc)
2026 {
2027 	struct tss_segment_32 tss_seg;
2028 	int ret;
2029 	u32 new_tss_base = get_desc_base(new_desc);
2030 
2031 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2032 			    &ctxt->exception);
2033 	if (ret != X86EMUL_CONTINUE)
2034 		/* FIXME: need to provide precise fault address */
2035 		return ret;
2036 
2037 	save_state_to_tss32(ctxt, ops, &tss_seg);
2038 
2039 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2040 			     &ctxt->exception);
2041 	if (ret != X86EMUL_CONTINUE)
2042 		/* FIXME: need to provide precise fault address */
2043 		return ret;
2044 
2045 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2046 			    &ctxt->exception);
2047 	if (ret != X86EMUL_CONTINUE)
2048 		/* FIXME: need to provide precise fault address */
2049 		return ret;
2050 
2051 	if (old_tss_sel != 0xffff) {
2052 		tss_seg.prev_task_link = old_tss_sel;
2053 
2054 		ret = ops->write_std(new_tss_base,
2055 				     &tss_seg.prev_task_link,
2056 				     sizeof tss_seg.prev_task_link,
2057 				     ctxt->vcpu, &ctxt->exception);
2058 		if (ret != X86EMUL_CONTINUE)
2059 			/* FIXME: need to provide precise fault address */
2060 			return ret;
2061 	}
2062 
2063 	return load_state_from_tss32(ctxt, ops, &tss_seg);
2064 }
2065 
2066 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2067 				   struct x86_emulate_ops *ops,
2068 				   u16 tss_selector, int reason,
2069 				   bool has_error_code, u32 error_code)
2070 {
2071 	struct desc_struct curr_tss_desc, next_tss_desc;
2072 	int ret;
2073 	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2074 	ulong old_tss_base =
2075 		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
2076 	u32 desc_limit;
2077 
2078 	/* FIXME: old_tss_base == ~0 ? */
2079 
2080 	ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2081 	if (ret != X86EMUL_CONTINUE)
2082 		return ret;
2083 	ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2084 	if (ret != X86EMUL_CONTINUE)
2085 		return ret;
2086 
2087 	/* FIXME: check that next_tss_desc is tss */
2088 
2089 	if (reason != TASK_SWITCH_IRET) {
2090 		if ((tss_selector & 3) > next_tss_desc.dpl ||
2091 		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl)
2092 			return emulate_gp(ctxt, 0);
2093 	}
2094 
2095 	desc_limit = desc_limit_scaled(&next_tss_desc);
2096 	if (!next_tss_desc.p ||
2097 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2098 	     desc_limit < 0x2b)) {
2099 		emulate_ts(ctxt, tss_selector & 0xfffc);
2100 		return X86EMUL_PROPAGATE_FAULT;
2101 	}
2102 
2103 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2104 		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2105 		write_segment_descriptor(ctxt, ops, old_tss_sel,
2106 					 &curr_tss_desc);
2107 	}
2108 
2109 	if (reason == TASK_SWITCH_IRET)
2110 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2111 
2112 	/* set back link to prev task only if NT bit is set in eflags
2113 	   note that old_tss_sel is not used afetr this point */
2114 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2115 		old_tss_sel = 0xffff;
2116 
2117 	if (next_tss_desc.type & 8)
2118 		ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2119 				     old_tss_base, &next_tss_desc);
2120 	else
2121 		ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2122 				     old_tss_base, &next_tss_desc);
2123 	if (ret != X86EMUL_CONTINUE)
2124 		return ret;
2125 
2126 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2127 		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2128 
2129 	if (reason != TASK_SWITCH_IRET) {
2130 		next_tss_desc.type |= (1 << 1); /* set busy flag */
2131 		write_segment_descriptor(ctxt, ops, tss_selector,
2132 					 &next_tss_desc);
2133 	}
2134 
2135 	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2136 	ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu);
2137 	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2138 
2139 	if (has_error_code) {
2140 		struct decode_cache *c = &ctxt->decode;
2141 
2142 		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2143 		c->lock_prefix = 0;
2144 		c->src.val = (unsigned long) error_code;
2145 		emulate_push(ctxt, ops);
2146 	}
2147 
2148 	return ret;
2149 }
2150 
2151 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2152 			 u16 tss_selector, int reason,
2153 			 bool has_error_code, u32 error_code)
2154 {
2155 	struct x86_emulate_ops *ops = ctxt->ops;
2156 	struct decode_cache *c = &ctxt->decode;
2157 	int rc;
2158 
2159 	c->eip = ctxt->eip;
2160 	c->dst.type = OP_NONE;
2161 
2162 	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2163 				     has_error_code, error_code);
2164 
2165 	if (rc == X86EMUL_CONTINUE) {
2166 		rc = writeback(ctxt, ops);
2167 		if (rc == X86EMUL_CONTINUE)
2168 			ctxt->eip = c->eip;
2169 	}
2170 
2171 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2172 }
2173 
2174 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
2175 			    int reg, struct operand *op)
2176 {
2177 	struct decode_cache *c = &ctxt->decode;
2178 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2179 
2180 	register_address_increment(c, &c->regs[reg], df * op->bytes);
2181 	op->addr.mem.ea = register_address(c, c->regs[reg]);
2182 	op->addr.mem.seg = seg;
2183 }
2184 
2185 static int em_push(struct x86_emulate_ctxt *ctxt)
2186 {
2187 	emulate_push(ctxt, ctxt->ops);
2188 	return X86EMUL_CONTINUE;
2189 }
2190 
2191 static int em_das(struct x86_emulate_ctxt *ctxt)
2192 {
2193 	struct decode_cache *c = &ctxt->decode;
2194 	u8 al, old_al;
2195 	bool af, cf, old_cf;
2196 
2197 	cf = ctxt->eflags & X86_EFLAGS_CF;
2198 	al = c->dst.val;
2199 
2200 	old_al = al;
2201 	old_cf = cf;
2202 	cf = false;
2203 	af = ctxt->eflags & X86_EFLAGS_AF;
2204 	if ((al & 0x0f) > 9 || af) {
2205 		al -= 6;
2206 		cf = old_cf | (al >= 250);
2207 		af = true;
2208 	} else {
2209 		af = false;
2210 	}
2211 	if (old_al > 0x99 || old_cf) {
2212 		al -= 0x60;
2213 		cf = true;
2214 	}
2215 
2216 	c->dst.val = al;
2217 	/* Set PF, ZF, SF */
2218 	c->src.type = OP_IMM;
2219 	c->src.val = 0;
2220 	c->src.bytes = 1;
2221 	emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2222 	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2223 	if (cf)
2224 		ctxt->eflags |= X86_EFLAGS_CF;
2225 	if (af)
2226 		ctxt->eflags |= X86_EFLAGS_AF;
2227 	return X86EMUL_CONTINUE;
2228 }
2229 
2230 static int em_call_far(struct x86_emulate_ctxt *ctxt)
2231 {
2232 	struct decode_cache *c = &ctxt->decode;
2233 	u16 sel, old_cs;
2234 	ulong old_eip;
2235 	int rc;
2236 
2237 	old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2238 	old_eip = c->eip;
2239 
2240 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
2241 	if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS))
2242 		return X86EMUL_CONTINUE;
2243 
2244 	c->eip = 0;
2245 	memcpy(&c->eip, c->src.valptr, c->op_bytes);
2246 
2247 	c->src.val = old_cs;
2248 	emulate_push(ctxt, ctxt->ops);
2249 	rc = writeback(ctxt, ctxt->ops);
2250 	if (rc != X86EMUL_CONTINUE)
2251 		return rc;
2252 
2253 	c->src.val = old_eip;
2254 	emulate_push(ctxt, ctxt->ops);
2255 	rc = writeback(ctxt, ctxt->ops);
2256 	if (rc != X86EMUL_CONTINUE)
2257 		return rc;
2258 
2259 	c->dst.type = OP_NONE;
2260 
2261 	return X86EMUL_CONTINUE;
2262 }
2263 
2264 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2265 {
2266 	struct decode_cache *c = &ctxt->decode;
2267 	int rc;
2268 
2269 	c->dst.type = OP_REG;
2270 	c->dst.addr.reg = &c->eip;
2271 	c->dst.bytes = c->op_bytes;
2272 	rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
2273 	if (rc != X86EMUL_CONTINUE)
2274 		return rc;
2275 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
2276 	return X86EMUL_CONTINUE;
2277 }
2278 
2279 static int em_imul(struct x86_emulate_ctxt *ctxt)
2280 {
2281 	struct decode_cache *c = &ctxt->decode;
2282 
2283 	emulate_2op_SrcV_nobyte("imul", c->src, c->dst, ctxt->eflags);
2284 	return X86EMUL_CONTINUE;
2285 }
2286 
2287 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2288 {
2289 	struct decode_cache *c = &ctxt->decode;
2290 
2291 	c->dst.val = c->src2.val;
2292 	return em_imul(ctxt);
2293 }
2294 
2295 static int em_cwd(struct x86_emulate_ctxt *ctxt)
2296 {
2297 	struct decode_cache *c = &ctxt->decode;
2298 
2299 	c->dst.type = OP_REG;
2300 	c->dst.bytes = c->src.bytes;
2301 	c->dst.addr.reg = &c->regs[VCPU_REGS_RDX];
2302 	c->dst.val = ~((c->src.val >> (c->src.bytes * 8 - 1)) - 1);
2303 
2304 	return X86EMUL_CONTINUE;
2305 }
2306 
2307 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
2308 {
2309 	unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
2310 	struct decode_cache *c = &ctxt->decode;
2311 	u64 tsc = 0;
2312 
2313 	if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD))
2314 		return emulate_gp(ctxt, 0);
2315 	ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
2316 	c->regs[VCPU_REGS_RAX] = (u32)tsc;
2317 	c->regs[VCPU_REGS_RDX] = tsc >> 32;
2318 	return X86EMUL_CONTINUE;
2319 }
2320 
2321 static int em_mov(struct x86_emulate_ctxt *ctxt)
2322 {
2323 	struct decode_cache *c = &ctxt->decode;
2324 	c->dst.val = c->src.val;
2325 	return X86EMUL_CONTINUE;
2326 }
2327 
2328 #define D(_y) { .flags = (_y) }
2329 #define N    D(0)
2330 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
2331 #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
2332 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
2333 
2334 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
2335 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
2336 
2337 #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),			\
2338 		D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock),		\
2339 		D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
2340 
2341 
2342 static struct opcode group1[] = {
2343 	X7(D(Lock)), N
2344 };
2345 
2346 static struct opcode group1A[] = {
2347 	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
2348 };
2349 
2350 static struct opcode group3[] = {
2351 	D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
2352 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2353 	X4(D(SrcMem | ModRM)),
2354 };
2355 
2356 static struct opcode group4[] = {
2357 	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
2358 	N, N, N, N, N, N,
2359 };
2360 
2361 static struct opcode group5[] = {
2362 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2363 	D(SrcMem | ModRM | Stack),
2364 	I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
2365 	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
2366 	D(SrcMem | ModRM | Stack), N,
2367 };
2368 
2369 static struct group_dual group7 = { {
2370 	N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
2371 	D(SrcNone | ModRM | DstMem | Mov), N,
2372 	D(SrcMem16 | ModRM | Mov | Priv),
2373 	D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
2374 }, {
2375 	D(SrcNone | ModRM | Priv | VendorSpecific), N,
2376 	N, D(SrcNone | ModRM | Priv | VendorSpecific),
2377 	D(SrcNone | ModRM | DstMem | Mov), N,
2378 	D(SrcMem16 | ModRM | Mov | Priv), N,
2379 } };
2380 
2381 static struct opcode group8[] = {
2382 	N, N, N, N,
2383 	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
2384 	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
2385 };
2386 
2387 static struct group_dual group9 = { {
2388 	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
2389 }, {
2390 	N, N, N, N, N, N, N, N,
2391 } };
2392 
2393 static struct opcode group11[] = {
2394 	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
2395 };
2396 
2397 static struct opcode opcode_table[256] = {
2398 	/* 0x00 - 0x07 */
2399 	D6ALU(Lock),
2400 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2401 	/* 0x08 - 0x0F */
2402 	D6ALU(Lock),
2403 	D(ImplicitOps | Stack | No64), N,
2404 	/* 0x10 - 0x17 */
2405 	D6ALU(Lock),
2406 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2407 	/* 0x18 - 0x1F */
2408 	D6ALU(Lock),
2409 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2410 	/* 0x20 - 0x27 */
2411 	D6ALU(Lock), N, N,
2412 	/* 0x28 - 0x2F */
2413 	D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
2414 	/* 0x30 - 0x37 */
2415 	D6ALU(Lock), N, N,
2416 	/* 0x38 - 0x3F */
2417 	D6ALU(0), N, N,
2418 	/* 0x40 - 0x4F */
2419 	X16(D(DstReg)),
2420 	/* 0x50 - 0x57 */
2421 	X8(I(SrcReg | Stack, em_push)),
2422 	/* 0x58 - 0x5F */
2423 	X8(D(DstReg | Stack)),
2424 	/* 0x60 - 0x67 */
2425 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2426 	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
2427 	N, N, N, N,
2428 	/* 0x68 - 0x6F */
2429 	I(SrcImm | Mov | Stack, em_push),
2430 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
2431 	I(SrcImmByte | Mov | Stack, em_push),
2432 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
2433 	D2bv(DstDI | Mov | String), /* insb, insw/insd */
2434 	D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
2435 	/* 0x70 - 0x7F */
2436 	X16(D(SrcImmByte)),
2437 	/* 0x80 - 0x87 */
2438 	G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
2439 	G(DstMem | SrcImm | ModRM | Group, group1),
2440 	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
2441 	G(DstMem | SrcImmByte | ModRM | Group, group1),
2442 	D2bv(DstMem | SrcReg | ModRM), D2bv(DstMem | SrcReg | ModRM | Lock),
2443 	/* 0x88 - 0x8F */
2444 	I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
2445 	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
2446 	D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
2447 	D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
2448 	/* 0x90 - 0x97 */
2449 	X8(D(SrcAcc | DstReg)),
2450 	/* 0x98 - 0x9F */
2451 	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
2452 	I(SrcImmFAddr | No64, em_call_far), N,
2453 	D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
2454 	/* 0xA0 - 0xA7 */
2455 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
2456 	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
2457 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
2458 	D2bv(SrcSI | DstDI | String),
2459 	/* 0xA8 - 0xAF */
2460 	D2bv(DstAcc | SrcImm),
2461 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
2462 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
2463 	D2bv(SrcAcc | DstDI | String),
2464 	/* 0xB0 - 0xB7 */
2465 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
2466 	/* 0xB8 - 0xBF */
2467 	X8(I(DstReg | SrcImm | Mov, em_mov)),
2468 	/* 0xC0 - 0xC7 */
2469 	D2bv(DstMem | SrcImmByte | ModRM),
2470 	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
2471 	D(ImplicitOps | Stack),
2472 	D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64),
2473 	G(ByteOp, group11), G(0, group11),
2474 	/* 0xC8 - 0xCF */
2475 	N, N, N, D(ImplicitOps | Stack),
2476 	D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
2477 	/* 0xD0 - 0xD7 */
2478 	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
2479 	N, N, N, N,
2480 	/* 0xD8 - 0xDF */
2481 	N, N, N, N, N, N, N, N,
2482 	/* 0xE0 - 0xE7 */
2483 	X4(D(SrcImmByte)),
2484 	D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
2485 	/* 0xE8 - 0xEF */
2486 	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
2487 	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
2488 	D2bv(SrcNone | DstAcc),	D2bv(SrcAcc | ImplicitOps),
2489 	/* 0xF0 - 0xF7 */
2490 	N, N, N, N,
2491 	D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
2492 	/* 0xF8 - 0xFF */
2493 	D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
2494 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
2495 };
2496 
2497 static struct opcode twobyte_table[256] = {
2498 	/* 0x00 - 0x0F */
2499 	N, GD(0, &group7), N, N,
2500 	N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N,
2501 	D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
2502 	N, D(ImplicitOps | ModRM), N, N,
2503 	/* 0x10 - 0x1F */
2504 	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
2505 	/* 0x20 - 0x2F */
2506 	D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
2507 	D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
2508 	N, N, N, N,
2509 	N, N, N, N, N, N, N, N,
2510 	/* 0x30 - 0x3F */
2511 	D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
2512 	D(ImplicitOps | Priv), N,
2513 	D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific),
2514 	N, N,
2515 	N, N, N, N, N, N, N, N,
2516 	/* 0x40 - 0x4F */
2517 	X16(D(DstReg | SrcMem | ModRM | Mov)),
2518 	/* 0x50 - 0x5F */
2519 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2520 	/* 0x60 - 0x6F */
2521 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2522 	/* 0x70 - 0x7F */
2523 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2524 	/* 0x80 - 0x8F */
2525 	X16(D(SrcImm)),
2526 	/* 0x90 - 0x9F */
2527 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
2528 	/* 0xA0 - 0xA7 */
2529 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2530 	N, D(DstMem | SrcReg | ModRM | BitOp),
2531 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2532 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
2533 	/* 0xA8 - 0xAF */
2534 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2535 	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
2536 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2537 	D(DstMem | SrcReg | Src2CL | ModRM),
2538 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
2539 	/* 0xB0 - 0xB7 */
2540 	D2bv(DstMem | SrcReg | ModRM | Lock),
2541 	D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2542 	D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM),
2543 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2544 	/* 0xB8 - 0xBF */
2545 	N, N,
2546 	G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2547 	D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
2548 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2549 	/* 0xC0 - 0xCF */
2550 	D2bv(DstMem | SrcReg | ModRM | Lock),
2551 	N, D(DstMem | SrcReg | ModRM | Mov),
2552 	N, N, N, GD(0, &group9),
2553 	N, N, N, N, N, N, N, N,
2554 	/* 0xD0 - 0xDF */
2555 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2556 	/* 0xE0 - 0xEF */
2557 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2558 	/* 0xF0 - 0xFF */
2559 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
2560 };
2561 
2562 #undef D
2563 #undef N
2564 #undef G
2565 #undef GD
2566 #undef I
2567 
2568 #undef D2bv
2569 #undef I2bv
2570 #undef D6ALU
2571 
2572 static unsigned imm_size(struct decode_cache *c)
2573 {
2574 	unsigned size;
2575 
2576 	size = (c->d & ByteOp) ? 1 : c->op_bytes;
2577 	if (size == 8)
2578 		size = 4;
2579 	return size;
2580 }
2581 
2582 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
2583 		      unsigned size, bool sign_extension)
2584 {
2585 	struct decode_cache *c = &ctxt->decode;
2586 	struct x86_emulate_ops *ops = ctxt->ops;
2587 	int rc = X86EMUL_CONTINUE;
2588 
2589 	op->type = OP_IMM;
2590 	op->bytes = size;
2591 	op->addr.mem.ea = c->eip;
2592 	/* NB. Immediates are sign-extended as necessary. */
2593 	switch (op->bytes) {
2594 	case 1:
2595 		op->val = insn_fetch(s8, 1, c->eip);
2596 		break;
2597 	case 2:
2598 		op->val = insn_fetch(s16, 2, c->eip);
2599 		break;
2600 	case 4:
2601 		op->val = insn_fetch(s32, 4, c->eip);
2602 		break;
2603 	}
2604 	if (!sign_extension) {
2605 		switch (op->bytes) {
2606 		case 1:
2607 			op->val &= 0xff;
2608 			break;
2609 		case 2:
2610 			op->val &= 0xffff;
2611 			break;
2612 		case 4:
2613 			op->val &= 0xffffffff;
2614 			break;
2615 		}
2616 	}
2617 done:
2618 	return rc;
2619 }
2620 
2621 int
2622 x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2623 {
2624 	struct x86_emulate_ops *ops = ctxt->ops;
2625 	struct decode_cache *c = &ctxt->decode;
2626 	int rc = X86EMUL_CONTINUE;
2627 	int mode = ctxt->mode;
2628 	int def_op_bytes, def_ad_bytes, dual, goffset;
2629 	struct opcode opcode, *g_mod012, *g_mod3;
2630 	struct operand memop = { .type = OP_NONE };
2631 
2632 	c->eip = ctxt->eip;
2633 	c->fetch.start = c->eip;
2634 	c->fetch.end = c->fetch.start + insn_len;
2635 	if (insn_len > 0)
2636 		memcpy(c->fetch.data, insn, insn_len);
2637 	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
2638 
2639 	switch (mode) {
2640 	case X86EMUL_MODE_REAL:
2641 	case X86EMUL_MODE_VM86:
2642 	case X86EMUL_MODE_PROT16:
2643 		def_op_bytes = def_ad_bytes = 2;
2644 		break;
2645 	case X86EMUL_MODE_PROT32:
2646 		def_op_bytes = def_ad_bytes = 4;
2647 		break;
2648 #ifdef CONFIG_X86_64
2649 	case X86EMUL_MODE_PROT64:
2650 		def_op_bytes = 4;
2651 		def_ad_bytes = 8;
2652 		break;
2653 #endif
2654 	default:
2655 		return -1;
2656 	}
2657 
2658 	c->op_bytes = def_op_bytes;
2659 	c->ad_bytes = def_ad_bytes;
2660 
2661 	/* Legacy prefixes. */
2662 	for (;;) {
2663 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
2664 		case 0x66:	/* operand-size override */
2665 			/* switch between 2/4 bytes */
2666 			c->op_bytes = def_op_bytes ^ 6;
2667 			break;
2668 		case 0x67:	/* address-size override */
2669 			if (mode == X86EMUL_MODE_PROT64)
2670 				/* switch between 4/8 bytes */
2671 				c->ad_bytes = def_ad_bytes ^ 12;
2672 			else
2673 				/* switch between 2/4 bytes */
2674 				c->ad_bytes = def_ad_bytes ^ 6;
2675 			break;
2676 		case 0x26:	/* ES override */
2677 		case 0x2e:	/* CS override */
2678 		case 0x36:	/* SS override */
2679 		case 0x3e:	/* DS override */
2680 			set_seg_override(c, (c->b >> 3) & 3);
2681 			break;
2682 		case 0x64:	/* FS override */
2683 		case 0x65:	/* GS override */
2684 			set_seg_override(c, c->b & 7);
2685 			break;
2686 		case 0x40 ... 0x4f: /* REX */
2687 			if (mode != X86EMUL_MODE_PROT64)
2688 				goto done_prefixes;
2689 			c->rex_prefix = c->b;
2690 			continue;
2691 		case 0xf0:	/* LOCK */
2692 			c->lock_prefix = 1;
2693 			break;
2694 		case 0xf2:	/* REPNE/REPNZ */
2695 			c->rep_prefix = REPNE_PREFIX;
2696 			break;
2697 		case 0xf3:	/* REP/REPE/REPZ */
2698 			c->rep_prefix = REPE_PREFIX;
2699 			break;
2700 		default:
2701 			goto done_prefixes;
2702 		}
2703 
2704 		/* Any legacy prefix after a REX prefix nullifies its effect. */
2705 
2706 		c->rex_prefix = 0;
2707 	}
2708 
2709 done_prefixes:
2710 
2711 	/* REX prefix. */
2712 	if (c->rex_prefix & 8)
2713 		c->op_bytes = 8;	/* REX.W */
2714 
2715 	/* Opcode byte(s). */
2716 	opcode = opcode_table[c->b];
2717 	/* Two-byte opcode? */
2718 	if (c->b == 0x0f) {
2719 		c->twobyte = 1;
2720 		c->b = insn_fetch(u8, 1, c->eip);
2721 		opcode = twobyte_table[c->b];
2722 	}
2723 	c->d = opcode.flags;
2724 
2725 	if (c->d & Group) {
2726 		dual = c->d & GroupDual;
2727 		c->modrm = insn_fetch(u8, 1, c->eip);
2728 		--c->eip;
2729 
2730 		if (c->d & GroupDual) {
2731 			g_mod012 = opcode.u.gdual->mod012;
2732 			g_mod3 = opcode.u.gdual->mod3;
2733 		} else
2734 			g_mod012 = g_mod3 = opcode.u.group;
2735 
2736 		c->d &= ~(Group | GroupDual);
2737 
2738 		goffset = (c->modrm >> 3) & 7;
2739 
2740 		if ((c->modrm >> 6) == 3)
2741 			opcode = g_mod3[goffset];
2742 		else
2743 			opcode = g_mod012[goffset];
2744 		c->d |= opcode.flags;
2745 	}
2746 
2747 	c->execute = opcode.u.execute;
2748 
2749 	/* Unrecognised? */
2750 	if (c->d == 0 || (c->d & Undefined))
2751 		return -1;
2752 
2753 	if (!(c->d & VendorSpecific) && ctxt->only_vendor_specific_insn)
2754 		return -1;
2755 
2756 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
2757 		c->op_bytes = 8;
2758 
2759 	if (c->d & Op3264) {
2760 		if (mode == X86EMUL_MODE_PROT64)
2761 			c->op_bytes = 8;
2762 		else
2763 			c->op_bytes = 4;
2764 	}
2765 
2766 	/* ModRM and SIB bytes. */
2767 	if (c->d & ModRM) {
2768 		rc = decode_modrm(ctxt, ops, &memop);
2769 		if (!c->has_seg_override)
2770 			set_seg_override(c, c->modrm_seg);
2771 	} else if (c->d & MemAbs)
2772 		rc = decode_abs(ctxt, ops, &memop);
2773 	if (rc != X86EMUL_CONTINUE)
2774 		goto done;
2775 
2776 	if (!c->has_seg_override)
2777 		set_seg_override(c, VCPU_SREG_DS);
2778 
2779 	memop.addr.mem.seg = seg_override(ctxt, ops, c);
2780 
2781 	if (memop.type == OP_MEM && c->ad_bytes != 8)
2782 		memop.addr.mem.ea = (u32)memop.addr.mem.ea;
2783 
2784 	if (memop.type == OP_MEM && c->rip_relative)
2785 		memop.addr.mem.ea += c->eip;
2786 
2787 	/*
2788 	 * Decode and fetch the source operand: register, memory
2789 	 * or immediate.
2790 	 */
2791 	switch (c->d & SrcMask) {
2792 	case SrcNone:
2793 		break;
2794 	case SrcReg:
2795 		decode_register_operand(&c->src, c, 0);
2796 		break;
2797 	case SrcMem16:
2798 		memop.bytes = 2;
2799 		goto srcmem_common;
2800 	case SrcMem32:
2801 		memop.bytes = 4;
2802 		goto srcmem_common;
2803 	case SrcMem:
2804 		memop.bytes = (c->d & ByteOp) ? 1 :
2805 							   c->op_bytes;
2806 	srcmem_common:
2807 		c->src = memop;
2808 		break;
2809 	case SrcImmU16:
2810 		rc = decode_imm(ctxt, &c->src, 2, false);
2811 		break;
2812 	case SrcImm:
2813 		rc = decode_imm(ctxt, &c->src, imm_size(c), true);
2814 		break;
2815 	case SrcImmU:
2816 		rc = decode_imm(ctxt, &c->src, imm_size(c), false);
2817 		break;
2818 	case SrcImmByte:
2819 		rc = decode_imm(ctxt, &c->src, 1, true);
2820 		break;
2821 	case SrcImmUByte:
2822 		rc = decode_imm(ctxt, &c->src, 1, false);
2823 		break;
2824 	case SrcAcc:
2825 		c->src.type = OP_REG;
2826 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2827 		c->src.addr.reg = &c->regs[VCPU_REGS_RAX];
2828 		fetch_register_operand(&c->src);
2829 		break;
2830 	case SrcOne:
2831 		c->src.bytes = 1;
2832 		c->src.val = 1;
2833 		break;
2834 	case SrcSI:
2835 		c->src.type = OP_MEM;
2836 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2837 		c->src.addr.mem.ea =
2838 			register_address(c, c->regs[VCPU_REGS_RSI]);
2839 		c->src.addr.mem.seg = seg_override(ctxt, ops, c),
2840 		c->src.val = 0;
2841 		break;
2842 	case SrcImmFAddr:
2843 		c->src.type = OP_IMM;
2844 		c->src.addr.mem.ea = c->eip;
2845 		c->src.bytes = c->op_bytes + 2;
2846 		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
2847 		break;
2848 	case SrcMemFAddr:
2849 		memop.bytes = c->op_bytes + 2;
2850 		goto srcmem_common;
2851 		break;
2852 	}
2853 
2854 	if (rc != X86EMUL_CONTINUE)
2855 		goto done;
2856 
2857 	/*
2858 	 * Decode and fetch the second source operand: register, memory
2859 	 * or immediate.
2860 	 */
2861 	switch (c->d & Src2Mask) {
2862 	case Src2None:
2863 		break;
2864 	case Src2CL:
2865 		c->src2.bytes = 1;
2866 		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
2867 		break;
2868 	case Src2ImmByte:
2869 		rc = decode_imm(ctxt, &c->src2, 1, true);
2870 		break;
2871 	case Src2One:
2872 		c->src2.bytes = 1;
2873 		c->src2.val = 1;
2874 		break;
2875 	case Src2Imm:
2876 		rc = decode_imm(ctxt, &c->src2, imm_size(c), true);
2877 		break;
2878 	}
2879 
2880 	if (rc != X86EMUL_CONTINUE)
2881 		goto done;
2882 
2883 	/* Decode and fetch the destination operand: register or memory. */
2884 	switch (c->d & DstMask) {
2885 	case DstReg:
2886 		decode_register_operand(&c->dst, c,
2887 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
2888 		break;
2889 	case DstImmUByte:
2890 		c->dst.type = OP_IMM;
2891 		c->dst.addr.mem.ea = c->eip;
2892 		c->dst.bytes = 1;
2893 		c->dst.val = insn_fetch(u8, 1, c->eip);
2894 		break;
2895 	case DstMem:
2896 	case DstMem64:
2897 		c->dst = memop;
2898 		if ((c->d & DstMask) == DstMem64)
2899 			c->dst.bytes = 8;
2900 		else
2901 			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2902 		if (c->d & BitOp)
2903 			fetch_bit_operand(c);
2904 		c->dst.orig_val = c->dst.val;
2905 		break;
2906 	case DstAcc:
2907 		c->dst.type = OP_REG;
2908 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2909 		c->dst.addr.reg = &c->regs[VCPU_REGS_RAX];
2910 		fetch_register_operand(&c->dst);
2911 		c->dst.orig_val = c->dst.val;
2912 		break;
2913 	case DstDI:
2914 		c->dst.type = OP_MEM;
2915 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2916 		c->dst.addr.mem.ea =
2917 			register_address(c, c->regs[VCPU_REGS_RDI]);
2918 		c->dst.addr.mem.seg = VCPU_SREG_ES;
2919 		c->dst.val = 0;
2920 		break;
2921 	case ImplicitOps:
2922 		/* Special instructions do their own operand decoding. */
2923 	default:
2924 		c->dst.type = OP_NONE; /* Disable writeback. */
2925 		return 0;
2926 	}
2927 
2928 done:
2929 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2930 }
2931 
2932 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
2933 {
2934 	struct decode_cache *c = &ctxt->decode;
2935 
2936 	/* The second termination condition only applies for REPE
2937 	 * and REPNE. Test if the repeat string operation prefix is
2938 	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2939 	 * corresponding termination condition according to:
2940 	 * 	- if REPE/REPZ and ZF = 0 then done
2941 	 * 	- if REPNE/REPNZ and ZF = 1 then done
2942 	 */
2943 	if (((c->b == 0xa6) || (c->b == 0xa7) ||
2944 	     (c->b == 0xae) || (c->b == 0xaf))
2945 	    && (((c->rep_prefix == REPE_PREFIX) &&
2946 		 ((ctxt->eflags & EFLG_ZF) == 0))
2947 		|| ((c->rep_prefix == REPNE_PREFIX) &&
2948 		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
2949 		return true;
2950 
2951 	return false;
2952 }
2953 
2954 int
2955 x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
2956 {
2957 	struct x86_emulate_ops *ops = ctxt->ops;
2958 	u64 msr_data;
2959 	struct decode_cache *c = &ctxt->decode;
2960 	int rc = X86EMUL_CONTINUE;
2961 	int saved_dst_type = c->dst.type;
2962 	int irq; /* Used for int 3, int, and into */
2963 
2964 	ctxt->decode.mem_read.pos = 0;
2965 
2966 	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2967 		rc = emulate_ud(ctxt);
2968 		goto done;
2969 	}
2970 
2971 	/* LOCK prefix is allowed only with some instructions */
2972 	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2973 		rc = emulate_ud(ctxt);
2974 		goto done;
2975 	}
2976 
2977 	if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) {
2978 		rc = emulate_ud(ctxt);
2979 		goto done;
2980 	}
2981 
2982 	/* Privileged instruction can be executed only in CPL=0 */
2983 	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2984 		rc = emulate_gp(ctxt, 0);
2985 		goto done;
2986 	}
2987 
2988 	if (c->rep_prefix && (c->d & String)) {
2989 		/* All REP prefixes have the same first termination condition */
2990 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2991 			ctxt->eip = c->eip;
2992 			goto done;
2993 		}
2994 	}
2995 
2996 	if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
2997 		rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem),
2998 					c->src.valptr, c->src.bytes);
2999 		if (rc != X86EMUL_CONTINUE)
3000 			goto done;
3001 		c->src.orig_val64 = c->src.val64;
3002 	}
3003 
3004 	if (c->src2.type == OP_MEM) {
3005 		rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem),
3006 					&c->src2.val, c->src2.bytes);
3007 		if (rc != X86EMUL_CONTINUE)
3008 			goto done;
3009 	}
3010 
3011 	if ((c->d & DstMask) == ImplicitOps)
3012 		goto special_insn;
3013 
3014 
3015 	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
3016 		/* optimisation - avoid slow emulated read if Mov */
3017 		rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem),
3018 				   &c->dst.val, c->dst.bytes);
3019 		if (rc != X86EMUL_CONTINUE)
3020 			goto done;
3021 	}
3022 	c->dst.orig_val = c->dst.val;
3023 
3024 special_insn:
3025 
3026 	if (c->execute) {
3027 		rc = c->execute(ctxt);
3028 		if (rc != X86EMUL_CONTINUE)
3029 			goto done;
3030 		goto writeback;
3031 	}
3032 
3033 	if (c->twobyte)
3034 		goto twobyte_insn;
3035 
3036 	switch (c->b) {
3037 	case 0x00 ... 0x05:
3038 	      add:		/* add */
3039 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3040 		break;
3041 	case 0x06:		/* push es */
3042 		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
3043 		break;
3044 	case 0x07:		/* pop es */
3045 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
3046 		break;
3047 	case 0x08 ... 0x0d:
3048 	      or:		/* or */
3049 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
3050 		break;
3051 	case 0x0e:		/* push cs */
3052 		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
3053 		break;
3054 	case 0x10 ... 0x15:
3055 	      adc:		/* adc */
3056 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
3057 		break;
3058 	case 0x16:		/* push ss */
3059 		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
3060 		break;
3061 	case 0x17:		/* pop ss */
3062 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
3063 		break;
3064 	case 0x18 ... 0x1d:
3065 	      sbb:		/* sbb */
3066 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
3067 		break;
3068 	case 0x1e:		/* push ds */
3069 		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
3070 		break;
3071 	case 0x1f:		/* pop ds */
3072 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
3073 		break;
3074 	case 0x20 ... 0x25:
3075 	      and:		/* and */
3076 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
3077 		break;
3078 	case 0x28 ... 0x2d:
3079 	      sub:		/* sub */
3080 		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
3081 		break;
3082 	case 0x30 ... 0x35:
3083 	      xor:		/* xor */
3084 		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
3085 		break;
3086 	case 0x38 ... 0x3d:
3087 	      cmp:		/* cmp */
3088 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3089 		break;
3090 	case 0x40 ... 0x47: /* inc r16/r32 */
3091 		emulate_1op("inc", c->dst, ctxt->eflags);
3092 		break;
3093 	case 0x48 ... 0x4f: /* dec r16/r32 */
3094 		emulate_1op("dec", c->dst, ctxt->eflags);
3095 		break;
3096 	case 0x58 ... 0x5f: /* pop reg */
3097 	pop_instruction:
3098 		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
3099 		break;
3100 	case 0x60:	/* pusha */
3101 		rc = emulate_pusha(ctxt, ops);
3102 		break;
3103 	case 0x61:	/* popa */
3104 		rc = emulate_popa(ctxt, ops);
3105 		break;
3106 	case 0x63:		/* movsxd */
3107 		if (ctxt->mode != X86EMUL_MODE_PROT64)
3108 			goto cannot_emulate;
3109 		c->dst.val = (s32) c->src.val;
3110 		break;
3111 	case 0x6c:		/* insb */
3112 	case 0x6d:		/* insw/insd */
3113 		c->src.val = c->regs[VCPU_REGS_RDX];
3114 		goto do_io_in;
3115 	case 0x6e:		/* outsb */
3116 	case 0x6f:		/* outsw/outsd */
3117 		c->dst.val = c->regs[VCPU_REGS_RDX];
3118 		goto do_io_out;
3119 		break;
3120 	case 0x70 ... 0x7f: /* jcc (short) */
3121 		if (test_cc(c->b, ctxt->eflags))
3122 			jmp_rel(c, c->src.val);
3123 		break;
3124 	case 0x80 ... 0x83:	/* Grp1 */
3125 		switch (c->modrm_reg) {
3126 		case 0:
3127 			goto add;
3128 		case 1:
3129 			goto or;
3130 		case 2:
3131 			goto adc;
3132 		case 3:
3133 			goto sbb;
3134 		case 4:
3135 			goto and;
3136 		case 5:
3137 			goto sub;
3138 		case 6:
3139 			goto xor;
3140 		case 7:
3141 			goto cmp;
3142 		}
3143 		break;
3144 	case 0x84 ... 0x85:
3145 	test:
3146 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
3147 		break;
3148 	case 0x86 ... 0x87:	/* xchg */
3149 	xchg:
3150 		/* Write back the register source. */
3151 		c->src.val = c->dst.val;
3152 		write_register_operand(&c->src);
3153 		/*
3154 		 * Write back the memory destination with implicit LOCK
3155 		 * prefix.
3156 		 */
3157 		c->dst.val = c->src.orig_val;
3158 		c->lock_prefix = 1;
3159 		break;
3160 	case 0x8c:  /* mov r/m, sreg */
3161 		if (c->modrm_reg > VCPU_SREG_GS) {
3162 			rc = emulate_ud(ctxt);
3163 			goto done;
3164 		}
3165 		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
3166 		break;
3167 	case 0x8d: /* lea r16/r32, m */
3168 		c->dst.val = c->src.addr.mem.ea;
3169 		break;
3170 	case 0x8e: { /* mov seg, r/m16 */
3171 		uint16_t sel;
3172 
3173 		sel = c->src.val;
3174 
3175 		if (c->modrm_reg == VCPU_SREG_CS ||
3176 		    c->modrm_reg > VCPU_SREG_GS) {
3177 			rc = emulate_ud(ctxt);
3178 			goto done;
3179 		}
3180 
3181 		if (c->modrm_reg == VCPU_SREG_SS)
3182 			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3183 
3184 		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
3185 
3186 		c->dst.type = OP_NONE;  /* Disable writeback. */
3187 		break;
3188 	}
3189 	case 0x8f:		/* pop (sole member of Grp1a) */
3190 		rc = emulate_grp1a(ctxt, ops);
3191 		break;
3192 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
3193 		if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
3194 			break;
3195 		goto xchg;
3196 	case 0x98: /* cbw/cwde/cdqe */
3197 		switch (c->op_bytes) {
3198 		case 2: c->dst.val = (s8)c->dst.val; break;
3199 		case 4: c->dst.val = (s16)c->dst.val; break;
3200 		case 8: c->dst.val = (s32)c->dst.val; break;
3201 		}
3202 		break;
3203 	case 0x9c: /* pushf */
3204 		c->src.val =  (unsigned long) ctxt->eflags;
3205 		emulate_push(ctxt, ops);
3206 		break;
3207 	case 0x9d: /* popf */
3208 		c->dst.type = OP_REG;
3209 		c->dst.addr.reg = &ctxt->eflags;
3210 		c->dst.bytes = c->op_bytes;
3211 		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
3212 		break;
3213 	case 0xa6 ... 0xa7:	/* cmps */
3214 		c->dst.type = OP_NONE; /* Disable writeback. */
3215 		goto cmp;
3216 	case 0xa8 ... 0xa9:	/* test ax, imm */
3217 		goto test;
3218 	case 0xae ... 0xaf:	/* scas */
3219 		goto cmp;
3220 	case 0xc0 ... 0xc1:
3221 		emulate_grp2(ctxt);
3222 		break;
3223 	case 0xc3: /* ret */
3224 		c->dst.type = OP_REG;
3225 		c->dst.addr.reg = &c->eip;
3226 		c->dst.bytes = c->op_bytes;
3227 		goto pop_instruction;
3228 	case 0xc4:		/* les */
3229 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
3230 		break;
3231 	case 0xc5:		/* lds */
3232 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS);
3233 		break;
3234 	case 0xcb:		/* ret far */
3235 		rc = emulate_ret_far(ctxt, ops);
3236 		break;
3237 	case 0xcc:		/* int3 */
3238 		irq = 3;
3239 		goto do_interrupt;
3240 	case 0xcd:		/* int n */
3241 		irq = c->src.val;
3242 	do_interrupt:
3243 		rc = emulate_int(ctxt, ops, irq);
3244 		break;
3245 	case 0xce:		/* into */
3246 		if (ctxt->eflags & EFLG_OF) {
3247 			irq = 4;
3248 			goto do_interrupt;
3249 		}
3250 		break;
3251 	case 0xcf:		/* iret */
3252 		rc = emulate_iret(ctxt, ops);
3253 		break;
3254 	case 0xd0 ... 0xd1:	/* Grp2 */
3255 		emulate_grp2(ctxt);
3256 		break;
3257 	case 0xd2 ... 0xd3:	/* Grp2 */
3258 		c->src.val = c->regs[VCPU_REGS_RCX];
3259 		emulate_grp2(ctxt);
3260 		break;
3261 	case 0xe0 ... 0xe2:	/* loop/loopz/loopnz */
3262 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3263 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) != 0 &&
3264 		    (c->b == 0xe2 || test_cc(c->b ^ 0x5, ctxt->eflags)))
3265 			jmp_rel(c, c->src.val);
3266 		break;
3267 	case 0xe3:	/* jcxz/jecxz/jrcxz */
3268 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0)
3269 			jmp_rel(c, c->src.val);
3270 		break;
3271 	case 0xe4: 	/* inb */
3272 	case 0xe5: 	/* in */
3273 		goto do_io_in;
3274 	case 0xe6: /* outb */
3275 	case 0xe7: /* out */
3276 		goto do_io_out;
3277 	case 0xe8: /* call (near) */ {
3278 		long int rel = c->src.val;
3279 		c->src.val = (unsigned long) c->eip;
3280 		jmp_rel(c, rel);
3281 		emulate_push(ctxt, ops);
3282 		break;
3283 	}
3284 	case 0xe9: /* jmp rel */
3285 		goto jmp;
3286 	case 0xea: { /* jmp far */
3287 		unsigned short sel;
3288 	jump_far:
3289 		memcpy(&sel, c->src.valptr + c->op_bytes, 2);
3290 
3291 		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
3292 			goto done;
3293 
3294 		c->eip = 0;
3295 		memcpy(&c->eip, c->src.valptr, c->op_bytes);
3296 		break;
3297 	}
3298 	case 0xeb:
3299 	      jmp:		/* jmp rel short */
3300 		jmp_rel(c, c->src.val);
3301 		c->dst.type = OP_NONE; /* Disable writeback. */
3302 		break;
3303 	case 0xec: /* in al,dx */
3304 	case 0xed: /* in (e/r)ax,dx */
3305 		c->src.val = c->regs[VCPU_REGS_RDX];
3306 	do_io_in:
3307 		c->dst.bytes = min(c->dst.bytes, 4u);
3308 		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3309 			rc = emulate_gp(ctxt, 0);
3310 			goto done;
3311 		}
3312 		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
3313 				     &c->dst.val))
3314 			goto done; /* IO is needed */
3315 		break;
3316 	case 0xee: /* out dx,al */
3317 	case 0xef: /* out dx,(e/r)ax */
3318 		c->dst.val = c->regs[VCPU_REGS_RDX];
3319 	do_io_out:
3320 		c->src.bytes = min(c->src.bytes, 4u);
3321 		if (!emulator_io_permited(ctxt, ops, c->dst.val,
3322 					  c->src.bytes)) {
3323 			rc = emulate_gp(ctxt, 0);
3324 			goto done;
3325 		}
3326 		ops->pio_out_emulated(c->src.bytes, c->dst.val,
3327 				      &c->src.val, 1, ctxt->vcpu);
3328 		c->dst.type = OP_NONE;	/* Disable writeback. */
3329 		break;
3330 	case 0xf4:              /* hlt */
3331 		ctxt->vcpu->arch.halt_request = 1;
3332 		break;
3333 	case 0xf5:	/* cmc */
3334 		/* complement carry flag from eflags reg */
3335 		ctxt->eflags ^= EFLG_CF;
3336 		break;
3337 	case 0xf6 ... 0xf7:	/* Grp3 */
3338 		rc = emulate_grp3(ctxt, ops);
3339 		break;
3340 	case 0xf8: /* clc */
3341 		ctxt->eflags &= ~EFLG_CF;
3342 		break;
3343 	case 0xf9: /* stc */
3344 		ctxt->eflags |= EFLG_CF;
3345 		break;
3346 	case 0xfa: /* cli */
3347 		if (emulator_bad_iopl(ctxt, ops)) {
3348 			rc = emulate_gp(ctxt, 0);
3349 			goto done;
3350 		} else
3351 			ctxt->eflags &= ~X86_EFLAGS_IF;
3352 		break;
3353 	case 0xfb: /* sti */
3354 		if (emulator_bad_iopl(ctxt, ops)) {
3355 			rc = emulate_gp(ctxt, 0);
3356 			goto done;
3357 		} else {
3358 			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3359 			ctxt->eflags |= X86_EFLAGS_IF;
3360 		}
3361 		break;
3362 	case 0xfc: /* cld */
3363 		ctxt->eflags &= ~EFLG_DF;
3364 		break;
3365 	case 0xfd: /* std */
3366 		ctxt->eflags |= EFLG_DF;
3367 		break;
3368 	case 0xfe: /* Grp4 */
3369 	grp45:
3370 		rc = emulate_grp45(ctxt, ops);
3371 		break;
3372 	case 0xff: /* Grp5 */
3373 		if (c->modrm_reg == 5)
3374 			goto jump_far;
3375 		goto grp45;
3376 	default:
3377 		goto cannot_emulate;
3378 	}
3379 
3380 	if (rc != X86EMUL_CONTINUE)
3381 		goto done;
3382 
3383 writeback:
3384 	rc = writeback(ctxt, ops);
3385 	if (rc != X86EMUL_CONTINUE)
3386 		goto done;
3387 
3388 	/*
3389 	 * restore dst type in case the decoding will be reused
3390 	 * (happens for string instruction )
3391 	 */
3392 	c->dst.type = saved_dst_type;
3393 
3394 	if ((c->d & SrcMask) == SrcSI)
3395 		string_addr_inc(ctxt, seg_override(ctxt, ops, c),
3396 				VCPU_REGS_RSI, &c->src);
3397 
3398 	if ((c->d & DstMask) == DstDI)
3399 		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
3400 				&c->dst);
3401 
3402 	if (c->rep_prefix && (c->d & String)) {
3403 		struct read_cache *r = &ctxt->decode.io_read;
3404 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3405 
3406 		if (!string_insn_completed(ctxt)) {
3407 			/*
3408 			 * Re-enter guest when pio read ahead buffer is empty
3409 			 * or, if it is not used, after each 1024 iteration.
3410 			 */
3411 			if ((r->end != 0 || c->regs[VCPU_REGS_RCX] & 0x3ff) &&
3412 			    (r->end == 0 || r->end != r->pos)) {
3413 				/*
3414 				 * Reset read cache. Usually happens before
3415 				 * decode, but since instruction is restarted
3416 				 * we have to do it here.
3417 				 */
3418 				ctxt->decode.mem_read.end = 0;
3419 				return EMULATION_RESTART;
3420 			}
3421 			goto done; /* skip rip writeback */
3422 		}
3423 	}
3424 
3425 	ctxt->eip = c->eip;
3426 
3427 done:
3428 	if (rc == X86EMUL_PROPAGATE_FAULT)
3429 		ctxt->have_exception = true;
3430 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3431 
3432 twobyte_insn:
3433 	switch (c->b) {
3434 	case 0x01: /* lgdt, lidt, lmsw */
3435 		switch (c->modrm_reg) {
3436 			u16 size;
3437 			unsigned long address;
3438 
3439 		case 0: /* vmcall */
3440 			if (c->modrm_mod != 3 || c->modrm_rm != 1)
3441 				goto cannot_emulate;
3442 
3443 			rc = kvm_fix_hypercall(ctxt->vcpu);
3444 			if (rc != X86EMUL_CONTINUE)
3445 				goto done;
3446 
3447 			/* Let the processor re-execute the fixed hypercall */
3448 			c->eip = ctxt->eip;
3449 			/* Disable writeback. */
3450 			c->dst.type = OP_NONE;
3451 			break;
3452 		case 2: /* lgdt */
3453 			rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3454 					     &size, &address, c->op_bytes);
3455 			if (rc != X86EMUL_CONTINUE)
3456 				goto done;
3457 			realmode_lgdt(ctxt->vcpu, size, address);
3458 			/* Disable writeback. */
3459 			c->dst.type = OP_NONE;
3460 			break;
3461 		case 3: /* lidt/vmmcall */
3462 			if (c->modrm_mod == 3) {
3463 				switch (c->modrm_rm) {
3464 				case 1:
3465 					rc = kvm_fix_hypercall(ctxt->vcpu);
3466 					break;
3467 				default:
3468 					goto cannot_emulate;
3469 				}
3470 			} else {
3471 				rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3472 						     &size, &address,
3473 						     c->op_bytes);
3474 				if (rc != X86EMUL_CONTINUE)
3475 					goto done;
3476 				realmode_lidt(ctxt->vcpu, size, address);
3477 			}
3478 			/* Disable writeback. */
3479 			c->dst.type = OP_NONE;
3480 			break;
3481 		case 4: /* smsw */
3482 			c->dst.bytes = 2;
3483 			c->dst.val = ops->get_cr(0, ctxt->vcpu);
3484 			break;
3485 		case 6: /* lmsw */
3486 			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
3487 				    (c->src.val & 0x0f), ctxt->vcpu);
3488 			c->dst.type = OP_NONE;
3489 			break;
3490 		case 5: /* not defined */
3491 			emulate_ud(ctxt);
3492 			rc = X86EMUL_PROPAGATE_FAULT;
3493 			goto done;
3494 		case 7: /* invlpg*/
3495 			emulate_invlpg(ctxt->vcpu,
3496 				       linear(ctxt, c->src.addr.mem));
3497 			/* Disable writeback. */
3498 			c->dst.type = OP_NONE;
3499 			break;
3500 		default:
3501 			goto cannot_emulate;
3502 		}
3503 		break;
3504 	case 0x05: 		/* syscall */
3505 		rc = emulate_syscall(ctxt, ops);
3506 		break;
3507 	case 0x06:
3508 		emulate_clts(ctxt->vcpu);
3509 		break;
3510 	case 0x09:		/* wbinvd */
3511 		kvm_emulate_wbinvd(ctxt->vcpu);
3512 		break;
3513 	case 0x08:		/* invd */
3514 	case 0x0d:		/* GrpP (prefetch) */
3515 	case 0x18:		/* Grp16 (prefetch/nop) */
3516 		break;
3517 	case 0x20: /* mov cr, reg */
3518 		switch (c->modrm_reg) {
3519 		case 1:
3520 		case 5 ... 7:
3521 		case 9 ... 15:
3522 			emulate_ud(ctxt);
3523 			rc = X86EMUL_PROPAGATE_FAULT;
3524 			goto done;
3525 		}
3526 		c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3527 		break;
3528 	case 0x21: /* mov from dr to reg */
3529 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3530 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3531 			emulate_ud(ctxt);
3532 			rc = X86EMUL_PROPAGATE_FAULT;
3533 			goto done;
3534 		}
3535 		ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
3536 		break;
3537 	case 0x22: /* mov reg, cr */
3538 		if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
3539 			emulate_gp(ctxt, 0);
3540 			rc = X86EMUL_PROPAGATE_FAULT;
3541 			goto done;
3542 		}
3543 		c->dst.type = OP_NONE;
3544 		break;
3545 	case 0x23: /* mov from reg to dr */
3546 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3547 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3548 			emulate_ud(ctxt);
3549 			rc = X86EMUL_PROPAGATE_FAULT;
3550 			goto done;
3551 		}
3552 
3553 		if (ops->set_dr(c->modrm_reg, c->src.val &
3554 				((ctxt->mode == X86EMUL_MODE_PROT64) ?
3555 				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
3556 			/* #UD condition is already handled by the code above */
3557 			emulate_gp(ctxt, 0);
3558 			rc = X86EMUL_PROPAGATE_FAULT;
3559 			goto done;
3560 		}
3561 
3562 		c->dst.type = OP_NONE;	/* no writeback */
3563 		break;
3564 	case 0x30:
3565 		/* wrmsr */
3566 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
3567 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
3568 		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3569 			emulate_gp(ctxt, 0);
3570 			rc = X86EMUL_PROPAGATE_FAULT;
3571 			goto done;
3572 		}
3573 		rc = X86EMUL_CONTINUE;
3574 		break;
3575 	case 0x32:
3576 		/* rdmsr */
3577 		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3578 			emulate_gp(ctxt, 0);
3579 			rc = X86EMUL_PROPAGATE_FAULT;
3580 			goto done;
3581 		} else {
3582 			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3583 			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3584 		}
3585 		rc = X86EMUL_CONTINUE;
3586 		break;
3587 	case 0x34:		/* sysenter */
3588 		rc = emulate_sysenter(ctxt, ops);
3589 		break;
3590 	case 0x35:		/* sysexit */
3591 		rc = emulate_sysexit(ctxt, ops);
3592 		break;
3593 	case 0x40 ... 0x4f:	/* cmov */
3594 		c->dst.val = c->dst.orig_val = c->src.val;
3595 		if (!test_cc(c->b, ctxt->eflags))
3596 			c->dst.type = OP_NONE; /* no writeback */
3597 		break;
3598 	case 0x80 ... 0x8f: /* jnz rel, etc*/
3599 		if (test_cc(c->b, ctxt->eflags))
3600 			jmp_rel(c, c->src.val);
3601 		break;
3602 	case 0x90 ... 0x9f:     /* setcc r/m8 */
3603 		c->dst.val = test_cc(c->b, ctxt->eflags);
3604 		break;
3605 	case 0xa0:	  /* push fs */
3606 		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3607 		break;
3608 	case 0xa1:	 /* pop fs */
3609 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3610 		break;
3611 	case 0xa3:
3612 	      bt:		/* bt */
3613 		c->dst.type = OP_NONE;
3614 		/* only subword offset */
3615 		c->src.val &= (c->dst.bytes << 3) - 1;
3616 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3617 		break;
3618 	case 0xa4: /* shld imm8, r, r/m */
3619 	case 0xa5: /* shld cl, r, r/m */
3620 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3621 		break;
3622 	case 0xa8:	/* push gs */
3623 		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3624 		break;
3625 	case 0xa9:	/* pop gs */
3626 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3627 		break;
3628 	case 0xab:
3629 	      bts:		/* bts */
3630 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3631 		break;
3632 	case 0xac: /* shrd imm8, r, r/m */
3633 	case 0xad: /* shrd cl, r, r/m */
3634 		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3635 		break;
3636 	case 0xae:              /* clflush */
3637 		break;
3638 	case 0xb0 ... 0xb1:	/* cmpxchg */
3639 		/*
3640 		 * Save real source value, then compare EAX against
3641 		 * destination.
3642 		 */
3643 		c->src.orig_val = c->src.val;
3644 		c->src.val = c->regs[VCPU_REGS_RAX];
3645 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3646 		if (ctxt->eflags & EFLG_ZF) {
3647 			/* Success: write back to memory. */
3648 			c->dst.val = c->src.orig_val;
3649 		} else {
3650 			/* Failure: write the value we saw to EAX. */
3651 			c->dst.type = OP_REG;
3652 			c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3653 		}
3654 		break;
3655 	case 0xb2:		/* lss */
3656 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS);
3657 		break;
3658 	case 0xb3:
3659 	      btr:		/* btr */
3660 		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3661 		break;
3662 	case 0xb4:		/* lfs */
3663 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS);
3664 		break;
3665 	case 0xb5:		/* lgs */
3666 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS);
3667 		break;
3668 	case 0xb6 ... 0xb7:	/* movzx */
3669 		c->dst.bytes = c->op_bytes;
3670 		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3671 						       : (u16) c->src.val;
3672 		break;
3673 	case 0xba:		/* Grp8 */
3674 		switch (c->modrm_reg & 3) {
3675 		case 0:
3676 			goto bt;
3677 		case 1:
3678 			goto bts;
3679 		case 2:
3680 			goto btr;
3681 		case 3:
3682 			goto btc;
3683 		}
3684 		break;
3685 	case 0xbb:
3686 	      btc:		/* btc */
3687 		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3688 		break;
3689 	case 0xbc: {		/* bsf */
3690 		u8 zf;
3691 		__asm__ ("bsf %2, %0; setz %1"
3692 			 : "=r"(c->dst.val), "=q"(zf)
3693 			 : "r"(c->src.val));
3694 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3695 		if (zf) {
3696 			ctxt->eflags |= X86_EFLAGS_ZF;
3697 			c->dst.type = OP_NONE;	/* Disable writeback. */
3698 		}
3699 		break;
3700 	}
3701 	case 0xbd: {		/* bsr */
3702 		u8 zf;
3703 		__asm__ ("bsr %2, %0; setz %1"
3704 			 : "=r"(c->dst.val), "=q"(zf)
3705 			 : "r"(c->src.val));
3706 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3707 		if (zf) {
3708 			ctxt->eflags |= X86_EFLAGS_ZF;
3709 			c->dst.type = OP_NONE;	/* Disable writeback. */
3710 		}
3711 		break;
3712 	}
3713 	case 0xbe ... 0xbf:	/* movsx */
3714 		c->dst.bytes = c->op_bytes;
3715 		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3716 							(s16) c->src.val;
3717 		break;
3718 	case 0xc0 ... 0xc1:	/* xadd */
3719 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3720 		/* Write back the register source. */
3721 		c->src.val = c->dst.orig_val;
3722 		write_register_operand(&c->src);
3723 		break;
3724 	case 0xc3:		/* movnti */
3725 		c->dst.bytes = c->op_bytes;
3726 		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3727 							(u64) c->src.val;
3728 		break;
3729 	case 0xc7:		/* Grp9 (cmpxchg8b) */
3730 		rc = emulate_grp9(ctxt, ops);
3731 		break;
3732 	default:
3733 		goto cannot_emulate;
3734 	}
3735 
3736 	if (rc != X86EMUL_CONTINUE)
3737 		goto done;
3738 
3739 	goto writeback;
3740 
3741 cannot_emulate:
3742 	return -1;
3743 }
3744