xref: /openbmc/linux/arch/x86/kvm/emulate.c (revision df2634f43f5106947f3735a0b61a6527a4b278cd)
1 /******************************************************************************
2  * emulate.c
3  *
4  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5  *
6  * Copyright (c) 2005 Keir Fraser
7  *
8  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9  * privileged instructions:
10  *
11  * Copyright (C) 2006 Qumranet
12  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
13  *
14  *   Avi Kivity <avi@qumranet.com>
15  *   Yaniv Kamay <yaniv@qumranet.com>
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2.  See
18  * the COPYING file in the top-level directory.
19  *
20  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21  */
22 
23 #include <linux/kvm_host.h>
24 #include "kvm_cache_regs.h"
25 #include <linux/module.h>
26 #include <asm/kvm_emulate.h>
27 
28 #include "x86.h"
29 #include "tss.h"
30 
31 /*
32  * Opcode effective-address decode tables.
33  * Note that we only emulate instructions that have at least one memory
34  * operand (excluding implicit stack references). We assume that stack
35  * references and instruction fetches will never occur in special memory
36  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
37  * not be handled.
38  */
39 
40 /* Operand sizes: 8-bit operands or specified/overridden size. */
41 #define ByteOp      (1<<0)	/* 8-bit operands. */
42 /* Destination operand type. */
43 #define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
44 #define DstReg      (2<<1)	/* Register operand. */
45 #define DstMem      (3<<1)	/* Memory operand. */
46 #define DstAcc      (4<<1)	/* Destination Accumulator */
47 #define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
48 #define DstMem64    (6<<1)	/* 64bit memory operand */
49 #define DstImmUByte (7<<1)	/* 8-bit unsigned immediate operand */
50 #define DstMask     (7<<1)
51 /* Source operand type. */
52 #define SrcNone     (0<<4)	/* No source operand. */
53 #define SrcReg      (1<<4)	/* Register operand. */
54 #define SrcMem      (2<<4)	/* Memory operand. */
55 #define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
56 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
57 #define SrcImm      (5<<4)	/* Immediate operand. */
58 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
59 #define SrcOne      (7<<4)	/* Implied '1' */
60 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
61 #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
62 #define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
63 #define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
64 #define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
65 #define SrcAcc      (0xd<<4)	/* Source Accumulator */
66 #define SrcImmU16   (0xe<<4)    /* Immediate operand, unsigned, 16 bits */
67 #define SrcMask     (0xf<<4)
68 /* Generic ModRM decode. */
69 #define ModRM       (1<<8)
70 /* Destination is only written; never read. */
71 #define Mov         (1<<9)
72 #define BitOp       (1<<10)
73 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
74 #define String      (1<<12)     /* String instruction (rep capable) */
75 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
76 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
77 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
78 /* Misc flags */
79 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
80 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
81 #define Undefined   (1<<25) /* No Such Instruction */
82 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
83 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
84 #define No64	    (1<<28)
85 /* Source 2 operand type */
86 #define Src2None    (0<<29)
87 #define Src2CL      (1<<29)
88 #define Src2ImmByte (2<<29)
89 #define Src2One     (3<<29)
90 #define Src2Imm     (4<<29)
91 #define Src2Mask    (7<<29)
92 
93 #define X2(x...) x, x
94 #define X3(x...) X2(x), x
95 #define X4(x...) X2(x), X2(x)
96 #define X5(x...) X4(x), x
97 #define X6(x...) X4(x), X2(x)
98 #define X7(x...) X4(x), X3(x)
99 #define X8(x...) X4(x), X4(x)
100 #define X16(x...) X8(x), X8(x)
101 
102 struct opcode {
103 	u32 flags;
104 	union {
105 		int (*execute)(struct x86_emulate_ctxt *ctxt);
106 		struct opcode *group;
107 		struct group_dual *gdual;
108 	} u;
109 };
110 
111 struct group_dual {
112 	struct opcode mod012[8];
113 	struct opcode mod3[8];
114 };
115 
116 /* EFLAGS bit definitions. */
117 #define EFLG_ID (1<<21)
118 #define EFLG_VIP (1<<20)
119 #define EFLG_VIF (1<<19)
120 #define EFLG_AC (1<<18)
121 #define EFLG_VM (1<<17)
122 #define EFLG_RF (1<<16)
123 #define EFLG_IOPL (3<<12)
124 #define EFLG_NT (1<<14)
125 #define EFLG_OF (1<<11)
126 #define EFLG_DF (1<<10)
127 #define EFLG_IF (1<<9)
128 #define EFLG_TF (1<<8)
129 #define EFLG_SF (1<<7)
130 #define EFLG_ZF (1<<6)
131 #define EFLG_AF (1<<4)
132 #define EFLG_PF (1<<2)
133 #define EFLG_CF (1<<0)
134 
135 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
136 #define EFLG_RESERVED_ONE_MASK 2
137 
138 /*
139  * Instruction emulation:
140  * Most instructions are emulated directly via a fragment of inline assembly
141  * code. This allows us to save/restore EFLAGS and thus very easily pick up
142  * any modified flags.
143  */
144 
145 #if defined(CONFIG_X86_64)
146 #define _LO32 "k"		/* force 32-bit operand */
147 #define _STK  "%%rsp"		/* stack pointer */
148 #elif defined(__i386__)
149 #define _LO32 ""		/* force 32-bit operand */
150 #define _STK  "%%esp"		/* stack pointer */
151 #endif
152 
153 /*
154  * These EFLAGS bits are restored from saved value during emulation, and
155  * any changes are written back to the saved value after emulation.
156  */
157 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
158 
159 /* Before executing instruction: restore necessary bits in EFLAGS. */
160 #define _PRE_EFLAGS(_sav, _msk, _tmp)					\
161 	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
162 	"movl %"_sav",%"_LO32 _tmp"; "                                  \
163 	"push %"_tmp"; "                                                \
164 	"push %"_tmp"; "                                                \
165 	"movl %"_msk",%"_LO32 _tmp"; "                                  \
166 	"andl %"_LO32 _tmp",("_STK"); "                                 \
167 	"pushf; "                                                       \
168 	"notl %"_LO32 _tmp"; "                                          \
169 	"andl %"_LO32 _tmp",("_STK"); "                                 \
170 	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
171 	"pop  %"_tmp"; "                                                \
172 	"orl  %"_LO32 _tmp",("_STK"); "                                 \
173 	"popf; "                                                        \
174 	"pop  %"_sav"; "
175 
176 /* After executing instruction: write-back necessary bits in EFLAGS. */
177 #define _POST_EFLAGS(_sav, _msk, _tmp) \
178 	/* _sav |= EFLAGS & _msk; */		\
179 	"pushf; "				\
180 	"pop  %"_tmp"; "			\
181 	"andl %"_msk",%"_LO32 _tmp"; "		\
182 	"orl  %"_LO32 _tmp",%"_sav"; "
183 
184 #ifdef CONFIG_X86_64
185 #define ON64(x) x
186 #else
187 #define ON64(x)
188 #endif
189 
190 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \
191 	do {								\
192 		__asm__ __volatile__ (					\
193 			_PRE_EFLAGS("0", "4", "2")			\
194 			_op _suffix " %"_x"3,%1; "			\
195 			_POST_EFLAGS("0", "4", "2")			\
196 			: "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\
197 			  "=&r" (_tmp)					\
198 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
199 	} while (0)
200 
201 
202 /* Raw emulation: instruction has two explicit operands. */
203 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
204 	do {								\
205 		unsigned long _tmp;					\
206 									\
207 		switch ((_dst).bytes) {					\
208 		case 2:							\
209 			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\
210 			break;						\
211 		case 4:							\
212 			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\
213 			break;						\
214 		case 8:							\
215 			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \
216 			break;						\
217 		}							\
218 	} while (0)
219 
220 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
221 	do {								     \
222 		unsigned long _tmp;					     \
223 		switch ((_dst).bytes) {				             \
224 		case 1:							     \
225 			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \
226 			break;						     \
227 		default:						     \
228 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
229 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
230 			break;						     \
231 		}							     \
232 	} while (0)
233 
234 /* Source operand is byte-sized and may be restricted to just %cl. */
235 #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
236 	__emulate_2op(_op, _src, _dst, _eflags,				\
237 		      "b", "c", "b", "c", "b", "c", "b", "c")
238 
239 /* Source operand is byte, word, long or quad sized. */
240 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
241 	__emulate_2op(_op, _src, _dst, _eflags,				\
242 		      "b", "q", "w", "r", _LO32, "r", "", "r")
243 
244 /* Source operand is word, long or quad sized. */
245 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
246 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
247 			     "w", "r", _LO32, "r", "", "r")
248 
249 /* Instruction has three operands and one operand is stored in ECX register */
250 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
251 	do {									\
252 		unsigned long _tmp;						\
253 		_type _clv  = (_cl).val;  					\
254 		_type _srcv = (_src).val;    					\
255 		_type _dstv = (_dst).val;					\
256 										\
257 		__asm__ __volatile__ (						\
258 			_PRE_EFLAGS("0", "5", "2")				\
259 			_op _suffix " %4,%1 \n"					\
260 			_POST_EFLAGS("0", "5", "2")				\
261 			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
262 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
263 			); 							\
264 										\
265 		(_cl).val  = (unsigned long) _clv;				\
266 		(_src).val = (unsigned long) _srcv;				\
267 		(_dst).val = (unsigned long) _dstv;				\
268 	} while (0)
269 
270 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
271 	do {									\
272 		switch ((_dst).bytes) {						\
273 		case 2:								\
274 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
275 						"w", unsigned short);         	\
276 			break;							\
277 		case 4: 							\
278 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
279 						"l", unsigned int);           	\
280 			break;							\
281 		case 8:								\
282 			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
283 						"q", unsigned long));  		\
284 			break;							\
285 		}								\
286 	} while (0)
287 
288 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
289 	do {								\
290 		unsigned long _tmp;					\
291 									\
292 		__asm__ __volatile__ (					\
293 			_PRE_EFLAGS("0", "3", "2")			\
294 			_op _suffix " %1; "				\
295 			_POST_EFLAGS("0", "3", "2")			\
296 			: "=m" (_eflags), "+m" ((_dst).val),		\
297 			  "=&r" (_tmp)					\
298 			: "i" (EFLAGS_MASK));				\
299 	} while (0)
300 
301 /* Instruction has only one explicit operand (no source operand). */
302 #define emulate_1op(_op, _dst, _eflags)                                    \
303 	do {								\
304 		switch ((_dst).bytes) {				        \
305 		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
306 		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
307 		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
308 		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
309 		}							\
310 	} while (0)
311 
312 #define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix)		\
313 	do {								\
314 		unsigned long _tmp;					\
315 									\
316 		__asm__ __volatile__ (					\
317 			_PRE_EFLAGS("0", "4", "1")			\
318 			_op _suffix " %5; "				\
319 			_POST_EFLAGS("0", "4", "1")			\
320 			: "=m" (_eflags), "=&r" (_tmp),			\
321 			  "+a" (_rax), "+d" (_rdx)			\
322 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
323 			  "a" (_rax), "d" (_rdx));			\
324 	} while (0)
325 
326 #define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \
327 	do {								\
328 		unsigned long _tmp;					\
329 									\
330 		__asm__ __volatile__ (					\
331 			_PRE_EFLAGS("0", "5", "1")			\
332 			"1: \n\t"					\
333 			_op _suffix " %6; "				\
334 			"2: \n\t"					\
335 			_POST_EFLAGS("0", "5", "1")			\
336 			".pushsection .fixup,\"ax\" \n\t"		\
337 			"3: movb $1, %4 \n\t"				\
338 			"jmp 2b \n\t"					\
339 			".popsection \n\t"				\
340 			_ASM_EXTABLE(1b, 3b)				\
341 			: "=m" (_eflags), "=&r" (_tmp),			\
342 			  "+a" (_rax), "+d" (_rdx), "+qm"(_ex)		\
343 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
344 			  "a" (_rax), "d" (_rdx));			\
345 	} while (0)
346 
347 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
348 #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)			\
349 	do {									\
350 		switch((_src).bytes) {						\
351 		case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
352 		case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx,  _eflags, "w"); break; \
353 		case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
354 		case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
355 		}							\
356 	} while (0)
357 
358 #define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex)	\
359 	do {								\
360 		switch((_src).bytes) {					\
361 		case 1:							\
362 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx,	\
363 						 _eflags, "b", _ex);	\
364 			break;						\
365 		case 2:							\
366 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
367 						 _eflags, "w", _ex);	\
368 			break;						\
369 		case 4:							\
370 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
371 						 _eflags, "l", _ex);	\
372 			break;						\
373 		case 8: ON64(						\
374 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
375 						 _eflags, "q", _ex));	\
376 			break;						\
377 		}							\
378 	} while (0)
379 
380 /* Fetch next part of the instruction being emulated. */
381 #define insn_fetch(_type, _size, _eip)                                  \
382 ({	unsigned long _x;						\
383 	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
384 	if (rc != X86EMUL_CONTINUE)					\
385 		goto done;						\
386 	(_eip) += (_size);						\
387 	(_type)_x;							\
388 })
389 
390 #define insn_fetch_arr(_arr, _size, _eip)                                \
391 ({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
392 	if (rc != X86EMUL_CONTINUE)					\
393 		goto done;						\
394 	(_eip) += (_size);						\
395 })
396 
397 static inline unsigned long ad_mask(struct decode_cache *c)
398 {
399 	return (1UL << (c->ad_bytes << 3)) - 1;
400 }
401 
402 /* Access/update address held in a register, based on addressing mode. */
403 static inline unsigned long
404 address_mask(struct decode_cache *c, unsigned long reg)
405 {
406 	if (c->ad_bytes == sizeof(unsigned long))
407 		return reg;
408 	else
409 		return reg & ad_mask(c);
410 }
411 
412 static inline unsigned long
413 register_address(struct decode_cache *c, unsigned long reg)
414 {
415 	return address_mask(c, reg);
416 }
417 
418 static inline void
419 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
420 {
421 	if (c->ad_bytes == sizeof(unsigned long))
422 		*reg += inc;
423 	else
424 		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
425 }
426 
427 static inline void jmp_rel(struct decode_cache *c, int rel)
428 {
429 	register_address_increment(c, &c->eip, rel);
430 }
431 
432 static void set_seg_override(struct decode_cache *c, int seg)
433 {
434 	c->has_seg_override = true;
435 	c->seg_override = seg;
436 }
437 
438 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
439 			      struct x86_emulate_ops *ops, int seg)
440 {
441 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
442 		return 0;
443 
444 	return ops->get_cached_segment_base(seg, ctxt->vcpu);
445 }
446 
447 static unsigned seg_override(struct x86_emulate_ctxt *ctxt,
448 			     struct x86_emulate_ops *ops,
449 			     struct decode_cache *c)
450 {
451 	if (!c->has_seg_override)
452 		return 0;
453 
454 	return c->seg_override;
455 }
456 
457 static ulong linear(struct x86_emulate_ctxt *ctxt,
458 		    struct segmented_address addr)
459 {
460 	struct decode_cache *c = &ctxt->decode;
461 	ulong la;
462 
463 	la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea;
464 	if (c->ad_bytes != 8)
465 		la &= (u32)-1;
466 	return la;
467 }
468 
469 static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
470 			     u32 error, bool valid)
471 {
472 	ctxt->exception.vector = vec;
473 	ctxt->exception.error_code = error;
474 	ctxt->exception.error_code_valid = valid;
475 	return X86EMUL_PROPAGATE_FAULT;
476 }
477 
478 static int emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
479 {
480 	return emulate_exception(ctxt, GP_VECTOR, err, true);
481 }
482 
483 static int emulate_ud(struct x86_emulate_ctxt *ctxt)
484 {
485 	return emulate_exception(ctxt, UD_VECTOR, 0, false);
486 }
487 
488 static int emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
489 {
490 	return emulate_exception(ctxt, TS_VECTOR, err, true);
491 }
492 
493 static int emulate_de(struct x86_emulate_ctxt *ctxt)
494 {
495 	return emulate_exception(ctxt, DE_VECTOR, 0, false);
496 }
497 
498 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
499 			      struct x86_emulate_ops *ops,
500 			      unsigned long eip, u8 *dest)
501 {
502 	struct fetch_cache *fc = &ctxt->decode.fetch;
503 	int rc;
504 	int size, cur_size;
505 
506 	if (eip == fc->end) {
507 		cur_size = fc->end - fc->start;
508 		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
509 		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
510 				size, ctxt->vcpu, &ctxt->exception);
511 		if (rc != X86EMUL_CONTINUE)
512 			return rc;
513 		fc->end += size;
514 	}
515 	*dest = fc->data[eip - fc->start];
516 	return X86EMUL_CONTINUE;
517 }
518 
519 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
520 			 struct x86_emulate_ops *ops,
521 			 unsigned long eip, void *dest, unsigned size)
522 {
523 	int rc;
524 
525 	/* x86 instructions are limited to 15 bytes. */
526 	if (eip + size - ctxt->eip > 15)
527 		return X86EMUL_UNHANDLEABLE;
528 	while (size--) {
529 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
530 		if (rc != X86EMUL_CONTINUE)
531 			return rc;
532 	}
533 	return X86EMUL_CONTINUE;
534 }
535 
536 /*
537  * Given the 'reg' portion of a ModRM byte, and a register block, return a
538  * pointer into the block that addresses the relevant register.
539  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
540  */
541 static void *decode_register(u8 modrm_reg, unsigned long *regs,
542 			     int highbyte_regs)
543 {
544 	void *p;
545 
546 	p = &regs[modrm_reg];
547 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
548 		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
549 	return p;
550 }
551 
552 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
553 			   struct x86_emulate_ops *ops,
554 			   struct segmented_address addr,
555 			   u16 *size, unsigned long *address, int op_bytes)
556 {
557 	int rc;
558 
559 	if (op_bytes == 2)
560 		op_bytes = 3;
561 	*address = 0;
562 	rc = ops->read_std(linear(ctxt, addr), (unsigned long *)size, 2,
563 			   ctxt->vcpu, &ctxt->exception);
564 	if (rc != X86EMUL_CONTINUE)
565 		return rc;
566 	addr.ea += 2;
567 	rc = ops->read_std(linear(ctxt, addr), address, op_bytes,
568 			   ctxt->vcpu, &ctxt->exception);
569 	return rc;
570 }
571 
572 static int test_cc(unsigned int condition, unsigned int flags)
573 {
574 	int rc = 0;
575 
576 	switch ((condition & 15) >> 1) {
577 	case 0: /* o */
578 		rc |= (flags & EFLG_OF);
579 		break;
580 	case 1: /* b/c/nae */
581 		rc |= (flags & EFLG_CF);
582 		break;
583 	case 2: /* z/e */
584 		rc |= (flags & EFLG_ZF);
585 		break;
586 	case 3: /* be/na */
587 		rc |= (flags & (EFLG_CF|EFLG_ZF));
588 		break;
589 	case 4: /* s */
590 		rc |= (flags & EFLG_SF);
591 		break;
592 	case 5: /* p/pe */
593 		rc |= (flags & EFLG_PF);
594 		break;
595 	case 7: /* le/ng */
596 		rc |= (flags & EFLG_ZF);
597 		/* fall through */
598 	case 6: /* l/nge */
599 		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
600 		break;
601 	}
602 
603 	/* Odd condition identifiers (lsb == 1) have inverted sense. */
604 	return (!!rc ^ (condition & 1));
605 }
606 
607 static void fetch_register_operand(struct operand *op)
608 {
609 	switch (op->bytes) {
610 	case 1:
611 		op->val = *(u8 *)op->addr.reg;
612 		break;
613 	case 2:
614 		op->val = *(u16 *)op->addr.reg;
615 		break;
616 	case 4:
617 		op->val = *(u32 *)op->addr.reg;
618 		break;
619 	case 8:
620 		op->val = *(u64 *)op->addr.reg;
621 		break;
622 	}
623 }
624 
625 static void decode_register_operand(struct operand *op,
626 				    struct decode_cache *c,
627 				    int inhibit_bytereg)
628 {
629 	unsigned reg = c->modrm_reg;
630 	int highbyte_regs = c->rex_prefix == 0;
631 
632 	if (!(c->d & ModRM))
633 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
634 	op->type = OP_REG;
635 	if ((c->d & ByteOp) && !inhibit_bytereg) {
636 		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
637 		op->bytes = 1;
638 	} else {
639 		op->addr.reg = decode_register(reg, c->regs, 0);
640 		op->bytes = c->op_bytes;
641 	}
642 	fetch_register_operand(op);
643 	op->orig_val = op->val;
644 }
645 
646 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
647 			struct x86_emulate_ops *ops,
648 			struct operand *op)
649 {
650 	struct decode_cache *c = &ctxt->decode;
651 	u8 sib;
652 	int index_reg = 0, base_reg = 0, scale;
653 	int rc = X86EMUL_CONTINUE;
654 	ulong modrm_ea = 0;
655 
656 	if (c->rex_prefix) {
657 		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
658 		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
659 		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
660 	}
661 
662 	c->modrm = insn_fetch(u8, 1, c->eip);
663 	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
664 	c->modrm_reg |= (c->modrm & 0x38) >> 3;
665 	c->modrm_rm |= (c->modrm & 0x07);
666 	c->modrm_seg = VCPU_SREG_DS;
667 
668 	if (c->modrm_mod == 3) {
669 		op->type = OP_REG;
670 		op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
671 		op->addr.reg = decode_register(c->modrm_rm,
672 					       c->regs, c->d & ByteOp);
673 		fetch_register_operand(op);
674 		return rc;
675 	}
676 
677 	op->type = OP_MEM;
678 
679 	if (c->ad_bytes == 2) {
680 		unsigned bx = c->regs[VCPU_REGS_RBX];
681 		unsigned bp = c->regs[VCPU_REGS_RBP];
682 		unsigned si = c->regs[VCPU_REGS_RSI];
683 		unsigned di = c->regs[VCPU_REGS_RDI];
684 
685 		/* 16-bit ModR/M decode. */
686 		switch (c->modrm_mod) {
687 		case 0:
688 			if (c->modrm_rm == 6)
689 				modrm_ea += insn_fetch(u16, 2, c->eip);
690 			break;
691 		case 1:
692 			modrm_ea += insn_fetch(s8, 1, c->eip);
693 			break;
694 		case 2:
695 			modrm_ea += insn_fetch(u16, 2, c->eip);
696 			break;
697 		}
698 		switch (c->modrm_rm) {
699 		case 0:
700 			modrm_ea += bx + si;
701 			break;
702 		case 1:
703 			modrm_ea += bx + di;
704 			break;
705 		case 2:
706 			modrm_ea += bp + si;
707 			break;
708 		case 3:
709 			modrm_ea += bp + di;
710 			break;
711 		case 4:
712 			modrm_ea += si;
713 			break;
714 		case 5:
715 			modrm_ea += di;
716 			break;
717 		case 6:
718 			if (c->modrm_mod != 0)
719 				modrm_ea += bp;
720 			break;
721 		case 7:
722 			modrm_ea += bx;
723 			break;
724 		}
725 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
726 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
727 			c->modrm_seg = VCPU_SREG_SS;
728 		modrm_ea = (u16)modrm_ea;
729 	} else {
730 		/* 32/64-bit ModR/M decode. */
731 		if ((c->modrm_rm & 7) == 4) {
732 			sib = insn_fetch(u8, 1, c->eip);
733 			index_reg |= (sib >> 3) & 7;
734 			base_reg |= sib & 7;
735 			scale = sib >> 6;
736 
737 			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
738 				modrm_ea += insn_fetch(s32, 4, c->eip);
739 			else
740 				modrm_ea += c->regs[base_reg];
741 			if (index_reg != 4)
742 				modrm_ea += c->regs[index_reg] << scale;
743 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
744 			if (ctxt->mode == X86EMUL_MODE_PROT64)
745 				c->rip_relative = 1;
746 		} else
747 			modrm_ea += c->regs[c->modrm_rm];
748 		switch (c->modrm_mod) {
749 		case 0:
750 			if (c->modrm_rm == 5)
751 				modrm_ea += insn_fetch(s32, 4, c->eip);
752 			break;
753 		case 1:
754 			modrm_ea += insn_fetch(s8, 1, c->eip);
755 			break;
756 		case 2:
757 			modrm_ea += insn_fetch(s32, 4, c->eip);
758 			break;
759 		}
760 	}
761 	op->addr.mem.ea = modrm_ea;
762 done:
763 	return rc;
764 }
765 
766 static int decode_abs(struct x86_emulate_ctxt *ctxt,
767 		      struct x86_emulate_ops *ops,
768 		      struct operand *op)
769 {
770 	struct decode_cache *c = &ctxt->decode;
771 	int rc = X86EMUL_CONTINUE;
772 
773 	op->type = OP_MEM;
774 	switch (c->ad_bytes) {
775 	case 2:
776 		op->addr.mem.ea = insn_fetch(u16, 2, c->eip);
777 		break;
778 	case 4:
779 		op->addr.mem.ea = insn_fetch(u32, 4, c->eip);
780 		break;
781 	case 8:
782 		op->addr.mem.ea = insn_fetch(u64, 8, c->eip);
783 		break;
784 	}
785 done:
786 	return rc;
787 }
788 
789 static void fetch_bit_operand(struct decode_cache *c)
790 {
791 	long sv = 0, mask;
792 
793 	if (c->dst.type == OP_MEM && c->src.type == OP_REG) {
794 		mask = ~(c->dst.bytes * 8 - 1);
795 
796 		if (c->src.bytes == 2)
797 			sv = (s16)c->src.val & (s16)mask;
798 		else if (c->src.bytes == 4)
799 			sv = (s32)c->src.val & (s32)mask;
800 
801 		c->dst.addr.mem.ea += (sv >> 3);
802 	}
803 
804 	/* only subword offset */
805 	c->src.val &= (c->dst.bytes << 3) - 1;
806 }
807 
808 static int read_emulated(struct x86_emulate_ctxt *ctxt,
809 			 struct x86_emulate_ops *ops,
810 			 unsigned long addr, void *dest, unsigned size)
811 {
812 	int rc;
813 	struct read_cache *mc = &ctxt->decode.mem_read;
814 
815 	while (size) {
816 		int n = min(size, 8u);
817 		size -= n;
818 		if (mc->pos < mc->end)
819 			goto read_cached;
820 
821 		rc = ops->read_emulated(addr, mc->data + mc->end, n,
822 					&ctxt->exception, ctxt->vcpu);
823 		if (rc != X86EMUL_CONTINUE)
824 			return rc;
825 		mc->end += n;
826 
827 	read_cached:
828 		memcpy(dest, mc->data + mc->pos, n);
829 		mc->pos += n;
830 		dest += n;
831 		addr += n;
832 	}
833 	return X86EMUL_CONTINUE;
834 }
835 
836 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
837 			   struct x86_emulate_ops *ops,
838 			   unsigned int size, unsigned short port,
839 			   void *dest)
840 {
841 	struct read_cache *rc = &ctxt->decode.io_read;
842 
843 	if (rc->pos == rc->end) { /* refill pio read ahead */
844 		struct decode_cache *c = &ctxt->decode;
845 		unsigned int in_page, n;
846 		unsigned int count = c->rep_prefix ?
847 			address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
848 		in_page = (ctxt->eflags & EFLG_DF) ?
849 			offset_in_page(c->regs[VCPU_REGS_RDI]) :
850 			PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
851 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
852 			count);
853 		if (n == 0)
854 			n = 1;
855 		rc->pos = rc->end = 0;
856 		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
857 			return 0;
858 		rc->end = n * size;
859 	}
860 
861 	memcpy(dest, rc->data + rc->pos, size);
862 	rc->pos += size;
863 	return 1;
864 }
865 
866 static u32 desc_limit_scaled(struct desc_struct *desc)
867 {
868 	u32 limit = get_desc_limit(desc);
869 
870 	return desc->g ? (limit << 12) | 0xfff : limit;
871 }
872 
873 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
874 				     struct x86_emulate_ops *ops,
875 				     u16 selector, struct desc_ptr *dt)
876 {
877 	if (selector & 1 << 2) {
878 		struct desc_struct desc;
879 		memset (dt, 0, sizeof *dt);
880 		if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
881 			return;
882 
883 		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
884 		dt->address = get_desc_base(&desc);
885 	} else
886 		ops->get_gdt(dt, ctxt->vcpu);
887 }
888 
889 /* allowed just for 8 bytes segments */
890 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
891 				   struct x86_emulate_ops *ops,
892 				   u16 selector, struct desc_struct *desc)
893 {
894 	struct desc_ptr dt;
895 	u16 index = selector >> 3;
896 	int ret;
897 	ulong addr;
898 
899 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
900 
901 	if (dt.size < index * 8 + 7)
902 		return emulate_gp(ctxt, selector & 0xfffc);
903 	addr = dt.address + index * 8;
904 	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,
905 			    &ctxt->exception);
906 
907        return ret;
908 }
909 
910 /* allowed just for 8 bytes segments */
911 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
912 				    struct x86_emulate_ops *ops,
913 				    u16 selector, struct desc_struct *desc)
914 {
915 	struct desc_ptr dt;
916 	u16 index = selector >> 3;
917 	ulong addr;
918 	int ret;
919 
920 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
921 
922 	if (dt.size < index * 8 + 7)
923 		return emulate_gp(ctxt, selector & 0xfffc);
924 
925 	addr = dt.address + index * 8;
926 	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu,
927 			     &ctxt->exception);
928 
929 	return ret;
930 }
931 
932 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
933 				   struct x86_emulate_ops *ops,
934 				   u16 selector, int seg)
935 {
936 	struct desc_struct seg_desc;
937 	u8 dpl, rpl, cpl;
938 	unsigned err_vec = GP_VECTOR;
939 	u32 err_code = 0;
940 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
941 	int ret;
942 
943 	memset(&seg_desc, 0, sizeof seg_desc);
944 
945 	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
946 	    || ctxt->mode == X86EMUL_MODE_REAL) {
947 		/* set real mode segment descriptor */
948 		set_desc_base(&seg_desc, selector << 4);
949 		set_desc_limit(&seg_desc, 0xffff);
950 		seg_desc.type = 3;
951 		seg_desc.p = 1;
952 		seg_desc.s = 1;
953 		goto load;
954 	}
955 
956 	/* NULL selector is not valid for TR, CS and SS */
957 	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
958 	    && null_selector)
959 		goto exception;
960 
961 	/* TR should be in GDT only */
962 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
963 		goto exception;
964 
965 	if (null_selector) /* for NULL selector skip all following checks */
966 		goto load;
967 
968 	ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
969 	if (ret != X86EMUL_CONTINUE)
970 		return ret;
971 
972 	err_code = selector & 0xfffc;
973 	err_vec = GP_VECTOR;
974 
975 	/* can't load system descriptor into segment selecor */
976 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
977 		goto exception;
978 
979 	if (!seg_desc.p) {
980 		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
981 		goto exception;
982 	}
983 
984 	rpl = selector & 3;
985 	dpl = seg_desc.dpl;
986 	cpl = ops->cpl(ctxt->vcpu);
987 
988 	switch (seg) {
989 	case VCPU_SREG_SS:
990 		/*
991 		 * segment is not a writable data segment or segment
992 		 * selector's RPL != CPL or segment selector's RPL != CPL
993 		 */
994 		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
995 			goto exception;
996 		break;
997 	case VCPU_SREG_CS:
998 		if (!(seg_desc.type & 8))
999 			goto exception;
1000 
1001 		if (seg_desc.type & 4) {
1002 			/* conforming */
1003 			if (dpl > cpl)
1004 				goto exception;
1005 		} else {
1006 			/* nonconforming */
1007 			if (rpl > cpl || dpl != cpl)
1008 				goto exception;
1009 		}
1010 		/* CS(RPL) <- CPL */
1011 		selector = (selector & 0xfffc) | cpl;
1012 		break;
1013 	case VCPU_SREG_TR:
1014 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1015 			goto exception;
1016 		break;
1017 	case VCPU_SREG_LDTR:
1018 		if (seg_desc.s || seg_desc.type != 2)
1019 			goto exception;
1020 		break;
1021 	default: /*  DS, ES, FS, or GS */
1022 		/*
1023 		 * segment is not a data or readable code segment or
1024 		 * ((segment is a data or nonconforming code segment)
1025 		 * and (both RPL and CPL > DPL))
1026 		 */
1027 		if ((seg_desc.type & 0xa) == 0x8 ||
1028 		    (((seg_desc.type & 0xc) != 0xc) &&
1029 		     (rpl > dpl && cpl > dpl)))
1030 			goto exception;
1031 		break;
1032 	}
1033 
1034 	if (seg_desc.s) {
1035 		/* mark segment as accessed */
1036 		seg_desc.type |= 1;
1037 		ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1038 		if (ret != X86EMUL_CONTINUE)
1039 			return ret;
1040 	}
1041 load:
1042 	ops->set_segment_selector(selector, seg, ctxt->vcpu);
1043 	ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1044 	return X86EMUL_CONTINUE;
1045 exception:
1046 	emulate_exception(ctxt, err_vec, err_code, true);
1047 	return X86EMUL_PROPAGATE_FAULT;
1048 }
1049 
1050 static void write_register_operand(struct operand *op)
1051 {
1052 	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
1053 	switch (op->bytes) {
1054 	case 1:
1055 		*(u8 *)op->addr.reg = (u8)op->val;
1056 		break;
1057 	case 2:
1058 		*(u16 *)op->addr.reg = (u16)op->val;
1059 		break;
1060 	case 4:
1061 		*op->addr.reg = (u32)op->val;
1062 		break;	/* 64b: zero-extend */
1063 	case 8:
1064 		*op->addr.reg = op->val;
1065 		break;
1066 	}
1067 }
1068 
1069 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1070 			    struct x86_emulate_ops *ops)
1071 {
1072 	int rc;
1073 	struct decode_cache *c = &ctxt->decode;
1074 
1075 	switch (c->dst.type) {
1076 	case OP_REG:
1077 		write_register_operand(&c->dst);
1078 		break;
1079 	case OP_MEM:
1080 		if (c->lock_prefix)
1081 			rc = ops->cmpxchg_emulated(
1082 					linear(ctxt, c->dst.addr.mem),
1083 					&c->dst.orig_val,
1084 					&c->dst.val,
1085 					c->dst.bytes,
1086 					&ctxt->exception,
1087 					ctxt->vcpu);
1088 		else
1089 			rc = ops->write_emulated(
1090 					linear(ctxt, c->dst.addr.mem),
1091 					&c->dst.val,
1092 					c->dst.bytes,
1093 					&ctxt->exception,
1094 					ctxt->vcpu);
1095 		if (rc != X86EMUL_CONTINUE)
1096 			return rc;
1097 		break;
1098 	case OP_NONE:
1099 		/* no writeback */
1100 		break;
1101 	default:
1102 		break;
1103 	}
1104 	return X86EMUL_CONTINUE;
1105 }
1106 
1107 static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
1108 				struct x86_emulate_ops *ops)
1109 {
1110 	struct decode_cache *c = &ctxt->decode;
1111 
1112 	c->dst.type  = OP_MEM;
1113 	c->dst.bytes = c->op_bytes;
1114 	c->dst.val = c->src.val;
1115 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1116 	c->dst.addr.mem.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1117 	c->dst.addr.mem.seg = VCPU_SREG_SS;
1118 }
1119 
1120 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1121 		       struct x86_emulate_ops *ops,
1122 		       void *dest, int len)
1123 {
1124 	struct decode_cache *c = &ctxt->decode;
1125 	int rc;
1126 	struct segmented_address addr;
1127 
1128 	addr.ea = register_address(c, c->regs[VCPU_REGS_RSP]);
1129 	addr.seg = VCPU_SREG_SS;
1130 	rc = read_emulated(ctxt, ops, linear(ctxt, addr), dest, len);
1131 	if (rc != X86EMUL_CONTINUE)
1132 		return rc;
1133 
1134 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1135 	return rc;
1136 }
1137 
1138 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1139 		       struct x86_emulate_ops *ops,
1140 		       void *dest, int len)
1141 {
1142 	int rc;
1143 	unsigned long val, change_mask;
1144 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1145 	int cpl = ops->cpl(ctxt->vcpu);
1146 
1147 	rc = emulate_pop(ctxt, ops, &val, len);
1148 	if (rc != X86EMUL_CONTINUE)
1149 		return rc;
1150 
1151 	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1152 		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1153 
1154 	switch(ctxt->mode) {
1155 	case X86EMUL_MODE_PROT64:
1156 	case X86EMUL_MODE_PROT32:
1157 	case X86EMUL_MODE_PROT16:
1158 		if (cpl == 0)
1159 			change_mask |= EFLG_IOPL;
1160 		if (cpl <= iopl)
1161 			change_mask |= EFLG_IF;
1162 		break;
1163 	case X86EMUL_MODE_VM86:
1164 		if (iopl < 3)
1165 			return emulate_gp(ctxt, 0);
1166 		change_mask |= EFLG_IF;
1167 		break;
1168 	default: /* real mode */
1169 		change_mask |= (EFLG_IOPL | EFLG_IF);
1170 		break;
1171 	}
1172 
1173 	*(unsigned long *)dest =
1174 		(ctxt->eflags & ~change_mask) | (val & change_mask);
1175 
1176 	return rc;
1177 }
1178 
1179 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
1180 			      struct x86_emulate_ops *ops, int seg)
1181 {
1182 	struct decode_cache *c = &ctxt->decode;
1183 
1184 	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
1185 
1186 	emulate_push(ctxt, ops);
1187 }
1188 
1189 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1190 			     struct x86_emulate_ops *ops, int seg)
1191 {
1192 	struct decode_cache *c = &ctxt->decode;
1193 	unsigned long selector;
1194 	int rc;
1195 
1196 	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1197 	if (rc != X86EMUL_CONTINUE)
1198 		return rc;
1199 
1200 	rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1201 	return rc;
1202 }
1203 
1204 static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
1205 			  struct x86_emulate_ops *ops)
1206 {
1207 	struct decode_cache *c = &ctxt->decode;
1208 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1209 	int rc = X86EMUL_CONTINUE;
1210 	int reg = VCPU_REGS_RAX;
1211 
1212 	while (reg <= VCPU_REGS_RDI) {
1213 		(reg == VCPU_REGS_RSP) ?
1214 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1215 
1216 		emulate_push(ctxt, ops);
1217 
1218 		rc = writeback(ctxt, ops);
1219 		if (rc != X86EMUL_CONTINUE)
1220 			return rc;
1221 
1222 		++reg;
1223 	}
1224 
1225 	/* Disable writeback. */
1226 	c->dst.type = OP_NONE;
1227 
1228 	return rc;
1229 }
1230 
1231 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1232 			struct x86_emulate_ops *ops)
1233 {
1234 	struct decode_cache *c = &ctxt->decode;
1235 	int rc = X86EMUL_CONTINUE;
1236 	int reg = VCPU_REGS_RDI;
1237 
1238 	while (reg >= VCPU_REGS_RAX) {
1239 		if (reg == VCPU_REGS_RSP) {
1240 			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1241 							c->op_bytes);
1242 			--reg;
1243 		}
1244 
1245 		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1246 		if (rc != X86EMUL_CONTINUE)
1247 			break;
1248 		--reg;
1249 	}
1250 	return rc;
1251 }
1252 
1253 int emulate_int_real(struct x86_emulate_ctxt *ctxt,
1254 			       struct x86_emulate_ops *ops, int irq)
1255 {
1256 	struct decode_cache *c = &ctxt->decode;
1257 	int rc;
1258 	struct desc_ptr dt;
1259 	gva_t cs_addr;
1260 	gva_t eip_addr;
1261 	u16 cs, eip;
1262 
1263 	/* TODO: Add limit checks */
1264 	c->src.val = ctxt->eflags;
1265 	emulate_push(ctxt, ops);
1266 	rc = writeback(ctxt, ops);
1267 	if (rc != X86EMUL_CONTINUE)
1268 		return rc;
1269 
1270 	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
1271 
1272 	c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1273 	emulate_push(ctxt, ops);
1274 	rc = writeback(ctxt, ops);
1275 	if (rc != X86EMUL_CONTINUE)
1276 		return rc;
1277 
1278 	c->src.val = c->eip;
1279 	emulate_push(ctxt, ops);
1280 	rc = writeback(ctxt, ops);
1281 	if (rc != X86EMUL_CONTINUE)
1282 		return rc;
1283 
1284 	c->dst.type = OP_NONE;
1285 
1286 	ops->get_idt(&dt, ctxt->vcpu);
1287 
1288 	eip_addr = dt.address + (irq << 2);
1289 	cs_addr = dt.address + (irq << 2) + 2;
1290 
1291 	rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &ctxt->exception);
1292 	if (rc != X86EMUL_CONTINUE)
1293 		return rc;
1294 
1295 	rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &ctxt->exception);
1296 	if (rc != X86EMUL_CONTINUE)
1297 		return rc;
1298 
1299 	rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS);
1300 	if (rc != X86EMUL_CONTINUE)
1301 		return rc;
1302 
1303 	c->eip = eip;
1304 
1305 	return rc;
1306 }
1307 
1308 static int emulate_int(struct x86_emulate_ctxt *ctxt,
1309 		       struct x86_emulate_ops *ops, int irq)
1310 {
1311 	switch(ctxt->mode) {
1312 	case X86EMUL_MODE_REAL:
1313 		return emulate_int_real(ctxt, ops, irq);
1314 	case X86EMUL_MODE_VM86:
1315 	case X86EMUL_MODE_PROT16:
1316 	case X86EMUL_MODE_PROT32:
1317 	case X86EMUL_MODE_PROT64:
1318 	default:
1319 		/* Protected mode interrupts unimplemented yet */
1320 		return X86EMUL_UNHANDLEABLE;
1321 	}
1322 }
1323 
1324 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
1325 			     struct x86_emulate_ops *ops)
1326 {
1327 	struct decode_cache *c = &ctxt->decode;
1328 	int rc = X86EMUL_CONTINUE;
1329 	unsigned long temp_eip = 0;
1330 	unsigned long temp_eflags = 0;
1331 	unsigned long cs = 0;
1332 	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
1333 			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
1334 			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
1335 	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1336 
1337 	/* TODO: Add stack limit check */
1338 
1339 	rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
1340 
1341 	if (rc != X86EMUL_CONTINUE)
1342 		return rc;
1343 
1344 	if (temp_eip & ~0xffff)
1345 		return emulate_gp(ctxt, 0);
1346 
1347 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1348 
1349 	if (rc != X86EMUL_CONTINUE)
1350 		return rc;
1351 
1352 	rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
1353 
1354 	if (rc != X86EMUL_CONTINUE)
1355 		return rc;
1356 
1357 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1358 
1359 	if (rc != X86EMUL_CONTINUE)
1360 		return rc;
1361 
1362 	c->eip = temp_eip;
1363 
1364 
1365 	if (c->op_bytes == 4)
1366 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
1367 	else if (c->op_bytes == 2) {
1368 		ctxt->eflags &= ~0xffff;
1369 		ctxt->eflags |= temp_eflags;
1370 	}
1371 
1372 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
1373 	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
1374 
1375 	return rc;
1376 }
1377 
1378 static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
1379 				    struct x86_emulate_ops* ops)
1380 {
1381 	switch(ctxt->mode) {
1382 	case X86EMUL_MODE_REAL:
1383 		return emulate_iret_real(ctxt, ops);
1384 	case X86EMUL_MODE_VM86:
1385 	case X86EMUL_MODE_PROT16:
1386 	case X86EMUL_MODE_PROT32:
1387 	case X86EMUL_MODE_PROT64:
1388 	default:
1389 		/* iret from protected mode unimplemented yet */
1390 		return X86EMUL_UNHANDLEABLE;
1391 	}
1392 }
1393 
1394 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1395 				struct x86_emulate_ops *ops)
1396 {
1397 	struct decode_cache *c = &ctxt->decode;
1398 
1399 	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1400 }
1401 
1402 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1403 {
1404 	struct decode_cache *c = &ctxt->decode;
1405 	switch (c->modrm_reg) {
1406 	case 0:	/* rol */
1407 		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1408 		break;
1409 	case 1:	/* ror */
1410 		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1411 		break;
1412 	case 2:	/* rcl */
1413 		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1414 		break;
1415 	case 3:	/* rcr */
1416 		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1417 		break;
1418 	case 4:	/* sal/shl */
1419 	case 6:	/* sal/shl */
1420 		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1421 		break;
1422 	case 5:	/* shr */
1423 		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1424 		break;
1425 	case 7:	/* sar */
1426 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1427 		break;
1428 	}
1429 }
1430 
1431 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1432 			       struct x86_emulate_ops *ops)
1433 {
1434 	struct decode_cache *c = &ctxt->decode;
1435 	unsigned long *rax = &c->regs[VCPU_REGS_RAX];
1436 	unsigned long *rdx = &c->regs[VCPU_REGS_RDX];
1437 	u8 de = 0;
1438 
1439 	switch (c->modrm_reg) {
1440 	case 0 ... 1:	/* test */
1441 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1442 		break;
1443 	case 2:	/* not */
1444 		c->dst.val = ~c->dst.val;
1445 		break;
1446 	case 3:	/* neg */
1447 		emulate_1op("neg", c->dst, ctxt->eflags);
1448 		break;
1449 	case 4: /* mul */
1450 		emulate_1op_rax_rdx("mul", c->src, *rax, *rdx, ctxt->eflags);
1451 		break;
1452 	case 5: /* imul */
1453 		emulate_1op_rax_rdx("imul", c->src, *rax, *rdx, ctxt->eflags);
1454 		break;
1455 	case 6: /* div */
1456 		emulate_1op_rax_rdx_ex("div", c->src, *rax, *rdx,
1457 				       ctxt->eflags, de);
1458 		break;
1459 	case 7: /* idiv */
1460 		emulate_1op_rax_rdx_ex("idiv", c->src, *rax, *rdx,
1461 				       ctxt->eflags, de);
1462 		break;
1463 	default:
1464 		return X86EMUL_UNHANDLEABLE;
1465 	}
1466 	if (de)
1467 		return emulate_de(ctxt);
1468 	return X86EMUL_CONTINUE;
1469 }
1470 
1471 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1472 			       struct x86_emulate_ops *ops)
1473 {
1474 	struct decode_cache *c = &ctxt->decode;
1475 
1476 	switch (c->modrm_reg) {
1477 	case 0:	/* inc */
1478 		emulate_1op("inc", c->dst, ctxt->eflags);
1479 		break;
1480 	case 1:	/* dec */
1481 		emulate_1op("dec", c->dst, ctxt->eflags);
1482 		break;
1483 	case 2: /* call near abs */ {
1484 		long int old_eip;
1485 		old_eip = c->eip;
1486 		c->eip = c->src.val;
1487 		c->src.val = old_eip;
1488 		emulate_push(ctxt, ops);
1489 		break;
1490 	}
1491 	case 4: /* jmp abs */
1492 		c->eip = c->src.val;
1493 		break;
1494 	case 6:	/* push */
1495 		emulate_push(ctxt, ops);
1496 		break;
1497 	}
1498 	return X86EMUL_CONTINUE;
1499 }
1500 
1501 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1502 			       struct x86_emulate_ops *ops)
1503 {
1504 	struct decode_cache *c = &ctxt->decode;
1505 	u64 old = c->dst.orig_val64;
1506 
1507 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1508 	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1509 		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1510 		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1511 		ctxt->eflags &= ~EFLG_ZF;
1512 	} else {
1513 		c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1514 			(u32) c->regs[VCPU_REGS_RBX];
1515 
1516 		ctxt->eflags |= EFLG_ZF;
1517 	}
1518 	return X86EMUL_CONTINUE;
1519 }
1520 
1521 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1522 			   struct x86_emulate_ops *ops)
1523 {
1524 	struct decode_cache *c = &ctxt->decode;
1525 	int rc;
1526 	unsigned long cs;
1527 
1528 	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1529 	if (rc != X86EMUL_CONTINUE)
1530 		return rc;
1531 	if (c->op_bytes == 4)
1532 		c->eip = (u32)c->eip;
1533 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1534 	if (rc != X86EMUL_CONTINUE)
1535 		return rc;
1536 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1537 	return rc;
1538 }
1539 
1540 static int emulate_load_segment(struct x86_emulate_ctxt *ctxt,
1541 			   struct x86_emulate_ops *ops, int seg)
1542 {
1543 	struct decode_cache *c = &ctxt->decode;
1544 	unsigned short sel;
1545 	int rc;
1546 
1547 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
1548 
1549 	rc = load_segment_descriptor(ctxt, ops, sel, seg);
1550 	if (rc != X86EMUL_CONTINUE)
1551 		return rc;
1552 
1553 	c->dst.val = c->src.val;
1554 	return rc;
1555 }
1556 
1557 static inline void
1558 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1559 			struct x86_emulate_ops *ops, struct desc_struct *cs,
1560 			struct desc_struct *ss)
1561 {
1562 	memset(cs, 0, sizeof(struct desc_struct));
1563 	ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu);
1564 	memset(ss, 0, sizeof(struct desc_struct));
1565 
1566 	cs->l = 0;		/* will be adjusted later */
1567 	set_desc_base(cs, 0);	/* flat segment */
1568 	cs->g = 1;		/* 4kb granularity */
1569 	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1570 	cs->type = 0x0b;	/* Read, Execute, Accessed */
1571 	cs->s = 1;
1572 	cs->dpl = 0;		/* will be adjusted later */
1573 	cs->p = 1;
1574 	cs->d = 1;
1575 
1576 	set_desc_base(ss, 0);	/* flat segment */
1577 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1578 	ss->g = 1;		/* 4kb granularity */
1579 	ss->s = 1;
1580 	ss->type = 0x03;	/* Read/Write, Accessed */
1581 	ss->d = 1;		/* 32bit stack segment */
1582 	ss->dpl = 0;
1583 	ss->p = 1;
1584 }
1585 
1586 static int
1587 emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1588 {
1589 	struct decode_cache *c = &ctxt->decode;
1590 	struct desc_struct cs, ss;
1591 	u64 msr_data;
1592 	u16 cs_sel, ss_sel;
1593 
1594 	/* syscall is not available in real mode */
1595 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1596 	    ctxt->mode == X86EMUL_MODE_VM86)
1597 		return emulate_ud(ctxt);
1598 
1599 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1600 
1601 	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1602 	msr_data >>= 32;
1603 	cs_sel = (u16)(msr_data & 0xfffc);
1604 	ss_sel = (u16)(msr_data + 8);
1605 
1606 	if (is_long_mode(ctxt->vcpu)) {
1607 		cs.d = 0;
1608 		cs.l = 1;
1609 	}
1610 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1611 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1612 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1613 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1614 
1615 	c->regs[VCPU_REGS_RCX] = c->eip;
1616 	if (is_long_mode(ctxt->vcpu)) {
1617 #ifdef CONFIG_X86_64
1618 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1619 
1620 		ops->get_msr(ctxt->vcpu,
1621 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
1622 			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1623 		c->eip = msr_data;
1624 
1625 		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1626 		ctxt->eflags &= ~(msr_data | EFLG_RF);
1627 #endif
1628 	} else {
1629 		/* legacy mode */
1630 		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1631 		c->eip = (u32)msr_data;
1632 
1633 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1634 	}
1635 
1636 	return X86EMUL_CONTINUE;
1637 }
1638 
1639 static int
1640 emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1641 {
1642 	struct decode_cache *c = &ctxt->decode;
1643 	struct desc_struct cs, ss;
1644 	u64 msr_data;
1645 	u16 cs_sel, ss_sel;
1646 
1647 	/* inject #GP if in real mode */
1648 	if (ctxt->mode == X86EMUL_MODE_REAL)
1649 		return emulate_gp(ctxt, 0);
1650 
1651 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
1652 	* Therefore, we inject an #UD.
1653 	*/
1654 	if (ctxt->mode == X86EMUL_MODE_PROT64)
1655 		return emulate_ud(ctxt);
1656 
1657 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1658 
1659 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1660 	switch (ctxt->mode) {
1661 	case X86EMUL_MODE_PROT32:
1662 		if ((msr_data & 0xfffc) == 0x0)
1663 			return emulate_gp(ctxt, 0);
1664 		break;
1665 	case X86EMUL_MODE_PROT64:
1666 		if (msr_data == 0x0)
1667 			return emulate_gp(ctxt, 0);
1668 		break;
1669 	}
1670 
1671 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1672 	cs_sel = (u16)msr_data;
1673 	cs_sel &= ~SELECTOR_RPL_MASK;
1674 	ss_sel = cs_sel + 8;
1675 	ss_sel &= ~SELECTOR_RPL_MASK;
1676 	if (ctxt->mode == X86EMUL_MODE_PROT64
1677 		|| is_long_mode(ctxt->vcpu)) {
1678 		cs.d = 0;
1679 		cs.l = 1;
1680 	}
1681 
1682 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1683 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1684 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1685 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1686 
1687 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1688 	c->eip = msr_data;
1689 
1690 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1691 	c->regs[VCPU_REGS_RSP] = msr_data;
1692 
1693 	return X86EMUL_CONTINUE;
1694 }
1695 
1696 static int
1697 emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1698 {
1699 	struct decode_cache *c = &ctxt->decode;
1700 	struct desc_struct cs, ss;
1701 	u64 msr_data;
1702 	int usermode;
1703 	u16 cs_sel, ss_sel;
1704 
1705 	/* inject #GP if in real mode or Virtual 8086 mode */
1706 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1707 	    ctxt->mode == X86EMUL_MODE_VM86)
1708 		return emulate_gp(ctxt, 0);
1709 
1710 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1711 
1712 	if ((c->rex_prefix & 0x8) != 0x0)
1713 		usermode = X86EMUL_MODE_PROT64;
1714 	else
1715 		usermode = X86EMUL_MODE_PROT32;
1716 
1717 	cs.dpl = 3;
1718 	ss.dpl = 3;
1719 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1720 	switch (usermode) {
1721 	case X86EMUL_MODE_PROT32:
1722 		cs_sel = (u16)(msr_data + 16);
1723 		if ((msr_data & 0xfffc) == 0x0)
1724 			return emulate_gp(ctxt, 0);
1725 		ss_sel = (u16)(msr_data + 24);
1726 		break;
1727 	case X86EMUL_MODE_PROT64:
1728 		cs_sel = (u16)(msr_data + 32);
1729 		if (msr_data == 0x0)
1730 			return emulate_gp(ctxt, 0);
1731 		ss_sel = cs_sel + 8;
1732 		cs.d = 0;
1733 		cs.l = 1;
1734 		break;
1735 	}
1736 	cs_sel |= SELECTOR_RPL_MASK;
1737 	ss_sel |= SELECTOR_RPL_MASK;
1738 
1739 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1740 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1741 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1742 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1743 
1744 	c->eip = c->regs[VCPU_REGS_RDX];
1745 	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
1746 
1747 	return X86EMUL_CONTINUE;
1748 }
1749 
1750 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1751 			      struct x86_emulate_ops *ops)
1752 {
1753 	int iopl;
1754 	if (ctxt->mode == X86EMUL_MODE_REAL)
1755 		return false;
1756 	if (ctxt->mode == X86EMUL_MODE_VM86)
1757 		return true;
1758 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1759 	return ops->cpl(ctxt->vcpu) > iopl;
1760 }
1761 
1762 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1763 					    struct x86_emulate_ops *ops,
1764 					    u16 port, u16 len)
1765 {
1766 	struct desc_struct tr_seg;
1767 	int r;
1768 	u16 io_bitmap_ptr;
1769 	u8 perm, bit_idx = port & 0x7;
1770 	unsigned mask = (1 << len) - 1;
1771 
1772 	ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu);
1773 	if (!tr_seg.p)
1774 		return false;
1775 	if (desc_limit_scaled(&tr_seg) < 103)
1776 		return false;
1777 	r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2,
1778 			  ctxt->vcpu, NULL);
1779 	if (r != X86EMUL_CONTINUE)
1780 		return false;
1781 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
1782 		return false;
1783 	r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8,
1784 			  &perm, 1, ctxt->vcpu, NULL);
1785 	if (r != X86EMUL_CONTINUE)
1786 		return false;
1787 	if ((perm >> bit_idx) & mask)
1788 		return false;
1789 	return true;
1790 }
1791 
1792 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1793 				 struct x86_emulate_ops *ops,
1794 				 u16 port, u16 len)
1795 {
1796 	if (ctxt->perm_ok)
1797 		return true;
1798 
1799 	if (emulator_bad_iopl(ctxt, ops))
1800 		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1801 			return false;
1802 
1803 	ctxt->perm_ok = true;
1804 
1805 	return true;
1806 }
1807 
1808 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
1809 				struct x86_emulate_ops *ops,
1810 				struct tss_segment_16 *tss)
1811 {
1812 	struct decode_cache *c = &ctxt->decode;
1813 
1814 	tss->ip = c->eip;
1815 	tss->flag = ctxt->eflags;
1816 	tss->ax = c->regs[VCPU_REGS_RAX];
1817 	tss->cx = c->regs[VCPU_REGS_RCX];
1818 	tss->dx = c->regs[VCPU_REGS_RDX];
1819 	tss->bx = c->regs[VCPU_REGS_RBX];
1820 	tss->sp = c->regs[VCPU_REGS_RSP];
1821 	tss->bp = c->regs[VCPU_REGS_RBP];
1822 	tss->si = c->regs[VCPU_REGS_RSI];
1823 	tss->di = c->regs[VCPU_REGS_RDI];
1824 
1825 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
1826 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1827 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
1828 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
1829 	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
1830 }
1831 
1832 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
1833 				 struct x86_emulate_ops *ops,
1834 				 struct tss_segment_16 *tss)
1835 {
1836 	struct decode_cache *c = &ctxt->decode;
1837 	int ret;
1838 
1839 	c->eip = tss->ip;
1840 	ctxt->eflags = tss->flag | 2;
1841 	c->regs[VCPU_REGS_RAX] = tss->ax;
1842 	c->regs[VCPU_REGS_RCX] = tss->cx;
1843 	c->regs[VCPU_REGS_RDX] = tss->dx;
1844 	c->regs[VCPU_REGS_RBX] = tss->bx;
1845 	c->regs[VCPU_REGS_RSP] = tss->sp;
1846 	c->regs[VCPU_REGS_RBP] = tss->bp;
1847 	c->regs[VCPU_REGS_RSI] = tss->si;
1848 	c->regs[VCPU_REGS_RDI] = tss->di;
1849 
1850 	/*
1851 	 * SDM says that segment selectors are loaded before segment
1852 	 * descriptors
1853 	 */
1854 	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
1855 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
1856 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
1857 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
1858 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
1859 
1860 	/*
1861 	 * Now load segment descriptors. If fault happenes at this stage
1862 	 * it is handled in a context of new task
1863 	 */
1864 	ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
1865 	if (ret != X86EMUL_CONTINUE)
1866 		return ret;
1867 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
1868 	if (ret != X86EMUL_CONTINUE)
1869 		return ret;
1870 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
1871 	if (ret != X86EMUL_CONTINUE)
1872 		return ret;
1873 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
1874 	if (ret != X86EMUL_CONTINUE)
1875 		return ret;
1876 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
1877 	if (ret != X86EMUL_CONTINUE)
1878 		return ret;
1879 
1880 	return X86EMUL_CONTINUE;
1881 }
1882 
1883 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1884 			  struct x86_emulate_ops *ops,
1885 			  u16 tss_selector, u16 old_tss_sel,
1886 			  ulong old_tss_base, struct desc_struct *new_desc)
1887 {
1888 	struct tss_segment_16 tss_seg;
1889 	int ret;
1890 	u32 new_tss_base = get_desc_base(new_desc);
1891 
1892 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1893 			    &ctxt->exception);
1894 	if (ret != X86EMUL_CONTINUE)
1895 		/* FIXME: need to provide precise fault address */
1896 		return ret;
1897 
1898 	save_state_to_tss16(ctxt, ops, &tss_seg);
1899 
1900 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1901 			     &ctxt->exception);
1902 	if (ret != X86EMUL_CONTINUE)
1903 		/* FIXME: need to provide precise fault address */
1904 		return ret;
1905 
1906 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1907 			    &ctxt->exception);
1908 	if (ret != X86EMUL_CONTINUE)
1909 		/* FIXME: need to provide precise fault address */
1910 		return ret;
1911 
1912 	if (old_tss_sel != 0xffff) {
1913 		tss_seg.prev_task_link = old_tss_sel;
1914 
1915 		ret = ops->write_std(new_tss_base,
1916 				     &tss_seg.prev_task_link,
1917 				     sizeof tss_seg.prev_task_link,
1918 				     ctxt->vcpu, &ctxt->exception);
1919 		if (ret != X86EMUL_CONTINUE)
1920 			/* FIXME: need to provide precise fault address */
1921 			return ret;
1922 	}
1923 
1924 	return load_state_from_tss16(ctxt, ops, &tss_seg);
1925 }
1926 
1927 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
1928 				struct x86_emulate_ops *ops,
1929 				struct tss_segment_32 *tss)
1930 {
1931 	struct decode_cache *c = &ctxt->decode;
1932 
1933 	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
1934 	tss->eip = c->eip;
1935 	tss->eflags = ctxt->eflags;
1936 	tss->eax = c->regs[VCPU_REGS_RAX];
1937 	tss->ecx = c->regs[VCPU_REGS_RCX];
1938 	tss->edx = c->regs[VCPU_REGS_RDX];
1939 	tss->ebx = c->regs[VCPU_REGS_RBX];
1940 	tss->esp = c->regs[VCPU_REGS_RSP];
1941 	tss->ebp = c->regs[VCPU_REGS_RBP];
1942 	tss->esi = c->regs[VCPU_REGS_RSI];
1943 	tss->edi = c->regs[VCPU_REGS_RDI];
1944 
1945 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
1946 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1947 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
1948 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
1949 	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
1950 	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
1951 	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
1952 }
1953 
1954 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
1955 				 struct x86_emulate_ops *ops,
1956 				 struct tss_segment_32 *tss)
1957 {
1958 	struct decode_cache *c = &ctxt->decode;
1959 	int ret;
1960 
1961 	if (ops->set_cr(3, tss->cr3, ctxt->vcpu))
1962 		return emulate_gp(ctxt, 0);
1963 	c->eip = tss->eip;
1964 	ctxt->eflags = tss->eflags | 2;
1965 	c->regs[VCPU_REGS_RAX] = tss->eax;
1966 	c->regs[VCPU_REGS_RCX] = tss->ecx;
1967 	c->regs[VCPU_REGS_RDX] = tss->edx;
1968 	c->regs[VCPU_REGS_RBX] = tss->ebx;
1969 	c->regs[VCPU_REGS_RSP] = tss->esp;
1970 	c->regs[VCPU_REGS_RBP] = tss->ebp;
1971 	c->regs[VCPU_REGS_RSI] = tss->esi;
1972 	c->regs[VCPU_REGS_RDI] = tss->edi;
1973 
1974 	/*
1975 	 * SDM says that segment selectors are loaded before segment
1976 	 * descriptors
1977 	 */
1978 	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
1979 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
1980 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
1981 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
1982 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
1983 	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
1984 	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
1985 
1986 	/*
1987 	 * Now load segment descriptors. If fault happenes at this stage
1988 	 * it is handled in a context of new task
1989 	 */
1990 	ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
1991 	if (ret != X86EMUL_CONTINUE)
1992 		return ret;
1993 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
1994 	if (ret != X86EMUL_CONTINUE)
1995 		return ret;
1996 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
1997 	if (ret != X86EMUL_CONTINUE)
1998 		return ret;
1999 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2000 	if (ret != X86EMUL_CONTINUE)
2001 		return ret;
2002 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2003 	if (ret != X86EMUL_CONTINUE)
2004 		return ret;
2005 	ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2006 	if (ret != X86EMUL_CONTINUE)
2007 		return ret;
2008 	ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2009 	if (ret != X86EMUL_CONTINUE)
2010 		return ret;
2011 
2012 	return X86EMUL_CONTINUE;
2013 }
2014 
2015 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2016 			  struct x86_emulate_ops *ops,
2017 			  u16 tss_selector, u16 old_tss_sel,
2018 			  ulong old_tss_base, struct desc_struct *new_desc)
2019 {
2020 	struct tss_segment_32 tss_seg;
2021 	int ret;
2022 	u32 new_tss_base = get_desc_base(new_desc);
2023 
2024 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2025 			    &ctxt->exception);
2026 	if (ret != X86EMUL_CONTINUE)
2027 		/* FIXME: need to provide precise fault address */
2028 		return ret;
2029 
2030 	save_state_to_tss32(ctxt, ops, &tss_seg);
2031 
2032 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2033 			     &ctxt->exception);
2034 	if (ret != X86EMUL_CONTINUE)
2035 		/* FIXME: need to provide precise fault address */
2036 		return ret;
2037 
2038 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2039 			    &ctxt->exception);
2040 	if (ret != X86EMUL_CONTINUE)
2041 		/* FIXME: need to provide precise fault address */
2042 		return ret;
2043 
2044 	if (old_tss_sel != 0xffff) {
2045 		tss_seg.prev_task_link = old_tss_sel;
2046 
2047 		ret = ops->write_std(new_tss_base,
2048 				     &tss_seg.prev_task_link,
2049 				     sizeof tss_seg.prev_task_link,
2050 				     ctxt->vcpu, &ctxt->exception);
2051 		if (ret != X86EMUL_CONTINUE)
2052 			/* FIXME: need to provide precise fault address */
2053 			return ret;
2054 	}
2055 
2056 	return load_state_from_tss32(ctxt, ops, &tss_seg);
2057 }
2058 
2059 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2060 				   struct x86_emulate_ops *ops,
2061 				   u16 tss_selector, int reason,
2062 				   bool has_error_code, u32 error_code)
2063 {
2064 	struct desc_struct curr_tss_desc, next_tss_desc;
2065 	int ret;
2066 	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2067 	ulong old_tss_base =
2068 		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
2069 	u32 desc_limit;
2070 
2071 	/* FIXME: old_tss_base == ~0 ? */
2072 
2073 	ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2074 	if (ret != X86EMUL_CONTINUE)
2075 		return ret;
2076 	ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2077 	if (ret != X86EMUL_CONTINUE)
2078 		return ret;
2079 
2080 	/* FIXME: check that next_tss_desc is tss */
2081 
2082 	if (reason != TASK_SWITCH_IRET) {
2083 		if ((tss_selector & 3) > next_tss_desc.dpl ||
2084 		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl)
2085 			return emulate_gp(ctxt, 0);
2086 	}
2087 
2088 	desc_limit = desc_limit_scaled(&next_tss_desc);
2089 	if (!next_tss_desc.p ||
2090 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2091 	     desc_limit < 0x2b)) {
2092 		emulate_ts(ctxt, tss_selector & 0xfffc);
2093 		return X86EMUL_PROPAGATE_FAULT;
2094 	}
2095 
2096 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2097 		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2098 		write_segment_descriptor(ctxt, ops, old_tss_sel,
2099 					 &curr_tss_desc);
2100 	}
2101 
2102 	if (reason == TASK_SWITCH_IRET)
2103 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2104 
2105 	/* set back link to prev task only if NT bit is set in eflags
2106 	   note that old_tss_sel is not used afetr this point */
2107 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2108 		old_tss_sel = 0xffff;
2109 
2110 	if (next_tss_desc.type & 8)
2111 		ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2112 				     old_tss_base, &next_tss_desc);
2113 	else
2114 		ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2115 				     old_tss_base, &next_tss_desc);
2116 	if (ret != X86EMUL_CONTINUE)
2117 		return ret;
2118 
2119 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2120 		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2121 
2122 	if (reason != TASK_SWITCH_IRET) {
2123 		next_tss_desc.type |= (1 << 1); /* set busy flag */
2124 		write_segment_descriptor(ctxt, ops, tss_selector,
2125 					 &next_tss_desc);
2126 	}
2127 
2128 	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2129 	ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2130 	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2131 
2132 	if (has_error_code) {
2133 		struct decode_cache *c = &ctxt->decode;
2134 
2135 		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2136 		c->lock_prefix = 0;
2137 		c->src.val = (unsigned long) error_code;
2138 		emulate_push(ctxt, ops);
2139 	}
2140 
2141 	return ret;
2142 }
2143 
2144 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2145 			 u16 tss_selector, int reason,
2146 			 bool has_error_code, u32 error_code)
2147 {
2148 	struct x86_emulate_ops *ops = ctxt->ops;
2149 	struct decode_cache *c = &ctxt->decode;
2150 	int rc;
2151 
2152 	c->eip = ctxt->eip;
2153 	c->dst.type = OP_NONE;
2154 
2155 	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2156 				     has_error_code, error_code);
2157 
2158 	if (rc == X86EMUL_CONTINUE) {
2159 		rc = writeback(ctxt, ops);
2160 		if (rc == X86EMUL_CONTINUE)
2161 			ctxt->eip = c->eip;
2162 	}
2163 
2164 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2165 }
2166 
2167 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
2168 			    int reg, struct operand *op)
2169 {
2170 	struct decode_cache *c = &ctxt->decode;
2171 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2172 
2173 	register_address_increment(c, &c->regs[reg], df * op->bytes);
2174 	op->addr.mem.ea = register_address(c, c->regs[reg]);
2175 	op->addr.mem.seg = seg;
2176 }
2177 
2178 static int em_push(struct x86_emulate_ctxt *ctxt)
2179 {
2180 	emulate_push(ctxt, ctxt->ops);
2181 	return X86EMUL_CONTINUE;
2182 }
2183 
2184 static int em_das(struct x86_emulate_ctxt *ctxt)
2185 {
2186 	struct decode_cache *c = &ctxt->decode;
2187 	u8 al, old_al;
2188 	bool af, cf, old_cf;
2189 
2190 	cf = ctxt->eflags & X86_EFLAGS_CF;
2191 	al = c->dst.val;
2192 
2193 	old_al = al;
2194 	old_cf = cf;
2195 	cf = false;
2196 	af = ctxt->eflags & X86_EFLAGS_AF;
2197 	if ((al & 0x0f) > 9 || af) {
2198 		al -= 6;
2199 		cf = old_cf | (al >= 250);
2200 		af = true;
2201 	} else {
2202 		af = false;
2203 	}
2204 	if (old_al > 0x99 || old_cf) {
2205 		al -= 0x60;
2206 		cf = true;
2207 	}
2208 
2209 	c->dst.val = al;
2210 	/* Set PF, ZF, SF */
2211 	c->src.type = OP_IMM;
2212 	c->src.val = 0;
2213 	c->src.bytes = 1;
2214 	emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2215 	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2216 	if (cf)
2217 		ctxt->eflags |= X86_EFLAGS_CF;
2218 	if (af)
2219 		ctxt->eflags |= X86_EFLAGS_AF;
2220 	return X86EMUL_CONTINUE;
2221 }
2222 
2223 static int em_call_far(struct x86_emulate_ctxt *ctxt)
2224 {
2225 	struct decode_cache *c = &ctxt->decode;
2226 	u16 sel, old_cs;
2227 	ulong old_eip;
2228 	int rc;
2229 
2230 	old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2231 	old_eip = c->eip;
2232 
2233 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
2234 	if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS))
2235 		return X86EMUL_CONTINUE;
2236 
2237 	c->eip = 0;
2238 	memcpy(&c->eip, c->src.valptr, c->op_bytes);
2239 
2240 	c->src.val = old_cs;
2241 	emulate_push(ctxt, ctxt->ops);
2242 	rc = writeback(ctxt, ctxt->ops);
2243 	if (rc != X86EMUL_CONTINUE)
2244 		return rc;
2245 
2246 	c->src.val = old_eip;
2247 	emulate_push(ctxt, ctxt->ops);
2248 	rc = writeback(ctxt, ctxt->ops);
2249 	if (rc != X86EMUL_CONTINUE)
2250 		return rc;
2251 
2252 	c->dst.type = OP_NONE;
2253 
2254 	return X86EMUL_CONTINUE;
2255 }
2256 
2257 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2258 {
2259 	struct decode_cache *c = &ctxt->decode;
2260 	int rc;
2261 
2262 	c->dst.type = OP_REG;
2263 	c->dst.addr.reg = &c->eip;
2264 	c->dst.bytes = c->op_bytes;
2265 	rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
2266 	if (rc != X86EMUL_CONTINUE)
2267 		return rc;
2268 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
2269 	return X86EMUL_CONTINUE;
2270 }
2271 
2272 static int em_imul(struct x86_emulate_ctxt *ctxt)
2273 {
2274 	struct decode_cache *c = &ctxt->decode;
2275 
2276 	emulate_2op_SrcV_nobyte("imul", c->src, c->dst, ctxt->eflags);
2277 	return X86EMUL_CONTINUE;
2278 }
2279 
2280 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2281 {
2282 	struct decode_cache *c = &ctxt->decode;
2283 
2284 	c->dst.val = c->src2.val;
2285 	return em_imul(ctxt);
2286 }
2287 
2288 static int em_cwd(struct x86_emulate_ctxt *ctxt)
2289 {
2290 	struct decode_cache *c = &ctxt->decode;
2291 
2292 	c->dst.type = OP_REG;
2293 	c->dst.bytes = c->src.bytes;
2294 	c->dst.addr.reg = &c->regs[VCPU_REGS_RDX];
2295 	c->dst.val = ~((c->src.val >> (c->src.bytes * 8 - 1)) - 1);
2296 
2297 	return X86EMUL_CONTINUE;
2298 }
2299 
2300 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
2301 {
2302 	unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
2303 	struct decode_cache *c = &ctxt->decode;
2304 	u64 tsc = 0;
2305 
2306 	if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD))
2307 		return emulate_gp(ctxt, 0);
2308 	ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
2309 	c->regs[VCPU_REGS_RAX] = (u32)tsc;
2310 	c->regs[VCPU_REGS_RDX] = tsc >> 32;
2311 	return X86EMUL_CONTINUE;
2312 }
2313 
2314 static int em_mov(struct x86_emulate_ctxt *ctxt)
2315 {
2316 	struct decode_cache *c = &ctxt->decode;
2317 	c->dst.val = c->src.val;
2318 	return X86EMUL_CONTINUE;
2319 }
2320 
2321 #define D(_y) { .flags = (_y) }
2322 #define N    D(0)
2323 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
2324 #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
2325 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
2326 
2327 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
2328 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
2329 
2330 #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),			\
2331 		D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock),		\
2332 		D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
2333 
2334 
2335 static struct opcode group1[] = {
2336 	X7(D(Lock)), N
2337 };
2338 
2339 static struct opcode group1A[] = {
2340 	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
2341 };
2342 
2343 static struct opcode group3[] = {
2344 	D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
2345 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2346 	X4(D(SrcMem | ModRM)),
2347 };
2348 
2349 static struct opcode group4[] = {
2350 	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
2351 	N, N, N, N, N, N,
2352 };
2353 
2354 static struct opcode group5[] = {
2355 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2356 	D(SrcMem | ModRM | Stack),
2357 	I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
2358 	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
2359 	D(SrcMem | ModRM | Stack), N,
2360 };
2361 
2362 static struct group_dual group7 = { {
2363 	N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
2364 	D(SrcNone | ModRM | DstMem | Mov), N,
2365 	D(SrcMem16 | ModRM | Mov | Priv),
2366 	D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
2367 }, {
2368 	D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv),
2369 	D(SrcNone | ModRM | DstMem | Mov), N,
2370 	D(SrcMem16 | ModRM | Mov | Priv), N,
2371 } };
2372 
2373 static struct opcode group8[] = {
2374 	N, N, N, N,
2375 	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
2376 	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
2377 };
2378 
2379 static struct group_dual group9 = { {
2380 	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
2381 }, {
2382 	N, N, N, N, N, N, N, N,
2383 } };
2384 
2385 static struct opcode group11[] = {
2386 	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
2387 };
2388 
2389 static struct opcode opcode_table[256] = {
2390 	/* 0x00 - 0x07 */
2391 	D6ALU(Lock),
2392 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2393 	/* 0x08 - 0x0F */
2394 	D6ALU(Lock),
2395 	D(ImplicitOps | Stack | No64), N,
2396 	/* 0x10 - 0x17 */
2397 	D6ALU(Lock),
2398 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2399 	/* 0x18 - 0x1F */
2400 	D6ALU(Lock),
2401 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2402 	/* 0x20 - 0x27 */
2403 	D6ALU(Lock), N, N,
2404 	/* 0x28 - 0x2F */
2405 	D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
2406 	/* 0x30 - 0x37 */
2407 	D6ALU(Lock), N, N,
2408 	/* 0x38 - 0x3F */
2409 	D6ALU(0), N, N,
2410 	/* 0x40 - 0x4F */
2411 	X16(D(DstReg)),
2412 	/* 0x50 - 0x57 */
2413 	X8(I(SrcReg | Stack, em_push)),
2414 	/* 0x58 - 0x5F */
2415 	X8(D(DstReg | Stack)),
2416 	/* 0x60 - 0x67 */
2417 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2418 	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
2419 	N, N, N, N,
2420 	/* 0x68 - 0x6F */
2421 	I(SrcImm | Mov | Stack, em_push),
2422 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
2423 	I(SrcImmByte | Mov | Stack, em_push),
2424 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
2425 	D2bv(DstDI | Mov | String), /* insb, insw/insd */
2426 	D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
2427 	/* 0x70 - 0x7F */
2428 	X16(D(SrcImmByte)),
2429 	/* 0x80 - 0x87 */
2430 	G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
2431 	G(DstMem | SrcImm | ModRM | Group, group1),
2432 	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
2433 	G(DstMem | SrcImmByte | ModRM | Group, group1),
2434 	D2bv(DstMem | SrcReg | ModRM), D2bv(DstMem | SrcReg | ModRM | Lock),
2435 	/* 0x88 - 0x8F */
2436 	I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
2437 	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
2438 	D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
2439 	D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
2440 	/* 0x90 - 0x97 */
2441 	X8(D(SrcAcc | DstReg)),
2442 	/* 0x98 - 0x9F */
2443 	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
2444 	I(SrcImmFAddr | No64, em_call_far), N,
2445 	D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
2446 	/* 0xA0 - 0xA7 */
2447 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
2448 	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
2449 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
2450 	D2bv(SrcSI | DstDI | String),
2451 	/* 0xA8 - 0xAF */
2452 	D2bv(DstAcc | SrcImm),
2453 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
2454 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
2455 	D2bv(SrcAcc | DstDI | String),
2456 	/* 0xB0 - 0xB7 */
2457 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
2458 	/* 0xB8 - 0xBF */
2459 	X8(I(DstReg | SrcImm | Mov, em_mov)),
2460 	/* 0xC0 - 0xC7 */
2461 	D2bv(DstMem | SrcImmByte | ModRM),
2462 	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
2463 	D(ImplicitOps | Stack),
2464 	D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64),
2465 	G(ByteOp, group11), G(0, group11),
2466 	/* 0xC8 - 0xCF */
2467 	N, N, N, D(ImplicitOps | Stack),
2468 	D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
2469 	/* 0xD0 - 0xD7 */
2470 	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
2471 	N, N, N, N,
2472 	/* 0xD8 - 0xDF */
2473 	N, N, N, N, N, N, N, N,
2474 	/* 0xE0 - 0xE7 */
2475 	X4(D(SrcImmByte)),
2476 	D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
2477 	/* 0xE8 - 0xEF */
2478 	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
2479 	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
2480 	D2bv(SrcNone | DstAcc),	D2bv(SrcAcc | ImplicitOps),
2481 	/* 0xF0 - 0xF7 */
2482 	N, N, N, N,
2483 	D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
2484 	/* 0xF8 - 0xFF */
2485 	D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
2486 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
2487 };
2488 
2489 static struct opcode twobyte_table[256] = {
2490 	/* 0x00 - 0x0F */
2491 	N, GD(0, &group7), N, N,
2492 	N, D(ImplicitOps), D(ImplicitOps | Priv), N,
2493 	D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
2494 	N, D(ImplicitOps | ModRM), N, N,
2495 	/* 0x10 - 0x1F */
2496 	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
2497 	/* 0x20 - 0x2F */
2498 	D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
2499 	D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
2500 	N, N, N, N,
2501 	N, N, N, N, N, N, N, N,
2502 	/* 0x30 - 0x3F */
2503 	D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
2504 	D(ImplicitOps | Priv), N,
2505 	D(ImplicitOps), D(ImplicitOps | Priv), N, N,
2506 	N, N, N, N, N, N, N, N,
2507 	/* 0x40 - 0x4F */
2508 	X16(D(DstReg | SrcMem | ModRM | Mov)),
2509 	/* 0x50 - 0x5F */
2510 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2511 	/* 0x60 - 0x6F */
2512 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2513 	/* 0x70 - 0x7F */
2514 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2515 	/* 0x80 - 0x8F */
2516 	X16(D(SrcImm)),
2517 	/* 0x90 - 0x9F */
2518 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
2519 	/* 0xA0 - 0xA7 */
2520 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2521 	N, D(DstMem | SrcReg | ModRM | BitOp),
2522 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2523 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
2524 	/* 0xA8 - 0xAF */
2525 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2526 	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
2527 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2528 	D(DstMem | SrcReg | Src2CL | ModRM),
2529 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
2530 	/* 0xB0 - 0xB7 */
2531 	D2bv(DstMem | SrcReg | ModRM | Lock),
2532 	D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2533 	D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM),
2534 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2535 	/* 0xB8 - 0xBF */
2536 	N, N,
2537 	G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2538 	D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
2539 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2540 	/* 0xC0 - 0xCF */
2541 	D2bv(DstMem | SrcReg | ModRM | Lock),
2542 	N, D(DstMem | SrcReg | ModRM | Mov),
2543 	N, N, N, GD(0, &group9),
2544 	N, N, N, N, N, N, N, N,
2545 	/* 0xD0 - 0xDF */
2546 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2547 	/* 0xE0 - 0xEF */
2548 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2549 	/* 0xF0 - 0xFF */
2550 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
2551 };
2552 
2553 #undef D
2554 #undef N
2555 #undef G
2556 #undef GD
2557 #undef I
2558 
2559 #undef D2bv
2560 #undef I2bv
2561 #undef D6ALU
2562 
2563 static unsigned imm_size(struct decode_cache *c)
2564 {
2565 	unsigned size;
2566 
2567 	size = (c->d & ByteOp) ? 1 : c->op_bytes;
2568 	if (size == 8)
2569 		size = 4;
2570 	return size;
2571 }
2572 
2573 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
2574 		      unsigned size, bool sign_extension)
2575 {
2576 	struct decode_cache *c = &ctxt->decode;
2577 	struct x86_emulate_ops *ops = ctxt->ops;
2578 	int rc = X86EMUL_CONTINUE;
2579 
2580 	op->type = OP_IMM;
2581 	op->bytes = size;
2582 	op->addr.mem.ea = c->eip;
2583 	/* NB. Immediates are sign-extended as necessary. */
2584 	switch (op->bytes) {
2585 	case 1:
2586 		op->val = insn_fetch(s8, 1, c->eip);
2587 		break;
2588 	case 2:
2589 		op->val = insn_fetch(s16, 2, c->eip);
2590 		break;
2591 	case 4:
2592 		op->val = insn_fetch(s32, 4, c->eip);
2593 		break;
2594 	}
2595 	if (!sign_extension) {
2596 		switch (op->bytes) {
2597 		case 1:
2598 			op->val &= 0xff;
2599 			break;
2600 		case 2:
2601 			op->val &= 0xffff;
2602 			break;
2603 		case 4:
2604 			op->val &= 0xffffffff;
2605 			break;
2606 		}
2607 	}
2608 done:
2609 	return rc;
2610 }
2611 
2612 int
2613 x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
2614 {
2615 	struct x86_emulate_ops *ops = ctxt->ops;
2616 	struct decode_cache *c = &ctxt->decode;
2617 	int rc = X86EMUL_CONTINUE;
2618 	int mode = ctxt->mode;
2619 	int def_op_bytes, def_ad_bytes, dual, goffset;
2620 	struct opcode opcode, *g_mod012, *g_mod3;
2621 	struct operand memop = { .type = OP_NONE };
2622 
2623 	c->eip = ctxt->eip;
2624 	c->fetch.start = c->eip;
2625 	c->fetch.end = c->fetch.start + insn_len;
2626 	if (insn_len > 0)
2627 		memcpy(c->fetch.data, insn, insn_len);
2628 	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
2629 
2630 	switch (mode) {
2631 	case X86EMUL_MODE_REAL:
2632 	case X86EMUL_MODE_VM86:
2633 	case X86EMUL_MODE_PROT16:
2634 		def_op_bytes = def_ad_bytes = 2;
2635 		break;
2636 	case X86EMUL_MODE_PROT32:
2637 		def_op_bytes = def_ad_bytes = 4;
2638 		break;
2639 #ifdef CONFIG_X86_64
2640 	case X86EMUL_MODE_PROT64:
2641 		def_op_bytes = 4;
2642 		def_ad_bytes = 8;
2643 		break;
2644 #endif
2645 	default:
2646 		return -1;
2647 	}
2648 
2649 	c->op_bytes = def_op_bytes;
2650 	c->ad_bytes = def_ad_bytes;
2651 
2652 	/* Legacy prefixes. */
2653 	for (;;) {
2654 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
2655 		case 0x66:	/* operand-size override */
2656 			/* switch between 2/4 bytes */
2657 			c->op_bytes = def_op_bytes ^ 6;
2658 			break;
2659 		case 0x67:	/* address-size override */
2660 			if (mode == X86EMUL_MODE_PROT64)
2661 				/* switch between 4/8 bytes */
2662 				c->ad_bytes = def_ad_bytes ^ 12;
2663 			else
2664 				/* switch between 2/4 bytes */
2665 				c->ad_bytes = def_ad_bytes ^ 6;
2666 			break;
2667 		case 0x26:	/* ES override */
2668 		case 0x2e:	/* CS override */
2669 		case 0x36:	/* SS override */
2670 		case 0x3e:	/* DS override */
2671 			set_seg_override(c, (c->b >> 3) & 3);
2672 			break;
2673 		case 0x64:	/* FS override */
2674 		case 0x65:	/* GS override */
2675 			set_seg_override(c, c->b & 7);
2676 			break;
2677 		case 0x40 ... 0x4f: /* REX */
2678 			if (mode != X86EMUL_MODE_PROT64)
2679 				goto done_prefixes;
2680 			c->rex_prefix = c->b;
2681 			continue;
2682 		case 0xf0:	/* LOCK */
2683 			c->lock_prefix = 1;
2684 			break;
2685 		case 0xf2:	/* REPNE/REPNZ */
2686 			c->rep_prefix = REPNE_PREFIX;
2687 			break;
2688 		case 0xf3:	/* REP/REPE/REPZ */
2689 			c->rep_prefix = REPE_PREFIX;
2690 			break;
2691 		default:
2692 			goto done_prefixes;
2693 		}
2694 
2695 		/* Any legacy prefix after a REX prefix nullifies its effect. */
2696 
2697 		c->rex_prefix = 0;
2698 	}
2699 
2700 done_prefixes:
2701 
2702 	/* REX prefix. */
2703 	if (c->rex_prefix & 8)
2704 		c->op_bytes = 8;	/* REX.W */
2705 
2706 	/* Opcode byte(s). */
2707 	opcode = opcode_table[c->b];
2708 	/* Two-byte opcode? */
2709 	if (c->b == 0x0f) {
2710 		c->twobyte = 1;
2711 		c->b = insn_fetch(u8, 1, c->eip);
2712 		opcode = twobyte_table[c->b];
2713 	}
2714 	c->d = opcode.flags;
2715 
2716 	if (c->d & Group) {
2717 		dual = c->d & GroupDual;
2718 		c->modrm = insn_fetch(u8, 1, c->eip);
2719 		--c->eip;
2720 
2721 		if (c->d & GroupDual) {
2722 			g_mod012 = opcode.u.gdual->mod012;
2723 			g_mod3 = opcode.u.gdual->mod3;
2724 		} else
2725 			g_mod012 = g_mod3 = opcode.u.group;
2726 
2727 		c->d &= ~(Group | GroupDual);
2728 
2729 		goffset = (c->modrm >> 3) & 7;
2730 
2731 		if ((c->modrm >> 6) == 3)
2732 			opcode = g_mod3[goffset];
2733 		else
2734 			opcode = g_mod012[goffset];
2735 		c->d |= opcode.flags;
2736 	}
2737 
2738 	c->execute = opcode.u.execute;
2739 
2740 	/* Unrecognised? */
2741 	if (c->d == 0 || (c->d & Undefined))
2742 		return -1;
2743 
2744 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
2745 		c->op_bytes = 8;
2746 
2747 	if (c->d & Op3264) {
2748 		if (mode == X86EMUL_MODE_PROT64)
2749 			c->op_bytes = 8;
2750 		else
2751 			c->op_bytes = 4;
2752 	}
2753 
2754 	/* ModRM and SIB bytes. */
2755 	if (c->d & ModRM) {
2756 		rc = decode_modrm(ctxt, ops, &memop);
2757 		if (!c->has_seg_override)
2758 			set_seg_override(c, c->modrm_seg);
2759 	} else if (c->d & MemAbs)
2760 		rc = decode_abs(ctxt, ops, &memop);
2761 	if (rc != X86EMUL_CONTINUE)
2762 		goto done;
2763 
2764 	if (!c->has_seg_override)
2765 		set_seg_override(c, VCPU_SREG_DS);
2766 
2767 	memop.addr.mem.seg = seg_override(ctxt, ops, c);
2768 
2769 	if (memop.type == OP_MEM && c->ad_bytes != 8)
2770 		memop.addr.mem.ea = (u32)memop.addr.mem.ea;
2771 
2772 	if (memop.type == OP_MEM && c->rip_relative)
2773 		memop.addr.mem.ea += c->eip;
2774 
2775 	/*
2776 	 * Decode and fetch the source operand: register, memory
2777 	 * or immediate.
2778 	 */
2779 	switch (c->d & SrcMask) {
2780 	case SrcNone:
2781 		break;
2782 	case SrcReg:
2783 		decode_register_operand(&c->src, c, 0);
2784 		break;
2785 	case SrcMem16:
2786 		memop.bytes = 2;
2787 		goto srcmem_common;
2788 	case SrcMem32:
2789 		memop.bytes = 4;
2790 		goto srcmem_common;
2791 	case SrcMem:
2792 		memop.bytes = (c->d & ByteOp) ? 1 :
2793 							   c->op_bytes;
2794 	srcmem_common:
2795 		c->src = memop;
2796 		break;
2797 	case SrcImmU16:
2798 		rc = decode_imm(ctxt, &c->src, 2, false);
2799 		break;
2800 	case SrcImm:
2801 		rc = decode_imm(ctxt, &c->src, imm_size(c), true);
2802 		break;
2803 	case SrcImmU:
2804 		rc = decode_imm(ctxt, &c->src, imm_size(c), false);
2805 		break;
2806 	case SrcImmByte:
2807 		rc = decode_imm(ctxt, &c->src, 1, true);
2808 		break;
2809 	case SrcImmUByte:
2810 		rc = decode_imm(ctxt, &c->src, 1, false);
2811 		break;
2812 	case SrcAcc:
2813 		c->src.type = OP_REG;
2814 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2815 		c->src.addr.reg = &c->regs[VCPU_REGS_RAX];
2816 		fetch_register_operand(&c->src);
2817 		break;
2818 	case SrcOne:
2819 		c->src.bytes = 1;
2820 		c->src.val = 1;
2821 		break;
2822 	case SrcSI:
2823 		c->src.type = OP_MEM;
2824 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2825 		c->src.addr.mem.ea =
2826 			register_address(c, c->regs[VCPU_REGS_RSI]);
2827 		c->src.addr.mem.seg = seg_override(ctxt, ops, c),
2828 		c->src.val = 0;
2829 		break;
2830 	case SrcImmFAddr:
2831 		c->src.type = OP_IMM;
2832 		c->src.addr.mem.ea = c->eip;
2833 		c->src.bytes = c->op_bytes + 2;
2834 		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
2835 		break;
2836 	case SrcMemFAddr:
2837 		memop.bytes = c->op_bytes + 2;
2838 		goto srcmem_common;
2839 		break;
2840 	}
2841 
2842 	if (rc != X86EMUL_CONTINUE)
2843 		goto done;
2844 
2845 	/*
2846 	 * Decode and fetch the second source operand: register, memory
2847 	 * or immediate.
2848 	 */
2849 	switch (c->d & Src2Mask) {
2850 	case Src2None:
2851 		break;
2852 	case Src2CL:
2853 		c->src2.bytes = 1;
2854 		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
2855 		break;
2856 	case Src2ImmByte:
2857 		rc = decode_imm(ctxt, &c->src2, 1, true);
2858 		break;
2859 	case Src2One:
2860 		c->src2.bytes = 1;
2861 		c->src2.val = 1;
2862 		break;
2863 	case Src2Imm:
2864 		rc = decode_imm(ctxt, &c->src2, imm_size(c), true);
2865 		break;
2866 	}
2867 
2868 	if (rc != X86EMUL_CONTINUE)
2869 		goto done;
2870 
2871 	/* Decode and fetch the destination operand: register or memory. */
2872 	switch (c->d & DstMask) {
2873 	case DstReg:
2874 		decode_register_operand(&c->dst, c,
2875 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
2876 		break;
2877 	case DstImmUByte:
2878 		c->dst.type = OP_IMM;
2879 		c->dst.addr.mem.ea = c->eip;
2880 		c->dst.bytes = 1;
2881 		c->dst.val = insn_fetch(u8, 1, c->eip);
2882 		break;
2883 	case DstMem:
2884 	case DstMem64:
2885 		c->dst = memop;
2886 		if ((c->d & DstMask) == DstMem64)
2887 			c->dst.bytes = 8;
2888 		else
2889 			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2890 		if (c->d & BitOp)
2891 			fetch_bit_operand(c);
2892 		c->dst.orig_val = c->dst.val;
2893 		break;
2894 	case DstAcc:
2895 		c->dst.type = OP_REG;
2896 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2897 		c->dst.addr.reg = &c->regs[VCPU_REGS_RAX];
2898 		fetch_register_operand(&c->dst);
2899 		c->dst.orig_val = c->dst.val;
2900 		break;
2901 	case DstDI:
2902 		c->dst.type = OP_MEM;
2903 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2904 		c->dst.addr.mem.ea =
2905 			register_address(c, c->regs[VCPU_REGS_RDI]);
2906 		c->dst.addr.mem.seg = VCPU_SREG_ES;
2907 		c->dst.val = 0;
2908 		break;
2909 	case ImplicitOps:
2910 		/* Special instructions do their own operand decoding. */
2911 	default:
2912 		c->dst.type = OP_NONE; /* Disable writeback. */
2913 		return 0;
2914 	}
2915 
2916 done:
2917 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2918 }
2919 
2920 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
2921 {
2922 	struct decode_cache *c = &ctxt->decode;
2923 
2924 	/* The second termination condition only applies for REPE
2925 	 * and REPNE. Test if the repeat string operation prefix is
2926 	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2927 	 * corresponding termination condition according to:
2928 	 * 	- if REPE/REPZ and ZF = 0 then done
2929 	 * 	- if REPNE/REPNZ and ZF = 1 then done
2930 	 */
2931 	if (((c->b == 0xa6) || (c->b == 0xa7) ||
2932 	     (c->b == 0xae) || (c->b == 0xaf))
2933 	    && (((c->rep_prefix == REPE_PREFIX) &&
2934 		 ((ctxt->eflags & EFLG_ZF) == 0))
2935 		|| ((c->rep_prefix == REPNE_PREFIX) &&
2936 		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
2937 		return true;
2938 
2939 	return false;
2940 }
2941 
2942 int
2943 x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
2944 {
2945 	struct x86_emulate_ops *ops = ctxt->ops;
2946 	u64 msr_data;
2947 	struct decode_cache *c = &ctxt->decode;
2948 	int rc = X86EMUL_CONTINUE;
2949 	int saved_dst_type = c->dst.type;
2950 	int irq; /* Used for int 3, int, and into */
2951 
2952 	ctxt->decode.mem_read.pos = 0;
2953 
2954 	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2955 		rc = emulate_ud(ctxt);
2956 		goto done;
2957 	}
2958 
2959 	/* LOCK prefix is allowed only with some instructions */
2960 	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2961 		rc = emulate_ud(ctxt);
2962 		goto done;
2963 	}
2964 
2965 	if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) {
2966 		rc = emulate_ud(ctxt);
2967 		goto done;
2968 	}
2969 
2970 	/* Privileged instruction can be executed only in CPL=0 */
2971 	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2972 		rc = emulate_gp(ctxt, 0);
2973 		goto done;
2974 	}
2975 
2976 	if (c->rep_prefix && (c->d & String)) {
2977 		/* All REP prefixes have the same first termination condition */
2978 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2979 			ctxt->eip = c->eip;
2980 			goto done;
2981 		}
2982 	}
2983 
2984 	if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
2985 		rc = read_emulated(ctxt, ops, linear(ctxt, c->src.addr.mem),
2986 					c->src.valptr, c->src.bytes);
2987 		if (rc != X86EMUL_CONTINUE)
2988 			goto done;
2989 		c->src.orig_val64 = c->src.val64;
2990 	}
2991 
2992 	if (c->src2.type == OP_MEM) {
2993 		rc = read_emulated(ctxt, ops, linear(ctxt, c->src2.addr.mem),
2994 					&c->src2.val, c->src2.bytes);
2995 		if (rc != X86EMUL_CONTINUE)
2996 			goto done;
2997 	}
2998 
2999 	if ((c->d & DstMask) == ImplicitOps)
3000 		goto special_insn;
3001 
3002 
3003 	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
3004 		/* optimisation - avoid slow emulated read if Mov */
3005 		rc = read_emulated(ctxt, ops, linear(ctxt, c->dst.addr.mem),
3006 				   &c->dst.val, c->dst.bytes);
3007 		if (rc != X86EMUL_CONTINUE)
3008 			goto done;
3009 	}
3010 	c->dst.orig_val = c->dst.val;
3011 
3012 special_insn:
3013 
3014 	if (c->execute) {
3015 		rc = c->execute(ctxt);
3016 		if (rc != X86EMUL_CONTINUE)
3017 			goto done;
3018 		goto writeback;
3019 	}
3020 
3021 	if (c->twobyte)
3022 		goto twobyte_insn;
3023 
3024 	switch (c->b) {
3025 	case 0x00 ... 0x05:
3026 	      add:		/* add */
3027 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3028 		break;
3029 	case 0x06:		/* push es */
3030 		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
3031 		break;
3032 	case 0x07:		/* pop es */
3033 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
3034 		break;
3035 	case 0x08 ... 0x0d:
3036 	      or:		/* or */
3037 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
3038 		break;
3039 	case 0x0e:		/* push cs */
3040 		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
3041 		break;
3042 	case 0x10 ... 0x15:
3043 	      adc:		/* adc */
3044 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
3045 		break;
3046 	case 0x16:		/* push ss */
3047 		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
3048 		break;
3049 	case 0x17:		/* pop ss */
3050 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
3051 		break;
3052 	case 0x18 ... 0x1d:
3053 	      sbb:		/* sbb */
3054 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
3055 		break;
3056 	case 0x1e:		/* push ds */
3057 		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
3058 		break;
3059 	case 0x1f:		/* pop ds */
3060 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
3061 		break;
3062 	case 0x20 ... 0x25:
3063 	      and:		/* and */
3064 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
3065 		break;
3066 	case 0x28 ... 0x2d:
3067 	      sub:		/* sub */
3068 		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
3069 		break;
3070 	case 0x30 ... 0x35:
3071 	      xor:		/* xor */
3072 		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
3073 		break;
3074 	case 0x38 ... 0x3d:
3075 	      cmp:		/* cmp */
3076 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3077 		break;
3078 	case 0x40 ... 0x47: /* inc r16/r32 */
3079 		emulate_1op("inc", c->dst, ctxt->eflags);
3080 		break;
3081 	case 0x48 ... 0x4f: /* dec r16/r32 */
3082 		emulate_1op("dec", c->dst, ctxt->eflags);
3083 		break;
3084 	case 0x58 ... 0x5f: /* pop reg */
3085 	pop_instruction:
3086 		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
3087 		break;
3088 	case 0x60:	/* pusha */
3089 		rc = emulate_pusha(ctxt, ops);
3090 		break;
3091 	case 0x61:	/* popa */
3092 		rc = emulate_popa(ctxt, ops);
3093 		break;
3094 	case 0x63:		/* movsxd */
3095 		if (ctxt->mode != X86EMUL_MODE_PROT64)
3096 			goto cannot_emulate;
3097 		c->dst.val = (s32) c->src.val;
3098 		break;
3099 	case 0x6c:		/* insb */
3100 	case 0x6d:		/* insw/insd */
3101 		c->src.val = c->regs[VCPU_REGS_RDX];
3102 		goto do_io_in;
3103 	case 0x6e:		/* outsb */
3104 	case 0x6f:		/* outsw/outsd */
3105 		c->dst.val = c->regs[VCPU_REGS_RDX];
3106 		goto do_io_out;
3107 		break;
3108 	case 0x70 ... 0x7f: /* jcc (short) */
3109 		if (test_cc(c->b, ctxt->eflags))
3110 			jmp_rel(c, c->src.val);
3111 		break;
3112 	case 0x80 ... 0x83:	/* Grp1 */
3113 		switch (c->modrm_reg) {
3114 		case 0:
3115 			goto add;
3116 		case 1:
3117 			goto or;
3118 		case 2:
3119 			goto adc;
3120 		case 3:
3121 			goto sbb;
3122 		case 4:
3123 			goto and;
3124 		case 5:
3125 			goto sub;
3126 		case 6:
3127 			goto xor;
3128 		case 7:
3129 			goto cmp;
3130 		}
3131 		break;
3132 	case 0x84 ... 0x85:
3133 	test:
3134 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
3135 		break;
3136 	case 0x86 ... 0x87:	/* xchg */
3137 	xchg:
3138 		/* Write back the register source. */
3139 		c->src.val = c->dst.val;
3140 		write_register_operand(&c->src);
3141 		/*
3142 		 * Write back the memory destination with implicit LOCK
3143 		 * prefix.
3144 		 */
3145 		c->dst.val = c->src.orig_val;
3146 		c->lock_prefix = 1;
3147 		break;
3148 	case 0x8c:  /* mov r/m, sreg */
3149 		if (c->modrm_reg > VCPU_SREG_GS) {
3150 			rc = emulate_ud(ctxt);
3151 			goto done;
3152 		}
3153 		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
3154 		break;
3155 	case 0x8d: /* lea r16/r32, m */
3156 		c->dst.val = c->src.addr.mem.ea;
3157 		break;
3158 	case 0x8e: { /* mov seg, r/m16 */
3159 		uint16_t sel;
3160 
3161 		sel = c->src.val;
3162 
3163 		if (c->modrm_reg == VCPU_SREG_CS ||
3164 		    c->modrm_reg > VCPU_SREG_GS) {
3165 			rc = emulate_ud(ctxt);
3166 			goto done;
3167 		}
3168 
3169 		if (c->modrm_reg == VCPU_SREG_SS)
3170 			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3171 
3172 		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
3173 
3174 		c->dst.type = OP_NONE;  /* Disable writeback. */
3175 		break;
3176 	}
3177 	case 0x8f:		/* pop (sole member of Grp1a) */
3178 		rc = emulate_grp1a(ctxt, ops);
3179 		break;
3180 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
3181 		if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
3182 			break;
3183 		goto xchg;
3184 	case 0x98: /* cbw/cwde/cdqe */
3185 		switch (c->op_bytes) {
3186 		case 2: c->dst.val = (s8)c->dst.val; break;
3187 		case 4: c->dst.val = (s16)c->dst.val; break;
3188 		case 8: c->dst.val = (s32)c->dst.val; break;
3189 		}
3190 		break;
3191 	case 0x9c: /* pushf */
3192 		c->src.val =  (unsigned long) ctxt->eflags;
3193 		emulate_push(ctxt, ops);
3194 		break;
3195 	case 0x9d: /* popf */
3196 		c->dst.type = OP_REG;
3197 		c->dst.addr.reg = &ctxt->eflags;
3198 		c->dst.bytes = c->op_bytes;
3199 		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
3200 		break;
3201 	case 0xa6 ... 0xa7:	/* cmps */
3202 		c->dst.type = OP_NONE; /* Disable writeback. */
3203 		goto cmp;
3204 	case 0xa8 ... 0xa9:	/* test ax, imm */
3205 		goto test;
3206 	case 0xae ... 0xaf:	/* scas */
3207 		goto cmp;
3208 	case 0xc0 ... 0xc1:
3209 		emulate_grp2(ctxt);
3210 		break;
3211 	case 0xc3: /* ret */
3212 		c->dst.type = OP_REG;
3213 		c->dst.addr.reg = &c->eip;
3214 		c->dst.bytes = c->op_bytes;
3215 		goto pop_instruction;
3216 	case 0xc4:		/* les */
3217 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
3218 		break;
3219 	case 0xc5:		/* lds */
3220 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS);
3221 		break;
3222 	case 0xcb:		/* ret far */
3223 		rc = emulate_ret_far(ctxt, ops);
3224 		break;
3225 	case 0xcc:		/* int3 */
3226 		irq = 3;
3227 		goto do_interrupt;
3228 	case 0xcd:		/* int n */
3229 		irq = c->src.val;
3230 	do_interrupt:
3231 		rc = emulate_int(ctxt, ops, irq);
3232 		break;
3233 	case 0xce:		/* into */
3234 		if (ctxt->eflags & EFLG_OF) {
3235 			irq = 4;
3236 			goto do_interrupt;
3237 		}
3238 		break;
3239 	case 0xcf:		/* iret */
3240 		rc = emulate_iret(ctxt, ops);
3241 		break;
3242 	case 0xd0 ... 0xd1:	/* Grp2 */
3243 		emulate_grp2(ctxt);
3244 		break;
3245 	case 0xd2 ... 0xd3:	/* Grp2 */
3246 		c->src.val = c->regs[VCPU_REGS_RCX];
3247 		emulate_grp2(ctxt);
3248 		break;
3249 	case 0xe0 ... 0xe2:	/* loop/loopz/loopnz */
3250 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3251 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) != 0 &&
3252 		    (c->b == 0xe2 || test_cc(c->b ^ 0x5, ctxt->eflags)))
3253 			jmp_rel(c, c->src.val);
3254 		break;
3255 	case 0xe3:	/* jcxz/jecxz/jrcxz */
3256 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0)
3257 			jmp_rel(c, c->src.val);
3258 		break;
3259 	case 0xe4: 	/* inb */
3260 	case 0xe5: 	/* in */
3261 		goto do_io_in;
3262 	case 0xe6: /* outb */
3263 	case 0xe7: /* out */
3264 		goto do_io_out;
3265 	case 0xe8: /* call (near) */ {
3266 		long int rel = c->src.val;
3267 		c->src.val = (unsigned long) c->eip;
3268 		jmp_rel(c, rel);
3269 		emulate_push(ctxt, ops);
3270 		break;
3271 	}
3272 	case 0xe9: /* jmp rel */
3273 		goto jmp;
3274 	case 0xea: { /* jmp far */
3275 		unsigned short sel;
3276 	jump_far:
3277 		memcpy(&sel, c->src.valptr + c->op_bytes, 2);
3278 
3279 		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
3280 			goto done;
3281 
3282 		c->eip = 0;
3283 		memcpy(&c->eip, c->src.valptr, c->op_bytes);
3284 		break;
3285 	}
3286 	case 0xeb:
3287 	      jmp:		/* jmp rel short */
3288 		jmp_rel(c, c->src.val);
3289 		c->dst.type = OP_NONE; /* Disable writeback. */
3290 		break;
3291 	case 0xec: /* in al,dx */
3292 	case 0xed: /* in (e/r)ax,dx */
3293 		c->src.val = c->regs[VCPU_REGS_RDX];
3294 	do_io_in:
3295 		c->dst.bytes = min(c->dst.bytes, 4u);
3296 		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3297 			rc = emulate_gp(ctxt, 0);
3298 			goto done;
3299 		}
3300 		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
3301 				     &c->dst.val))
3302 			goto done; /* IO is needed */
3303 		break;
3304 	case 0xee: /* out dx,al */
3305 	case 0xef: /* out dx,(e/r)ax */
3306 		c->dst.val = c->regs[VCPU_REGS_RDX];
3307 	do_io_out:
3308 		c->src.bytes = min(c->src.bytes, 4u);
3309 		if (!emulator_io_permited(ctxt, ops, c->dst.val,
3310 					  c->src.bytes)) {
3311 			rc = emulate_gp(ctxt, 0);
3312 			goto done;
3313 		}
3314 		ops->pio_out_emulated(c->src.bytes, c->dst.val,
3315 				      &c->src.val, 1, ctxt->vcpu);
3316 		c->dst.type = OP_NONE;	/* Disable writeback. */
3317 		break;
3318 	case 0xf4:              /* hlt */
3319 		ctxt->vcpu->arch.halt_request = 1;
3320 		break;
3321 	case 0xf5:	/* cmc */
3322 		/* complement carry flag from eflags reg */
3323 		ctxt->eflags ^= EFLG_CF;
3324 		break;
3325 	case 0xf6 ... 0xf7:	/* Grp3 */
3326 		rc = emulate_grp3(ctxt, ops);
3327 		break;
3328 	case 0xf8: /* clc */
3329 		ctxt->eflags &= ~EFLG_CF;
3330 		break;
3331 	case 0xf9: /* stc */
3332 		ctxt->eflags |= EFLG_CF;
3333 		break;
3334 	case 0xfa: /* cli */
3335 		if (emulator_bad_iopl(ctxt, ops)) {
3336 			rc = emulate_gp(ctxt, 0);
3337 			goto done;
3338 		} else
3339 			ctxt->eflags &= ~X86_EFLAGS_IF;
3340 		break;
3341 	case 0xfb: /* sti */
3342 		if (emulator_bad_iopl(ctxt, ops)) {
3343 			rc = emulate_gp(ctxt, 0);
3344 			goto done;
3345 		} else {
3346 			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3347 			ctxt->eflags |= X86_EFLAGS_IF;
3348 		}
3349 		break;
3350 	case 0xfc: /* cld */
3351 		ctxt->eflags &= ~EFLG_DF;
3352 		break;
3353 	case 0xfd: /* std */
3354 		ctxt->eflags |= EFLG_DF;
3355 		break;
3356 	case 0xfe: /* Grp4 */
3357 	grp45:
3358 		rc = emulate_grp45(ctxt, ops);
3359 		break;
3360 	case 0xff: /* Grp5 */
3361 		if (c->modrm_reg == 5)
3362 			goto jump_far;
3363 		goto grp45;
3364 	default:
3365 		goto cannot_emulate;
3366 	}
3367 
3368 	if (rc != X86EMUL_CONTINUE)
3369 		goto done;
3370 
3371 writeback:
3372 	rc = writeback(ctxt, ops);
3373 	if (rc != X86EMUL_CONTINUE)
3374 		goto done;
3375 
3376 	/*
3377 	 * restore dst type in case the decoding will be reused
3378 	 * (happens for string instruction )
3379 	 */
3380 	c->dst.type = saved_dst_type;
3381 
3382 	if ((c->d & SrcMask) == SrcSI)
3383 		string_addr_inc(ctxt, seg_override(ctxt, ops, c),
3384 				VCPU_REGS_RSI, &c->src);
3385 
3386 	if ((c->d & DstMask) == DstDI)
3387 		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
3388 				&c->dst);
3389 
3390 	if (c->rep_prefix && (c->d & String)) {
3391 		struct read_cache *r = &ctxt->decode.io_read;
3392 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3393 
3394 		if (!string_insn_completed(ctxt)) {
3395 			/*
3396 			 * Re-enter guest when pio read ahead buffer is empty
3397 			 * or, if it is not used, after each 1024 iteration.
3398 			 */
3399 			if ((r->end != 0 || c->regs[VCPU_REGS_RCX] & 0x3ff) &&
3400 			    (r->end == 0 || r->end != r->pos)) {
3401 				/*
3402 				 * Reset read cache. Usually happens before
3403 				 * decode, but since instruction is restarted
3404 				 * we have to do it here.
3405 				 */
3406 				ctxt->decode.mem_read.end = 0;
3407 				return EMULATION_RESTART;
3408 			}
3409 			goto done; /* skip rip writeback */
3410 		}
3411 	}
3412 
3413 	ctxt->eip = c->eip;
3414 
3415 done:
3416 	if (rc == X86EMUL_PROPAGATE_FAULT)
3417 		ctxt->have_exception = true;
3418 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3419 
3420 twobyte_insn:
3421 	switch (c->b) {
3422 	case 0x01: /* lgdt, lidt, lmsw */
3423 		switch (c->modrm_reg) {
3424 			u16 size;
3425 			unsigned long address;
3426 
3427 		case 0: /* vmcall */
3428 			if (c->modrm_mod != 3 || c->modrm_rm != 1)
3429 				goto cannot_emulate;
3430 
3431 			rc = kvm_fix_hypercall(ctxt->vcpu);
3432 			if (rc != X86EMUL_CONTINUE)
3433 				goto done;
3434 
3435 			/* Let the processor re-execute the fixed hypercall */
3436 			c->eip = ctxt->eip;
3437 			/* Disable writeback. */
3438 			c->dst.type = OP_NONE;
3439 			break;
3440 		case 2: /* lgdt */
3441 			rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3442 					     &size, &address, c->op_bytes);
3443 			if (rc != X86EMUL_CONTINUE)
3444 				goto done;
3445 			realmode_lgdt(ctxt->vcpu, size, address);
3446 			/* Disable writeback. */
3447 			c->dst.type = OP_NONE;
3448 			break;
3449 		case 3: /* lidt/vmmcall */
3450 			if (c->modrm_mod == 3) {
3451 				switch (c->modrm_rm) {
3452 				case 1:
3453 					rc = kvm_fix_hypercall(ctxt->vcpu);
3454 					break;
3455 				default:
3456 					goto cannot_emulate;
3457 				}
3458 			} else {
3459 				rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3460 						     &size, &address,
3461 						     c->op_bytes);
3462 				if (rc != X86EMUL_CONTINUE)
3463 					goto done;
3464 				realmode_lidt(ctxt->vcpu, size, address);
3465 			}
3466 			/* Disable writeback. */
3467 			c->dst.type = OP_NONE;
3468 			break;
3469 		case 4: /* smsw */
3470 			c->dst.bytes = 2;
3471 			c->dst.val = ops->get_cr(0, ctxt->vcpu);
3472 			break;
3473 		case 6: /* lmsw */
3474 			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
3475 				    (c->src.val & 0x0f), ctxt->vcpu);
3476 			c->dst.type = OP_NONE;
3477 			break;
3478 		case 5: /* not defined */
3479 			emulate_ud(ctxt);
3480 			rc = X86EMUL_PROPAGATE_FAULT;
3481 			goto done;
3482 		case 7: /* invlpg*/
3483 			emulate_invlpg(ctxt->vcpu,
3484 				       linear(ctxt, c->src.addr.mem));
3485 			/* Disable writeback. */
3486 			c->dst.type = OP_NONE;
3487 			break;
3488 		default:
3489 			goto cannot_emulate;
3490 		}
3491 		break;
3492 	case 0x05: 		/* syscall */
3493 		rc = emulate_syscall(ctxt, ops);
3494 		break;
3495 	case 0x06:
3496 		emulate_clts(ctxt->vcpu);
3497 		break;
3498 	case 0x09:		/* wbinvd */
3499 		kvm_emulate_wbinvd(ctxt->vcpu);
3500 		break;
3501 	case 0x08:		/* invd */
3502 	case 0x0d:		/* GrpP (prefetch) */
3503 	case 0x18:		/* Grp16 (prefetch/nop) */
3504 		break;
3505 	case 0x20: /* mov cr, reg */
3506 		switch (c->modrm_reg) {
3507 		case 1:
3508 		case 5 ... 7:
3509 		case 9 ... 15:
3510 			emulate_ud(ctxt);
3511 			rc = X86EMUL_PROPAGATE_FAULT;
3512 			goto done;
3513 		}
3514 		c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3515 		break;
3516 	case 0x21: /* mov from dr to reg */
3517 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3518 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3519 			emulate_ud(ctxt);
3520 			rc = X86EMUL_PROPAGATE_FAULT;
3521 			goto done;
3522 		}
3523 		ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
3524 		break;
3525 	case 0x22: /* mov reg, cr */
3526 		if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
3527 			emulate_gp(ctxt, 0);
3528 			rc = X86EMUL_PROPAGATE_FAULT;
3529 			goto done;
3530 		}
3531 		c->dst.type = OP_NONE;
3532 		break;
3533 	case 0x23: /* mov from reg to dr */
3534 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3535 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3536 			emulate_ud(ctxt);
3537 			rc = X86EMUL_PROPAGATE_FAULT;
3538 			goto done;
3539 		}
3540 
3541 		if (ops->set_dr(c->modrm_reg, c->src.val &
3542 				((ctxt->mode == X86EMUL_MODE_PROT64) ?
3543 				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
3544 			/* #UD condition is already handled by the code above */
3545 			emulate_gp(ctxt, 0);
3546 			rc = X86EMUL_PROPAGATE_FAULT;
3547 			goto done;
3548 		}
3549 
3550 		c->dst.type = OP_NONE;	/* no writeback */
3551 		break;
3552 	case 0x30:
3553 		/* wrmsr */
3554 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
3555 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
3556 		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3557 			emulate_gp(ctxt, 0);
3558 			rc = X86EMUL_PROPAGATE_FAULT;
3559 			goto done;
3560 		}
3561 		rc = X86EMUL_CONTINUE;
3562 		break;
3563 	case 0x32:
3564 		/* rdmsr */
3565 		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3566 			emulate_gp(ctxt, 0);
3567 			rc = X86EMUL_PROPAGATE_FAULT;
3568 			goto done;
3569 		} else {
3570 			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3571 			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3572 		}
3573 		rc = X86EMUL_CONTINUE;
3574 		break;
3575 	case 0x34:		/* sysenter */
3576 		rc = emulate_sysenter(ctxt, ops);
3577 		break;
3578 	case 0x35:		/* sysexit */
3579 		rc = emulate_sysexit(ctxt, ops);
3580 		break;
3581 	case 0x40 ... 0x4f:	/* cmov */
3582 		c->dst.val = c->dst.orig_val = c->src.val;
3583 		if (!test_cc(c->b, ctxt->eflags))
3584 			c->dst.type = OP_NONE; /* no writeback */
3585 		break;
3586 	case 0x80 ... 0x8f: /* jnz rel, etc*/
3587 		if (test_cc(c->b, ctxt->eflags))
3588 			jmp_rel(c, c->src.val);
3589 		break;
3590 	case 0x90 ... 0x9f:     /* setcc r/m8 */
3591 		c->dst.val = test_cc(c->b, ctxt->eflags);
3592 		break;
3593 	case 0xa0:	  /* push fs */
3594 		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3595 		break;
3596 	case 0xa1:	 /* pop fs */
3597 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3598 		break;
3599 	case 0xa3:
3600 	      bt:		/* bt */
3601 		c->dst.type = OP_NONE;
3602 		/* only subword offset */
3603 		c->src.val &= (c->dst.bytes << 3) - 1;
3604 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3605 		break;
3606 	case 0xa4: /* shld imm8, r, r/m */
3607 	case 0xa5: /* shld cl, r, r/m */
3608 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3609 		break;
3610 	case 0xa8:	/* push gs */
3611 		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3612 		break;
3613 	case 0xa9:	/* pop gs */
3614 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3615 		break;
3616 	case 0xab:
3617 	      bts:		/* bts */
3618 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3619 		break;
3620 	case 0xac: /* shrd imm8, r, r/m */
3621 	case 0xad: /* shrd cl, r, r/m */
3622 		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3623 		break;
3624 	case 0xae:              /* clflush */
3625 		break;
3626 	case 0xb0 ... 0xb1:	/* cmpxchg */
3627 		/*
3628 		 * Save real source value, then compare EAX against
3629 		 * destination.
3630 		 */
3631 		c->src.orig_val = c->src.val;
3632 		c->src.val = c->regs[VCPU_REGS_RAX];
3633 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3634 		if (ctxt->eflags & EFLG_ZF) {
3635 			/* Success: write back to memory. */
3636 			c->dst.val = c->src.orig_val;
3637 		} else {
3638 			/* Failure: write the value we saw to EAX. */
3639 			c->dst.type = OP_REG;
3640 			c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3641 		}
3642 		break;
3643 	case 0xb2:		/* lss */
3644 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS);
3645 		break;
3646 	case 0xb3:
3647 	      btr:		/* btr */
3648 		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3649 		break;
3650 	case 0xb4:		/* lfs */
3651 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS);
3652 		break;
3653 	case 0xb5:		/* lgs */
3654 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS);
3655 		break;
3656 	case 0xb6 ... 0xb7:	/* movzx */
3657 		c->dst.bytes = c->op_bytes;
3658 		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3659 						       : (u16) c->src.val;
3660 		break;
3661 	case 0xba:		/* Grp8 */
3662 		switch (c->modrm_reg & 3) {
3663 		case 0:
3664 			goto bt;
3665 		case 1:
3666 			goto bts;
3667 		case 2:
3668 			goto btr;
3669 		case 3:
3670 			goto btc;
3671 		}
3672 		break;
3673 	case 0xbb:
3674 	      btc:		/* btc */
3675 		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3676 		break;
3677 	case 0xbc: {		/* bsf */
3678 		u8 zf;
3679 		__asm__ ("bsf %2, %0; setz %1"
3680 			 : "=r"(c->dst.val), "=q"(zf)
3681 			 : "r"(c->src.val));
3682 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3683 		if (zf) {
3684 			ctxt->eflags |= X86_EFLAGS_ZF;
3685 			c->dst.type = OP_NONE;	/* Disable writeback. */
3686 		}
3687 		break;
3688 	}
3689 	case 0xbd: {		/* bsr */
3690 		u8 zf;
3691 		__asm__ ("bsr %2, %0; setz %1"
3692 			 : "=r"(c->dst.val), "=q"(zf)
3693 			 : "r"(c->src.val));
3694 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3695 		if (zf) {
3696 			ctxt->eflags |= X86_EFLAGS_ZF;
3697 			c->dst.type = OP_NONE;	/* Disable writeback. */
3698 		}
3699 		break;
3700 	}
3701 	case 0xbe ... 0xbf:	/* movsx */
3702 		c->dst.bytes = c->op_bytes;
3703 		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3704 							(s16) c->src.val;
3705 		break;
3706 	case 0xc0 ... 0xc1:	/* xadd */
3707 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3708 		/* Write back the register source. */
3709 		c->src.val = c->dst.orig_val;
3710 		write_register_operand(&c->src);
3711 		break;
3712 	case 0xc3:		/* movnti */
3713 		c->dst.bytes = c->op_bytes;
3714 		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3715 							(u64) c->src.val;
3716 		break;
3717 	case 0xc7:		/* Grp9 (cmpxchg8b) */
3718 		rc = emulate_grp9(ctxt, ops);
3719 		break;
3720 	default:
3721 		goto cannot_emulate;
3722 	}
3723 
3724 	if (rc != X86EMUL_CONTINUE)
3725 		goto done;
3726 
3727 	goto writeback;
3728 
3729 cannot_emulate:
3730 	return -1;
3731 }
3732