xref: /openbmc/linux/arch/x86/kvm/emulate.c (revision baa7eb025ab14f3cba2e35c0a8648f9c9f01d24f)
1 /******************************************************************************
2  * emulate.c
3  *
4  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
5  *
6  * Copyright (c) 2005 Keir Fraser
7  *
8  * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9  * privileged instructions:
10  *
11  * Copyright (C) 2006 Qumranet
12  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
13  *
14  *   Avi Kivity <avi@qumranet.com>
15  *   Yaniv Kamay <yaniv@qumranet.com>
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2.  See
18  * the COPYING file in the top-level directory.
19  *
20  * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
21  */
22 
23 #ifndef __KERNEL__
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <public/xen.h>
27 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #else
29 #include <linux/kvm_host.h>
30 #include "kvm_cache_regs.h"
31 #define DPRINTF(x...) do {} while (0)
32 #endif
33 #include <linux/module.h>
34 #include <asm/kvm_emulate.h>
35 
36 #include "x86.h"
37 #include "tss.h"
38 
39 /*
40  * Opcode effective-address decode tables.
41  * Note that we only emulate instructions that have at least one memory
42  * operand (excluding implicit stack references). We assume that stack
43  * references and instruction fetches will never occur in special memory
44  * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
45  * not be handled.
46  */
47 
48 /* Operand sizes: 8-bit operands or specified/overridden size. */
49 #define ByteOp      (1<<0)	/* 8-bit operands. */
50 /* Destination operand type. */
51 #define ImplicitOps (1<<1)	/* Implicit in opcode. No generic decode. */
52 #define DstReg      (2<<1)	/* Register operand. */
53 #define DstMem      (3<<1)	/* Memory operand. */
54 #define DstAcc      (4<<1)	/* Destination Accumulator */
55 #define DstDI       (5<<1)	/* Destination is in ES:(E)DI */
56 #define DstMem64    (6<<1)	/* 64bit memory operand */
57 #define DstImmUByte (7<<1)	/* 8-bit unsigned immediate operand */
58 #define DstMask     (7<<1)
59 /* Source operand type. */
60 #define SrcNone     (0<<4)	/* No source operand. */
61 #define SrcReg      (1<<4)	/* Register operand. */
62 #define SrcMem      (2<<4)	/* Memory operand. */
63 #define SrcMem16    (3<<4)	/* Memory operand (16-bit). */
64 #define SrcMem32    (4<<4)	/* Memory operand (32-bit). */
65 #define SrcImm      (5<<4)	/* Immediate operand. */
66 #define SrcImmByte  (6<<4)	/* 8-bit sign-extended immediate operand. */
67 #define SrcOne      (7<<4)	/* Implied '1' */
68 #define SrcImmUByte (8<<4)      /* 8-bit unsigned immediate operand. */
69 #define SrcImmU     (9<<4)      /* Immediate operand, unsigned */
70 #define SrcSI       (0xa<<4)	/* Source is in the DS:RSI */
71 #define SrcImmFAddr (0xb<<4)	/* Source is immediate far address */
72 #define SrcMemFAddr (0xc<<4)	/* Source is far address in memory */
73 #define SrcAcc      (0xd<<4)	/* Source Accumulator */
74 #define SrcImmU16   (0xe<<4)    /* Immediate operand, unsigned, 16 bits */
75 #define SrcMask     (0xf<<4)
76 /* Generic ModRM decode. */
77 #define ModRM       (1<<8)
78 /* Destination is only written; never read. */
79 #define Mov         (1<<9)
80 #define BitOp       (1<<10)
81 #define MemAbs      (1<<11)      /* Memory operand is absolute displacement */
82 #define String      (1<<12)     /* String instruction (rep capable) */
83 #define Stack       (1<<13)     /* Stack instruction (push/pop) */
84 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
85 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
86 /* Misc flags */
87 #define NoAccess    (1<<23) /* Don't access memory (lea/invlpg/verr etc) */
88 #define Op3264      (1<<24) /* Operand is 64b in long mode, 32b otherwise */
89 #define Undefined   (1<<25) /* No Such Instruction */
90 #define Lock        (1<<26) /* lock prefix is allowed for the instruction */
91 #define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
92 #define No64	    (1<<28)
93 /* Source 2 operand type */
94 #define Src2None    (0<<29)
95 #define Src2CL      (1<<29)
96 #define Src2ImmByte (2<<29)
97 #define Src2One     (3<<29)
98 #define Src2Imm     (4<<29)
99 #define Src2Mask    (7<<29)
100 
101 #define X2(x...) x, x
102 #define X3(x...) X2(x), x
103 #define X4(x...) X2(x), X2(x)
104 #define X5(x...) X4(x), x
105 #define X6(x...) X4(x), X2(x)
106 #define X7(x...) X4(x), X3(x)
107 #define X8(x...) X4(x), X4(x)
108 #define X16(x...) X8(x), X8(x)
109 
110 struct opcode {
111 	u32 flags;
112 	union {
113 		int (*execute)(struct x86_emulate_ctxt *ctxt);
114 		struct opcode *group;
115 		struct group_dual *gdual;
116 	} u;
117 };
118 
119 struct group_dual {
120 	struct opcode mod012[8];
121 	struct opcode mod3[8];
122 };
123 
124 /* EFLAGS bit definitions. */
125 #define EFLG_ID (1<<21)
126 #define EFLG_VIP (1<<20)
127 #define EFLG_VIF (1<<19)
128 #define EFLG_AC (1<<18)
129 #define EFLG_VM (1<<17)
130 #define EFLG_RF (1<<16)
131 #define EFLG_IOPL (3<<12)
132 #define EFLG_NT (1<<14)
133 #define EFLG_OF (1<<11)
134 #define EFLG_DF (1<<10)
135 #define EFLG_IF (1<<9)
136 #define EFLG_TF (1<<8)
137 #define EFLG_SF (1<<7)
138 #define EFLG_ZF (1<<6)
139 #define EFLG_AF (1<<4)
140 #define EFLG_PF (1<<2)
141 #define EFLG_CF (1<<0)
142 
143 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
144 #define EFLG_RESERVED_ONE_MASK 2
145 
146 /*
147  * Instruction emulation:
148  * Most instructions are emulated directly via a fragment of inline assembly
149  * code. This allows us to save/restore EFLAGS and thus very easily pick up
150  * any modified flags.
151  */
152 
153 #if defined(CONFIG_X86_64)
154 #define _LO32 "k"		/* force 32-bit operand */
155 #define _STK  "%%rsp"		/* stack pointer */
156 #elif defined(__i386__)
157 #define _LO32 ""		/* force 32-bit operand */
158 #define _STK  "%%esp"		/* stack pointer */
159 #endif
160 
161 /*
162  * These EFLAGS bits are restored from saved value during emulation, and
163  * any changes are written back to the saved value after emulation.
164  */
165 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
166 
167 /* Before executing instruction: restore necessary bits in EFLAGS. */
168 #define _PRE_EFLAGS(_sav, _msk, _tmp)					\
169 	/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
170 	"movl %"_sav",%"_LO32 _tmp"; "                                  \
171 	"push %"_tmp"; "                                                \
172 	"push %"_tmp"; "                                                \
173 	"movl %"_msk",%"_LO32 _tmp"; "                                  \
174 	"andl %"_LO32 _tmp",("_STK"); "                                 \
175 	"pushf; "                                                       \
176 	"notl %"_LO32 _tmp"; "                                          \
177 	"andl %"_LO32 _tmp",("_STK"); "                                 \
178 	"andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); "	\
179 	"pop  %"_tmp"; "                                                \
180 	"orl  %"_LO32 _tmp",("_STK"); "                                 \
181 	"popf; "                                                        \
182 	"pop  %"_sav"; "
183 
184 /* After executing instruction: write-back necessary bits in EFLAGS. */
185 #define _POST_EFLAGS(_sav, _msk, _tmp) \
186 	/* _sav |= EFLAGS & _msk; */		\
187 	"pushf; "				\
188 	"pop  %"_tmp"; "			\
189 	"andl %"_msk",%"_LO32 _tmp"; "		\
190 	"orl  %"_LO32 _tmp",%"_sav"; "
191 
192 #ifdef CONFIG_X86_64
193 #define ON64(x) x
194 #else
195 #define ON64(x)
196 #endif
197 
198 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix, _dsttype) \
199 	do {								\
200 		__asm__ __volatile__ (					\
201 			_PRE_EFLAGS("0", "4", "2")			\
202 			_op _suffix " %"_x"3,%1; "			\
203 			_POST_EFLAGS("0", "4", "2")			\
204 			: "=m" (_eflags), "+q" (*(_dsttype*)&(_dst).val),\
205 			  "=&r" (_tmp)					\
206 			: _y ((_src).val), "i" (EFLAGS_MASK));		\
207 	} while (0)
208 
209 
210 /* Raw emulation: instruction has two explicit operands. */
211 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
212 	do {								\
213 		unsigned long _tmp;					\
214 									\
215 		switch ((_dst).bytes) {					\
216 		case 2:							\
217 			____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w",u16);\
218 			break;						\
219 		case 4:							\
220 			____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l",u32);\
221 			break;						\
222 		case 8:							\
223 			ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q",u64)); \
224 			break;						\
225 		}							\
226 	} while (0)
227 
228 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
229 	do {								     \
230 		unsigned long _tmp;					     \
231 		switch ((_dst).bytes) {				             \
232 		case 1:							     \
233 			____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b",u8); \
234 			break;						     \
235 		default:						     \
236 			__emulate_2op_nobyte(_op, _src, _dst, _eflags,	     \
237 					     _wx, _wy, _lx, _ly, _qx, _qy);  \
238 			break;						     \
239 		}							     \
240 	} while (0)
241 
242 /* Source operand is byte-sized and may be restricted to just %cl. */
243 #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                      \
244 	__emulate_2op(_op, _src, _dst, _eflags,				\
245 		      "b", "c", "b", "c", "b", "c", "b", "c")
246 
247 /* Source operand is byte, word, long or quad sized. */
248 #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                      \
249 	__emulate_2op(_op, _src, _dst, _eflags,				\
250 		      "b", "q", "w", "r", _LO32, "r", "", "r")
251 
252 /* Source operand is word, long or quad sized. */
253 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)               \
254 	__emulate_2op_nobyte(_op, _src, _dst, _eflags,			\
255 			     "w", "r", _LO32, "r", "", "r")
256 
257 /* Instruction has three operands and one operand is stored in ECX register */
258 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) 	\
259 	do {									\
260 		unsigned long _tmp;						\
261 		_type _clv  = (_cl).val;  					\
262 		_type _srcv = (_src).val;    					\
263 		_type _dstv = (_dst).val;					\
264 										\
265 		__asm__ __volatile__ (						\
266 			_PRE_EFLAGS("0", "5", "2")				\
267 			_op _suffix " %4,%1 \n"					\
268 			_POST_EFLAGS("0", "5", "2")				\
269 			: "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp)		\
270 			: "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK)		\
271 			); 							\
272 										\
273 		(_cl).val  = (unsigned long) _clv;				\
274 		(_src).val = (unsigned long) _srcv;				\
275 		(_dst).val = (unsigned long) _dstv;				\
276 	} while (0)
277 
278 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags)				\
279 	do {									\
280 		switch ((_dst).bytes) {						\
281 		case 2:								\
282 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
283 						"w", unsigned short);         	\
284 			break;							\
285 		case 4: 							\
286 			__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,  	\
287 						"l", unsigned int);           	\
288 			break;							\
289 		case 8:								\
290 			ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags,	\
291 						"q", unsigned long));  		\
292 			break;							\
293 		}								\
294 	} while (0)
295 
296 #define __emulate_1op(_op, _dst, _eflags, _suffix)			\
297 	do {								\
298 		unsigned long _tmp;					\
299 									\
300 		__asm__ __volatile__ (					\
301 			_PRE_EFLAGS("0", "3", "2")			\
302 			_op _suffix " %1; "				\
303 			_POST_EFLAGS("0", "3", "2")			\
304 			: "=m" (_eflags), "+m" ((_dst).val),		\
305 			  "=&r" (_tmp)					\
306 			: "i" (EFLAGS_MASK));				\
307 	} while (0)
308 
309 /* Instruction has only one explicit operand (no source operand). */
310 #define emulate_1op(_op, _dst, _eflags)                                    \
311 	do {								\
312 		switch ((_dst).bytes) {				        \
313 		case 1:	__emulate_1op(_op, _dst, _eflags, "b"); break;	\
314 		case 2:	__emulate_1op(_op, _dst, _eflags, "w"); break;	\
315 		case 4:	__emulate_1op(_op, _dst, _eflags, "l"); break;	\
316 		case 8:	ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
317 		}							\
318 	} while (0)
319 
320 #define __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, _suffix)		\
321 	do {								\
322 		unsigned long _tmp;					\
323 									\
324 		__asm__ __volatile__ (					\
325 			_PRE_EFLAGS("0", "4", "1")			\
326 			_op _suffix " %5; "				\
327 			_POST_EFLAGS("0", "4", "1")			\
328 			: "=m" (_eflags), "=&r" (_tmp),			\
329 			  "+a" (_rax), "+d" (_rdx)			\
330 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
331 			  "a" (_rax), "d" (_rdx));			\
332 	} while (0)
333 
334 #define __emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _suffix, _ex) \
335 	do {								\
336 		unsigned long _tmp;					\
337 									\
338 		__asm__ __volatile__ (					\
339 			_PRE_EFLAGS("0", "5", "1")			\
340 			"1: \n\t"					\
341 			_op _suffix " %6; "				\
342 			"2: \n\t"					\
343 			_POST_EFLAGS("0", "5", "1")			\
344 			".pushsection .fixup,\"ax\" \n\t"		\
345 			"3: movb $1, %4 \n\t"				\
346 			"jmp 2b \n\t"					\
347 			".popsection \n\t"				\
348 			_ASM_EXTABLE(1b, 3b)				\
349 			: "=m" (_eflags), "=&r" (_tmp),			\
350 			  "+a" (_rax), "+d" (_rdx), "+qm"(_ex)		\
351 			: "i" (EFLAGS_MASK), "m" ((_src).val),		\
352 			  "a" (_rax), "d" (_rdx));			\
353 	} while (0)
354 
355 /* instruction has only one source operand, destination is implicit (e.g. mul, div, imul, idiv) */
356 #define emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags)			\
357 	do {									\
358 		switch((_src).bytes) {						\
359 		case 1: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "b"); break; \
360 		case 2: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx,  _eflags, "w"); break; \
361 		case 4: __emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "l"); break; \
362 		case 8: ON64(__emulate_1op_rax_rdx(_op, _src, _rax, _rdx, _eflags, "q")); break; \
363 		}							\
364 	} while (0)
365 
366 #define emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, _eflags, _ex)	\
367 	do {								\
368 		switch((_src).bytes) {					\
369 		case 1:							\
370 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx,	\
371 						 _eflags, "b", _ex);	\
372 			break;						\
373 		case 2:							\
374 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
375 						 _eflags, "w", _ex);	\
376 			break;						\
377 		case 4:							\
378 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
379 						 _eflags, "l", _ex);	\
380 			break;						\
381 		case 8: ON64(						\
382 			__emulate_1op_rax_rdx_ex(_op, _src, _rax, _rdx, \
383 						 _eflags, "q", _ex));	\
384 			break;						\
385 		}							\
386 	} while (0)
387 
388 /* Fetch next part of the instruction being emulated. */
389 #define insn_fetch(_type, _size, _eip)                                  \
390 ({	unsigned long _x;						\
391 	rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size));		\
392 	if (rc != X86EMUL_CONTINUE)					\
393 		goto done;						\
394 	(_eip) += (_size);						\
395 	(_type)_x;							\
396 })
397 
398 #define insn_fetch_arr(_arr, _size, _eip)                                \
399 ({	rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size));		\
400 	if (rc != X86EMUL_CONTINUE)					\
401 		goto done;						\
402 	(_eip) += (_size);						\
403 })
404 
405 static inline unsigned long ad_mask(struct decode_cache *c)
406 {
407 	return (1UL << (c->ad_bytes << 3)) - 1;
408 }
409 
410 /* Access/update address held in a register, based on addressing mode. */
411 static inline unsigned long
412 address_mask(struct decode_cache *c, unsigned long reg)
413 {
414 	if (c->ad_bytes == sizeof(unsigned long))
415 		return reg;
416 	else
417 		return reg & ad_mask(c);
418 }
419 
420 static inline unsigned long
421 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
422 {
423 	return base + address_mask(c, reg);
424 }
425 
426 static inline void
427 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
428 {
429 	if (c->ad_bytes == sizeof(unsigned long))
430 		*reg += inc;
431 	else
432 		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
433 }
434 
435 static inline void jmp_rel(struct decode_cache *c, int rel)
436 {
437 	register_address_increment(c, &c->eip, rel);
438 }
439 
440 static void set_seg_override(struct decode_cache *c, int seg)
441 {
442 	c->has_seg_override = true;
443 	c->seg_override = seg;
444 }
445 
446 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt,
447 			      struct x86_emulate_ops *ops, int seg)
448 {
449 	if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
450 		return 0;
451 
452 	return ops->get_cached_segment_base(seg, ctxt->vcpu);
453 }
454 
455 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
456 				       struct x86_emulate_ops *ops,
457 				       struct decode_cache *c)
458 {
459 	if (!c->has_seg_override)
460 		return 0;
461 
462 	return seg_base(ctxt, ops, c->seg_override);
463 }
464 
465 static unsigned long es_base(struct x86_emulate_ctxt *ctxt,
466 			     struct x86_emulate_ops *ops)
467 {
468 	return seg_base(ctxt, ops, VCPU_SREG_ES);
469 }
470 
471 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt,
472 			     struct x86_emulate_ops *ops)
473 {
474 	return seg_base(ctxt, ops, VCPU_SREG_SS);
475 }
476 
477 static void emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
478 				      u32 error, bool valid)
479 {
480 	ctxt->exception = vec;
481 	ctxt->error_code = error;
482 	ctxt->error_code_valid = valid;
483 }
484 
485 static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err)
486 {
487 	emulate_exception(ctxt, GP_VECTOR, err, true);
488 }
489 
490 static void emulate_pf(struct x86_emulate_ctxt *ctxt)
491 {
492 	emulate_exception(ctxt, PF_VECTOR, 0, true);
493 }
494 
495 static void emulate_ud(struct x86_emulate_ctxt *ctxt)
496 {
497 	emulate_exception(ctxt, UD_VECTOR, 0, false);
498 }
499 
500 static void emulate_ts(struct x86_emulate_ctxt *ctxt, int err)
501 {
502 	emulate_exception(ctxt, TS_VECTOR, err, true);
503 }
504 
505 static int emulate_de(struct x86_emulate_ctxt *ctxt)
506 {
507 	emulate_exception(ctxt, DE_VECTOR, 0, false);
508 	return X86EMUL_PROPAGATE_FAULT;
509 }
510 
511 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
512 			      struct x86_emulate_ops *ops,
513 			      unsigned long eip, u8 *dest)
514 {
515 	struct fetch_cache *fc = &ctxt->decode.fetch;
516 	int rc;
517 	int size, cur_size;
518 
519 	if (eip == fc->end) {
520 		cur_size = fc->end - fc->start;
521 		size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
522 		rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
523 				size, ctxt->vcpu, NULL);
524 		if (rc != X86EMUL_CONTINUE)
525 			return rc;
526 		fc->end += size;
527 	}
528 	*dest = fc->data[eip - fc->start];
529 	return X86EMUL_CONTINUE;
530 }
531 
532 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
533 			 struct x86_emulate_ops *ops,
534 			 unsigned long eip, void *dest, unsigned size)
535 {
536 	int rc;
537 
538 	/* x86 instructions are limited to 15 bytes. */
539 	if (eip + size - ctxt->eip > 15)
540 		return X86EMUL_UNHANDLEABLE;
541 	while (size--) {
542 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
543 		if (rc != X86EMUL_CONTINUE)
544 			return rc;
545 	}
546 	return X86EMUL_CONTINUE;
547 }
548 
549 /*
550  * Given the 'reg' portion of a ModRM byte, and a register block, return a
551  * pointer into the block that addresses the relevant register.
552  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
553  */
554 static void *decode_register(u8 modrm_reg, unsigned long *regs,
555 			     int highbyte_regs)
556 {
557 	void *p;
558 
559 	p = &regs[modrm_reg];
560 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
561 		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
562 	return p;
563 }
564 
565 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
566 			   struct x86_emulate_ops *ops,
567 			   ulong addr,
568 			   u16 *size, unsigned long *address, int op_bytes)
569 {
570 	int rc;
571 
572 	if (op_bytes == 2)
573 		op_bytes = 3;
574 	*address = 0;
575 	rc = ops->read_std(addr, (unsigned long *)size, 2, ctxt->vcpu, NULL);
576 	if (rc != X86EMUL_CONTINUE)
577 		return rc;
578 	rc = ops->read_std(addr + 2, address, op_bytes, ctxt->vcpu, NULL);
579 	return rc;
580 }
581 
582 static int test_cc(unsigned int condition, unsigned int flags)
583 {
584 	int rc = 0;
585 
586 	switch ((condition & 15) >> 1) {
587 	case 0: /* o */
588 		rc |= (flags & EFLG_OF);
589 		break;
590 	case 1: /* b/c/nae */
591 		rc |= (flags & EFLG_CF);
592 		break;
593 	case 2: /* z/e */
594 		rc |= (flags & EFLG_ZF);
595 		break;
596 	case 3: /* be/na */
597 		rc |= (flags & (EFLG_CF|EFLG_ZF));
598 		break;
599 	case 4: /* s */
600 		rc |= (flags & EFLG_SF);
601 		break;
602 	case 5: /* p/pe */
603 		rc |= (flags & EFLG_PF);
604 		break;
605 	case 7: /* le/ng */
606 		rc |= (flags & EFLG_ZF);
607 		/* fall through */
608 	case 6: /* l/nge */
609 		rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
610 		break;
611 	}
612 
613 	/* Odd condition identifiers (lsb == 1) have inverted sense. */
614 	return (!!rc ^ (condition & 1));
615 }
616 
617 static void fetch_register_operand(struct operand *op)
618 {
619 	switch (op->bytes) {
620 	case 1:
621 		op->val = *(u8 *)op->addr.reg;
622 		break;
623 	case 2:
624 		op->val = *(u16 *)op->addr.reg;
625 		break;
626 	case 4:
627 		op->val = *(u32 *)op->addr.reg;
628 		break;
629 	case 8:
630 		op->val = *(u64 *)op->addr.reg;
631 		break;
632 	}
633 }
634 
635 static void decode_register_operand(struct operand *op,
636 				    struct decode_cache *c,
637 				    int inhibit_bytereg)
638 {
639 	unsigned reg = c->modrm_reg;
640 	int highbyte_regs = c->rex_prefix == 0;
641 
642 	if (!(c->d & ModRM))
643 		reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
644 	op->type = OP_REG;
645 	if ((c->d & ByteOp) && !inhibit_bytereg) {
646 		op->addr.reg = decode_register(reg, c->regs, highbyte_regs);
647 		op->bytes = 1;
648 	} else {
649 		op->addr.reg = decode_register(reg, c->regs, 0);
650 		op->bytes = c->op_bytes;
651 	}
652 	fetch_register_operand(op);
653 	op->orig_val = op->val;
654 }
655 
656 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
657 			struct x86_emulate_ops *ops,
658 			struct operand *op)
659 {
660 	struct decode_cache *c = &ctxt->decode;
661 	u8 sib;
662 	int index_reg = 0, base_reg = 0, scale;
663 	int rc = X86EMUL_CONTINUE;
664 	ulong modrm_ea = 0;
665 
666 	if (c->rex_prefix) {
667 		c->modrm_reg = (c->rex_prefix & 4) << 1;	/* REX.R */
668 		index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
669 		c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
670 	}
671 
672 	c->modrm = insn_fetch(u8, 1, c->eip);
673 	c->modrm_mod |= (c->modrm & 0xc0) >> 6;
674 	c->modrm_reg |= (c->modrm & 0x38) >> 3;
675 	c->modrm_rm |= (c->modrm & 0x07);
676 	c->modrm_seg = VCPU_SREG_DS;
677 
678 	if (c->modrm_mod == 3) {
679 		op->type = OP_REG;
680 		op->bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
681 		op->addr.reg = decode_register(c->modrm_rm,
682 					       c->regs, c->d & ByteOp);
683 		fetch_register_operand(op);
684 		return rc;
685 	}
686 
687 	op->type = OP_MEM;
688 
689 	if (c->ad_bytes == 2) {
690 		unsigned bx = c->regs[VCPU_REGS_RBX];
691 		unsigned bp = c->regs[VCPU_REGS_RBP];
692 		unsigned si = c->regs[VCPU_REGS_RSI];
693 		unsigned di = c->regs[VCPU_REGS_RDI];
694 
695 		/* 16-bit ModR/M decode. */
696 		switch (c->modrm_mod) {
697 		case 0:
698 			if (c->modrm_rm == 6)
699 				modrm_ea += insn_fetch(u16, 2, c->eip);
700 			break;
701 		case 1:
702 			modrm_ea += insn_fetch(s8, 1, c->eip);
703 			break;
704 		case 2:
705 			modrm_ea += insn_fetch(u16, 2, c->eip);
706 			break;
707 		}
708 		switch (c->modrm_rm) {
709 		case 0:
710 			modrm_ea += bx + si;
711 			break;
712 		case 1:
713 			modrm_ea += bx + di;
714 			break;
715 		case 2:
716 			modrm_ea += bp + si;
717 			break;
718 		case 3:
719 			modrm_ea += bp + di;
720 			break;
721 		case 4:
722 			modrm_ea += si;
723 			break;
724 		case 5:
725 			modrm_ea += di;
726 			break;
727 		case 6:
728 			if (c->modrm_mod != 0)
729 				modrm_ea += bp;
730 			break;
731 		case 7:
732 			modrm_ea += bx;
733 			break;
734 		}
735 		if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
736 		    (c->modrm_rm == 6 && c->modrm_mod != 0))
737 			c->modrm_seg = VCPU_SREG_SS;
738 		modrm_ea = (u16)modrm_ea;
739 	} else {
740 		/* 32/64-bit ModR/M decode. */
741 		if ((c->modrm_rm & 7) == 4) {
742 			sib = insn_fetch(u8, 1, c->eip);
743 			index_reg |= (sib >> 3) & 7;
744 			base_reg |= sib & 7;
745 			scale = sib >> 6;
746 
747 			if ((base_reg & 7) == 5 && c->modrm_mod == 0)
748 				modrm_ea += insn_fetch(s32, 4, c->eip);
749 			else
750 				modrm_ea += c->regs[base_reg];
751 			if (index_reg != 4)
752 				modrm_ea += c->regs[index_reg] << scale;
753 		} else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
754 			if (ctxt->mode == X86EMUL_MODE_PROT64)
755 				c->rip_relative = 1;
756 		} else
757 			modrm_ea += c->regs[c->modrm_rm];
758 		switch (c->modrm_mod) {
759 		case 0:
760 			if (c->modrm_rm == 5)
761 				modrm_ea += insn_fetch(s32, 4, c->eip);
762 			break;
763 		case 1:
764 			modrm_ea += insn_fetch(s8, 1, c->eip);
765 			break;
766 		case 2:
767 			modrm_ea += insn_fetch(s32, 4, c->eip);
768 			break;
769 		}
770 	}
771 	op->addr.mem = modrm_ea;
772 done:
773 	return rc;
774 }
775 
776 static int decode_abs(struct x86_emulate_ctxt *ctxt,
777 		      struct x86_emulate_ops *ops,
778 		      struct operand *op)
779 {
780 	struct decode_cache *c = &ctxt->decode;
781 	int rc = X86EMUL_CONTINUE;
782 
783 	op->type = OP_MEM;
784 	switch (c->ad_bytes) {
785 	case 2:
786 		op->addr.mem = insn_fetch(u16, 2, c->eip);
787 		break;
788 	case 4:
789 		op->addr.mem = insn_fetch(u32, 4, c->eip);
790 		break;
791 	case 8:
792 		op->addr.mem = insn_fetch(u64, 8, c->eip);
793 		break;
794 	}
795 done:
796 	return rc;
797 }
798 
799 static void fetch_bit_operand(struct decode_cache *c)
800 {
801 	long sv = 0, mask;
802 
803 	if (c->dst.type == OP_MEM && c->src.type == OP_REG) {
804 		mask = ~(c->dst.bytes * 8 - 1);
805 
806 		if (c->src.bytes == 2)
807 			sv = (s16)c->src.val & (s16)mask;
808 		else if (c->src.bytes == 4)
809 			sv = (s32)c->src.val & (s32)mask;
810 
811 		c->dst.addr.mem += (sv >> 3);
812 	}
813 
814 	/* only subword offset */
815 	c->src.val &= (c->dst.bytes << 3) - 1;
816 }
817 
818 static int read_emulated(struct x86_emulate_ctxt *ctxt,
819 			 struct x86_emulate_ops *ops,
820 			 unsigned long addr, void *dest, unsigned size)
821 {
822 	int rc;
823 	struct read_cache *mc = &ctxt->decode.mem_read;
824 	u32 err;
825 
826 	while (size) {
827 		int n = min(size, 8u);
828 		size -= n;
829 		if (mc->pos < mc->end)
830 			goto read_cached;
831 
832 		rc = ops->read_emulated(addr, mc->data + mc->end, n, &err,
833 					ctxt->vcpu);
834 		if (rc == X86EMUL_PROPAGATE_FAULT)
835 			emulate_pf(ctxt);
836 		if (rc != X86EMUL_CONTINUE)
837 			return rc;
838 		mc->end += n;
839 
840 	read_cached:
841 		memcpy(dest, mc->data + mc->pos, n);
842 		mc->pos += n;
843 		dest += n;
844 		addr += n;
845 	}
846 	return X86EMUL_CONTINUE;
847 }
848 
849 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
850 			   struct x86_emulate_ops *ops,
851 			   unsigned int size, unsigned short port,
852 			   void *dest)
853 {
854 	struct read_cache *rc = &ctxt->decode.io_read;
855 
856 	if (rc->pos == rc->end) { /* refill pio read ahead */
857 		struct decode_cache *c = &ctxt->decode;
858 		unsigned int in_page, n;
859 		unsigned int count = c->rep_prefix ?
860 			address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
861 		in_page = (ctxt->eflags & EFLG_DF) ?
862 			offset_in_page(c->regs[VCPU_REGS_RDI]) :
863 			PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
864 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
865 			count);
866 		if (n == 0)
867 			n = 1;
868 		rc->pos = rc->end = 0;
869 		if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
870 			return 0;
871 		rc->end = n * size;
872 	}
873 
874 	memcpy(dest, rc->data + rc->pos, size);
875 	rc->pos += size;
876 	return 1;
877 }
878 
879 static u32 desc_limit_scaled(struct desc_struct *desc)
880 {
881 	u32 limit = get_desc_limit(desc);
882 
883 	return desc->g ? (limit << 12) | 0xfff : limit;
884 }
885 
886 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
887 				     struct x86_emulate_ops *ops,
888 				     u16 selector, struct desc_ptr *dt)
889 {
890 	if (selector & 1 << 2) {
891 		struct desc_struct desc;
892 		memset (dt, 0, sizeof *dt);
893 		if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
894 			return;
895 
896 		dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
897 		dt->address = get_desc_base(&desc);
898 	} else
899 		ops->get_gdt(dt, ctxt->vcpu);
900 }
901 
902 /* allowed just for 8 bytes segments */
903 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
904 				   struct x86_emulate_ops *ops,
905 				   u16 selector, struct desc_struct *desc)
906 {
907 	struct desc_ptr dt;
908 	u16 index = selector >> 3;
909 	int ret;
910 	u32 err;
911 	ulong addr;
912 
913 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
914 
915 	if (dt.size < index * 8 + 7) {
916 		emulate_gp(ctxt, selector & 0xfffc);
917 		return X86EMUL_PROPAGATE_FAULT;
918 	}
919 	addr = dt.address + index * 8;
920 	ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu,  &err);
921 	if (ret == X86EMUL_PROPAGATE_FAULT)
922 		emulate_pf(ctxt);
923 
924        return ret;
925 }
926 
927 /* allowed just for 8 bytes segments */
928 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
929 				    struct x86_emulate_ops *ops,
930 				    u16 selector, struct desc_struct *desc)
931 {
932 	struct desc_ptr dt;
933 	u16 index = selector >> 3;
934 	u32 err;
935 	ulong addr;
936 	int ret;
937 
938 	get_descriptor_table_ptr(ctxt, ops, selector, &dt);
939 
940 	if (dt.size < index * 8 + 7) {
941 		emulate_gp(ctxt, selector & 0xfffc);
942 		return X86EMUL_PROPAGATE_FAULT;
943 	}
944 
945 	addr = dt.address + index * 8;
946 	ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
947 	if (ret == X86EMUL_PROPAGATE_FAULT)
948 		emulate_pf(ctxt);
949 
950 	return ret;
951 }
952 
953 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
954 				   struct x86_emulate_ops *ops,
955 				   u16 selector, int seg)
956 {
957 	struct desc_struct seg_desc;
958 	u8 dpl, rpl, cpl;
959 	unsigned err_vec = GP_VECTOR;
960 	u32 err_code = 0;
961 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
962 	int ret;
963 
964 	memset(&seg_desc, 0, sizeof seg_desc);
965 
966 	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
967 	    || ctxt->mode == X86EMUL_MODE_REAL) {
968 		/* set real mode segment descriptor */
969 		set_desc_base(&seg_desc, selector << 4);
970 		set_desc_limit(&seg_desc, 0xffff);
971 		seg_desc.type = 3;
972 		seg_desc.p = 1;
973 		seg_desc.s = 1;
974 		goto load;
975 	}
976 
977 	/* NULL selector is not valid for TR, CS and SS */
978 	if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
979 	    && null_selector)
980 		goto exception;
981 
982 	/* TR should be in GDT only */
983 	if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
984 		goto exception;
985 
986 	if (null_selector) /* for NULL selector skip all following checks */
987 		goto load;
988 
989 	ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
990 	if (ret != X86EMUL_CONTINUE)
991 		return ret;
992 
993 	err_code = selector & 0xfffc;
994 	err_vec = GP_VECTOR;
995 
996 	/* can't load system descriptor into segment selecor */
997 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
998 		goto exception;
999 
1000 	if (!seg_desc.p) {
1001 		err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1002 		goto exception;
1003 	}
1004 
1005 	rpl = selector & 3;
1006 	dpl = seg_desc.dpl;
1007 	cpl = ops->cpl(ctxt->vcpu);
1008 
1009 	switch (seg) {
1010 	case VCPU_SREG_SS:
1011 		/*
1012 		 * segment is not a writable data segment or segment
1013 		 * selector's RPL != CPL or segment selector's RPL != CPL
1014 		 */
1015 		if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1016 			goto exception;
1017 		break;
1018 	case VCPU_SREG_CS:
1019 		if (!(seg_desc.type & 8))
1020 			goto exception;
1021 
1022 		if (seg_desc.type & 4) {
1023 			/* conforming */
1024 			if (dpl > cpl)
1025 				goto exception;
1026 		} else {
1027 			/* nonconforming */
1028 			if (rpl > cpl || dpl != cpl)
1029 				goto exception;
1030 		}
1031 		/* CS(RPL) <- CPL */
1032 		selector = (selector & 0xfffc) | cpl;
1033 		break;
1034 	case VCPU_SREG_TR:
1035 		if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1036 			goto exception;
1037 		break;
1038 	case VCPU_SREG_LDTR:
1039 		if (seg_desc.s || seg_desc.type != 2)
1040 			goto exception;
1041 		break;
1042 	default: /*  DS, ES, FS, or GS */
1043 		/*
1044 		 * segment is not a data or readable code segment or
1045 		 * ((segment is a data or nonconforming code segment)
1046 		 * and (both RPL and CPL > DPL))
1047 		 */
1048 		if ((seg_desc.type & 0xa) == 0x8 ||
1049 		    (((seg_desc.type & 0xc) != 0xc) &&
1050 		     (rpl > dpl && cpl > dpl)))
1051 			goto exception;
1052 		break;
1053 	}
1054 
1055 	if (seg_desc.s) {
1056 		/* mark segment as accessed */
1057 		seg_desc.type |= 1;
1058 		ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1059 		if (ret != X86EMUL_CONTINUE)
1060 			return ret;
1061 	}
1062 load:
1063 	ops->set_segment_selector(selector, seg, ctxt->vcpu);
1064 	ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1065 	return X86EMUL_CONTINUE;
1066 exception:
1067 	emulate_exception(ctxt, err_vec, err_code, true);
1068 	return X86EMUL_PROPAGATE_FAULT;
1069 }
1070 
1071 static void write_register_operand(struct operand *op)
1072 {
1073 	/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
1074 	switch (op->bytes) {
1075 	case 1:
1076 		*(u8 *)op->addr.reg = (u8)op->val;
1077 		break;
1078 	case 2:
1079 		*(u16 *)op->addr.reg = (u16)op->val;
1080 		break;
1081 	case 4:
1082 		*op->addr.reg = (u32)op->val;
1083 		break;	/* 64b: zero-extend */
1084 	case 8:
1085 		*op->addr.reg = op->val;
1086 		break;
1087 	}
1088 }
1089 
1090 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1091 			    struct x86_emulate_ops *ops)
1092 {
1093 	int rc;
1094 	struct decode_cache *c = &ctxt->decode;
1095 	u32 err;
1096 
1097 	switch (c->dst.type) {
1098 	case OP_REG:
1099 		write_register_operand(&c->dst);
1100 		break;
1101 	case OP_MEM:
1102 		if (c->lock_prefix)
1103 			rc = ops->cmpxchg_emulated(
1104 					c->dst.addr.mem,
1105 					&c->dst.orig_val,
1106 					&c->dst.val,
1107 					c->dst.bytes,
1108 					&err,
1109 					ctxt->vcpu);
1110 		else
1111 			rc = ops->write_emulated(
1112 					c->dst.addr.mem,
1113 					&c->dst.val,
1114 					c->dst.bytes,
1115 					&err,
1116 					ctxt->vcpu);
1117 		if (rc == X86EMUL_PROPAGATE_FAULT)
1118 			emulate_pf(ctxt);
1119 		if (rc != X86EMUL_CONTINUE)
1120 			return rc;
1121 		break;
1122 	case OP_NONE:
1123 		/* no writeback */
1124 		break;
1125 	default:
1126 		break;
1127 	}
1128 	return X86EMUL_CONTINUE;
1129 }
1130 
1131 static inline void emulate_push(struct x86_emulate_ctxt *ctxt,
1132 				struct x86_emulate_ops *ops)
1133 {
1134 	struct decode_cache *c = &ctxt->decode;
1135 
1136 	c->dst.type  = OP_MEM;
1137 	c->dst.bytes = c->op_bytes;
1138 	c->dst.val = c->src.val;
1139 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1140 	c->dst.addr.mem = register_address(c, ss_base(ctxt, ops),
1141 					   c->regs[VCPU_REGS_RSP]);
1142 }
1143 
1144 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1145 		       struct x86_emulate_ops *ops,
1146 		       void *dest, int len)
1147 {
1148 	struct decode_cache *c = &ctxt->decode;
1149 	int rc;
1150 
1151 	rc = read_emulated(ctxt, ops, register_address(c, ss_base(ctxt, ops),
1152 						       c->regs[VCPU_REGS_RSP]),
1153 			   dest, len);
1154 	if (rc != X86EMUL_CONTINUE)
1155 		return rc;
1156 
1157 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1158 	return rc;
1159 }
1160 
1161 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1162 		       struct x86_emulate_ops *ops,
1163 		       void *dest, int len)
1164 {
1165 	int rc;
1166 	unsigned long val, change_mask;
1167 	int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1168 	int cpl = ops->cpl(ctxt->vcpu);
1169 
1170 	rc = emulate_pop(ctxt, ops, &val, len);
1171 	if (rc != X86EMUL_CONTINUE)
1172 		return rc;
1173 
1174 	change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1175 		| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1176 
1177 	switch(ctxt->mode) {
1178 	case X86EMUL_MODE_PROT64:
1179 	case X86EMUL_MODE_PROT32:
1180 	case X86EMUL_MODE_PROT16:
1181 		if (cpl == 0)
1182 			change_mask |= EFLG_IOPL;
1183 		if (cpl <= iopl)
1184 			change_mask |= EFLG_IF;
1185 		break;
1186 	case X86EMUL_MODE_VM86:
1187 		if (iopl < 3) {
1188 			emulate_gp(ctxt, 0);
1189 			return X86EMUL_PROPAGATE_FAULT;
1190 		}
1191 		change_mask |= EFLG_IF;
1192 		break;
1193 	default: /* real mode */
1194 		change_mask |= (EFLG_IOPL | EFLG_IF);
1195 		break;
1196 	}
1197 
1198 	*(unsigned long *)dest =
1199 		(ctxt->eflags & ~change_mask) | (val & change_mask);
1200 
1201 	if (rc == X86EMUL_PROPAGATE_FAULT)
1202 		emulate_pf(ctxt);
1203 
1204 	return rc;
1205 }
1206 
1207 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt,
1208 			      struct x86_emulate_ops *ops, int seg)
1209 {
1210 	struct decode_cache *c = &ctxt->decode;
1211 
1212 	c->src.val = ops->get_segment_selector(seg, ctxt->vcpu);
1213 
1214 	emulate_push(ctxt, ops);
1215 }
1216 
1217 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1218 			     struct x86_emulate_ops *ops, int seg)
1219 {
1220 	struct decode_cache *c = &ctxt->decode;
1221 	unsigned long selector;
1222 	int rc;
1223 
1224 	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1225 	if (rc != X86EMUL_CONTINUE)
1226 		return rc;
1227 
1228 	rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1229 	return rc;
1230 }
1231 
1232 static int emulate_pusha(struct x86_emulate_ctxt *ctxt,
1233 			  struct x86_emulate_ops *ops)
1234 {
1235 	struct decode_cache *c = &ctxt->decode;
1236 	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1237 	int rc = X86EMUL_CONTINUE;
1238 	int reg = VCPU_REGS_RAX;
1239 
1240 	while (reg <= VCPU_REGS_RDI) {
1241 		(reg == VCPU_REGS_RSP) ?
1242 		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1243 
1244 		emulate_push(ctxt, ops);
1245 
1246 		rc = writeback(ctxt, ops);
1247 		if (rc != X86EMUL_CONTINUE)
1248 			return rc;
1249 
1250 		++reg;
1251 	}
1252 
1253 	/* Disable writeback. */
1254 	c->dst.type = OP_NONE;
1255 
1256 	return rc;
1257 }
1258 
1259 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1260 			struct x86_emulate_ops *ops)
1261 {
1262 	struct decode_cache *c = &ctxt->decode;
1263 	int rc = X86EMUL_CONTINUE;
1264 	int reg = VCPU_REGS_RDI;
1265 
1266 	while (reg >= VCPU_REGS_RAX) {
1267 		if (reg == VCPU_REGS_RSP) {
1268 			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1269 							c->op_bytes);
1270 			--reg;
1271 		}
1272 
1273 		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1274 		if (rc != X86EMUL_CONTINUE)
1275 			break;
1276 		--reg;
1277 	}
1278 	return rc;
1279 }
1280 
1281 int emulate_int_real(struct x86_emulate_ctxt *ctxt,
1282 			       struct x86_emulate_ops *ops, int irq)
1283 {
1284 	struct decode_cache *c = &ctxt->decode;
1285 	int rc;
1286 	struct desc_ptr dt;
1287 	gva_t cs_addr;
1288 	gva_t eip_addr;
1289 	u16 cs, eip;
1290 	u32 err;
1291 
1292 	/* TODO: Add limit checks */
1293 	c->src.val = ctxt->eflags;
1294 	emulate_push(ctxt, ops);
1295 	rc = writeback(ctxt, ops);
1296 	if (rc != X86EMUL_CONTINUE)
1297 		return rc;
1298 
1299 	ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
1300 
1301 	c->src.val = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1302 	emulate_push(ctxt, ops);
1303 	rc = writeback(ctxt, ops);
1304 	if (rc != X86EMUL_CONTINUE)
1305 		return rc;
1306 
1307 	c->src.val = c->eip;
1308 	emulate_push(ctxt, ops);
1309 	rc = writeback(ctxt, ops);
1310 	if (rc != X86EMUL_CONTINUE)
1311 		return rc;
1312 
1313 	c->dst.type = OP_NONE;
1314 
1315 	ops->get_idt(&dt, ctxt->vcpu);
1316 
1317 	eip_addr = dt.address + (irq << 2);
1318 	cs_addr = dt.address + (irq << 2) + 2;
1319 
1320 	rc = ops->read_std(cs_addr, &cs, 2, ctxt->vcpu, &err);
1321 	if (rc != X86EMUL_CONTINUE)
1322 		return rc;
1323 
1324 	rc = ops->read_std(eip_addr, &eip, 2, ctxt->vcpu, &err);
1325 	if (rc != X86EMUL_CONTINUE)
1326 		return rc;
1327 
1328 	rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS);
1329 	if (rc != X86EMUL_CONTINUE)
1330 		return rc;
1331 
1332 	c->eip = eip;
1333 
1334 	return rc;
1335 }
1336 
1337 static int emulate_int(struct x86_emulate_ctxt *ctxt,
1338 		       struct x86_emulate_ops *ops, int irq)
1339 {
1340 	switch(ctxt->mode) {
1341 	case X86EMUL_MODE_REAL:
1342 		return emulate_int_real(ctxt, ops, irq);
1343 	case X86EMUL_MODE_VM86:
1344 	case X86EMUL_MODE_PROT16:
1345 	case X86EMUL_MODE_PROT32:
1346 	case X86EMUL_MODE_PROT64:
1347 	default:
1348 		/* Protected mode interrupts unimplemented yet */
1349 		return X86EMUL_UNHANDLEABLE;
1350 	}
1351 }
1352 
1353 static int emulate_iret_real(struct x86_emulate_ctxt *ctxt,
1354 			     struct x86_emulate_ops *ops)
1355 {
1356 	struct decode_cache *c = &ctxt->decode;
1357 	int rc = X86EMUL_CONTINUE;
1358 	unsigned long temp_eip = 0;
1359 	unsigned long temp_eflags = 0;
1360 	unsigned long cs = 0;
1361 	unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
1362 			     EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
1363 			     EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
1364 	unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
1365 
1366 	/* TODO: Add stack limit check */
1367 
1368 	rc = emulate_pop(ctxt, ops, &temp_eip, c->op_bytes);
1369 
1370 	if (rc != X86EMUL_CONTINUE)
1371 		return rc;
1372 
1373 	if (temp_eip & ~0xffff) {
1374 		emulate_gp(ctxt, 0);
1375 		return X86EMUL_PROPAGATE_FAULT;
1376 	}
1377 
1378 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1379 
1380 	if (rc != X86EMUL_CONTINUE)
1381 		return rc;
1382 
1383 	rc = emulate_pop(ctxt, ops, &temp_eflags, c->op_bytes);
1384 
1385 	if (rc != X86EMUL_CONTINUE)
1386 		return rc;
1387 
1388 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1389 
1390 	if (rc != X86EMUL_CONTINUE)
1391 		return rc;
1392 
1393 	c->eip = temp_eip;
1394 
1395 
1396 	if (c->op_bytes == 4)
1397 		ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
1398 	else if (c->op_bytes == 2) {
1399 		ctxt->eflags &= ~0xffff;
1400 		ctxt->eflags |= temp_eflags;
1401 	}
1402 
1403 	ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
1404 	ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
1405 
1406 	return rc;
1407 }
1408 
1409 static inline int emulate_iret(struct x86_emulate_ctxt *ctxt,
1410 				    struct x86_emulate_ops* ops)
1411 {
1412 	switch(ctxt->mode) {
1413 	case X86EMUL_MODE_REAL:
1414 		return emulate_iret_real(ctxt, ops);
1415 	case X86EMUL_MODE_VM86:
1416 	case X86EMUL_MODE_PROT16:
1417 	case X86EMUL_MODE_PROT32:
1418 	case X86EMUL_MODE_PROT64:
1419 	default:
1420 		/* iret from protected mode unimplemented yet */
1421 		return X86EMUL_UNHANDLEABLE;
1422 	}
1423 }
1424 
1425 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1426 				struct x86_emulate_ops *ops)
1427 {
1428 	struct decode_cache *c = &ctxt->decode;
1429 
1430 	return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1431 }
1432 
1433 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1434 {
1435 	struct decode_cache *c = &ctxt->decode;
1436 	switch (c->modrm_reg) {
1437 	case 0:	/* rol */
1438 		emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1439 		break;
1440 	case 1:	/* ror */
1441 		emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1442 		break;
1443 	case 2:	/* rcl */
1444 		emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1445 		break;
1446 	case 3:	/* rcr */
1447 		emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1448 		break;
1449 	case 4:	/* sal/shl */
1450 	case 6:	/* sal/shl */
1451 		emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1452 		break;
1453 	case 5:	/* shr */
1454 		emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1455 		break;
1456 	case 7:	/* sar */
1457 		emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1458 		break;
1459 	}
1460 }
1461 
1462 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1463 			       struct x86_emulate_ops *ops)
1464 {
1465 	struct decode_cache *c = &ctxt->decode;
1466 	unsigned long *rax = &c->regs[VCPU_REGS_RAX];
1467 	unsigned long *rdx = &c->regs[VCPU_REGS_RDX];
1468 	u8 de = 0;
1469 
1470 	switch (c->modrm_reg) {
1471 	case 0 ... 1:	/* test */
1472 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1473 		break;
1474 	case 2:	/* not */
1475 		c->dst.val = ~c->dst.val;
1476 		break;
1477 	case 3:	/* neg */
1478 		emulate_1op("neg", c->dst, ctxt->eflags);
1479 		break;
1480 	case 4: /* mul */
1481 		emulate_1op_rax_rdx("mul", c->src, *rax, *rdx, ctxt->eflags);
1482 		break;
1483 	case 5: /* imul */
1484 		emulate_1op_rax_rdx("imul", c->src, *rax, *rdx, ctxt->eflags);
1485 		break;
1486 	case 6: /* div */
1487 		emulate_1op_rax_rdx_ex("div", c->src, *rax, *rdx,
1488 				       ctxt->eflags, de);
1489 		break;
1490 	case 7: /* idiv */
1491 		emulate_1op_rax_rdx_ex("idiv", c->src, *rax, *rdx,
1492 				       ctxt->eflags, de);
1493 		break;
1494 	default:
1495 		return X86EMUL_UNHANDLEABLE;
1496 	}
1497 	if (de)
1498 		return emulate_de(ctxt);
1499 	return X86EMUL_CONTINUE;
1500 }
1501 
1502 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1503 			       struct x86_emulate_ops *ops)
1504 {
1505 	struct decode_cache *c = &ctxt->decode;
1506 
1507 	switch (c->modrm_reg) {
1508 	case 0:	/* inc */
1509 		emulate_1op("inc", c->dst, ctxt->eflags);
1510 		break;
1511 	case 1:	/* dec */
1512 		emulate_1op("dec", c->dst, ctxt->eflags);
1513 		break;
1514 	case 2: /* call near abs */ {
1515 		long int old_eip;
1516 		old_eip = c->eip;
1517 		c->eip = c->src.val;
1518 		c->src.val = old_eip;
1519 		emulate_push(ctxt, ops);
1520 		break;
1521 	}
1522 	case 4: /* jmp abs */
1523 		c->eip = c->src.val;
1524 		break;
1525 	case 6:	/* push */
1526 		emulate_push(ctxt, ops);
1527 		break;
1528 	}
1529 	return X86EMUL_CONTINUE;
1530 }
1531 
1532 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1533 			       struct x86_emulate_ops *ops)
1534 {
1535 	struct decode_cache *c = &ctxt->decode;
1536 	u64 old = c->dst.orig_val64;
1537 
1538 	if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1539 	    ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1540 		c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1541 		c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1542 		ctxt->eflags &= ~EFLG_ZF;
1543 	} else {
1544 		c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1545 			(u32) c->regs[VCPU_REGS_RBX];
1546 
1547 		ctxt->eflags |= EFLG_ZF;
1548 	}
1549 	return X86EMUL_CONTINUE;
1550 }
1551 
1552 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1553 			   struct x86_emulate_ops *ops)
1554 {
1555 	struct decode_cache *c = &ctxt->decode;
1556 	int rc;
1557 	unsigned long cs;
1558 
1559 	rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1560 	if (rc != X86EMUL_CONTINUE)
1561 		return rc;
1562 	if (c->op_bytes == 4)
1563 		c->eip = (u32)c->eip;
1564 	rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1565 	if (rc != X86EMUL_CONTINUE)
1566 		return rc;
1567 	rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1568 	return rc;
1569 }
1570 
1571 static int emulate_load_segment(struct x86_emulate_ctxt *ctxt,
1572 			   struct x86_emulate_ops *ops, int seg)
1573 {
1574 	struct decode_cache *c = &ctxt->decode;
1575 	unsigned short sel;
1576 	int rc;
1577 
1578 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
1579 
1580 	rc = load_segment_descriptor(ctxt, ops, sel, seg);
1581 	if (rc != X86EMUL_CONTINUE)
1582 		return rc;
1583 
1584 	c->dst.val = c->src.val;
1585 	return rc;
1586 }
1587 
1588 static inline void
1589 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1590 			struct x86_emulate_ops *ops, struct desc_struct *cs,
1591 			struct desc_struct *ss)
1592 {
1593 	memset(cs, 0, sizeof(struct desc_struct));
1594 	ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu);
1595 	memset(ss, 0, sizeof(struct desc_struct));
1596 
1597 	cs->l = 0;		/* will be adjusted later */
1598 	set_desc_base(cs, 0);	/* flat segment */
1599 	cs->g = 1;		/* 4kb granularity */
1600 	set_desc_limit(cs, 0xfffff);	/* 4GB limit */
1601 	cs->type = 0x0b;	/* Read, Execute, Accessed */
1602 	cs->s = 1;
1603 	cs->dpl = 0;		/* will be adjusted later */
1604 	cs->p = 1;
1605 	cs->d = 1;
1606 
1607 	set_desc_base(ss, 0);	/* flat segment */
1608 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
1609 	ss->g = 1;		/* 4kb granularity */
1610 	ss->s = 1;
1611 	ss->type = 0x03;	/* Read/Write, Accessed */
1612 	ss->d = 1;		/* 32bit stack segment */
1613 	ss->dpl = 0;
1614 	ss->p = 1;
1615 }
1616 
1617 static int
1618 emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1619 {
1620 	struct decode_cache *c = &ctxt->decode;
1621 	struct desc_struct cs, ss;
1622 	u64 msr_data;
1623 	u16 cs_sel, ss_sel;
1624 
1625 	/* syscall is not available in real mode */
1626 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1627 	    ctxt->mode == X86EMUL_MODE_VM86) {
1628 		emulate_ud(ctxt);
1629 		return X86EMUL_PROPAGATE_FAULT;
1630 	}
1631 
1632 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1633 
1634 	ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1635 	msr_data >>= 32;
1636 	cs_sel = (u16)(msr_data & 0xfffc);
1637 	ss_sel = (u16)(msr_data + 8);
1638 
1639 	if (is_long_mode(ctxt->vcpu)) {
1640 		cs.d = 0;
1641 		cs.l = 1;
1642 	}
1643 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1644 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1645 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1646 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1647 
1648 	c->regs[VCPU_REGS_RCX] = c->eip;
1649 	if (is_long_mode(ctxt->vcpu)) {
1650 #ifdef CONFIG_X86_64
1651 		c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1652 
1653 		ops->get_msr(ctxt->vcpu,
1654 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
1655 			     MSR_LSTAR : MSR_CSTAR, &msr_data);
1656 		c->eip = msr_data;
1657 
1658 		ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1659 		ctxt->eflags &= ~(msr_data | EFLG_RF);
1660 #endif
1661 	} else {
1662 		/* legacy mode */
1663 		ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1664 		c->eip = (u32)msr_data;
1665 
1666 		ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1667 	}
1668 
1669 	return X86EMUL_CONTINUE;
1670 }
1671 
1672 static int
1673 emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1674 {
1675 	struct decode_cache *c = &ctxt->decode;
1676 	struct desc_struct cs, ss;
1677 	u64 msr_data;
1678 	u16 cs_sel, ss_sel;
1679 
1680 	/* inject #GP if in real mode */
1681 	if (ctxt->mode == X86EMUL_MODE_REAL) {
1682 		emulate_gp(ctxt, 0);
1683 		return X86EMUL_PROPAGATE_FAULT;
1684 	}
1685 
1686 	/* XXX sysenter/sysexit have not been tested in 64bit mode.
1687 	* Therefore, we inject an #UD.
1688 	*/
1689 	if (ctxt->mode == X86EMUL_MODE_PROT64) {
1690 		emulate_ud(ctxt);
1691 		return X86EMUL_PROPAGATE_FAULT;
1692 	}
1693 
1694 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1695 
1696 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1697 	switch (ctxt->mode) {
1698 	case X86EMUL_MODE_PROT32:
1699 		if ((msr_data & 0xfffc) == 0x0) {
1700 			emulate_gp(ctxt, 0);
1701 			return X86EMUL_PROPAGATE_FAULT;
1702 		}
1703 		break;
1704 	case X86EMUL_MODE_PROT64:
1705 		if (msr_data == 0x0) {
1706 			emulate_gp(ctxt, 0);
1707 			return X86EMUL_PROPAGATE_FAULT;
1708 		}
1709 		break;
1710 	}
1711 
1712 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1713 	cs_sel = (u16)msr_data;
1714 	cs_sel &= ~SELECTOR_RPL_MASK;
1715 	ss_sel = cs_sel + 8;
1716 	ss_sel &= ~SELECTOR_RPL_MASK;
1717 	if (ctxt->mode == X86EMUL_MODE_PROT64
1718 		|| is_long_mode(ctxt->vcpu)) {
1719 		cs.d = 0;
1720 		cs.l = 1;
1721 	}
1722 
1723 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1724 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1725 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1726 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1727 
1728 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1729 	c->eip = msr_data;
1730 
1731 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1732 	c->regs[VCPU_REGS_RSP] = msr_data;
1733 
1734 	return X86EMUL_CONTINUE;
1735 }
1736 
1737 static int
1738 emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
1739 {
1740 	struct decode_cache *c = &ctxt->decode;
1741 	struct desc_struct cs, ss;
1742 	u64 msr_data;
1743 	int usermode;
1744 	u16 cs_sel, ss_sel;
1745 
1746 	/* inject #GP if in real mode or Virtual 8086 mode */
1747 	if (ctxt->mode == X86EMUL_MODE_REAL ||
1748 	    ctxt->mode == X86EMUL_MODE_VM86) {
1749 		emulate_gp(ctxt, 0);
1750 		return X86EMUL_PROPAGATE_FAULT;
1751 	}
1752 
1753 	setup_syscalls_segments(ctxt, ops, &cs, &ss);
1754 
1755 	if ((c->rex_prefix & 0x8) != 0x0)
1756 		usermode = X86EMUL_MODE_PROT64;
1757 	else
1758 		usermode = X86EMUL_MODE_PROT32;
1759 
1760 	cs.dpl = 3;
1761 	ss.dpl = 3;
1762 	ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1763 	switch (usermode) {
1764 	case X86EMUL_MODE_PROT32:
1765 		cs_sel = (u16)(msr_data + 16);
1766 		if ((msr_data & 0xfffc) == 0x0) {
1767 			emulate_gp(ctxt, 0);
1768 			return X86EMUL_PROPAGATE_FAULT;
1769 		}
1770 		ss_sel = (u16)(msr_data + 24);
1771 		break;
1772 	case X86EMUL_MODE_PROT64:
1773 		cs_sel = (u16)(msr_data + 32);
1774 		if (msr_data == 0x0) {
1775 			emulate_gp(ctxt, 0);
1776 			return X86EMUL_PROPAGATE_FAULT;
1777 		}
1778 		ss_sel = cs_sel + 8;
1779 		cs.d = 0;
1780 		cs.l = 1;
1781 		break;
1782 	}
1783 	cs_sel |= SELECTOR_RPL_MASK;
1784 	ss_sel |= SELECTOR_RPL_MASK;
1785 
1786 	ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu);
1787 	ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu);
1788 	ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu);
1789 	ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu);
1790 
1791 	c->eip = c->regs[VCPU_REGS_RDX];
1792 	c->regs[VCPU_REGS_RSP] = c->regs[VCPU_REGS_RCX];
1793 
1794 	return X86EMUL_CONTINUE;
1795 }
1796 
1797 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1798 			      struct x86_emulate_ops *ops)
1799 {
1800 	int iopl;
1801 	if (ctxt->mode == X86EMUL_MODE_REAL)
1802 		return false;
1803 	if (ctxt->mode == X86EMUL_MODE_VM86)
1804 		return true;
1805 	iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1806 	return ops->cpl(ctxt->vcpu) > iopl;
1807 }
1808 
1809 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1810 					    struct x86_emulate_ops *ops,
1811 					    u16 port, u16 len)
1812 {
1813 	struct desc_struct tr_seg;
1814 	int r;
1815 	u16 io_bitmap_ptr;
1816 	u8 perm, bit_idx = port & 0x7;
1817 	unsigned mask = (1 << len) - 1;
1818 
1819 	ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu);
1820 	if (!tr_seg.p)
1821 		return false;
1822 	if (desc_limit_scaled(&tr_seg) < 103)
1823 		return false;
1824 	r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2,
1825 			  ctxt->vcpu, NULL);
1826 	if (r != X86EMUL_CONTINUE)
1827 		return false;
1828 	if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
1829 		return false;
1830 	r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8,
1831 			  &perm, 1, ctxt->vcpu, NULL);
1832 	if (r != X86EMUL_CONTINUE)
1833 		return false;
1834 	if ((perm >> bit_idx) & mask)
1835 		return false;
1836 	return true;
1837 }
1838 
1839 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1840 				 struct x86_emulate_ops *ops,
1841 				 u16 port, u16 len)
1842 {
1843 	if (ctxt->perm_ok)
1844 		return true;
1845 
1846 	if (emulator_bad_iopl(ctxt, ops))
1847 		if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
1848 			return false;
1849 
1850 	ctxt->perm_ok = true;
1851 
1852 	return true;
1853 }
1854 
1855 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
1856 				struct x86_emulate_ops *ops,
1857 				struct tss_segment_16 *tss)
1858 {
1859 	struct decode_cache *c = &ctxt->decode;
1860 
1861 	tss->ip = c->eip;
1862 	tss->flag = ctxt->eflags;
1863 	tss->ax = c->regs[VCPU_REGS_RAX];
1864 	tss->cx = c->regs[VCPU_REGS_RCX];
1865 	tss->dx = c->regs[VCPU_REGS_RDX];
1866 	tss->bx = c->regs[VCPU_REGS_RBX];
1867 	tss->sp = c->regs[VCPU_REGS_RSP];
1868 	tss->bp = c->regs[VCPU_REGS_RBP];
1869 	tss->si = c->regs[VCPU_REGS_RSI];
1870 	tss->di = c->regs[VCPU_REGS_RDI];
1871 
1872 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
1873 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
1874 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
1875 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
1876 	tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
1877 }
1878 
1879 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
1880 				 struct x86_emulate_ops *ops,
1881 				 struct tss_segment_16 *tss)
1882 {
1883 	struct decode_cache *c = &ctxt->decode;
1884 	int ret;
1885 
1886 	c->eip = tss->ip;
1887 	ctxt->eflags = tss->flag | 2;
1888 	c->regs[VCPU_REGS_RAX] = tss->ax;
1889 	c->regs[VCPU_REGS_RCX] = tss->cx;
1890 	c->regs[VCPU_REGS_RDX] = tss->dx;
1891 	c->regs[VCPU_REGS_RBX] = tss->bx;
1892 	c->regs[VCPU_REGS_RSP] = tss->sp;
1893 	c->regs[VCPU_REGS_RBP] = tss->bp;
1894 	c->regs[VCPU_REGS_RSI] = tss->si;
1895 	c->regs[VCPU_REGS_RDI] = tss->di;
1896 
1897 	/*
1898 	 * SDM says that segment selectors are loaded before segment
1899 	 * descriptors
1900 	 */
1901 	ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
1902 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
1903 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
1904 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
1905 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
1906 
1907 	/*
1908 	 * Now load segment descriptors. If fault happenes at this stage
1909 	 * it is handled in a context of new task
1910 	 */
1911 	ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
1912 	if (ret != X86EMUL_CONTINUE)
1913 		return ret;
1914 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
1915 	if (ret != X86EMUL_CONTINUE)
1916 		return ret;
1917 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
1918 	if (ret != X86EMUL_CONTINUE)
1919 		return ret;
1920 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
1921 	if (ret != X86EMUL_CONTINUE)
1922 		return ret;
1923 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
1924 	if (ret != X86EMUL_CONTINUE)
1925 		return ret;
1926 
1927 	return X86EMUL_CONTINUE;
1928 }
1929 
1930 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
1931 			  struct x86_emulate_ops *ops,
1932 			  u16 tss_selector, u16 old_tss_sel,
1933 			  ulong old_tss_base, struct desc_struct *new_desc)
1934 {
1935 	struct tss_segment_16 tss_seg;
1936 	int ret;
1937 	u32 err, new_tss_base = get_desc_base(new_desc);
1938 
1939 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1940 			    &err);
1941 	if (ret == X86EMUL_PROPAGATE_FAULT) {
1942 		/* FIXME: need to provide precise fault address */
1943 		emulate_pf(ctxt);
1944 		return ret;
1945 	}
1946 
1947 	save_state_to_tss16(ctxt, ops, &tss_seg);
1948 
1949 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1950 			     &err);
1951 	if (ret == X86EMUL_PROPAGATE_FAULT) {
1952 		/* FIXME: need to provide precise fault address */
1953 		emulate_pf(ctxt);
1954 		return ret;
1955 	}
1956 
1957 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
1958 			    &err);
1959 	if (ret == X86EMUL_PROPAGATE_FAULT) {
1960 		/* FIXME: need to provide precise fault address */
1961 		emulate_pf(ctxt);
1962 		return ret;
1963 	}
1964 
1965 	if (old_tss_sel != 0xffff) {
1966 		tss_seg.prev_task_link = old_tss_sel;
1967 
1968 		ret = ops->write_std(new_tss_base,
1969 				     &tss_seg.prev_task_link,
1970 				     sizeof tss_seg.prev_task_link,
1971 				     ctxt->vcpu, &err);
1972 		if (ret == X86EMUL_PROPAGATE_FAULT) {
1973 			/* FIXME: need to provide precise fault address */
1974 			emulate_pf(ctxt);
1975 			return ret;
1976 		}
1977 	}
1978 
1979 	return load_state_from_tss16(ctxt, ops, &tss_seg);
1980 }
1981 
1982 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
1983 				struct x86_emulate_ops *ops,
1984 				struct tss_segment_32 *tss)
1985 {
1986 	struct decode_cache *c = &ctxt->decode;
1987 
1988 	tss->cr3 = ops->get_cr(3, ctxt->vcpu);
1989 	tss->eip = c->eip;
1990 	tss->eflags = ctxt->eflags;
1991 	tss->eax = c->regs[VCPU_REGS_RAX];
1992 	tss->ecx = c->regs[VCPU_REGS_RCX];
1993 	tss->edx = c->regs[VCPU_REGS_RDX];
1994 	tss->ebx = c->regs[VCPU_REGS_RBX];
1995 	tss->esp = c->regs[VCPU_REGS_RSP];
1996 	tss->ebp = c->regs[VCPU_REGS_RBP];
1997 	tss->esi = c->regs[VCPU_REGS_RSI];
1998 	tss->edi = c->regs[VCPU_REGS_RDI];
1999 
2000 	tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2001 	tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2002 	tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2003 	tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2004 	tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2005 	tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2006 	tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2007 }
2008 
2009 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2010 				 struct x86_emulate_ops *ops,
2011 				 struct tss_segment_32 *tss)
2012 {
2013 	struct decode_cache *c = &ctxt->decode;
2014 	int ret;
2015 
2016 	if (ops->set_cr(3, tss->cr3, ctxt->vcpu)) {
2017 		emulate_gp(ctxt, 0);
2018 		return X86EMUL_PROPAGATE_FAULT;
2019 	}
2020 	c->eip = tss->eip;
2021 	ctxt->eflags = tss->eflags | 2;
2022 	c->regs[VCPU_REGS_RAX] = tss->eax;
2023 	c->regs[VCPU_REGS_RCX] = tss->ecx;
2024 	c->regs[VCPU_REGS_RDX] = tss->edx;
2025 	c->regs[VCPU_REGS_RBX] = tss->ebx;
2026 	c->regs[VCPU_REGS_RSP] = tss->esp;
2027 	c->regs[VCPU_REGS_RBP] = tss->ebp;
2028 	c->regs[VCPU_REGS_RSI] = tss->esi;
2029 	c->regs[VCPU_REGS_RDI] = tss->edi;
2030 
2031 	/*
2032 	 * SDM says that segment selectors are loaded before segment
2033 	 * descriptors
2034 	 */
2035 	ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2036 	ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2037 	ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2038 	ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2039 	ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2040 	ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2041 	ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2042 
2043 	/*
2044 	 * Now load segment descriptors. If fault happenes at this stage
2045 	 * it is handled in a context of new task
2046 	 */
2047 	ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2048 	if (ret != X86EMUL_CONTINUE)
2049 		return ret;
2050 	ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2051 	if (ret != X86EMUL_CONTINUE)
2052 		return ret;
2053 	ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2054 	if (ret != X86EMUL_CONTINUE)
2055 		return ret;
2056 	ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2057 	if (ret != X86EMUL_CONTINUE)
2058 		return ret;
2059 	ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2060 	if (ret != X86EMUL_CONTINUE)
2061 		return ret;
2062 	ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2063 	if (ret != X86EMUL_CONTINUE)
2064 		return ret;
2065 	ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2066 	if (ret != X86EMUL_CONTINUE)
2067 		return ret;
2068 
2069 	return X86EMUL_CONTINUE;
2070 }
2071 
2072 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2073 			  struct x86_emulate_ops *ops,
2074 			  u16 tss_selector, u16 old_tss_sel,
2075 			  ulong old_tss_base, struct desc_struct *new_desc)
2076 {
2077 	struct tss_segment_32 tss_seg;
2078 	int ret;
2079 	u32 err, new_tss_base = get_desc_base(new_desc);
2080 
2081 	ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2082 			    &err);
2083 	if (ret == X86EMUL_PROPAGATE_FAULT) {
2084 		/* FIXME: need to provide precise fault address */
2085 		emulate_pf(ctxt);
2086 		return ret;
2087 	}
2088 
2089 	save_state_to_tss32(ctxt, ops, &tss_seg);
2090 
2091 	ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2092 			     &err);
2093 	if (ret == X86EMUL_PROPAGATE_FAULT) {
2094 		/* FIXME: need to provide precise fault address */
2095 		emulate_pf(ctxt);
2096 		return ret;
2097 	}
2098 
2099 	ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2100 			    &err);
2101 	if (ret == X86EMUL_PROPAGATE_FAULT) {
2102 		/* FIXME: need to provide precise fault address */
2103 		emulate_pf(ctxt);
2104 		return ret;
2105 	}
2106 
2107 	if (old_tss_sel != 0xffff) {
2108 		tss_seg.prev_task_link = old_tss_sel;
2109 
2110 		ret = ops->write_std(new_tss_base,
2111 				     &tss_seg.prev_task_link,
2112 				     sizeof tss_seg.prev_task_link,
2113 				     ctxt->vcpu, &err);
2114 		if (ret == X86EMUL_PROPAGATE_FAULT) {
2115 			/* FIXME: need to provide precise fault address */
2116 			emulate_pf(ctxt);
2117 			return ret;
2118 		}
2119 	}
2120 
2121 	return load_state_from_tss32(ctxt, ops, &tss_seg);
2122 }
2123 
2124 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2125 				   struct x86_emulate_ops *ops,
2126 				   u16 tss_selector, int reason,
2127 				   bool has_error_code, u32 error_code)
2128 {
2129 	struct desc_struct curr_tss_desc, next_tss_desc;
2130 	int ret;
2131 	u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2132 	ulong old_tss_base =
2133 		ops->get_cached_segment_base(VCPU_SREG_TR, ctxt->vcpu);
2134 	u32 desc_limit;
2135 
2136 	/* FIXME: old_tss_base == ~0 ? */
2137 
2138 	ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2139 	if (ret != X86EMUL_CONTINUE)
2140 		return ret;
2141 	ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2142 	if (ret != X86EMUL_CONTINUE)
2143 		return ret;
2144 
2145 	/* FIXME: check that next_tss_desc is tss */
2146 
2147 	if (reason != TASK_SWITCH_IRET) {
2148 		if ((tss_selector & 3) > next_tss_desc.dpl ||
2149 		    ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2150 			emulate_gp(ctxt, 0);
2151 			return X86EMUL_PROPAGATE_FAULT;
2152 		}
2153 	}
2154 
2155 	desc_limit = desc_limit_scaled(&next_tss_desc);
2156 	if (!next_tss_desc.p ||
2157 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2158 	     desc_limit < 0x2b)) {
2159 		emulate_ts(ctxt, tss_selector & 0xfffc);
2160 		return X86EMUL_PROPAGATE_FAULT;
2161 	}
2162 
2163 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2164 		curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2165 		write_segment_descriptor(ctxt, ops, old_tss_sel,
2166 					 &curr_tss_desc);
2167 	}
2168 
2169 	if (reason == TASK_SWITCH_IRET)
2170 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2171 
2172 	/* set back link to prev task only if NT bit is set in eflags
2173 	   note that old_tss_sel is not used afetr this point */
2174 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2175 		old_tss_sel = 0xffff;
2176 
2177 	if (next_tss_desc.type & 8)
2178 		ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2179 				     old_tss_base, &next_tss_desc);
2180 	else
2181 		ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2182 				     old_tss_base, &next_tss_desc);
2183 	if (ret != X86EMUL_CONTINUE)
2184 		return ret;
2185 
2186 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2187 		ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2188 
2189 	if (reason != TASK_SWITCH_IRET) {
2190 		next_tss_desc.type |= (1 << 1); /* set busy flag */
2191 		write_segment_descriptor(ctxt, ops, tss_selector,
2192 					 &next_tss_desc);
2193 	}
2194 
2195 	ops->set_cr(0,  ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2196 	ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2197 	ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2198 
2199 	if (has_error_code) {
2200 		struct decode_cache *c = &ctxt->decode;
2201 
2202 		c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2203 		c->lock_prefix = 0;
2204 		c->src.val = (unsigned long) error_code;
2205 		emulate_push(ctxt, ops);
2206 	}
2207 
2208 	return ret;
2209 }
2210 
2211 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2212 			 u16 tss_selector, int reason,
2213 			 bool has_error_code, u32 error_code)
2214 {
2215 	struct x86_emulate_ops *ops = ctxt->ops;
2216 	struct decode_cache *c = &ctxt->decode;
2217 	int rc;
2218 
2219 	c->eip = ctxt->eip;
2220 	c->dst.type = OP_NONE;
2221 
2222 	rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2223 				     has_error_code, error_code);
2224 
2225 	if (rc == X86EMUL_CONTINUE) {
2226 		rc = writeback(ctxt, ops);
2227 		if (rc == X86EMUL_CONTINUE)
2228 			ctxt->eip = c->eip;
2229 	}
2230 
2231 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2232 }
2233 
2234 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2235 			    int reg, struct operand *op)
2236 {
2237 	struct decode_cache *c = &ctxt->decode;
2238 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2239 
2240 	register_address_increment(c, &c->regs[reg], df * op->bytes);
2241 	op->addr.mem = register_address(c,  base, c->regs[reg]);
2242 }
2243 
2244 static int em_push(struct x86_emulate_ctxt *ctxt)
2245 {
2246 	emulate_push(ctxt, ctxt->ops);
2247 	return X86EMUL_CONTINUE;
2248 }
2249 
2250 static int em_das(struct x86_emulate_ctxt *ctxt)
2251 {
2252 	struct decode_cache *c = &ctxt->decode;
2253 	u8 al, old_al;
2254 	bool af, cf, old_cf;
2255 
2256 	cf = ctxt->eflags & X86_EFLAGS_CF;
2257 	al = c->dst.val;
2258 
2259 	old_al = al;
2260 	old_cf = cf;
2261 	cf = false;
2262 	af = ctxt->eflags & X86_EFLAGS_AF;
2263 	if ((al & 0x0f) > 9 || af) {
2264 		al -= 6;
2265 		cf = old_cf | (al >= 250);
2266 		af = true;
2267 	} else {
2268 		af = false;
2269 	}
2270 	if (old_al > 0x99 || old_cf) {
2271 		al -= 0x60;
2272 		cf = true;
2273 	}
2274 
2275 	c->dst.val = al;
2276 	/* Set PF, ZF, SF */
2277 	c->src.type = OP_IMM;
2278 	c->src.val = 0;
2279 	c->src.bytes = 1;
2280 	emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2281 	ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF);
2282 	if (cf)
2283 		ctxt->eflags |= X86_EFLAGS_CF;
2284 	if (af)
2285 		ctxt->eflags |= X86_EFLAGS_AF;
2286 	return X86EMUL_CONTINUE;
2287 }
2288 
2289 static int em_call_far(struct x86_emulate_ctxt *ctxt)
2290 {
2291 	struct decode_cache *c = &ctxt->decode;
2292 	u16 sel, old_cs;
2293 	ulong old_eip;
2294 	int rc;
2295 
2296 	old_cs = ctxt->ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2297 	old_eip = c->eip;
2298 
2299 	memcpy(&sel, c->src.valptr + c->op_bytes, 2);
2300 	if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS))
2301 		return X86EMUL_CONTINUE;
2302 
2303 	c->eip = 0;
2304 	memcpy(&c->eip, c->src.valptr, c->op_bytes);
2305 
2306 	c->src.val = old_cs;
2307 	emulate_push(ctxt, ctxt->ops);
2308 	rc = writeback(ctxt, ctxt->ops);
2309 	if (rc != X86EMUL_CONTINUE)
2310 		return rc;
2311 
2312 	c->src.val = old_eip;
2313 	emulate_push(ctxt, ctxt->ops);
2314 	rc = writeback(ctxt, ctxt->ops);
2315 	if (rc != X86EMUL_CONTINUE)
2316 		return rc;
2317 
2318 	c->dst.type = OP_NONE;
2319 
2320 	return X86EMUL_CONTINUE;
2321 }
2322 
2323 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
2324 {
2325 	struct decode_cache *c = &ctxt->decode;
2326 	int rc;
2327 
2328 	c->dst.type = OP_REG;
2329 	c->dst.addr.reg = &c->eip;
2330 	c->dst.bytes = c->op_bytes;
2331 	rc = emulate_pop(ctxt, ctxt->ops, &c->dst.val, c->op_bytes);
2332 	if (rc != X86EMUL_CONTINUE)
2333 		return rc;
2334 	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.val);
2335 	return X86EMUL_CONTINUE;
2336 }
2337 
2338 static int em_imul(struct x86_emulate_ctxt *ctxt)
2339 {
2340 	struct decode_cache *c = &ctxt->decode;
2341 
2342 	emulate_2op_SrcV_nobyte("imul", c->src, c->dst, ctxt->eflags);
2343 	return X86EMUL_CONTINUE;
2344 }
2345 
2346 static int em_imul_3op(struct x86_emulate_ctxt *ctxt)
2347 {
2348 	struct decode_cache *c = &ctxt->decode;
2349 
2350 	c->dst.val = c->src2.val;
2351 	return em_imul(ctxt);
2352 }
2353 
2354 static int em_cwd(struct x86_emulate_ctxt *ctxt)
2355 {
2356 	struct decode_cache *c = &ctxt->decode;
2357 
2358 	c->dst.type = OP_REG;
2359 	c->dst.bytes = c->src.bytes;
2360 	c->dst.addr.reg = &c->regs[VCPU_REGS_RDX];
2361 	c->dst.val = ~((c->src.val >> (c->src.bytes * 8 - 1)) - 1);
2362 
2363 	return X86EMUL_CONTINUE;
2364 }
2365 
2366 static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
2367 {
2368 	unsigned cpl = ctxt->ops->cpl(ctxt->vcpu);
2369 	struct decode_cache *c = &ctxt->decode;
2370 	u64 tsc = 0;
2371 
2372 	if (cpl > 0 && (ctxt->ops->get_cr(4, ctxt->vcpu) & X86_CR4_TSD)) {
2373 		emulate_gp(ctxt, 0);
2374 		return X86EMUL_PROPAGATE_FAULT;
2375 	}
2376 	ctxt->ops->get_msr(ctxt->vcpu, MSR_IA32_TSC, &tsc);
2377 	c->regs[VCPU_REGS_RAX] = (u32)tsc;
2378 	c->regs[VCPU_REGS_RDX] = tsc >> 32;
2379 	return X86EMUL_CONTINUE;
2380 }
2381 
2382 static int em_mov(struct x86_emulate_ctxt *ctxt)
2383 {
2384 	struct decode_cache *c = &ctxt->decode;
2385 	c->dst.val = c->src.val;
2386 	return X86EMUL_CONTINUE;
2387 }
2388 
2389 #define D(_y) { .flags = (_y) }
2390 #define N    D(0)
2391 #define G(_f, _g) { .flags = ((_f) | Group), .u.group = (_g) }
2392 #define GD(_f, _g) { .flags = ((_f) | Group | GroupDual), .u.gdual = (_g) }
2393 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
2394 
2395 #define D2bv(_f)      D((_f) | ByteOp), D(_f)
2396 #define I2bv(_f, _e)  I((_f) | ByteOp, _e), I(_f, _e)
2397 
2398 #define D6ALU(_f) D2bv((_f) | DstMem | SrcReg | ModRM),			\
2399 		D2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock),		\
2400 		D2bv(((_f) & ~Lock) | DstAcc | SrcImm)
2401 
2402 
2403 static struct opcode group1[] = {
2404 	X7(D(Lock)), N
2405 };
2406 
2407 static struct opcode group1A[] = {
2408 	D(DstMem | SrcNone | ModRM | Mov | Stack), N, N, N, N, N, N, N,
2409 };
2410 
2411 static struct opcode group3[] = {
2412 	D(DstMem | SrcImm | ModRM), D(DstMem | SrcImm | ModRM),
2413 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2414 	X4(D(SrcMem | ModRM)),
2415 };
2416 
2417 static struct opcode group4[] = {
2418 	D(ByteOp | DstMem | SrcNone | ModRM | Lock), D(ByteOp | DstMem | SrcNone | ModRM | Lock),
2419 	N, N, N, N, N, N,
2420 };
2421 
2422 static struct opcode group5[] = {
2423 	D(DstMem | SrcNone | ModRM | Lock), D(DstMem | SrcNone | ModRM | Lock),
2424 	D(SrcMem | ModRM | Stack),
2425 	I(SrcMemFAddr | ModRM | ImplicitOps | Stack, em_call_far),
2426 	D(SrcMem | ModRM | Stack), D(SrcMemFAddr | ModRM | ImplicitOps),
2427 	D(SrcMem | ModRM | Stack), N,
2428 };
2429 
2430 static struct group_dual group7 = { {
2431 	N, N, D(ModRM | SrcMem | Priv), D(ModRM | SrcMem | Priv),
2432 	D(SrcNone | ModRM | DstMem | Mov), N,
2433 	D(SrcMem16 | ModRM | Mov | Priv),
2434 	D(SrcMem | ModRM | ByteOp | Priv | NoAccess),
2435 }, {
2436 	D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv),
2437 	D(SrcNone | ModRM | DstMem | Mov), N,
2438 	D(SrcMem16 | ModRM | Mov | Priv), N,
2439 } };
2440 
2441 static struct opcode group8[] = {
2442 	N, N, N, N,
2443 	D(DstMem | SrcImmByte | ModRM), D(DstMem | SrcImmByte | ModRM | Lock),
2444 	D(DstMem | SrcImmByte | ModRM | Lock), D(DstMem | SrcImmByte | ModRM | Lock),
2445 };
2446 
2447 static struct group_dual group9 = { {
2448 	N, D(DstMem64 | ModRM | Lock), N, N, N, N, N, N,
2449 }, {
2450 	N, N, N, N, N, N, N, N,
2451 } };
2452 
2453 static struct opcode group11[] = {
2454 	I(DstMem | SrcImm | ModRM | Mov, em_mov), X7(D(Undefined)),
2455 };
2456 
2457 static struct opcode opcode_table[256] = {
2458 	/* 0x00 - 0x07 */
2459 	D6ALU(Lock),
2460 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2461 	/* 0x08 - 0x0F */
2462 	D6ALU(Lock),
2463 	D(ImplicitOps | Stack | No64), N,
2464 	/* 0x10 - 0x17 */
2465 	D6ALU(Lock),
2466 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2467 	/* 0x18 - 0x1F */
2468 	D6ALU(Lock),
2469 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2470 	/* 0x20 - 0x27 */
2471 	D6ALU(Lock), N, N,
2472 	/* 0x28 - 0x2F */
2473 	D6ALU(Lock), N, I(ByteOp | DstAcc | No64, em_das),
2474 	/* 0x30 - 0x37 */
2475 	D6ALU(Lock), N, N,
2476 	/* 0x38 - 0x3F */
2477 	D6ALU(0), N, N,
2478 	/* 0x40 - 0x4F */
2479 	X16(D(DstReg)),
2480 	/* 0x50 - 0x57 */
2481 	X8(I(SrcReg | Stack, em_push)),
2482 	/* 0x58 - 0x5F */
2483 	X8(D(DstReg | Stack)),
2484 	/* 0x60 - 0x67 */
2485 	D(ImplicitOps | Stack | No64), D(ImplicitOps | Stack | No64),
2486 	N, D(DstReg | SrcMem32 | ModRM | Mov) /* movsxd (x86/64) */ ,
2487 	N, N, N, N,
2488 	/* 0x68 - 0x6F */
2489 	I(SrcImm | Mov | Stack, em_push),
2490 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
2491 	I(SrcImmByte | Mov | Stack, em_push),
2492 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
2493 	D2bv(DstDI | Mov | String), /* insb, insw/insd */
2494 	D2bv(SrcSI | ImplicitOps | String), /* outsb, outsw/outsd */
2495 	/* 0x70 - 0x7F */
2496 	X16(D(SrcImmByte)),
2497 	/* 0x80 - 0x87 */
2498 	G(ByteOp | DstMem | SrcImm | ModRM | Group, group1),
2499 	G(DstMem | SrcImm | ModRM | Group, group1),
2500 	G(ByteOp | DstMem | SrcImm | ModRM | No64 | Group, group1),
2501 	G(DstMem | SrcImmByte | ModRM | Group, group1),
2502 	D2bv(DstMem | SrcReg | ModRM), D2bv(DstMem | SrcReg | ModRM | Lock),
2503 	/* 0x88 - 0x8F */
2504 	I2bv(DstMem | SrcReg | ModRM | Mov, em_mov),
2505 	I2bv(DstReg | SrcMem | ModRM | Mov, em_mov),
2506 	D(DstMem | SrcNone | ModRM | Mov), D(ModRM | SrcMem | NoAccess | DstReg),
2507 	D(ImplicitOps | SrcMem16 | ModRM), G(0, group1A),
2508 	/* 0x90 - 0x97 */
2509 	X8(D(SrcAcc | DstReg)),
2510 	/* 0x98 - 0x9F */
2511 	D(DstAcc | SrcNone), I(ImplicitOps | SrcAcc, em_cwd),
2512 	I(SrcImmFAddr | No64, em_call_far), N,
2513 	D(ImplicitOps | Stack), D(ImplicitOps | Stack), N, N,
2514 	/* 0xA0 - 0xA7 */
2515 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
2516 	I2bv(DstMem | SrcAcc | Mov | MemAbs, em_mov),
2517 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
2518 	D2bv(SrcSI | DstDI | String),
2519 	/* 0xA8 - 0xAF */
2520 	D2bv(DstAcc | SrcImm),
2521 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
2522 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
2523 	D2bv(SrcAcc | DstDI | String),
2524 	/* 0xB0 - 0xB7 */
2525 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
2526 	/* 0xB8 - 0xBF */
2527 	X8(I(DstReg | SrcImm | Mov, em_mov)),
2528 	/* 0xC0 - 0xC7 */
2529 	D2bv(DstMem | SrcImmByte | ModRM),
2530 	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
2531 	D(ImplicitOps | Stack),
2532 	D(DstReg | SrcMemFAddr | ModRM | No64), D(DstReg | SrcMemFAddr | ModRM | No64),
2533 	G(ByteOp, group11), G(0, group11),
2534 	/* 0xC8 - 0xCF */
2535 	N, N, N, D(ImplicitOps | Stack),
2536 	D(ImplicitOps), D(SrcImmByte), D(ImplicitOps | No64), D(ImplicitOps),
2537 	/* 0xD0 - 0xD7 */
2538 	D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM),
2539 	N, N, N, N,
2540 	/* 0xD8 - 0xDF */
2541 	N, N, N, N, N, N, N, N,
2542 	/* 0xE0 - 0xE7 */
2543 	X4(D(SrcImmByte)),
2544 	D2bv(SrcImmUByte | DstAcc), D2bv(SrcAcc | DstImmUByte),
2545 	/* 0xE8 - 0xEF */
2546 	D(SrcImm | Stack), D(SrcImm | ImplicitOps),
2547 	D(SrcImmFAddr | No64), D(SrcImmByte | ImplicitOps),
2548 	D2bv(SrcNone | DstAcc),	D2bv(SrcAcc | ImplicitOps),
2549 	/* 0xF0 - 0xF7 */
2550 	N, N, N, N,
2551 	D(ImplicitOps | Priv), D(ImplicitOps), G(ByteOp, group3), G(0, group3),
2552 	/* 0xF8 - 0xFF */
2553 	D(ImplicitOps), D(ImplicitOps), D(ImplicitOps), D(ImplicitOps),
2554 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
2555 };
2556 
2557 static struct opcode twobyte_table[256] = {
2558 	/* 0x00 - 0x0F */
2559 	N, GD(0, &group7), N, N,
2560 	N, D(ImplicitOps), D(ImplicitOps | Priv), N,
2561 	D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N,
2562 	N, D(ImplicitOps | ModRM), N, N,
2563 	/* 0x10 - 0x1F */
2564 	N, N, N, N, N, N, N, N, D(ImplicitOps | ModRM), N, N, N, N, N, N, N,
2565 	/* 0x20 - 0x2F */
2566 	D(ModRM | DstMem | Priv | Op3264), D(ModRM | DstMem | Priv | Op3264),
2567 	D(ModRM | SrcMem | Priv | Op3264), D(ModRM | SrcMem | Priv | Op3264),
2568 	N, N, N, N,
2569 	N, N, N, N, N, N, N, N,
2570 	/* 0x30 - 0x3F */
2571 	D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc),
2572 	D(ImplicitOps | Priv), N,
2573 	D(ImplicitOps), D(ImplicitOps | Priv), N, N,
2574 	N, N, N, N, N, N, N, N,
2575 	/* 0x40 - 0x4F */
2576 	X16(D(DstReg | SrcMem | ModRM | Mov)),
2577 	/* 0x50 - 0x5F */
2578 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2579 	/* 0x60 - 0x6F */
2580 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2581 	/* 0x70 - 0x7F */
2582 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2583 	/* 0x80 - 0x8F */
2584 	X16(D(SrcImm)),
2585 	/* 0x90 - 0x9F */
2586 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
2587 	/* 0xA0 - 0xA7 */
2588 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2589 	N, D(DstMem | SrcReg | ModRM | BitOp),
2590 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2591 	D(DstMem | SrcReg | Src2CL | ModRM), N, N,
2592 	/* 0xA8 - 0xAF */
2593 	D(ImplicitOps | Stack), D(ImplicitOps | Stack),
2594 	N, D(DstMem | SrcReg | ModRM | BitOp | Lock),
2595 	D(DstMem | SrcReg | Src2ImmByte | ModRM),
2596 	D(DstMem | SrcReg | Src2CL | ModRM),
2597 	D(ModRM), I(DstReg | SrcMem | ModRM, em_imul),
2598 	/* 0xB0 - 0xB7 */
2599 	D2bv(DstMem | SrcReg | ModRM | Lock),
2600 	D(DstReg | SrcMemFAddr | ModRM), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2601 	D(DstReg | SrcMemFAddr | ModRM), D(DstReg | SrcMemFAddr | ModRM),
2602 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2603 	/* 0xB8 - 0xBF */
2604 	N, N,
2605 	G(BitOp, group8), D(DstMem | SrcReg | ModRM | BitOp | Lock),
2606 	D(DstReg | SrcMem | ModRM), D(DstReg | SrcMem | ModRM),
2607 	D(ByteOp | DstReg | SrcMem | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
2608 	/* 0xC0 - 0xCF */
2609 	D2bv(DstMem | SrcReg | ModRM | Lock),
2610 	N, D(DstMem | SrcReg | ModRM | Mov),
2611 	N, N, N, GD(0, &group9),
2612 	N, N, N, N, N, N, N, N,
2613 	/* 0xD0 - 0xDF */
2614 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2615 	/* 0xE0 - 0xEF */
2616 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
2617 	/* 0xF0 - 0xFF */
2618 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
2619 };
2620 
2621 #undef D
2622 #undef N
2623 #undef G
2624 #undef GD
2625 #undef I
2626 
2627 #undef D2bv
2628 #undef I2bv
2629 #undef D6ALU
2630 
2631 static unsigned imm_size(struct decode_cache *c)
2632 {
2633 	unsigned size;
2634 
2635 	size = (c->d & ByteOp) ? 1 : c->op_bytes;
2636 	if (size == 8)
2637 		size = 4;
2638 	return size;
2639 }
2640 
2641 static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op,
2642 		      unsigned size, bool sign_extension)
2643 {
2644 	struct decode_cache *c = &ctxt->decode;
2645 	struct x86_emulate_ops *ops = ctxt->ops;
2646 	int rc = X86EMUL_CONTINUE;
2647 
2648 	op->type = OP_IMM;
2649 	op->bytes = size;
2650 	op->addr.mem = c->eip;
2651 	/* NB. Immediates are sign-extended as necessary. */
2652 	switch (op->bytes) {
2653 	case 1:
2654 		op->val = insn_fetch(s8, 1, c->eip);
2655 		break;
2656 	case 2:
2657 		op->val = insn_fetch(s16, 2, c->eip);
2658 		break;
2659 	case 4:
2660 		op->val = insn_fetch(s32, 4, c->eip);
2661 		break;
2662 	}
2663 	if (!sign_extension) {
2664 		switch (op->bytes) {
2665 		case 1:
2666 			op->val &= 0xff;
2667 			break;
2668 		case 2:
2669 			op->val &= 0xffff;
2670 			break;
2671 		case 4:
2672 			op->val &= 0xffffffff;
2673 			break;
2674 		}
2675 	}
2676 done:
2677 	return rc;
2678 }
2679 
2680 int
2681 x86_decode_insn(struct x86_emulate_ctxt *ctxt)
2682 {
2683 	struct x86_emulate_ops *ops = ctxt->ops;
2684 	struct decode_cache *c = &ctxt->decode;
2685 	int rc = X86EMUL_CONTINUE;
2686 	int mode = ctxt->mode;
2687 	int def_op_bytes, def_ad_bytes, dual, goffset;
2688 	struct opcode opcode, *g_mod012, *g_mod3;
2689 	struct operand memop = { .type = OP_NONE };
2690 
2691 	c->eip = ctxt->eip;
2692 	c->fetch.start = c->fetch.end = c->eip;
2693 	ctxt->cs_base = seg_base(ctxt, ops, VCPU_SREG_CS);
2694 
2695 	switch (mode) {
2696 	case X86EMUL_MODE_REAL:
2697 	case X86EMUL_MODE_VM86:
2698 	case X86EMUL_MODE_PROT16:
2699 		def_op_bytes = def_ad_bytes = 2;
2700 		break;
2701 	case X86EMUL_MODE_PROT32:
2702 		def_op_bytes = def_ad_bytes = 4;
2703 		break;
2704 #ifdef CONFIG_X86_64
2705 	case X86EMUL_MODE_PROT64:
2706 		def_op_bytes = 4;
2707 		def_ad_bytes = 8;
2708 		break;
2709 #endif
2710 	default:
2711 		return -1;
2712 	}
2713 
2714 	c->op_bytes = def_op_bytes;
2715 	c->ad_bytes = def_ad_bytes;
2716 
2717 	/* Legacy prefixes. */
2718 	for (;;) {
2719 		switch (c->b = insn_fetch(u8, 1, c->eip)) {
2720 		case 0x66:	/* operand-size override */
2721 			/* switch between 2/4 bytes */
2722 			c->op_bytes = def_op_bytes ^ 6;
2723 			break;
2724 		case 0x67:	/* address-size override */
2725 			if (mode == X86EMUL_MODE_PROT64)
2726 				/* switch between 4/8 bytes */
2727 				c->ad_bytes = def_ad_bytes ^ 12;
2728 			else
2729 				/* switch between 2/4 bytes */
2730 				c->ad_bytes = def_ad_bytes ^ 6;
2731 			break;
2732 		case 0x26:	/* ES override */
2733 		case 0x2e:	/* CS override */
2734 		case 0x36:	/* SS override */
2735 		case 0x3e:	/* DS override */
2736 			set_seg_override(c, (c->b >> 3) & 3);
2737 			break;
2738 		case 0x64:	/* FS override */
2739 		case 0x65:	/* GS override */
2740 			set_seg_override(c, c->b & 7);
2741 			break;
2742 		case 0x40 ... 0x4f: /* REX */
2743 			if (mode != X86EMUL_MODE_PROT64)
2744 				goto done_prefixes;
2745 			c->rex_prefix = c->b;
2746 			continue;
2747 		case 0xf0:	/* LOCK */
2748 			c->lock_prefix = 1;
2749 			break;
2750 		case 0xf2:	/* REPNE/REPNZ */
2751 			c->rep_prefix = REPNE_PREFIX;
2752 			break;
2753 		case 0xf3:	/* REP/REPE/REPZ */
2754 			c->rep_prefix = REPE_PREFIX;
2755 			break;
2756 		default:
2757 			goto done_prefixes;
2758 		}
2759 
2760 		/* Any legacy prefix after a REX prefix nullifies its effect. */
2761 
2762 		c->rex_prefix = 0;
2763 	}
2764 
2765 done_prefixes:
2766 
2767 	/* REX prefix. */
2768 	if (c->rex_prefix & 8)
2769 		c->op_bytes = 8;	/* REX.W */
2770 
2771 	/* Opcode byte(s). */
2772 	opcode = opcode_table[c->b];
2773 	/* Two-byte opcode? */
2774 	if (c->b == 0x0f) {
2775 		c->twobyte = 1;
2776 		c->b = insn_fetch(u8, 1, c->eip);
2777 		opcode = twobyte_table[c->b];
2778 	}
2779 	c->d = opcode.flags;
2780 
2781 	if (c->d & Group) {
2782 		dual = c->d & GroupDual;
2783 		c->modrm = insn_fetch(u8, 1, c->eip);
2784 		--c->eip;
2785 
2786 		if (c->d & GroupDual) {
2787 			g_mod012 = opcode.u.gdual->mod012;
2788 			g_mod3 = opcode.u.gdual->mod3;
2789 		} else
2790 			g_mod012 = g_mod3 = opcode.u.group;
2791 
2792 		c->d &= ~(Group | GroupDual);
2793 
2794 		goffset = (c->modrm >> 3) & 7;
2795 
2796 		if ((c->modrm >> 6) == 3)
2797 			opcode = g_mod3[goffset];
2798 		else
2799 			opcode = g_mod012[goffset];
2800 		c->d |= opcode.flags;
2801 	}
2802 
2803 	c->execute = opcode.u.execute;
2804 
2805 	/* Unrecognised? */
2806 	if (c->d == 0 || (c->d & Undefined)) {
2807 		DPRINTF("Cannot emulate %02x\n", c->b);
2808 		return -1;
2809 	}
2810 
2811 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
2812 		c->op_bytes = 8;
2813 
2814 	if (c->d & Op3264) {
2815 		if (mode == X86EMUL_MODE_PROT64)
2816 			c->op_bytes = 8;
2817 		else
2818 			c->op_bytes = 4;
2819 	}
2820 
2821 	/* ModRM and SIB bytes. */
2822 	if (c->d & ModRM) {
2823 		rc = decode_modrm(ctxt, ops, &memop);
2824 		if (!c->has_seg_override)
2825 			set_seg_override(c, c->modrm_seg);
2826 	} else if (c->d & MemAbs)
2827 		rc = decode_abs(ctxt, ops, &memop);
2828 	if (rc != X86EMUL_CONTINUE)
2829 		goto done;
2830 
2831 	if (!c->has_seg_override)
2832 		set_seg_override(c, VCPU_SREG_DS);
2833 
2834 	if (memop.type == OP_MEM && !(!c->twobyte && c->b == 0x8d))
2835 		memop.addr.mem += seg_override_base(ctxt, ops, c);
2836 
2837 	if (memop.type == OP_MEM && c->ad_bytes != 8)
2838 		memop.addr.mem = (u32)memop.addr.mem;
2839 
2840 	if (memop.type == OP_MEM && c->rip_relative)
2841 		memop.addr.mem += c->eip;
2842 
2843 	/*
2844 	 * Decode and fetch the source operand: register, memory
2845 	 * or immediate.
2846 	 */
2847 	switch (c->d & SrcMask) {
2848 	case SrcNone:
2849 		break;
2850 	case SrcReg:
2851 		decode_register_operand(&c->src, c, 0);
2852 		break;
2853 	case SrcMem16:
2854 		memop.bytes = 2;
2855 		goto srcmem_common;
2856 	case SrcMem32:
2857 		memop.bytes = 4;
2858 		goto srcmem_common;
2859 	case SrcMem:
2860 		memop.bytes = (c->d & ByteOp) ? 1 :
2861 							   c->op_bytes;
2862 	srcmem_common:
2863 		c->src = memop;
2864 		break;
2865 	case SrcImmU16:
2866 		rc = decode_imm(ctxt, &c->src, 2, false);
2867 		break;
2868 	case SrcImm:
2869 		rc = decode_imm(ctxt, &c->src, imm_size(c), true);
2870 		break;
2871 	case SrcImmU:
2872 		rc = decode_imm(ctxt, &c->src, imm_size(c), false);
2873 		break;
2874 	case SrcImmByte:
2875 		rc = decode_imm(ctxt, &c->src, 1, true);
2876 		break;
2877 	case SrcImmUByte:
2878 		rc = decode_imm(ctxt, &c->src, 1, false);
2879 		break;
2880 	case SrcAcc:
2881 		c->src.type = OP_REG;
2882 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2883 		c->src.addr.reg = &c->regs[VCPU_REGS_RAX];
2884 		fetch_register_operand(&c->src);
2885 		break;
2886 	case SrcOne:
2887 		c->src.bytes = 1;
2888 		c->src.val = 1;
2889 		break;
2890 	case SrcSI:
2891 		c->src.type = OP_MEM;
2892 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2893 		c->src.addr.mem =
2894 			register_address(c,  seg_override_base(ctxt, ops, c),
2895 					 c->regs[VCPU_REGS_RSI]);
2896 		c->src.val = 0;
2897 		break;
2898 	case SrcImmFAddr:
2899 		c->src.type = OP_IMM;
2900 		c->src.addr.mem = c->eip;
2901 		c->src.bytes = c->op_bytes + 2;
2902 		insn_fetch_arr(c->src.valptr, c->src.bytes, c->eip);
2903 		break;
2904 	case SrcMemFAddr:
2905 		memop.bytes = c->op_bytes + 2;
2906 		goto srcmem_common;
2907 		break;
2908 	}
2909 
2910 	if (rc != X86EMUL_CONTINUE)
2911 		goto done;
2912 
2913 	/*
2914 	 * Decode and fetch the second source operand: register, memory
2915 	 * or immediate.
2916 	 */
2917 	switch (c->d & Src2Mask) {
2918 	case Src2None:
2919 		break;
2920 	case Src2CL:
2921 		c->src2.bytes = 1;
2922 		c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
2923 		break;
2924 	case Src2ImmByte:
2925 		rc = decode_imm(ctxt, &c->src2, 1, true);
2926 		break;
2927 	case Src2One:
2928 		c->src2.bytes = 1;
2929 		c->src2.val = 1;
2930 		break;
2931 	case Src2Imm:
2932 		rc = decode_imm(ctxt, &c->src2, imm_size(c), true);
2933 		break;
2934 	}
2935 
2936 	if (rc != X86EMUL_CONTINUE)
2937 		goto done;
2938 
2939 	/* Decode and fetch the destination operand: register or memory. */
2940 	switch (c->d & DstMask) {
2941 	case DstReg:
2942 		decode_register_operand(&c->dst, c,
2943 			 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
2944 		break;
2945 	case DstImmUByte:
2946 		c->dst.type = OP_IMM;
2947 		c->dst.addr.mem = c->eip;
2948 		c->dst.bytes = 1;
2949 		c->dst.val = insn_fetch(u8, 1, c->eip);
2950 		break;
2951 	case DstMem:
2952 	case DstMem64:
2953 		c->dst = memop;
2954 		if ((c->d & DstMask) == DstMem64)
2955 			c->dst.bytes = 8;
2956 		else
2957 			c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2958 		if (c->d & BitOp)
2959 			fetch_bit_operand(c);
2960 		c->dst.orig_val = c->dst.val;
2961 		break;
2962 	case DstAcc:
2963 		c->dst.type = OP_REG;
2964 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2965 		c->dst.addr.reg = &c->regs[VCPU_REGS_RAX];
2966 		fetch_register_operand(&c->dst);
2967 		c->dst.orig_val = c->dst.val;
2968 		break;
2969 	case DstDI:
2970 		c->dst.type = OP_MEM;
2971 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2972 		c->dst.addr.mem =
2973 			register_address(c, es_base(ctxt, ops),
2974 					 c->regs[VCPU_REGS_RDI]);
2975 		c->dst.val = 0;
2976 		break;
2977 	case ImplicitOps:
2978 		/* Special instructions do their own operand decoding. */
2979 	default:
2980 		c->dst.type = OP_NONE; /* Disable writeback. */
2981 		return 0;
2982 	}
2983 
2984 done:
2985 	return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2986 }
2987 
2988 static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
2989 {
2990 	struct decode_cache *c = &ctxt->decode;
2991 
2992 	/* The second termination condition only applies for REPE
2993 	 * and REPNE. Test if the repeat string operation prefix is
2994 	 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2995 	 * corresponding termination condition according to:
2996 	 * 	- if REPE/REPZ and ZF = 0 then done
2997 	 * 	- if REPNE/REPNZ and ZF = 1 then done
2998 	 */
2999 	if (((c->b == 0xa6) || (c->b == 0xa7) ||
3000 	     (c->b == 0xae) || (c->b == 0xaf))
3001 	    && (((c->rep_prefix == REPE_PREFIX) &&
3002 		 ((ctxt->eflags & EFLG_ZF) == 0))
3003 		|| ((c->rep_prefix == REPNE_PREFIX) &&
3004 		    ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
3005 		return true;
3006 
3007 	return false;
3008 }
3009 
3010 int
3011 x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
3012 {
3013 	struct x86_emulate_ops *ops = ctxt->ops;
3014 	u64 msr_data;
3015 	struct decode_cache *c = &ctxt->decode;
3016 	int rc = X86EMUL_CONTINUE;
3017 	int saved_dst_type = c->dst.type;
3018 	int irq; /* Used for int 3, int, and into */
3019 
3020 	ctxt->decode.mem_read.pos = 0;
3021 
3022 	if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
3023 		emulate_ud(ctxt);
3024 		goto done;
3025 	}
3026 
3027 	/* LOCK prefix is allowed only with some instructions */
3028 	if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
3029 		emulate_ud(ctxt);
3030 		goto done;
3031 	}
3032 
3033 	if ((c->d & SrcMask) == SrcMemFAddr && c->src.type != OP_MEM) {
3034 		emulate_ud(ctxt);
3035 		goto done;
3036 	}
3037 
3038 	/* Privileged instruction can be executed only in CPL=0 */
3039 	if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
3040 		emulate_gp(ctxt, 0);
3041 		goto done;
3042 	}
3043 
3044 	if (c->rep_prefix && (c->d & String)) {
3045 		/* All REP prefixes have the same first termination condition */
3046 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
3047 			ctxt->eip = c->eip;
3048 			goto done;
3049 		}
3050 	}
3051 
3052 	if ((c->src.type == OP_MEM) && !(c->d & NoAccess)) {
3053 		rc = read_emulated(ctxt, ops, c->src.addr.mem,
3054 					c->src.valptr, c->src.bytes);
3055 		if (rc != X86EMUL_CONTINUE)
3056 			goto done;
3057 		c->src.orig_val64 = c->src.val64;
3058 	}
3059 
3060 	if (c->src2.type == OP_MEM) {
3061 		rc = read_emulated(ctxt, ops, c->src2.addr.mem,
3062 					&c->src2.val, c->src2.bytes);
3063 		if (rc != X86EMUL_CONTINUE)
3064 			goto done;
3065 	}
3066 
3067 	if ((c->d & DstMask) == ImplicitOps)
3068 		goto special_insn;
3069 
3070 
3071 	if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
3072 		/* optimisation - avoid slow emulated read if Mov */
3073 		rc = read_emulated(ctxt, ops, c->dst.addr.mem,
3074 				   &c->dst.val, c->dst.bytes);
3075 		if (rc != X86EMUL_CONTINUE)
3076 			goto done;
3077 	}
3078 	c->dst.orig_val = c->dst.val;
3079 
3080 special_insn:
3081 
3082 	if (c->execute) {
3083 		rc = c->execute(ctxt);
3084 		if (rc != X86EMUL_CONTINUE)
3085 			goto done;
3086 		goto writeback;
3087 	}
3088 
3089 	if (c->twobyte)
3090 		goto twobyte_insn;
3091 
3092 	switch (c->b) {
3093 	case 0x00 ... 0x05:
3094 	      add:		/* add */
3095 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3096 		break;
3097 	case 0x06:		/* push es */
3098 		emulate_push_sreg(ctxt, ops, VCPU_SREG_ES);
3099 		break;
3100 	case 0x07:		/* pop es */
3101 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
3102 		break;
3103 	case 0x08 ... 0x0d:
3104 	      or:		/* or */
3105 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
3106 		break;
3107 	case 0x0e:		/* push cs */
3108 		emulate_push_sreg(ctxt, ops, VCPU_SREG_CS);
3109 		break;
3110 	case 0x10 ... 0x15:
3111 	      adc:		/* adc */
3112 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
3113 		break;
3114 	case 0x16:		/* push ss */
3115 		emulate_push_sreg(ctxt, ops, VCPU_SREG_SS);
3116 		break;
3117 	case 0x17:		/* pop ss */
3118 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
3119 		break;
3120 	case 0x18 ... 0x1d:
3121 	      sbb:		/* sbb */
3122 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
3123 		break;
3124 	case 0x1e:		/* push ds */
3125 		emulate_push_sreg(ctxt, ops, VCPU_SREG_DS);
3126 		break;
3127 	case 0x1f:		/* pop ds */
3128 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
3129 		break;
3130 	case 0x20 ... 0x25:
3131 	      and:		/* and */
3132 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
3133 		break;
3134 	case 0x28 ... 0x2d:
3135 	      sub:		/* sub */
3136 		emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
3137 		break;
3138 	case 0x30 ... 0x35:
3139 	      xor:		/* xor */
3140 		emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
3141 		break;
3142 	case 0x38 ... 0x3d:
3143 	      cmp:		/* cmp */
3144 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3145 		break;
3146 	case 0x40 ... 0x47: /* inc r16/r32 */
3147 		emulate_1op("inc", c->dst, ctxt->eflags);
3148 		break;
3149 	case 0x48 ... 0x4f: /* dec r16/r32 */
3150 		emulate_1op("dec", c->dst, ctxt->eflags);
3151 		break;
3152 	case 0x58 ... 0x5f: /* pop reg */
3153 	pop_instruction:
3154 		rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
3155 		break;
3156 	case 0x60:	/* pusha */
3157 		rc = emulate_pusha(ctxt, ops);
3158 		break;
3159 	case 0x61:	/* popa */
3160 		rc = emulate_popa(ctxt, ops);
3161 		break;
3162 	case 0x63:		/* movsxd */
3163 		if (ctxt->mode != X86EMUL_MODE_PROT64)
3164 			goto cannot_emulate;
3165 		c->dst.val = (s32) c->src.val;
3166 		break;
3167 	case 0x6c:		/* insb */
3168 	case 0x6d:		/* insw/insd */
3169 		c->src.val = c->regs[VCPU_REGS_RDX];
3170 		goto do_io_in;
3171 	case 0x6e:		/* outsb */
3172 	case 0x6f:		/* outsw/outsd */
3173 		c->dst.val = c->regs[VCPU_REGS_RDX];
3174 		goto do_io_out;
3175 		break;
3176 	case 0x70 ... 0x7f: /* jcc (short) */
3177 		if (test_cc(c->b, ctxt->eflags))
3178 			jmp_rel(c, c->src.val);
3179 		break;
3180 	case 0x80 ... 0x83:	/* Grp1 */
3181 		switch (c->modrm_reg) {
3182 		case 0:
3183 			goto add;
3184 		case 1:
3185 			goto or;
3186 		case 2:
3187 			goto adc;
3188 		case 3:
3189 			goto sbb;
3190 		case 4:
3191 			goto and;
3192 		case 5:
3193 			goto sub;
3194 		case 6:
3195 			goto xor;
3196 		case 7:
3197 			goto cmp;
3198 		}
3199 		break;
3200 	case 0x84 ... 0x85:
3201 	test:
3202 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
3203 		break;
3204 	case 0x86 ... 0x87:	/* xchg */
3205 	xchg:
3206 		/* Write back the register source. */
3207 		c->src.val = c->dst.val;
3208 		write_register_operand(&c->src);
3209 		/*
3210 		 * Write back the memory destination with implicit LOCK
3211 		 * prefix.
3212 		 */
3213 		c->dst.val = c->src.orig_val;
3214 		c->lock_prefix = 1;
3215 		break;
3216 	case 0x8c:  /* mov r/m, sreg */
3217 		if (c->modrm_reg > VCPU_SREG_GS) {
3218 			emulate_ud(ctxt);
3219 			goto done;
3220 		}
3221 		c->dst.val = ops->get_segment_selector(c->modrm_reg, ctxt->vcpu);
3222 		break;
3223 	case 0x8d: /* lea r16/r32, m */
3224 		c->dst.val = c->src.addr.mem;
3225 		break;
3226 	case 0x8e: { /* mov seg, r/m16 */
3227 		uint16_t sel;
3228 
3229 		sel = c->src.val;
3230 
3231 		if (c->modrm_reg == VCPU_SREG_CS ||
3232 		    c->modrm_reg > VCPU_SREG_GS) {
3233 			emulate_ud(ctxt);
3234 			goto done;
3235 		}
3236 
3237 		if (c->modrm_reg == VCPU_SREG_SS)
3238 			ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS;
3239 
3240 		rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
3241 
3242 		c->dst.type = OP_NONE;  /* Disable writeback. */
3243 		break;
3244 	}
3245 	case 0x8f:		/* pop (sole member of Grp1a) */
3246 		rc = emulate_grp1a(ctxt, ops);
3247 		break;
3248 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
3249 		if (c->dst.addr.reg == &c->regs[VCPU_REGS_RAX])
3250 			break;
3251 		goto xchg;
3252 	case 0x98: /* cbw/cwde/cdqe */
3253 		switch (c->op_bytes) {
3254 		case 2: c->dst.val = (s8)c->dst.val; break;
3255 		case 4: c->dst.val = (s16)c->dst.val; break;
3256 		case 8: c->dst.val = (s32)c->dst.val; break;
3257 		}
3258 		break;
3259 	case 0x9c: /* pushf */
3260 		c->src.val =  (unsigned long) ctxt->eflags;
3261 		emulate_push(ctxt, ops);
3262 		break;
3263 	case 0x9d: /* popf */
3264 		c->dst.type = OP_REG;
3265 		c->dst.addr.reg = &ctxt->eflags;
3266 		c->dst.bytes = c->op_bytes;
3267 		rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
3268 		break;
3269 	case 0xa6 ... 0xa7:	/* cmps */
3270 		c->dst.type = OP_NONE; /* Disable writeback. */
3271 		DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.addr.mem, c->dst.addr.mem);
3272 		goto cmp;
3273 	case 0xa8 ... 0xa9:	/* test ax, imm */
3274 		goto test;
3275 	case 0xae ... 0xaf:	/* scas */
3276 		goto cmp;
3277 	case 0xc0 ... 0xc1:
3278 		emulate_grp2(ctxt);
3279 		break;
3280 	case 0xc3: /* ret */
3281 		c->dst.type = OP_REG;
3282 		c->dst.addr.reg = &c->eip;
3283 		c->dst.bytes = c->op_bytes;
3284 		goto pop_instruction;
3285 	case 0xc4:		/* les */
3286 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES);
3287 		break;
3288 	case 0xc5:		/* lds */
3289 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS);
3290 		break;
3291 	case 0xcb:		/* ret far */
3292 		rc = emulate_ret_far(ctxt, ops);
3293 		break;
3294 	case 0xcc:		/* int3 */
3295 		irq = 3;
3296 		goto do_interrupt;
3297 	case 0xcd:		/* int n */
3298 		irq = c->src.val;
3299 	do_interrupt:
3300 		rc = emulate_int(ctxt, ops, irq);
3301 		break;
3302 	case 0xce:		/* into */
3303 		if (ctxt->eflags & EFLG_OF) {
3304 			irq = 4;
3305 			goto do_interrupt;
3306 		}
3307 		break;
3308 	case 0xcf:		/* iret */
3309 		rc = emulate_iret(ctxt, ops);
3310 		break;
3311 	case 0xd0 ... 0xd1:	/* Grp2 */
3312 		emulate_grp2(ctxt);
3313 		break;
3314 	case 0xd2 ... 0xd3:	/* Grp2 */
3315 		c->src.val = c->regs[VCPU_REGS_RCX];
3316 		emulate_grp2(ctxt);
3317 		break;
3318 	case 0xe0 ... 0xe2:	/* loop/loopz/loopnz */
3319 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3320 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) != 0 &&
3321 		    (c->b == 0xe2 || test_cc(c->b ^ 0x5, ctxt->eflags)))
3322 			jmp_rel(c, c->src.val);
3323 		break;
3324 	case 0xe3:	/* jcxz/jecxz/jrcxz */
3325 		if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0)
3326 			jmp_rel(c, c->src.val);
3327 		break;
3328 	case 0xe4: 	/* inb */
3329 	case 0xe5: 	/* in */
3330 		goto do_io_in;
3331 	case 0xe6: /* outb */
3332 	case 0xe7: /* out */
3333 		goto do_io_out;
3334 	case 0xe8: /* call (near) */ {
3335 		long int rel = c->src.val;
3336 		c->src.val = (unsigned long) c->eip;
3337 		jmp_rel(c, rel);
3338 		emulate_push(ctxt, ops);
3339 		break;
3340 	}
3341 	case 0xe9: /* jmp rel */
3342 		goto jmp;
3343 	case 0xea: { /* jmp far */
3344 		unsigned short sel;
3345 	jump_far:
3346 		memcpy(&sel, c->src.valptr + c->op_bytes, 2);
3347 
3348 		if (load_segment_descriptor(ctxt, ops, sel, VCPU_SREG_CS))
3349 			goto done;
3350 
3351 		c->eip = 0;
3352 		memcpy(&c->eip, c->src.valptr, c->op_bytes);
3353 		break;
3354 	}
3355 	case 0xeb:
3356 	      jmp:		/* jmp rel short */
3357 		jmp_rel(c, c->src.val);
3358 		c->dst.type = OP_NONE; /* Disable writeback. */
3359 		break;
3360 	case 0xec: /* in al,dx */
3361 	case 0xed: /* in (e/r)ax,dx */
3362 		c->src.val = c->regs[VCPU_REGS_RDX];
3363 	do_io_in:
3364 		c->dst.bytes = min(c->dst.bytes, 4u);
3365 		if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
3366 			emulate_gp(ctxt, 0);
3367 			goto done;
3368 		}
3369 		if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
3370 				     &c->dst.val))
3371 			goto done; /* IO is needed */
3372 		break;
3373 	case 0xee: /* out dx,al */
3374 	case 0xef: /* out dx,(e/r)ax */
3375 		c->dst.val = c->regs[VCPU_REGS_RDX];
3376 	do_io_out:
3377 		c->src.bytes = min(c->src.bytes, 4u);
3378 		if (!emulator_io_permited(ctxt, ops, c->dst.val,
3379 					  c->src.bytes)) {
3380 			emulate_gp(ctxt, 0);
3381 			goto done;
3382 		}
3383 		ops->pio_out_emulated(c->src.bytes, c->dst.val,
3384 				      &c->src.val, 1, ctxt->vcpu);
3385 		c->dst.type = OP_NONE;	/* Disable writeback. */
3386 		break;
3387 	case 0xf4:              /* hlt */
3388 		ctxt->vcpu->arch.halt_request = 1;
3389 		break;
3390 	case 0xf5:	/* cmc */
3391 		/* complement carry flag from eflags reg */
3392 		ctxt->eflags ^= EFLG_CF;
3393 		break;
3394 	case 0xf6 ... 0xf7:	/* Grp3 */
3395 		rc = emulate_grp3(ctxt, ops);
3396 		break;
3397 	case 0xf8: /* clc */
3398 		ctxt->eflags &= ~EFLG_CF;
3399 		break;
3400 	case 0xf9: /* stc */
3401 		ctxt->eflags |= EFLG_CF;
3402 		break;
3403 	case 0xfa: /* cli */
3404 		if (emulator_bad_iopl(ctxt, ops)) {
3405 			emulate_gp(ctxt, 0);
3406 			goto done;
3407 		} else
3408 			ctxt->eflags &= ~X86_EFLAGS_IF;
3409 		break;
3410 	case 0xfb: /* sti */
3411 		if (emulator_bad_iopl(ctxt, ops)) {
3412 			emulate_gp(ctxt, 0);
3413 			goto done;
3414 		} else {
3415 			ctxt->interruptibility = KVM_X86_SHADOW_INT_STI;
3416 			ctxt->eflags |= X86_EFLAGS_IF;
3417 		}
3418 		break;
3419 	case 0xfc: /* cld */
3420 		ctxt->eflags &= ~EFLG_DF;
3421 		break;
3422 	case 0xfd: /* std */
3423 		ctxt->eflags |= EFLG_DF;
3424 		break;
3425 	case 0xfe: /* Grp4 */
3426 	grp45:
3427 		rc = emulate_grp45(ctxt, ops);
3428 		break;
3429 	case 0xff: /* Grp5 */
3430 		if (c->modrm_reg == 5)
3431 			goto jump_far;
3432 		goto grp45;
3433 	default:
3434 		goto cannot_emulate;
3435 	}
3436 
3437 	if (rc != X86EMUL_CONTINUE)
3438 		goto done;
3439 
3440 writeback:
3441 	rc = writeback(ctxt, ops);
3442 	if (rc != X86EMUL_CONTINUE)
3443 		goto done;
3444 
3445 	/*
3446 	 * restore dst type in case the decoding will be reused
3447 	 * (happens for string instruction )
3448 	 */
3449 	c->dst.type = saved_dst_type;
3450 
3451 	if ((c->d & SrcMask) == SrcSI)
3452 		string_addr_inc(ctxt, seg_override_base(ctxt, ops, c),
3453 				VCPU_REGS_RSI, &c->src);
3454 
3455 	if ((c->d & DstMask) == DstDI)
3456 		string_addr_inc(ctxt, es_base(ctxt, ops), VCPU_REGS_RDI,
3457 				&c->dst);
3458 
3459 	if (c->rep_prefix && (c->d & String)) {
3460 		struct read_cache *r = &ctxt->decode.io_read;
3461 		register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
3462 
3463 		if (!string_insn_completed(ctxt)) {
3464 			/*
3465 			 * Re-enter guest when pio read ahead buffer is empty
3466 			 * or, if it is not used, after each 1024 iteration.
3467 			 */
3468 			if ((r->end != 0 || c->regs[VCPU_REGS_RCX] & 0x3ff) &&
3469 			    (r->end == 0 || r->end != r->pos)) {
3470 				/*
3471 				 * Reset read cache. Usually happens before
3472 				 * decode, but since instruction is restarted
3473 				 * we have to do it here.
3474 				 */
3475 				ctxt->decode.mem_read.end = 0;
3476 				return EMULATION_RESTART;
3477 			}
3478 			goto done; /* skip rip writeback */
3479 		}
3480 	}
3481 
3482 	ctxt->eip = c->eip;
3483 
3484 done:
3485 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
3486 
3487 twobyte_insn:
3488 	switch (c->b) {
3489 	case 0x01: /* lgdt, lidt, lmsw */
3490 		switch (c->modrm_reg) {
3491 			u16 size;
3492 			unsigned long address;
3493 
3494 		case 0: /* vmcall */
3495 			if (c->modrm_mod != 3 || c->modrm_rm != 1)
3496 				goto cannot_emulate;
3497 
3498 			rc = kvm_fix_hypercall(ctxt->vcpu);
3499 			if (rc != X86EMUL_CONTINUE)
3500 				goto done;
3501 
3502 			/* Let the processor re-execute the fixed hypercall */
3503 			c->eip = ctxt->eip;
3504 			/* Disable writeback. */
3505 			c->dst.type = OP_NONE;
3506 			break;
3507 		case 2: /* lgdt */
3508 			rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3509 					     &size, &address, c->op_bytes);
3510 			if (rc != X86EMUL_CONTINUE)
3511 				goto done;
3512 			realmode_lgdt(ctxt->vcpu, size, address);
3513 			/* Disable writeback. */
3514 			c->dst.type = OP_NONE;
3515 			break;
3516 		case 3: /* lidt/vmmcall */
3517 			if (c->modrm_mod == 3) {
3518 				switch (c->modrm_rm) {
3519 				case 1:
3520 					rc = kvm_fix_hypercall(ctxt->vcpu);
3521 					break;
3522 				default:
3523 					goto cannot_emulate;
3524 				}
3525 			} else {
3526 				rc = read_descriptor(ctxt, ops, c->src.addr.mem,
3527 						     &size, &address,
3528 						     c->op_bytes);
3529 				if (rc != X86EMUL_CONTINUE)
3530 					goto done;
3531 				realmode_lidt(ctxt->vcpu, size, address);
3532 			}
3533 			/* Disable writeback. */
3534 			c->dst.type = OP_NONE;
3535 			break;
3536 		case 4: /* smsw */
3537 			c->dst.bytes = 2;
3538 			c->dst.val = ops->get_cr(0, ctxt->vcpu);
3539 			break;
3540 		case 6: /* lmsw */
3541 			ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0eul) |
3542 				    (c->src.val & 0x0f), ctxt->vcpu);
3543 			c->dst.type = OP_NONE;
3544 			break;
3545 		case 5: /* not defined */
3546 			emulate_ud(ctxt);
3547 			goto done;
3548 		case 7: /* invlpg*/
3549 			emulate_invlpg(ctxt->vcpu, c->src.addr.mem);
3550 			/* Disable writeback. */
3551 			c->dst.type = OP_NONE;
3552 			break;
3553 		default:
3554 			goto cannot_emulate;
3555 		}
3556 		break;
3557 	case 0x05: 		/* syscall */
3558 		rc = emulate_syscall(ctxt, ops);
3559 		break;
3560 	case 0x06:
3561 		emulate_clts(ctxt->vcpu);
3562 		break;
3563 	case 0x09:		/* wbinvd */
3564 		kvm_emulate_wbinvd(ctxt->vcpu);
3565 		break;
3566 	case 0x08:		/* invd */
3567 	case 0x0d:		/* GrpP (prefetch) */
3568 	case 0x18:		/* Grp16 (prefetch/nop) */
3569 		break;
3570 	case 0x20: /* mov cr, reg */
3571 		switch (c->modrm_reg) {
3572 		case 1:
3573 		case 5 ... 7:
3574 		case 9 ... 15:
3575 			emulate_ud(ctxt);
3576 			goto done;
3577 		}
3578 		c->dst.val = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3579 		break;
3580 	case 0x21: /* mov from dr to reg */
3581 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3582 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3583 			emulate_ud(ctxt);
3584 			goto done;
3585 		}
3586 		ops->get_dr(c->modrm_reg, &c->dst.val, ctxt->vcpu);
3587 		break;
3588 	case 0x22: /* mov reg, cr */
3589 		if (ops->set_cr(c->modrm_reg, c->src.val, ctxt->vcpu)) {
3590 			emulate_gp(ctxt, 0);
3591 			goto done;
3592 		}
3593 		c->dst.type = OP_NONE;
3594 		break;
3595 	case 0x23: /* mov from reg to dr */
3596 		if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3597 		    (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3598 			emulate_ud(ctxt);
3599 			goto done;
3600 		}
3601 
3602 		if (ops->set_dr(c->modrm_reg, c->src.val &
3603 				((ctxt->mode == X86EMUL_MODE_PROT64) ?
3604 				 ~0ULL : ~0U), ctxt->vcpu) < 0) {
3605 			/* #UD condition is already handled by the code above */
3606 			emulate_gp(ctxt, 0);
3607 			goto done;
3608 		}
3609 
3610 		c->dst.type = OP_NONE;	/* no writeback */
3611 		break;
3612 	case 0x30:
3613 		/* wrmsr */
3614 		msr_data = (u32)c->regs[VCPU_REGS_RAX]
3615 			| ((u64)c->regs[VCPU_REGS_RDX] << 32);
3616 		if (ops->set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3617 			emulate_gp(ctxt, 0);
3618 			goto done;
3619 		}
3620 		rc = X86EMUL_CONTINUE;
3621 		break;
3622 	case 0x32:
3623 		/* rdmsr */
3624 		if (ops->get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3625 			emulate_gp(ctxt, 0);
3626 			goto done;
3627 		} else {
3628 			c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3629 			c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3630 		}
3631 		rc = X86EMUL_CONTINUE;
3632 		break;
3633 	case 0x34:		/* sysenter */
3634 		rc = emulate_sysenter(ctxt, ops);
3635 		break;
3636 	case 0x35:		/* sysexit */
3637 		rc = emulate_sysexit(ctxt, ops);
3638 		break;
3639 	case 0x40 ... 0x4f:	/* cmov */
3640 		c->dst.val = c->dst.orig_val = c->src.val;
3641 		if (!test_cc(c->b, ctxt->eflags))
3642 			c->dst.type = OP_NONE; /* no writeback */
3643 		break;
3644 	case 0x80 ... 0x8f: /* jnz rel, etc*/
3645 		if (test_cc(c->b, ctxt->eflags))
3646 			jmp_rel(c, c->src.val);
3647 		break;
3648 	case 0x90 ... 0x9f:     /* setcc r/m8 */
3649 		c->dst.val = test_cc(c->b, ctxt->eflags);
3650 		break;
3651 	case 0xa0:	  /* push fs */
3652 		emulate_push_sreg(ctxt, ops, VCPU_SREG_FS);
3653 		break;
3654 	case 0xa1:	 /* pop fs */
3655 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3656 		break;
3657 	case 0xa3:
3658 	      bt:		/* bt */
3659 		c->dst.type = OP_NONE;
3660 		/* only subword offset */
3661 		c->src.val &= (c->dst.bytes << 3) - 1;
3662 		emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3663 		break;
3664 	case 0xa4: /* shld imm8, r, r/m */
3665 	case 0xa5: /* shld cl, r, r/m */
3666 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3667 		break;
3668 	case 0xa8:	/* push gs */
3669 		emulate_push_sreg(ctxt, ops, VCPU_SREG_GS);
3670 		break;
3671 	case 0xa9:	/* pop gs */
3672 		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3673 		break;
3674 	case 0xab:
3675 	      bts:		/* bts */
3676 		emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3677 		break;
3678 	case 0xac: /* shrd imm8, r, r/m */
3679 	case 0xad: /* shrd cl, r, r/m */
3680 		emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3681 		break;
3682 	case 0xae:              /* clflush */
3683 		break;
3684 	case 0xb0 ... 0xb1:	/* cmpxchg */
3685 		/*
3686 		 * Save real source value, then compare EAX against
3687 		 * destination.
3688 		 */
3689 		c->src.orig_val = c->src.val;
3690 		c->src.val = c->regs[VCPU_REGS_RAX];
3691 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3692 		if (ctxt->eflags & EFLG_ZF) {
3693 			/* Success: write back to memory. */
3694 			c->dst.val = c->src.orig_val;
3695 		} else {
3696 			/* Failure: write the value we saw to EAX. */
3697 			c->dst.type = OP_REG;
3698 			c->dst.addr.reg = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3699 		}
3700 		break;
3701 	case 0xb2:		/* lss */
3702 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS);
3703 		break;
3704 	case 0xb3:
3705 	      btr:		/* btr */
3706 		emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3707 		break;
3708 	case 0xb4:		/* lfs */
3709 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS);
3710 		break;
3711 	case 0xb5:		/* lgs */
3712 		rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS);
3713 		break;
3714 	case 0xb6 ... 0xb7:	/* movzx */
3715 		c->dst.bytes = c->op_bytes;
3716 		c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3717 						       : (u16) c->src.val;
3718 		break;
3719 	case 0xba:		/* Grp8 */
3720 		switch (c->modrm_reg & 3) {
3721 		case 0:
3722 			goto bt;
3723 		case 1:
3724 			goto bts;
3725 		case 2:
3726 			goto btr;
3727 		case 3:
3728 			goto btc;
3729 		}
3730 		break;
3731 	case 0xbb:
3732 	      btc:		/* btc */
3733 		emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3734 		break;
3735 	case 0xbc: {		/* bsf */
3736 		u8 zf;
3737 		__asm__ ("bsf %2, %0; setz %1"
3738 			 : "=r"(c->dst.val), "=q"(zf)
3739 			 : "r"(c->src.val));
3740 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3741 		if (zf) {
3742 			ctxt->eflags |= X86_EFLAGS_ZF;
3743 			c->dst.type = OP_NONE;	/* Disable writeback. */
3744 		}
3745 		break;
3746 	}
3747 	case 0xbd: {		/* bsr */
3748 		u8 zf;
3749 		__asm__ ("bsr %2, %0; setz %1"
3750 			 : "=r"(c->dst.val), "=q"(zf)
3751 			 : "r"(c->src.val));
3752 		ctxt->eflags &= ~X86_EFLAGS_ZF;
3753 		if (zf) {
3754 			ctxt->eflags |= X86_EFLAGS_ZF;
3755 			c->dst.type = OP_NONE;	/* Disable writeback. */
3756 		}
3757 		break;
3758 	}
3759 	case 0xbe ... 0xbf:	/* movsx */
3760 		c->dst.bytes = c->op_bytes;
3761 		c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3762 							(s16) c->src.val;
3763 		break;
3764 	case 0xc0 ... 0xc1:	/* xadd */
3765 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
3766 		/* Write back the register source. */
3767 		c->src.val = c->dst.orig_val;
3768 		write_register_operand(&c->src);
3769 		break;
3770 	case 0xc3:		/* movnti */
3771 		c->dst.bytes = c->op_bytes;
3772 		c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3773 							(u64) c->src.val;
3774 		break;
3775 	case 0xc7:		/* Grp9 (cmpxchg8b) */
3776 		rc = emulate_grp9(ctxt, ops);
3777 		break;
3778 	default:
3779 		goto cannot_emulate;
3780 	}
3781 
3782 	if (rc != X86EMUL_CONTINUE)
3783 		goto done;
3784 
3785 	goto writeback;
3786 
3787 cannot_emulate:
3788 	DPRINTF("Cannot emulate %02x\n", c->b);
3789 	return -1;
3790 }
3791