1 /* SPDX-License-Identifier: MIT */
2 /******************************************************************************
3  * arch-x86/xen.h
4  *
5  * Guest OS interface to x86 Xen.
6  *
7  * Copyright (c) 2004-2006, K A Fraser
8  */
9 
10 #include "../xen.h"
11 
12 #ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
13 #define __XEN_PUBLIC_ARCH_X86_XEN_H__
14 
15 /* Structural guest handles introduced in 0x00030201. */
16 #if __XEN_INTERFACE_VERSION__ >= 0x00030201
17 #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
18     typedef struct { type *p; } __guest_handle_ ## name
19 #else
20 #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
21     typedef type * __guest_handle_ ## name
22 #endif
23 
24 /*
25  * XEN_GUEST_HANDLE represents a guest pointer, when passed as a field
26  * in a struct in memory.
27  * XEN_GUEST_HANDLE_PARAM represent a guest pointer, when passed as an
28  * hypercall argument.
29  * XEN_GUEST_HANDLE_PARAM and XEN_GUEST_HANDLE are the same on X86 but
30  * they might not be on other architectures.
31  */
32 #define __DEFINE_XEN_GUEST_HANDLE(name, type) \
33     ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
34     ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
35 #define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
36 #define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
37 #define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
38 #define XEN_GUEST_HANDLE_PARAM(name)    XEN_GUEST_HANDLE(name)
39 #define set_xen_guest_handle_raw(hnd, val)  do { (hnd).p = val; } while (0)
40 #define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val)
41 
42 #if defined(__i386__)
43 # ifdef __XEN__
44 __DeFiNe__ __DECL_REG_LO8(which) uint32_t e ## which ## x
45 __DeFiNe__ __DECL_REG_LO16(name) union { uint32_t e ## name; }
46 # endif
47 #include "xen-x86_32.h"
48 # ifdef __XEN__
49 __UnDeF__ __DECL_REG_LO8
50 __UnDeF__ __DECL_REG_LO16
51 __DeFiNe__ __DECL_REG_LO8(which) e ## which ## x
52 __DeFiNe__ __DECL_REG_LO16(name) e ## name
53 # endif
54 #elif defined(__x86_64__)
55 #include "xen-x86_64.h"
56 #endif
57 
58 #ifndef __ASSEMBLY__
59 typedef unsigned long xen_pfn_t;
60 #define PRI_xen_pfn "lx"
61 #define PRIu_xen_pfn "lu"
62 #endif
63 
64 #define XEN_HAVE_PV_GUEST_ENTRY 1
65 
66 #define XEN_HAVE_PV_UPCALL_MASK 1
67 
68 /*
69  * `incontents 200 segdesc Segment Descriptor Tables
70  */
71 /*
72  * ` enum neg_errnoval
73  * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries);
74  * `
75  */
76 /*
77  * A number of GDT entries are reserved by Xen. These are not situated at the
78  * start of the GDT because some stupid OSes export hard-coded selector values
79  * in their ABI. These hard-coded values are always near the start of the GDT,
80  * so Xen places itself out of the way, at the far end of the GDT.
81  *
82  * NB The LDT is set using the MMUEXT_SET_LDT op of HYPERVISOR_mmuext_op
83  */
84 #define FIRST_RESERVED_GDT_PAGE  14
85 #define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
86 #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
87 
88 
89 /*
90  * ` enum neg_errnoval
91  * ` HYPERVISOR_update_descriptor(u64 pa, u64 desc);
92  * `
93  * ` @pa   The machine physical address of the descriptor to
94  * `       update. Must be either a descriptor page or writable.
95  * ` @desc The descriptor value to update, in the same format as a
96  * `       native descriptor table entry.
97  */
98 
99 /* Maximum number of virtual CPUs in legacy multi-processor guests. */
100 #define XEN_LEGACY_MAX_VCPUS 32
101 
102 #ifndef __ASSEMBLY__
103 
104 typedef unsigned long xen_ulong_t;
105 #define PRI_xen_ulong "lx"
106 
107 /*
108  * ` enum neg_errnoval
109  * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp);
110  * `
111  * Sets the stack segment and pointer for the current vcpu.
112  */
113 
114 /*
115  * ` enum neg_errnoval
116  * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]);
117  * `
118  */
119 /*
120  * Send an array of these to HYPERVISOR_set_trap_table().
121  * Terminate the array with a sentinel entry, with traps[].address==0.
122  * The privilege level specifies which modes may enter a trap via a software
123  * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
124  * privilege levels as follows:
125  *  Level == 0: Noone may enter
126  *  Level == 1: Kernel may enter
127  *  Level == 2: Kernel may enter
128  *  Level == 3: Everyone may enter
129  *
130  * Note: For compatibility with kernels not setting up exception handlers
131  *       early enough, Xen will avoid trying to inject #GP (and hence crash
132  *       the domain) when an RDMSR would require this, but no handler was
133  *       set yet. The precise conditions are implementation specific, and
134  *       new code may not rely on such behavior anyway.
135  */
136 #define TI_GET_DPL(_ti)      ((_ti)->flags & 3)
137 #define TI_GET_IF(_ti)       ((_ti)->flags & 4)
138 #define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
139 #define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
140 struct trap_info {
141     uint8_t       vector;  /* exception vector                              */
142     uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
143     uint16_t      cs;      /* code selector                                 */
144     unsigned long address; /* code offset                                   */
145 };
146 typedef struct trap_info trap_info_t;
147 DEFINE_XEN_GUEST_HANDLE(trap_info_t);
148 
149 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
150 
151 /*
152  * The following is all CPU context. Note that the fpu_ctxt block is filled
153  * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
154  *
155  * Also note that when calling DOMCTL_setvcpucontext for HVM guests, not all
156  * information in this structure is updated, the fields read include: fpu_ctxt
157  * (if VGCT_I387_VALID is set), flags, user_regs and debugreg[*].
158  *
159  * Note: VCPUOP_initialise for HVM guests is non-symetric with
160  * DOMCTL_setvcpucontext, and uses struct vcpu_hvm_context from hvm/hvm_vcpu.h
161  */
162 struct vcpu_guest_context {
163     /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
164     struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
165 #define VGCF_I387_VALID                (1<<0)
166 #define VGCF_IN_KERNEL                 (1<<2)
167 #define _VGCF_i387_valid               0
168 #define VGCF_i387_valid                (1<<_VGCF_i387_valid)
169 #define _VGCF_in_kernel                2
170 #define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
171 #define _VGCF_failsafe_disables_events 3
172 #define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
173 #define _VGCF_syscall_disables_events  4
174 #define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
175 #define _VGCF_online                   5
176 #define VGCF_online                    (1<<_VGCF_online)
177     unsigned long flags;                    /* VGCF_* flags                 */
178     struct cpu_user_regs user_regs;         /* User-level CPU registers     */
179     struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
180     unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
181     unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
182     unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
183     /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
184     unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
185     unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
186 #ifdef __i386__
187     unsigned long event_callback_cs;        /* CS:EIP of event callback     */
188     unsigned long event_callback_eip;
189     unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
190     unsigned long failsafe_callback_eip;
191 #else
192     unsigned long event_callback_eip;
193     unsigned long failsafe_callback_eip;
194 #ifdef __XEN__
195     union {
196         unsigned long syscall_callback_eip;
197         struct {
198             unsigned int event_callback_cs;    /* compat CS of event cb     */
199             unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
200         };
201     };
202 #else
203     unsigned long syscall_callback_eip;
204 #endif
205 #endif
206     unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
207 #ifdef __x86_64__
208     /* Segment base addresses. */
209     uint64_t      fs_base;
210     uint64_t      gs_base_kernel;
211     uint64_t      gs_base_user;
212 #endif
213 };
214 typedef struct vcpu_guest_context vcpu_guest_context_t;
215 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
216 
217 struct arch_shared_info {
218     /*
219      * Number of valid entries in the p2m table(s) anchored at
220      * pfn_to_mfn_frame_list_list and/or p2m_vaddr.
221      */
222     unsigned long max_pfn;
223     /*
224      * Frame containing list of mfns containing list of mfns containing p2m.
225      * A value of 0 indicates it has not yet been set up, ~0 indicates it has
226      * been set to invalid e.g. due to the p2m being too large for the 3-level
227      * p2m tree. In this case the linear mapper p2m list anchored at p2m_vaddr
228      * is to be used.
229      */
230     xen_pfn_t     pfn_to_mfn_frame_list_list;
231     unsigned long nmi_reason;
232     /*
233      * Following three fields are valid if p2m_cr3 contains a value different
234      * from 0.
235      * p2m_cr3 is the root of the address space where p2m_vaddr is valid.
236      * p2m_cr3 is in the same format as a cr3 value in the vcpu register state
237      * and holds the folded machine frame number (via xen_pfn_to_cr3) of a
238      * L3 or L4 page table.
239      * p2m_vaddr holds the virtual address of the linear p2m list. All entries
240      * in the range [0...max_pfn[ are accessible via this pointer.
241      * p2m_generation will be incremented by the guest before and after each
242      * change of the mappings of the p2m list. p2m_generation starts at 0 and
243      * a value with the least significant bit set indicates that a mapping
244      * update is in progress. This allows guest external software (e.g. in Dom0)
245      * to verify that read mappings are consistent and whether they have changed
246      * since the last check.
247      * Modifying a p2m element in the linear p2m list is allowed via an atomic
248      * write only.
249      */
250     unsigned long p2m_cr3;         /* cr3 value of the p2m address space */
251     unsigned long p2m_vaddr;       /* virtual address of the p2m list */
252     unsigned long p2m_generation;  /* generation count of p2m mapping */
253 #ifdef __i386__
254     /* There's no room for this field in the generic structure. */
255     uint32_t wc_sec_hi;
256 #endif
257 };
258 typedef struct arch_shared_info arch_shared_info_t;
259 
260 #if defined(__XEN__) || defined(__XEN_TOOLS__)
261 /*
262  * struct xen_arch_domainconfig's ABI is covered by
263  * XEN_DOMCTL_INTERFACE_VERSION.
264  */
265 struct xen_arch_domainconfig {
266 #define _XEN_X86_EMU_LAPIC          0
267 #define XEN_X86_EMU_LAPIC           (1U<<_XEN_X86_EMU_LAPIC)
268 #define _XEN_X86_EMU_HPET           1
269 #define XEN_X86_EMU_HPET            (1U<<_XEN_X86_EMU_HPET)
270 #define _XEN_X86_EMU_PM             2
271 #define XEN_X86_EMU_PM              (1U<<_XEN_X86_EMU_PM)
272 #define _XEN_X86_EMU_RTC            3
273 #define XEN_X86_EMU_RTC             (1U<<_XEN_X86_EMU_RTC)
274 #define _XEN_X86_EMU_IOAPIC         4
275 #define XEN_X86_EMU_IOAPIC          (1U<<_XEN_X86_EMU_IOAPIC)
276 #define _XEN_X86_EMU_PIC            5
277 #define XEN_X86_EMU_PIC             (1U<<_XEN_X86_EMU_PIC)
278 #define _XEN_X86_EMU_VGA            6
279 #define XEN_X86_EMU_VGA             (1U<<_XEN_X86_EMU_VGA)
280 #define _XEN_X86_EMU_IOMMU          7
281 #define XEN_X86_EMU_IOMMU           (1U<<_XEN_X86_EMU_IOMMU)
282 #define _XEN_X86_EMU_PIT            8
283 #define XEN_X86_EMU_PIT             (1U<<_XEN_X86_EMU_PIT)
284 #define _XEN_X86_EMU_USE_PIRQ       9
285 #define XEN_X86_EMU_USE_PIRQ        (1U<<_XEN_X86_EMU_USE_PIRQ)
286 #define _XEN_X86_EMU_VPCI           10
287 #define XEN_X86_EMU_VPCI            (1U<<_XEN_X86_EMU_VPCI)
288 
289 #define XEN_X86_EMU_ALL             (XEN_X86_EMU_LAPIC | XEN_X86_EMU_HPET |  \
290                                      XEN_X86_EMU_PM | XEN_X86_EMU_RTC |      \
291                                      XEN_X86_EMU_IOAPIC | XEN_X86_EMU_PIC |  \
292                                      XEN_X86_EMU_VGA | XEN_X86_EMU_IOMMU |   \
293                                      XEN_X86_EMU_PIT | XEN_X86_EMU_USE_PIRQ |\
294                                      XEN_X86_EMU_VPCI)
295     uint32_t emulation_flags;
296 
297 /*
298  * Select whether to use a relaxed behavior for accesses to MSRs not explicitly
299  * handled by Xen instead of injecting a #GP to the guest. Note this option
300  * doesn't allow the guest to read or write to the underlying MSR.
301  */
302 #define XEN_X86_MSR_RELAXED (1u << 0)
303     uint32_t misc_flags;
304 };
305 
306 /* Max  XEN_X86_* constant. Used for ABI checking. */
307 #define XEN_X86_MISC_FLAGS_MAX XEN_X86_MSR_RELAXED
308 
309 #endif
310 
311 /*
312  * Representations of architectural CPUID and MSR information.  Used as the
313  * serialised version of Xen's internal representation.
314  */
315 typedef struct xen_cpuid_leaf {
316 #define XEN_CPUID_NO_SUBLEAF 0xffffffffu
317     uint32_t leaf, subleaf;
318     uint32_t a, b, c, d;
319 } xen_cpuid_leaf_t;
320 DEFINE_XEN_GUEST_HANDLE(xen_cpuid_leaf_t);
321 
322 typedef struct xen_msr_entry {
323     uint32_t idx;
324     uint32_t flags; /* Reserved MBZ. */
325     uint64_t val;
326 } xen_msr_entry_t;
327 DEFINE_XEN_GUEST_HANDLE(xen_msr_entry_t);
328 
329 #endif /* !__ASSEMBLY__ */
330 
331 /*
332  * ` enum neg_errnoval
333  * ` HYPERVISOR_fpu_taskswitch(int set);
334  * `
335  * Sets (if set!=0) or clears (if set==0) CR0.TS.
336  */
337 
338 /*
339  * ` enum neg_errnoval
340  * ` HYPERVISOR_set_debugreg(int regno, unsigned long value);
341  *
342  * ` unsigned long
343  * ` HYPERVISOR_get_debugreg(int regno);
344  * For 0<=reg<=7, returns the debug register value.
345  * For other values of reg, returns ((unsigned long)-EINVAL).
346  * (Unfortunately, this interface is defective.)
347  */
348 
349 /*
350  * Prefix forces emulation of some non-trapping instructions.
351  * Currently only CPUID.
352  */
353 #ifdef __ASSEMBLY__
354 #define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
355 #define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
356 #else
357 #define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
358 #define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
359 #endif
360 
361 /*
362  * Debug console IO port, also called "port E9 hack". Each character written
363  * to this IO port will be printed on the hypervisor console, subject to log
364  * level restrictions.
365  */
366 #define XEN_HVM_DEBUGCONS_IOPORT 0xe9
367 
368 #endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
369 
370 /*
371  * Local variables:
372  * mode: C
373  * c-file-style: "BSD"
374  * c-basic-offset: 4
375  * tab-width: 4
376  * indent-tabs-mode: nil
377  * End:
378  */
379