xref: /openbmc/qemu/include/system/kvm_int.h (revision f96b157ebb93f94cd56ebbc99bc20982b8fd86ef)
1 /*
2  * Internal definitions for a target's KVM support
3  *
4  * This work is licensed under the terms of the GNU GPL, version 2 or later.
5  * See the COPYING file in the top-level directory.
6  *
7  */
8 
9 #ifndef QEMU_KVM_INT_H
10 #define QEMU_KVM_INT_H
11 
12 #include "system/memory.h"
13 #include "qapi/qapi-types-common.h"
14 #include "qemu/accel.h"
15 #include "qemu/queue.h"
16 #include "system/kvm.h"
17 #include "accel/accel-ops.h"
18 #include "hw/boards.h"
19 #include "hw/i386/topology.h"
20 #include "io/channel-socket.h"
21 
22 typedef struct KVMSlot
23 {
24     hwaddr start_addr;
25     ram_addr_t memory_size;
26     void *ram;
27     int slot;
28     int flags;
29     int old_flags;
30     /* Dirty bitmap cache for the slot */
31     unsigned long *dirty_bmap;
32     unsigned long dirty_bmap_size;
33     /* Cache of the address space ID */
34     int as_id;
35     /* Cache of the offset in ram address space */
36     ram_addr_t ram_start_offset;
37     int guest_memfd;
38     hwaddr guest_memfd_offset;
39 } KVMSlot;
40 
41 typedef struct KVMMemoryUpdate {
42     QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
43     MemoryRegionSection section;
44 } KVMMemoryUpdate;
45 
46 typedef struct KVMMemoryListener {
47     MemoryListener listener;
48     KVMSlot *slots;
49     unsigned int nr_slots_used;
50     unsigned int nr_slots_allocated;
51     int as_id;
52     QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
53     QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
54 } KVMMemoryListener;
55 
56 #define KVM_MSI_HASHTAB_SIZE    256
57 
58 typedef struct KVMHostTopoInfo {
59     /* Number of package on the Host */
60     unsigned int maxpkgs;
61     /* Number of cpus on the Host */
62     unsigned int maxcpus;
63     /* Number of cpus on each different package */
64     unsigned int *pkg_cpu_count;
65     /* Each package can have different maxticks */
66     unsigned int *maxticks;
67 } KVMHostTopoInfo;
68 
69 struct KVMMsrEnergy {
70     pid_t pid;
71     bool enable;
72     char *socket_path;
73     QIOChannelSocket *sioc;
74     QemuThread msr_thr;
75     unsigned int guest_vcpus;
76     unsigned int guest_vsockets;
77     X86CPUTopoInfo guest_topo_info;
78     KVMHostTopoInfo host_topo;
79     const CPUArchIdList *guest_cpu_list;
80     uint64_t *msr_value;
81     uint64_t msr_unit;
82     uint64_t msr_limit;
83     uint64_t msr_info;
84 };
85 
86 enum KVMDirtyRingReaperState {
87     KVM_DIRTY_RING_REAPER_NONE = 0,
88     /* The reaper is sleeping */
89     KVM_DIRTY_RING_REAPER_WAIT,
90     /* The reaper is reaping for dirty pages */
91     KVM_DIRTY_RING_REAPER_REAPING,
92 };
93 
94 /*
95  * KVM reaper instance, responsible for collecting the KVM dirty bits
96  * via the dirty ring.
97  */
98 struct KVMDirtyRingReaper {
99     /* The reaper thread */
100     QemuThread reaper_thr;
101     volatile uint64_t reaper_iteration; /* iteration number of reaper thr */
102     volatile enum KVMDirtyRingReaperState reaper_state; /* reap thr state */
103 };
104 struct KVMState
105 {
106     AccelState parent_obj;
107     /* Max number of KVM slots supported */
108     int nr_slots_max;
109     int fd;
110     int vmfd;
111     int coalesced_mmio;
112     int coalesced_pio;
113     struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
114     bool coalesced_flush_in_progress;
115     int vcpu_events;
116 #ifdef TARGET_KVM_HAVE_GUEST_DEBUG
117     QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
118 #endif
119     int max_nested_state_len;
120     int kvm_shadow_mem;
121     bool kernel_irqchip_allowed;
122     bool kernel_irqchip_required;
123     OnOffAuto kernel_irqchip_split;
124     bool sync_mmu;
125     bool guest_state_protected;
126     uint64_t manual_dirty_log_protect;
127     /*
128      * Older POSIX says that ioctl numbers are signed int, but in
129      * practice they are not. (Newer POSIX doesn't specify ioctl
130      * at all.) Linux, glibc and *BSD all treat ioctl numbers as
131      * unsigned, and real-world ioctl values like KVM_GET_XSAVE have
132      * bit 31 set, which means that passing them via an 'int' will
133      * result in sign-extension when they get converted back to the
134      * 'unsigned long' which the ioctl() prototype uses. Luckily Linux
135      * always treats the argument as an unsigned 32-bit int, so any
136      * possible sign-extension is deliberately ignored, but for
137      * consistency we keep to the same type that glibc is using.
138      */
139     unsigned long irq_set_ioctl;
140     unsigned int sigmask_len;
141     GHashTable *gsimap;
142 #ifdef KVM_CAP_IRQ_ROUTING
143     struct kvm_irq_routing *irq_routes;
144     int nr_allocated_irq_routes;
145     unsigned long *used_gsi_bitmap;
146     unsigned int gsi_count;
147 #endif
148     KVMMemoryListener memory_listener;
149     QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
150 
151     /* For "info mtree -f" to tell if an MR is registered in KVM */
152     int nr_as;
153     struct KVMAs {
154         KVMMemoryListener *ml;
155         AddressSpace *as;
156     } *as;
157     uint64_t kvm_dirty_ring_bytes;  /* Size of the per-vcpu dirty ring */
158     uint32_t kvm_dirty_ring_size;   /* Number of dirty GFNs per ring */
159     bool kvm_dirty_ring_with_bitmap;
160     uint64_t kvm_eager_split_size;  /* Eager Page Splitting chunk size */
161     struct KVMDirtyRingReaper reaper;
162     struct KVMMsrEnergy msr_energy;
163     NotifyVmexitOption notify_vmexit;
164     uint32_t notify_window;
165     uint32_t xen_version;
166     uint32_t xen_caps;
167     uint16_t xen_gnttab_max_frames;
168     uint16_t xen_evtchn_max_pirq;
169     char *device;
170 };
171 
172 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
173                                   AddressSpace *as, int as_id, const char *name);
174 
175 void kvm_set_max_memslot_size(hwaddr max_slot_size);
176 
177 /**
178  * kvm_hwpoison_page_add:
179  *
180  * Parameters:
181  *  @ram_addr: the address in the RAM for the poisoned page
182  *
183  * Add a poisoned page to the list
184  *
185  * Return: None.
186  */
187 void kvm_hwpoison_page_add(ram_addr_t ram_addr);
188 #endif
189