1 /*
2 * Xen HVM emulation support in KVM
3 *
4 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
5 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
8 * See the COPYING file in the top-level directory.
9 *
10 */
11
12 #include "qemu/osdep.h"
13 #include "qemu/log.h"
14 #include "qemu/main-loop.h"
15 #include "qemu/error-report.h"
16 #include "hw/xen/xen.h"
17 #include "sysemu/kvm_int.h"
18 #include "sysemu/kvm_xen.h"
19 #include "kvm/kvm_i386.h"
20 #include "exec/address-spaces.h"
21 #include "xen-emu.h"
22 #include "trace.h"
23 #include "sysemu/runstate.h"
24
25 #include "hw/pci/msi.h"
26 #include "hw/i386/apic-msidef.h"
27 #include "hw/i386/e820_memory_layout.h"
28 #include "hw/i386/kvm/xen_overlay.h"
29 #include "hw/i386/kvm/xen_evtchn.h"
30 #include "hw/i386/kvm/xen_gnttab.h"
31 #include "hw/i386/kvm/xen_primary_console.h"
32 #include "hw/i386/kvm/xen_xenstore.h"
33
34 #include "hw/xen/interface/version.h"
35 #include "hw/xen/interface/sched.h"
36 #include "hw/xen/interface/memory.h"
37 #include "hw/xen/interface/hvm/hvm_op.h"
38 #include "hw/xen/interface/hvm/params.h"
39 #include "hw/xen/interface/vcpu.h"
40 #include "hw/xen/interface/event_channel.h"
41 #include "hw/xen/interface/grant_table.h"
42
43 #include "xen-compat.h"
44
45 static void xen_vcpu_singleshot_timer_event(void *opaque);
46 static void xen_vcpu_periodic_timer_event(void *opaque);
47 static int vcpuop_stop_singleshot_timer(CPUState *cs);
48
49 #ifdef TARGET_X86_64
50 #define hypercall_compat32(longmode) (!(longmode))
51 #else
52 #define hypercall_compat32(longmode) (false)
53 #endif
54
kvm_gva_to_gpa(CPUState * cs,uint64_t gva,uint64_t * gpa,size_t * len,bool is_write)55 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
56 size_t *len, bool is_write)
57 {
58 struct kvm_translation tr = {
59 .linear_address = gva,
60 };
61
62 if (len) {
63 *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
64 }
65
66 if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
67 (is_write && !tr.writeable)) {
68 return false;
69 }
70 *gpa = tr.physical_address;
71 return true;
72 }
73
kvm_gva_rw(CPUState * cs,uint64_t gva,void * _buf,size_t sz,bool is_write)74 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
75 bool is_write)
76 {
77 uint8_t *buf = (uint8_t *)_buf;
78 uint64_t gpa;
79 size_t len;
80
81 while (sz) {
82 if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
83 return -EFAULT;
84 }
85 if (len > sz) {
86 len = sz;
87 }
88
89 cpu_physical_memory_rw(gpa, buf, len, is_write);
90
91 buf += len;
92 sz -= len;
93 gva += len;
94 }
95
96 return 0;
97 }
98
kvm_copy_from_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)99 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
100 size_t sz)
101 {
102 return kvm_gva_rw(cs, gva, buf, sz, false);
103 }
104
kvm_copy_to_gva(CPUState * cs,uint64_t gva,void * buf,size_t sz)105 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
106 size_t sz)
107 {
108 return kvm_gva_rw(cs, gva, buf, sz, true);
109 }
110
kvm_xen_init(KVMState * s,uint32_t hypercall_msr)111 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
112 {
113 const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
114 KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
115 struct kvm_xen_hvm_config cfg = {
116 .msr = hypercall_msr,
117 .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
118 };
119 int xen_caps, ret;
120
121 xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
122 if (required_caps & ~xen_caps) {
123 error_report("kvm: Xen HVM guest support not present or insufficient");
124 return -ENOSYS;
125 }
126
127 if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
128 struct kvm_xen_hvm_attr ha = {
129 .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
130 .u.xen_version = s->xen_version,
131 };
132 (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
133
134 cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
135 }
136
137 ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
138 if (ret < 0) {
139 error_report("kvm: Failed to enable Xen HVM support: %s",
140 strerror(-ret));
141 return ret;
142 }
143
144 /* If called a second time, don't repeat the rest of the setup. */
145 if (s->xen_caps) {
146 return 0;
147 }
148
149 /*
150 * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
151 * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
152 *
153 * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
154 * such things to be polled at precisely the right time. We *could* do
155 * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
156 * the moment the IRQ is acked, and see if it should be reasserted.
157 *
158 * But the in-kernel irqchip is deprecated, so we're unlikely to add
159 * that support in the kernel. Insist on using the split irqchip mode
160 * instead.
161 *
162 * This leaves us polling for the level going low in QEMU, which lacks
163 * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
164 * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
165 * the device (for which it has to unmap the device and trap access, for
166 * some period after an IRQ!!). In the Xen case, we do it on exit from
167 * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
168 * Which is kind of icky, but less so than the VFIO one. I may fix them
169 * both later...
170 */
171 if (!kvm_kernel_irqchip_split()) {
172 error_report("kvm: Xen support requires kernel-irqchip=split");
173 return -EINVAL;
174 }
175
176 s->xen_caps = xen_caps;
177
178 /* Tell fw_cfg to notify the BIOS to reserve the range. */
179 e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED);
180
181 /* The pages couldn't be overlaid until KVM was initialized */
182 xen_primary_console_reset();
183 xen_xenstore_reset();
184
185 return 0;
186 }
187
kvm_xen_init_vcpu(CPUState * cs)188 int kvm_xen_init_vcpu(CPUState *cs)
189 {
190 X86CPU *cpu = X86_CPU(cs);
191 CPUX86State *env = &cpu->env;
192 int err;
193
194 /*
195 * The kernel needs to know the Xen/ACPI vCPU ID because that's
196 * what the guest uses in hypercalls such as timers. It doesn't
197 * match the APIC ID which is generally used for talking to the
198 * kernel about vCPUs. And if vCPU threads race with creating
199 * their KVM vCPUs out of order, it doesn't necessarily match
200 * with the kernel's internal vCPU indices either.
201 */
202 if (kvm_xen_has_cap(EVTCHN_SEND)) {
203 struct kvm_xen_vcpu_attr va = {
204 .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
205 .u.vcpu_id = cs->cpu_index,
206 };
207 err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
208 if (err) {
209 error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
210 strerror(-err));
211 return err;
212 }
213 }
214
215 env->xen_vcpu_info_gpa = INVALID_GPA;
216 env->xen_vcpu_info_default_gpa = INVALID_GPA;
217 env->xen_vcpu_time_info_gpa = INVALID_GPA;
218 env->xen_vcpu_runstate_gpa = INVALID_GPA;
219
220 qemu_mutex_init(&env->xen_timers_lock);
221 env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
222 xen_vcpu_singleshot_timer_event,
223 cpu);
224 if (!env->xen_singleshot_timer) {
225 return -ENOMEM;
226 }
227 env->xen_singleshot_timer->opaque = cs;
228
229 env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
230 xen_vcpu_periodic_timer_event,
231 cpu);
232 if (!env->xen_periodic_timer) {
233 return -ENOMEM;
234 }
235 env->xen_periodic_timer->opaque = cs;
236
237 return 0;
238 }
239
kvm_xen_get_caps(void)240 uint32_t kvm_xen_get_caps(void)
241 {
242 return kvm_state->xen_caps;
243 }
244
kvm_xen_hcall_xen_version(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)245 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
246 int cmd, uint64_t arg)
247 {
248 int err = 0;
249
250 switch (cmd) {
251 case XENVER_get_features: {
252 struct xen_feature_info fi;
253
254 /* No need for 32/64 compat handling */
255 qemu_build_assert(sizeof(fi) == 8);
256
257 err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
258 if (err) {
259 break;
260 }
261
262 fi.submap = 0;
263 if (fi.submap_idx == 0) {
264 fi.submap |= 1 << XENFEAT_writable_page_tables |
265 1 << XENFEAT_writable_descriptor_tables |
266 1 << XENFEAT_auto_translated_physmap |
267 1 << XENFEAT_hvm_callback_vector |
268 1 << XENFEAT_hvm_safe_pvclock |
269 1 << XENFEAT_hvm_pirqs;
270 }
271
272 err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
273 break;
274 }
275
276 default:
277 return false;
278 }
279
280 exit->u.hcall.result = err;
281 return true;
282 }
283
kvm_xen_set_vcpu_attr(CPUState * cs,uint16_t type,uint64_t gpa)284 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
285 {
286 struct kvm_xen_vcpu_attr xhsi;
287
288 xhsi.type = type;
289 xhsi.u.gpa = gpa;
290
291 trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
292
293 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
294 }
295
kvm_xen_set_vcpu_callback_vector(CPUState * cs)296 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
297 {
298 uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
299 struct kvm_xen_vcpu_attr xva;
300
301 xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
302 xva.u.vector = vector;
303
304 trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
305
306 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
307 }
308
do_set_vcpu_callback_vector(CPUState * cs,run_on_cpu_data data)309 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
310 {
311 X86CPU *cpu = X86_CPU(cs);
312 CPUX86State *env = &cpu->env;
313
314 env->xen_vcpu_callback_vector = data.host_int;
315
316 if (kvm_xen_has_cap(EVTCHN_SEND)) {
317 kvm_xen_set_vcpu_callback_vector(cs);
318 }
319 }
320
set_vcpu_info(CPUState * cs,uint64_t gpa)321 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
322 {
323 X86CPU *cpu = X86_CPU(cs);
324 CPUX86State *env = &cpu->env;
325 MemoryRegionSection mrs = { .mr = NULL };
326 void *vcpu_info_hva = NULL;
327 int ret;
328
329 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
330 if (ret || gpa == INVALID_GPA) {
331 goto out;
332 }
333
334 mrs = memory_region_find(get_system_memory(), gpa,
335 sizeof(struct vcpu_info));
336 if (mrs.mr && mrs.mr->ram_block &&
337 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
338 vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
339 mrs.offset_within_region);
340 }
341 if (!vcpu_info_hva) {
342 if (mrs.mr) {
343 memory_region_unref(mrs.mr);
344 mrs.mr = NULL;
345 }
346 ret = -EINVAL;
347 }
348
349 out:
350 if (env->xen_vcpu_info_mr) {
351 memory_region_unref(env->xen_vcpu_info_mr);
352 }
353 env->xen_vcpu_info_hva = vcpu_info_hva;
354 env->xen_vcpu_info_mr = mrs.mr;
355 return ret;
356 }
357
do_set_vcpu_info_default_gpa(CPUState * cs,run_on_cpu_data data)358 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
359 {
360 X86CPU *cpu = X86_CPU(cs);
361 CPUX86State *env = &cpu->env;
362
363 env->xen_vcpu_info_default_gpa = data.host_ulong;
364
365 /* Changing the default does nothing if a vcpu_info was explicitly set. */
366 if (env->xen_vcpu_info_gpa == INVALID_GPA) {
367 set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
368 }
369 }
370
do_set_vcpu_info_gpa(CPUState * cs,run_on_cpu_data data)371 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
372 {
373 X86CPU *cpu = X86_CPU(cs);
374 CPUX86State *env = &cpu->env;
375
376 env->xen_vcpu_info_gpa = data.host_ulong;
377
378 set_vcpu_info(cs, env->xen_vcpu_info_gpa);
379 }
380
kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)381 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
382 {
383 CPUState *cs = qemu_get_cpu(vcpu_id);
384 if (!cs) {
385 return NULL;
386 }
387
388 return X86_CPU(cs)->env.xen_vcpu_info_hva;
389 }
390
kvm_xen_maybe_deassert_callback(CPUState * cs)391 void kvm_xen_maybe_deassert_callback(CPUState *cs)
392 {
393 CPUX86State *env = &X86_CPU(cs)->env;
394 struct vcpu_info *vi = env->xen_vcpu_info_hva;
395 if (!vi) {
396 return;
397 }
398
399 /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
400 if (!vi->evtchn_upcall_pending) {
401 bql_lock();
402 /*
403 * Check again now we have the lock, because it may have been
404 * asserted in the interim. And we don't want to take the lock
405 * every time because this is a fast path.
406 */
407 if (!vi->evtchn_upcall_pending) {
408 X86_CPU(cs)->env.xen_callback_asserted = false;
409 xen_evtchn_set_callback_level(0);
410 }
411 bql_unlock();
412 }
413 }
414
kvm_xen_set_callback_asserted(void)415 void kvm_xen_set_callback_asserted(void)
416 {
417 CPUState *cs = qemu_get_cpu(0);
418
419 if (cs) {
420 X86_CPU(cs)->env.xen_callback_asserted = true;
421 }
422 }
423
kvm_xen_has_vcpu_callback_vector(void)424 bool kvm_xen_has_vcpu_callback_vector(void)
425 {
426 CPUState *cs = qemu_get_cpu(0);
427
428 return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
429 }
430
kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id,int type)431 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
432 {
433 CPUState *cs = qemu_get_cpu(vcpu_id);
434 uint8_t vector;
435
436 if (!cs) {
437 return;
438 }
439
440 vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
441 if (vector) {
442 /*
443 * The per-vCPU callback vector injected via lapic. Just
444 * deliver it as an MSI.
445 */
446 MSIMessage msg = {
447 .address = APIC_DEFAULT_ADDRESS |
448 (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
449 .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
450 };
451 kvm_irqchip_send_msi(kvm_state, msg);
452 return;
453 }
454
455 switch (type) {
456 case HVM_PARAM_CALLBACK_TYPE_VECTOR:
457 /*
458 * If the evtchn_upcall_pending field in the vcpu_info is set, then
459 * KVM will automatically deliver the vector on entering the vCPU
460 * so all we have to do is kick it out.
461 */
462 qemu_cpu_kick(cs);
463 break;
464
465 case HVM_PARAM_CALLBACK_TYPE_GSI:
466 case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
467 if (vcpu_id == 0) {
468 xen_evtchn_set_callback_level(1);
469 }
470 break;
471 }
472 }
473
474 /* Must always be called with xen_timers_lock held */
kvm_xen_set_vcpu_timer(CPUState * cs)475 static int kvm_xen_set_vcpu_timer(CPUState *cs)
476 {
477 X86CPU *cpu = X86_CPU(cs);
478 CPUX86State *env = &cpu->env;
479
480 struct kvm_xen_vcpu_attr va = {
481 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
482 .u.timer.port = env->xen_virq[VIRQ_TIMER],
483 .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
484 .u.timer.expires_ns = env->xen_singleshot_timer_ns,
485 };
486
487 return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
488 }
489
do_set_vcpu_timer_virq(CPUState * cs,run_on_cpu_data data)490 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
491 {
492 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
493 kvm_xen_set_vcpu_timer(cs);
494 }
495
kvm_xen_set_vcpu_virq(uint32_t vcpu_id,uint16_t virq,uint16_t port)496 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
497 {
498 CPUState *cs = qemu_get_cpu(vcpu_id);
499
500 if (!cs) {
501 return -ENOENT;
502 }
503
504 /* cpu.h doesn't include the actual Xen header. */
505 qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
506
507 if (virq >= NR_VIRQS) {
508 return -EINVAL;
509 }
510
511 if (port && X86_CPU(cs)->env.xen_virq[virq]) {
512 return -EEXIST;
513 }
514
515 X86_CPU(cs)->env.xen_virq[virq] = port;
516 if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
517 async_run_on_cpu(cs, do_set_vcpu_timer_virq,
518 RUN_ON_CPU_HOST_INT(port));
519 }
520 return 0;
521 }
522
do_set_vcpu_time_info_gpa(CPUState * cs,run_on_cpu_data data)523 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
524 {
525 X86CPU *cpu = X86_CPU(cs);
526 CPUX86State *env = &cpu->env;
527
528 env->xen_vcpu_time_info_gpa = data.host_ulong;
529
530 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
531 env->xen_vcpu_time_info_gpa);
532 }
533
do_set_vcpu_runstate_gpa(CPUState * cs,run_on_cpu_data data)534 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
535 {
536 X86CPU *cpu = X86_CPU(cs);
537 CPUX86State *env = &cpu->env;
538
539 env->xen_vcpu_runstate_gpa = data.host_ulong;
540
541 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
542 env->xen_vcpu_runstate_gpa);
543 }
544
do_vcpu_soft_reset(CPUState * cs,run_on_cpu_data data)545 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
546 {
547 X86CPU *cpu = X86_CPU(cs);
548 CPUX86State *env = &cpu->env;
549
550 env->xen_vcpu_info_gpa = INVALID_GPA;
551 env->xen_vcpu_info_default_gpa = INVALID_GPA;
552 env->xen_vcpu_time_info_gpa = INVALID_GPA;
553 env->xen_vcpu_runstate_gpa = INVALID_GPA;
554 env->xen_vcpu_callback_vector = 0;
555 memset(env->xen_virq, 0, sizeof(env->xen_virq));
556
557 set_vcpu_info(cs, INVALID_GPA);
558 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
559 INVALID_GPA);
560 kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
561 INVALID_GPA);
562 if (kvm_xen_has_cap(EVTCHN_SEND)) {
563 kvm_xen_set_vcpu_callback_vector(cs);
564
565 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
566 env->xen_singleshot_timer_ns = 0;
567 kvm_xen_set_vcpu_timer(cs);
568 } else {
569 vcpuop_stop_singleshot_timer(cs);
570 };
571
572 }
573
xen_set_shared_info(uint64_t gfn)574 static int xen_set_shared_info(uint64_t gfn)
575 {
576 uint64_t gpa = gfn << TARGET_PAGE_BITS;
577 int i, err;
578
579 BQL_LOCK_GUARD();
580
581 /*
582 * The xen_overlay device tells KVM about it too, since it had to
583 * do that on migration load anyway (unless we're going to jump
584 * through lots of hoops to maintain the fiction that this isn't
585 * KVM-specific.
586 */
587 err = xen_overlay_map_shinfo_page(gpa);
588 if (err) {
589 return err;
590 }
591
592 trace_kvm_xen_set_shared_info(gfn);
593
594 for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
595 CPUState *cpu = qemu_get_cpu(i);
596 if (cpu) {
597 async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
598 RUN_ON_CPU_HOST_ULONG(gpa));
599 }
600 gpa += sizeof(vcpu_info_t);
601 }
602
603 return err;
604 }
605
add_to_physmap_one(uint32_t space,uint64_t idx,uint64_t gfn)606 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
607 {
608 switch (space) {
609 case XENMAPSPACE_shared_info:
610 if (idx > 0) {
611 return -EINVAL;
612 }
613 return xen_set_shared_info(gfn);
614
615 case XENMAPSPACE_grant_table:
616 return xen_gnttab_map_page(idx, gfn);
617
618 case XENMAPSPACE_gmfn:
619 case XENMAPSPACE_gmfn_range:
620 return -ENOTSUP;
621
622 case XENMAPSPACE_gmfn_foreign:
623 case XENMAPSPACE_dev_mmio:
624 return -EPERM;
625
626 default:
627 return -EINVAL;
628 }
629 }
630
do_add_to_physmap(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)631 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
632 uint64_t arg)
633 {
634 struct xen_add_to_physmap xatp;
635 CPUState *cs = CPU(cpu);
636
637 if (hypercall_compat32(exit->u.hcall.longmode)) {
638 struct compat_xen_add_to_physmap xatp32;
639
640 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
641 if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
642 return -EFAULT;
643 }
644 xatp.domid = xatp32.domid;
645 xatp.size = xatp32.size;
646 xatp.space = xatp32.space;
647 xatp.idx = xatp32.idx;
648 xatp.gpfn = xatp32.gpfn;
649 } else {
650 if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
651 return -EFAULT;
652 }
653 }
654
655 if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
656 return -ESRCH;
657 }
658
659 return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
660 }
661
do_add_to_physmap_batch(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)662 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
663 uint64_t arg)
664 {
665 struct xen_add_to_physmap_batch xatpb;
666 unsigned long idxs_gva, gpfns_gva, errs_gva;
667 CPUState *cs = CPU(cpu);
668 size_t op_sz;
669
670 if (hypercall_compat32(exit->u.hcall.longmode)) {
671 struct compat_xen_add_to_physmap_batch xatpb32;
672
673 qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
674 if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
675 return -EFAULT;
676 }
677 xatpb.domid = xatpb32.domid;
678 xatpb.space = xatpb32.space;
679 xatpb.size = xatpb32.size;
680
681 idxs_gva = xatpb32.idxs.c;
682 gpfns_gva = xatpb32.gpfns.c;
683 errs_gva = xatpb32.errs.c;
684 op_sz = sizeof(uint32_t);
685 } else {
686 if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
687 return -EFAULT;
688 }
689 op_sz = sizeof(unsigned long);
690 idxs_gva = (unsigned long)xatpb.idxs.p;
691 gpfns_gva = (unsigned long)xatpb.gpfns.p;
692 errs_gva = (unsigned long)xatpb.errs.p;
693 }
694
695 if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
696 return -ESRCH;
697 }
698
699 /* Explicitly invalid for the batch op. Not that we implement it anyway. */
700 if (xatpb.space == XENMAPSPACE_gmfn_range) {
701 return -EINVAL;
702 }
703
704 while (xatpb.size--) {
705 unsigned long idx = 0;
706 unsigned long gpfn = 0;
707 int err;
708
709 /* For 32-bit compat this only copies the low 32 bits of each */
710 if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
711 kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
712 return -EFAULT;
713 }
714 idxs_gva += op_sz;
715 gpfns_gva += op_sz;
716
717 err = add_to_physmap_one(xatpb.space, idx, gpfn);
718
719 if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
720 return -EFAULT;
721 }
722 errs_gva += sizeof(err);
723 }
724 return 0;
725 }
726
kvm_xen_hcall_memory_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)727 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
728 int cmd, uint64_t arg)
729 {
730 int err;
731
732 switch (cmd) {
733 case XENMEM_add_to_physmap:
734 err = do_add_to_physmap(exit, cpu, arg);
735 break;
736
737 case XENMEM_add_to_physmap_batch:
738 err = do_add_to_physmap_batch(exit, cpu, arg);
739 break;
740
741 default:
742 return false;
743 }
744
745 exit->u.hcall.result = err;
746 return true;
747 }
748
handle_set_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)749 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
750 uint64_t arg)
751 {
752 CPUState *cs = CPU(cpu);
753 struct xen_hvm_param hp;
754 int err = 0;
755
756 /* No need for 32/64 compat handling */
757 qemu_build_assert(sizeof(hp) == 16);
758
759 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
760 err = -EFAULT;
761 goto out;
762 }
763
764 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
765 err = -ESRCH;
766 goto out;
767 }
768
769 switch (hp.index) {
770 case HVM_PARAM_CALLBACK_IRQ:
771 bql_lock();
772 err = xen_evtchn_set_callback_param(hp.value);
773 bql_unlock();
774 xen_set_long_mode(exit->u.hcall.longmode);
775 break;
776 default:
777 return false;
778 }
779
780 out:
781 exit->u.hcall.result = err;
782 return true;
783 }
784
handle_get_param(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)785 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
786 uint64_t arg)
787 {
788 CPUState *cs = CPU(cpu);
789 struct xen_hvm_param hp;
790 int err = 0;
791
792 /* No need for 32/64 compat handling */
793 qemu_build_assert(sizeof(hp) == 16);
794
795 if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
796 err = -EFAULT;
797 goto out;
798 }
799
800 if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
801 err = -ESRCH;
802 goto out;
803 }
804
805 switch (hp.index) {
806 case HVM_PARAM_STORE_PFN:
807 hp.value = XEN_SPECIAL_PFN(XENSTORE);
808 break;
809 case HVM_PARAM_STORE_EVTCHN:
810 hp.value = xen_xenstore_get_port();
811 break;
812 case HVM_PARAM_CONSOLE_PFN:
813 hp.value = xen_primary_console_get_pfn();
814 if (!hp.value) {
815 err = -EINVAL;
816 }
817 break;
818 case HVM_PARAM_CONSOLE_EVTCHN:
819 hp.value = xen_primary_console_get_port();
820 if (!hp.value) {
821 err = -EINVAL;
822 }
823 break;
824 default:
825 return false;
826 }
827
828 if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
829 err = -EFAULT;
830 }
831 out:
832 exit->u.hcall.result = err;
833 return true;
834 }
835
kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t arg)836 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
837 X86CPU *cpu, uint64_t arg)
838 {
839 struct xen_hvm_evtchn_upcall_vector up;
840 CPUState *target_cs;
841
842 /* No need for 32/64 compat handling */
843 qemu_build_assert(sizeof(up) == 8);
844
845 if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
846 return -EFAULT;
847 }
848
849 if (up.vector < 0x10) {
850 return -EINVAL;
851 }
852
853 target_cs = qemu_get_cpu(up.vcpu);
854 if (!target_cs) {
855 return -EINVAL;
856 }
857
858 async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
859 RUN_ON_CPU_HOST_INT(up.vector));
860 return 0;
861 }
862
kvm_xen_hcall_hvm_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)863 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
864 int cmd, uint64_t arg)
865 {
866 int ret = -ENOSYS;
867 switch (cmd) {
868 case HVMOP_set_evtchn_upcall_vector:
869 ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
870 break;
871
872 case HVMOP_pagetable_dying:
873 ret = -ENOSYS;
874 break;
875
876 case HVMOP_set_param:
877 return handle_set_param(exit, cpu, arg);
878
879 case HVMOP_get_param:
880 return handle_get_param(exit, cpu, arg);
881
882 default:
883 return false;
884 }
885
886 exit->u.hcall.result = ret;
887 return true;
888 }
889
vcpuop_register_vcpu_info(CPUState * cs,CPUState * target,uint64_t arg)890 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
891 uint64_t arg)
892 {
893 struct vcpu_register_vcpu_info rvi;
894 uint64_t gpa;
895
896 /* No need for 32/64 compat handling */
897 qemu_build_assert(sizeof(rvi) == 16);
898 qemu_build_assert(sizeof(struct vcpu_info) == 64);
899
900 if (!target) {
901 return -ENOENT;
902 }
903
904 if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
905 return -EFAULT;
906 }
907
908 if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
909 return -EINVAL;
910 }
911
912 gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
913 async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
914 return 0;
915 }
916
vcpuop_register_vcpu_time_info(CPUState * cs,CPUState * target,uint64_t arg)917 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
918 uint64_t arg)
919 {
920 struct vcpu_register_time_memory_area tma;
921 uint64_t gpa;
922 size_t len;
923
924 /* No need for 32/64 compat handling */
925 qemu_build_assert(sizeof(tma) == 8);
926 qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
927
928 if (!target) {
929 return -ENOENT;
930 }
931
932 if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
933 return -EFAULT;
934 }
935
936 /*
937 * Xen actually uses the GVA and does the translation through the guest
938 * page tables each time. But Linux/KVM uses the GPA, on the assumption
939 * that guests only ever use *global* addresses (kernel virtual addresses)
940 * for it. If Linux is changed to redo the GVA→GPA translation each time,
941 * it will offer a new vCPU attribute for that, and we'll use it instead.
942 */
943 if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
944 len < sizeof(struct vcpu_time_info)) {
945 return -EFAULT;
946 }
947
948 async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
949 RUN_ON_CPU_HOST_ULONG(gpa));
950 return 0;
951 }
952
vcpuop_register_runstate_info(CPUState * cs,CPUState * target,uint64_t arg)953 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
954 uint64_t arg)
955 {
956 struct vcpu_register_runstate_memory_area rma;
957 uint64_t gpa;
958 size_t len;
959
960 /* No need for 32/64 compat handling */
961 qemu_build_assert(sizeof(rma) == 8);
962 /* The runstate area actually does change size, but Linux copes. */
963
964 if (!target) {
965 return -ENOENT;
966 }
967
968 if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
969 return -EFAULT;
970 }
971
972 /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
973 if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
974 return -EFAULT;
975 }
976
977 async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
978 RUN_ON_CPU_HOST_ULONG(gpa));
979 return 0;
980 }
981
kvm_get_current_ns(void)982 static uint64_t kvm_get_current_ns(void)
983 {
984 struct kvm_clock_data data;
985 int ret;
986
987 ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
988 if (ret < 0) {
989 fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
990 abort();
991 }
992
993 return data.clock;
994 }
995
xen_vcpu_singleshot_timer_event(void * opaque)996 static void xen_vcpu_singleshot_timer_event(void *opaque)
997 {
998 CPUState *cpu = opaque;
999 CPUX86State *env = &X86_CPU(cpu)->env;
1000 uint16_t port = env->xen_virq[VIRQ_TIMER];
1001
1002 if (likely(port)) {
1003 xen_evtchn_set_port(port);
1004 }
1005
1006 qemu_mutex_lock(&env->xen_timers_lock);
1007 env->xen_singleshot_timer_ns = 0;
1008 qemu_mutex_unlock(&env->xen_timers_lock);
1009 }
1010
xen_vcpu_periodic_timer_event(void * opaque)1011 static void xen_vcpu_periodic_timer_event(void *opaque)
1012 {
1013 CPUState *cpu = opaque;
1014 CPUX86State *env = &X86_CPU(cpu)->env;
1015 uint16_t port = env->xen_virq[VIRQ_TIMER];
1016 int64_t qemu_now;
1017
1018 if (likely(port)) {
1019 xen_evtchn_set_port(port);
1020 }
1021
1022 qemu_mutex_lock(&env->xen_timers_lock);
1023
1024 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1025 timer_mod_ns(env->xen_periodic_timer,
1026 qemu_now + env->xen_periodic_timer_period);
1027
1028 qemu_mutex_unlock(&env->xen_timers_lock);
1029 }
1030
do_set_periodic_timer(CPUState * target,uint64_t period_ns)1031 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1032 {
1033 CPUX86State *tenv = &X86_CPU(target)->env;
1034 int64_t qemu_now;
1035
1036 timer_del(tenv->xen_periodic_timer);
1037
1038 qemu_mutex_lock(&tenv->xen_timers_lock);
1039
1040 qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1041 timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1042 tenv->xen_periodic_timer_period = period_ns;
1043
1044 qemu_mutex_unlock(&tenv->xen_timers_lock);
1045 return 0;
1046 }
1047
1048 #define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL))
1049 #define MICROSECS(_us) ((int64_t)((_us) * 1000ULL))
1050 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1051 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
1052 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1053
vcpuop_set_periodic_timer(CPUState * cs,CPUState * target,uint64_t arg)1054 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1055 uint64_t arg)
1056 {
1057 struct vcpu_set_periodic_timer spt;
1058
1059 qemu_build_assert(sizeof(spt) == 8);
1060 if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1061 return -EFAULT;
1062 }
1063
1064 if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1065 return -EINVAL;
1066 }
1067
1068 return do_set_periodic_timer(target, spt.period_ns);
1069 }
1070
vcpuop_stop_periodic_timer(CPUState * target)1071 static int vcpuop_stop_periodic_timer(CPUState *target)
1072 {
1073 CPUX86State *tenv = &X86_CPU(target)->env;
1074
1075 qemu_mutex_lock(&tenv->xen_timers_lock);
1076
1077 timer_del(tenv->xen_periodic_timer);
1078 tenv->xen_periodic_timer_period = 0;
1079
1080 qemu_mutex_unlock(&tenv->xen_timers_lock);
1081 return 0;
1082 }
1083
1084 /*
1085 * Userspace handling of timer, for older kernels.
1086 * Must always be called with xen_timers_lock held.
1087 */
do_set_singleshot_timer(CPUState * cs,uint64_t timeout_abs,bool linux_wa)1088 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1089 bool linux_wa)
1090 {
1091 CPUX86State *env = &X86_CPU(cs)->env;
1092 int64_t now = kvm_get_current_ns();
1093 int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1094 int64_t delta = timeout_abs - now;
1095
1096 if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1097 (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1098 /*
1099 * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1100 * for negative absolute timeout values (caused by integer
1101 * overflow), and for values about 13 days in the future (2^50ns)
1102 * which would be caused by jiffies overflow. For those cases, it
1103 * sets the timeout 100ms in the future (not *too* soon, since if
1104 * a guest really did set a long timeout on purpose we don't want
1105 * to keep churning CPU time by waking it up).
1106 */
1107 delta = (100 * SCALE_MS);
1108 timeout_abs = now + delta;
1109 }
1110
1111 timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1112 env->xen_singleshot_timer_ns = now + delta;
1113 return 0;
1114 }
1115
vcpuop_set_singleshot_timer(CPUState * cs,uint64_t arg)1116 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1117 {
1118 struct vcpu_set_singleshot_timer sst = { 0 };
1119
1120 /*
1121 * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1122 * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1123 * that get used are identical, and there's four bytes of padding
1124 * unused at the end. For true Xen compatibility we should attempt
1125 * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1126 * if we can't get the padding too. But that's daft. Just copy what
1127 * we need.
1128 */
1129 qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1130 qemu_build_assert(sizeof(sst) >= 12);
1131
1132 if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1133 return -EFAULT;
1134 }
1135
1136 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1137
1138 /*
1139 * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
1140 * The only guest that ever used it, got it wrong.
1141 * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
1142 */
1143 return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
1144 }
1145
vcpuop_stop_singleshot_timer(CPUState * cs)1146 static int vcpuop_stop_singleshot_timer(CPUState *cs)
1147 {
1148 CPUX86State *env = &X86_CPU(cs)->env;
1149
1150 qemu_mutex_lock(&env->xen_timers_lock);
1151
1152 timer_del(env->xen_singleshot_timer);
1153 env->xen_singleshot_timer_ns = 0;
1154
1155 qemu_mutex_unlock(&env->xen_timers_lock);
1156 return 0;
1157 }
1158
kvm_xen_hcall_set_timer_op(struct kvm_xen_exit * exit,X86CPU * cpu,uint64_t timeout)1159 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1160 uint64_t timeout)
1161 {
1162 int err;
1163
1164 if (unlikely(timeout == 0)) {
1165 err = vcpuop_stop_singleshot_timer(CPU(cpu));
1166 } else {
1167 QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
1168 err = do_set_singleshot_timer(CPU(cpu), timeout, true);
1169 }
1170 exit->u.hcall.result = err;
1171 return true;
1172 }
1173
kvm_xen_hcall_vcpu_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,int vcpu_id,uint64_t arg)1174 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1175 int cmd, int vcpu_id, uint64_t arg)
1176 {
1177 CPUState *cs = CPU(cpu);
1178 CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1179 int err;
1180
1181 if (!dest) {
1182 err = -ENOENT;
1183 goto out;
1184 }
1185
1186 switch (cmd) {
1187 case VCPUOP_register_runstate_memory_area:
1188 err = vcpuop_register_runstate_info(cs, dest, arg);
1189 break;
1190 case VCPUOP_register_vcpu_time_memory_area:
1191 err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1192 break;
1193 case VCPUOP_register_vcpu_info:
1194 err = vcpuop_register_vcpu_info(cs, dest, arg);
1195 break;
1196 case VCPUOP_set_singleshot_timer: {
1197 if (cs->cpu_index == vcpu_id) {
1198 err = vcpuop_set_singleshot_timer(dest, arg);
1199 } else {
1200 err = -EINVAL;
1201 }
1202 break;
1203 }
1204 case VCPUOP_stop_singleshot_timer:
1205 if (cs->cpu_index == vcpu_id) {
1206 err = vcpuop_stop_singleshot_timer(dest);
1207 } else {
1208 err = -EINVAL;
1209 }
1210 break;
1211 case VCPUOP_set_periodic_timer: {
1212 err = vcpuop_set_periodic_timer(cs, dest, arg);
1213 break;
1214 }
1215 case VCPUOP_stop_periodic_timer:
1216 err = vcpuop_stop_periodic_timer(dest);
1217 break;
1218
1219 default:
1220 return false;
1221 }
1222
1223 out:
1224 exit->u.hcall.result = err;
1225 return true;
1226 }
1227
kvm_xen_hcall_evtchn_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1228 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1229 int cmd, uint64_t arg)
1230 {
1231 CPUState *cs = CPU(cpu);
1232 int err = -ENOSYS;
1233
1234 switch (cmd) {
1235 case EVTCHNOP_init_control:
1236 case EVTCHNOP_expand_array:
1237 case EVTCHNOP_set_priority:
1238 /* We do not support FIFO channels at this point */
1239 err = -ENOSYS;
1240 break;
1241
1242 case EVTCHNOP_status: {
1243 struct evtchn_status status;
1244
1245 qemu_build_assert(sizeof(status) == 24);
1246 if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1247 err = -EFAULT;
1248 break;
1249 }
1250
1251 err = xen_evtchn_status_op(&status);
1252 if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1253 err = -EFAULT;
1254 }
1255 break;
1256 }
1257 case EVTCHNOP_close: {
1258 struct evtchn_close close;
1259
1260 qemu_build_assert(sizeof(close) == 4);
1261 if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1262 err = -EFAULT;
1263 break;
1264 }
1265
1266 err = xen_evtchn_close_op(&close);
1267 break;
1268 }
1269 case EVTCHNOP_unmask: {
1270 struct evtchn_unmask unmask;
1271
1272 qemu_build_assert(sizeof(unmask) == 4);
1273 if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1274 err = -EFAULT;
1275 break;
1276 }
1277
1278 err = xen_evtchn_unmask_op(&unmask);
1279 break;
1280 }
1281 case EVTCHNOP_bind_virq: {
1282 struct evtchn_bind_virq virq;
1283
1284 qemu_build_assert(sizeof(virq) == 12);
1285 if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1286 err = -EFAULT;
1287 break;
1288 }
1289
1290 err = xen_evtchn_bind_virq_op(&virq);
1291 if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1292 err = -EFAULT;
1293 }
1294 break;
1295 }
1296 case EVTCHNOP_bind_pirq: {
1297 struct evtchn_bind_pirq pirq;
1298
1299 qemu_build_assert(sizeof(pirq) == 12);
1300 if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1301 err = -EFAULT;
1302 break;
1303 }
1304
1305 err = xen_evtchn_bind_pirq_op(&pirq);
1306 if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1307 err = -EFAULT;
1308 }
1309 break;
1310 }
1311 case EVTCHNOP_bind_ipi: {
1312 struct evtchn_bind_ipi ipi;
1313
1314 qemu_build_assert(sizeof(ipi) == 8);
1315 if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1316 err = -EFAULT;
1317 break;
1318 }
1319
1320 err = xen_evtchn_bind_ipi_op(&ipi);
1321 if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1322 err = -EFAULT;
1323 }
1324 break;
1325 }
1326 case EVTCHNOP_send: {
1327 struct evtchn_send send;
1328
1329 qemu_build_assert(sizeof(send) == 4);
1330 if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1331 err = -EFAULT;
1332 break;
1333 }
1334
1335 err = xen_evtchn_send_op(&send);
1336 break;
1337 }
1338 case EVTCHNOP_alloc_unbound: {
1339 struct evtchn_alloc_unbound alloc;
1340
1341 qemu_build_assert(sizeof(alloc) == 8);
1342 if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1343 err = -EFAULT;
1344 break;
1345 }
1346
1347 err = xen_evtchn_alloc_unbound_op(&alloc);
1348 if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1349 err = -EFAULT;
1350 }
1351 break;
1352 }
1353 case EVTCHNOP_bind_interdomain: {
1354 struct evtchn_bind_interdomain interdomain;
1355
1356 qemu_build_assert(sizeof(interdomain) == 12);
1357 if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1358 err = -EFAULT;
1359 break;
1360 }
1361
1362 err = xen_evtchn_bind_interdomain_op(&interdomain);
1363 if (!err &&
1364 kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1365 err = -EFAULT;
1366 }
1367 break;
1368 }
1369 case EVTCHNOP_bind_vcpu: {
1370 struct evtchn_bind_vcpu vcpu;
1371
1372 qemu_build_assert(sizeof(vcpu) == 8);
1373 if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1374 err = -EFAULT;
1375 break;
1376 }
1377
1378 err = xen_evtchn_bind_vcpu_op(&vcpu);
1379 break;
1380 }
1381 case EVTCHNOP_reset: {
1382 struct evtchn_reset reset;
1383
1384 qemu_build_assert(sizeof(reset) == 2);
1385 if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1386 err = -EFAULT;
1387 break;
1388 }
1389
1390 err = xen_evtchn_reset_op(&reset);
1391 break;
1392 }
1393 default:
1394 return false;
1395 }
1396
1397 exit->u.hcall.result = err;
1398 return true;
1399 }
1400
kvm_xen_soft_reset(void)1401 int kvm_xen_soft_reset(void)
1402 {
1403 CPUState *cpu;
1404 int err;
1405
1406 assert(bql_locked());
1407
1408 trace_kvm_xen_soft_reset();
1409
1410 err = xen_evtchn_soft_reset();
1411 if (err) {
1412 return err;
1413 }
1414
1415 /*
1416 * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1417 * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1418 * to deliver to the timer interrupt and treats that as 'disabled'.
1419 */
1420 err = xen_evtchn_set_callback_param(0);
1421 if (err) {
1422 return err;
1423 }
1424
1425 CPU_FOREACH(cpu) {
1426 async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1427 }
1428
1429 err = xen_overlay_map_shinfo_page(INVALID_GFN);
1430 if (err) {
1431 return err;
1432 }
1433
1434 err = xen_gnttab_reset();
1435 if (err) {
1436 return err;
1437 }
1438
1439 err = xen_primary_console_reset();
1440 if (err) {
1441 return err;
1442 }
1443
1444 err = xen_xenstore_reset();
1445 if (err) {
1446 return err;
1447 }
1448
1449 return 0;
1450 }
1451
schedop_shutdown(CPUState * cs,uint64_t arg)1452 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1453 {
1454 struct sched_shutdown shutdown;
1455 int ret = 0;
1456
1457 /* No need for 32/64 compat handling */
1458 qemu_build_assert(sizeof(shutdown) == 4);
1459
1460 if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1461 return -EFAULT;
1462 }
1463
1464 switch (shutdown.reason) {
1465 case SHUTDOWN_crash:
1466 cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1467 qemu_system_guest_panicked(NULL);
1468 break;
1469
1470 case SHUTDOWN_reboot:
1471 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1472 break;
1473
1474 case SHUTDOWN_poweroff:
1475 qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1476 break;
1477
1478 case SHUTDOWN_soft_reset:
1479 bql_lock();
1480 ret = kvm_xen_soft_reset();
1481 bql_unlock();
1482 break;
1483
1484 default:
1485 ret = -EINVAL;
1486 break;
1487 }
1488
1489 return ret;
1490 }
1491
kvm_xen_hcall_sched_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1492 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1493 int cmd, uint64_t arg)
1494 {
1495 CPUState *cs = CPU(cpu);
1496 int err = -ENOSYS;
1497
1498 switch (cmd) {
1499 case SCHEDOP_shutdown:
1500 err = schedop_shutdown(cs, arg);
1501 break;
1502
1503 case SCHEDOP_poll:
1504 /*
1505 * Linux will panic if this doesn't work. Just yield; it's not
1506 * worth overthinking it because with event channel handling
1507 * in KVM, the kernel will intercept this and it will never
1508 * reach QEMU anyway. The semantics of the hypercall explicltly
1509 * permit spurious wakeups.
1510 */
1511 case SCHEDOP_yield:
1512 sched_yield();
1513 err = 0;
1514 break;
1515
1516 default:
1517 return false;
1518 }
1519
1520 exit->u.hcall.result = err;
1521 return true;
1522 }
1523
kvm_xen_hcall_gnttab_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg,int count)1524 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1525 int cmd, uint64_t arg, int count)
1526 {
1527 CPUState *cs = CPU(cpu);
1528 int err;
1529
1530 switch (cmd) {
1531 case GNTTABOP_set_version: {
1532 struct gnttab_set_version set;
1533
1534 qemu_build_assert(sizeof(set) == 4);
1535 if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1536 err = -EFAULT;
1537 break;
1538 }
1539
1540 err = xen_gnttab_set_version_op(&set);
1541 if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1542 err = -EFAULT;
1543 }
1544 break;
1545 }
1546 case GNTTABOP_get_version: {
1547 struct gnttab_get_version get;
1548
1549 qemu_build_assert(sizeof(get) == 8);
1550 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1551 err = -EFAULT;
1552 break;
1553 }
1554
1555 err = xen_gnttab_get_version_op(&get);
1556 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1557 err = -EFAULT;
1558 }
1559 break;
1560 }
1561 case GNTTABOP_query_size: {
1562 struct gnttab_query_size size;
1563
1564 qemu_build_assert(sizeof(size) == 16);
1565 if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1566 err = -EFAULT;
1567 break;
1568 }
1569
1570 err = xen_gnttab_query_size_op(&size);
1571 if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1572 err = -EFAULT;
1573 }
1574 break;
1575 }
1576 case GNTTABOP_setup_table:
1577 case GNTTABOP_copy:
1578 case GNTTABOP_map_grant_ref:
1579 case GNTTABOP_unmap_grant_ref:
1580 case GNTTABOP_swap_grant_ref:
1581 return false;
1582
1583 default:
1584 /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1585 err = -ENOSYS;
1586 break;
1587 }
1588
1589 exit->u.hcall.result = err;
1590 return true;
1591 }
1592
kvm_xen_hcall_physdev_op(struct kvm_xen_exit * exit,X86CPU * cpu,int cmd,uint64_t arg)1593 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1594 int cmd, uint64_t arg)
1595 {
1596 CPUState *cs = CPU(cpu);
1597 int err;
1598
1599 switch (cmd) {
1600 case PHYSDEVOP_map_pirq: {
1601 struct physdev_map_pirq map;
1602
1603 if (hypercall_compat32(exit->u.hcall.longmode)) {
1604 struct compat_physdev_map_pirq *map32 = (void *)↦
1605
1606 if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1607 return -EFAULT;
1608 }
1609
1610 /*
1611 * The only thing that's different is the alignment of the
1612 * uint64_t table_base at the end, which gets padding to make
1613 * it 64-bit aligned in the 64-bit version.
1614 */
1615 qemu_build_assert(sizeof(*map32) == 36);
1616 qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1617 offsetof(struct compat_physdev_map_pirq, entry_nr));
1618 memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1619 } else {
1620 if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1621 err = -EFAULT;
1622 break;
1623 }
1624 }
1625 err = xen_physdev_map_pirq(&map);
1626 /*
1627 * Since table_base is an IN parameter and won't be changed, just
1628 * copy the size of the compat structure back to the guest.
1629 */
1630 if (!err && kvm_copy_to_gva(cs, arg, &map,
1631 sizeof(struct compat_physdev_map_pirq))) {
1632 err = -EFAULT;
1633 }
1634 break;
1635 }
1636 case PHYSDEVOP_unmap_pirq: {
1637 struct physdev_unmap_pirq unmap;
1638
1639 qemu_build_assert(sizeof(unmap) == 8);
1640 if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1641 err = -EFAULT;
1642 break;
1643 }
1644
1645 err = xen_physdev_unmap_pirq(&unmap);
1646 if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1647 err = -EFAULT;
1648 }
1649 break;
1650 }
1651 case PHYSDEVOP_eoi: {
1652 struct physdev_eoi eoi;
1653
1654 qemu_build_assert(sizeof(eoi) == 4);
1655 if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1656 err = -EFAULT;
1657 break;
1658 }
1659
1660 err = xen_physdev_eoi_pirq(&eoi);
1661 if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1662 err = -EFAULT;
1663 }
1664 break;
1665 }
1666 case PHYSDEVOP_irq_status_query: {
1667 struct physdev_irq_status_query query;
1668
1669 qemu_build_assert(sizeof(query) == 8);
1670 if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1671 err = -EFAULT;
1672 break;
1673 }
1674
1675 err = xen_physdev_query_pirq(&query);
1676 if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1677 err = -EFAULT;
1678 }
1679 break;
1680 }
1681 case PHYSDEVOP_get_free_pirq: {
1682 struct physdev_get_free_pirq get;
1683
1684 qemu_build_assert(sizeof(get) == 8);
1685 if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1686 err = -EFAULT;
1687 break;
1688 }
1689
1690 err = xen_physdev_get_free_pirq(&get);
1691 if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1692 err = -EFAULT;
1693 }
1694 break;
1695 }
1696 case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1697 err = -ENOSYS;
1698 break;
1699
1700 default:
1701 return false;
1702 }
1703
1704 exit->u.hcall.result = err;
1705 return true;
1706 }
1707
do_kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1708 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1709 {
1710 uint16_t code = exit->u.hcall.input;
1711
1712 if (exit->u.hcall.cpl > 0) {
1713 exit->u.hcall.result = -EPERM;
1714 return true;
1715 }
1716
1717 switch (code) {
1718 case __HYPERVISOR_set_timer_op:
1719 if (exit->u.hcall.longmode) {
1720 return kvm_xen_hcall_set_timer_op(exit, cpu,
1721 exit->u.hcall.params[0]);
1722 } else {
1723 /* In 32-bit mode, the 64-bit timer value is in two args. */
1724 uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1725 (uint32_t)exit->u.hcall.params[0];
1726 return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1727 }
1728 case __HYPERVISOR_grant_table_op:
1729 return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1730 exit->u.hcall.params[1],
1731 exit->u.hcall.params[2]);
1732 case __HYPERVISOR_sched_op:
1733 return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1734 exit->u.hcall.params[1]);
1735 case __HYPERVISOR_event_channel_op:
1736 return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1737 exit->u.hcall.params[1]);
1738 case __HYPERVISOR_vcpu_op:
1739 return kvm_xen_hcall_vcpu_op(exit, cpu,
1740 exit->u.hcall.params[0],
1741 exit->u.hcall.params[1],
1742 exit->u.hcall.params[2]);
1743 case __HYPERVISOR_hvm_op:
1744 return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1745 exit->u.hcall.params[1]);
1746 case __HYPERVISOR_memory_op:
1747 return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1748 exit->u.hcall.params[1]);
1749 case __HYPERVISOR_physdev_op:
1750 return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1751 exit->u.hcall.params[1]);
1752 case __HYPERVISOR_xen_version:
1753 return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1754 exit->u.hcall.params[1]);
1755 default:
1756 return false;
1757 }
1758 }
1759
kvm_xen_handle_exit(X86CPU * cpu,struct kvm_xen_exit * exit)1760 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1761 {
1762 if (exit->type != KVM_EXIT_XEN_HCALL) {
1763 return -1;
1764 }
1765
1766 /*
1767 * The kernel latches the guest 32/64 mode when the MSR is used to fill
1768 * the hypercall page. So if we see a hypercall in a mode that doesn't
1769 * match our own idea of the guest mode, fetch the kernel's idea of the
1770 * "long mode" to remain in sync.
1771 */
1772 if (exit->u.hcall.longmode != xen_is_long_mode()) {
1773 xen_sync_long_mode();
1774 }
1775
1776 if (!do_kvm_xen_handle_exit(cpu, exit)) {
1777 /*
1778 * Some hypercalls will be deliberately "implemented" by returning
1779 * -ENOSYS. This case is for hypercalls which are unexpected.
1780 */
1781 exit->u.hcall.result = -ENOSYS;
1782 qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1783 PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1784 (uint64_t)exit->u.hcall.input,
1785 (uint64_t)exit->u.hcall.params[0],
1786 (uint64_t)exit->u.hcall.params[1],
1787 (uint64_t)exit->u.hcall.params[2]);
1788 }
1789
1790 trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1791 exit->u.hcall.input, exit->u.hcall.params[0],
1792 exit->u.hcall.params[1], exit->u.hcall.params[2],
1793 exit->u.hcall.result);
1794 return 0;
1795 }
1796
kvm_xen_get_gnttab_max_frames(void)1797 uint16_t kvm_xen_get_gnttab_max_frames(void)
1798 {
1799 KVMState *s = KVM_STATE(current_accel());
1800 return s->xen_gnttab_max_frames;
1801 }
1802
kvm_xen_get_evtchn_max_pirq(void)1803 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1804 {
1805 KVMState *s = KVM_STATE(current_accel());
1806 return s->xen_evtchn_max_pirq;
1807 }
1808
kvm_put_xen_state(CPUState * cs)1809 int kvm_put_xen_state(CPUState *cs)
1810 {
1811 X86CPU *cpu = X86_CPU(cs);
1812 CPUX86State *env = &cpu->env;
1813 uint64_t gpa;
1814 int ret;
1815
1816 gpa = env->xen_vcpu_info_gpa;
1817 if (gpa == INVALID_GPA) {
1818 gpa = env->xen_vcpu_info_default_gpa;
1819 }
1820
1821 if (gpa != INVALID_GPA) {
1822 ret = set_vcpu_info(cs, gpa);
1823 if (ret < 0) {
1824 return ret;
1825 }
1826 }
1827
1828 gpa = env->xen_vcpu_time_info_gpa;
1829 if (gpa != INVALID_GPA) {
1830 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1831 gpa);
1832 if (ret < 0) {
1833 return ret;
1834 }
1835 }
1836
1837 gpa = env->xen_vcpu_runstate_gpa;
1838 if (gpa != INVALID_GPA) {
1839 ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1840 gpa);
1841 if (ret < 0) {
1842 return ret;
1843 }
1844 }
1845
1846 if (env->xen_periodic_timer_period) {
1847 ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1848 if (ret < 0) {
1849 return ret;
1850 }
1851 }
1852
1853 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1854 /*
1855 * If the kernel has EVTCHN_SEND support then it handles timers too,
1856 * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1857 */
1858 QEMU_LOCK_GUARD(&env->xen_timers_lock);
1859 if (env->xen_singleshot_timer_ns) {
1860 ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1861 false);
1862 if (ret < 0) {
1863 return ret;
1864 }
1865 }
1866 return 0;
1867 }
1868
1869 if (env->xen_vcpu_callback_vector) {
1870 ret = kvm_xen_set_vcpu_callback_vector(cs);
1871 if (ret < 0) {
1872 return ret;
1873 }
1874 }
1875
1876 if (env->xen_virq[VIRQ_TIMER]) {
1877 do_set_vcpu_timer_virq(cs,
1878 RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
1879 }
1880 return 0;
1881 }
1882
kvm_get_xen_state(CPUState * cs)1883 int kvm_get_xen_state(CPUState *cs)
1884 {
1885 X86CPU *cpu = X86_CPU(cs);
1886 CPUX86State *env = &cpu->env;
1887 uint64_t gpa;
1888 int ret;
1889
1890 /*
1891 * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1892 * to it. It's up to userspace to *assume* that any page shared thus is
1893 * always considered dirty. The shared_info page is different since it's
1894 * an overlay and migrated separately anyway.
1895 */
1896 gpa = env->xen_vcpu_info_gpa;
1897 if (gpa == INVALID_GPA) {
1898 gpa = env->xen_vcpu_info_default_gpa;
1899 }
1900 if (gpa != INVALID_GPA) {
1901 MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1902 gpa,
1903 sizeof(struct vcpu_info));
1904 if (mrs.mr &&
1905 !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1906 memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1907 sizeof(struct vcpu_info));
1908 }
1909 }
1910
1911 if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1912 return 0;
1913 }
1914
1915 /*
1916 * If the kernel is accelerating timers, read out the current value of the
1917 * singleshot timer deadline.
1918 */
1919 if (env->xen_virq[VIRQ_TIMER]) {
1920 struct kvm_xen_vcpu_attr va = {
1921 .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1922 };
1923 ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1924 if (ret < 0) {
1925 return ret;
1926 }
1927
1928 /*
1929 * This locking is fairly pointless, and is here to appease Coverity.
1930 * There is an unavoidable race condition if a different vCPU sets a
1931 * timer for this vCPU after the value has been read out. But that's
1932 * OK in practice because *all* the vCPUs need to be stopped before
1933 * we set about migrating their state.
1934 */
1935 QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1936 env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1937 }
1938
1939 return 0;
1940 }
1941