xref: /openbmc/qemu/hw/i386/kvm/xen_gnttab.c (revision 9468484fe904ab4691de6d9c34616667f377ceac)
1 /*
2  * QEMU Xen emulation: Grant table support
3  *
4  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5  *
6  * Authors: David Woodhouse <dwmw2@infradead.org>
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/host-utils.h"
14 #include "qemu/module.h"
15 #include "qemu/lockable.h"
16 #include "qemu/main-loop.h"
17 #include "qapi/error.h"
18 #include "qom/object.h"
19 #include "exec/target_page.h"
20 #include "exec/address-spaces.h"
21 #include "migration/vmstate.h"
22 
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_gnttab.h"
28 #include "xen_primary_console.h"
29 
30 #include "sysemu/kvm.h"
31 #include "sysemu/kvm_xen.h"
32 
33 #include "hw/xen/interface/memory.h"
34 #include "hw/xen/interface/grant_table.h"
35 
36 #define TYPE_XEN_GNTTAB "xen-gnttab"
37 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
38 
39 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
40 
41 static struct gnttab_backend_ops emu_gnttab_backend_ops;
42 
43 struct XenGnttabState {
44     /*< private >*/
45     SysBusDevice busdev;
46     /*< public >*/
47 
48     QemuMutex gnt_lock;
49 
50     uint32_t nr_frames;
51     uint32_t max_frames;
52 
53     union {
54         grant_entry_v1_t *v1;
55         /* Theoretically, v2 support could be added here. */
56     } entries;
57 
58     MemoryRegion gnt_frames;
59     MemoryRegion *gnt_aliases;
60     uint64_t *gnt_frame_gpas;
61 
62     uint8_t *map_track;
63 };
64 
65 struct XenGnttabState *xen_gnttab_singleton;
66 
xen_gnttab_realize(DeviceState * dev,Error ** errp)67 static void xen_gnttab_realize(DeviceState *dev, Error **errp)
68 {
69     XenGnttabState *s = XEN_GNTTAB(dev);
70     int i;
71 
72     if (xen_mode != XEN_EMULATE) {
73         error_setg(errp, "Xen grant table support is for Xen emulation");
74         return;
75     }
76     s->max_frames = kvm_xen_get_gnttab_max_frames();
77     memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
78                            XEN_PAGE_SIZE * s->max_frames, &error_abort);
79     memory_region_set_enabled(&s->gnt_frames, true);
80     s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
81 
82     /* Create individual page-sizes aliases for overlays */
83     s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
84     s->gnt_frame_gpas = (void *)g_new(uint64_t, s->max_frames);
85     for (i = 0; i < s->max_frames; i++) {
86         memory_region_init_alias(&s->gnt_aliases[i], OBJECT(dev),
87                                  NULL, &s->gnt_frames,
88                                  i * XEN_PAGE_SIZE, XEN_PAGE_SIZE);
89         s->gnt_frame_gpas[i] = INVALID_GPA;
90     }
91 
92     s->nr_frames = 0;
93     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
94     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
95     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
96 
97     qemu_mutex_init(&s->gnt_lock);
98 
99     xen_gnttab_singleton = s;
100 
101     s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
102 
103     xen_gnttab_ops = &emu_gnttab_backend_ops;
104 }
105 
xen_gnttab_post_load(void * opaque,int version_id)106 static int xen_gnttab_post_load(void *opaque, int version_id)
107 {
108     XenGnttabState *s = XEN_GNTTAB(opaque);
109     uint32_t i;
110 
111     for (i = 0; i < s->nr_frames; i++) {
112         if (s->gnt_frame_gpas[i] != INVALID_GPA) {
113             xen_overlay_do_map_page(&s->gnt_aliases[i], s->gnt_frame_gpas[i]);
114         }
115     }
116     return 0;
117 }
118 
xen_gnttab_is_needed(void * opaque)119 static bool xen_gnttab_is_needed(void *opaque)
120 {
121     return xen_mode == XEN_EMULATE;
122 }
123 
124 static const VMStateDescription xen_gnttab_vmstate = {
125     .name = "xen_gnttab",
126     .version_id = 1,
127     .minimum_version_id = 1,
128     .needed = xen_gnttab_is_needed,
129     .post_load = xen_gnttab_post_load,
130     .fields = (const VMStateField[]) {
131         VMSTATE_UINT32(nr_frames, XenGnttabState),
132         VMSTATE_VARRAY_UINT32(gnt_frame_gpas, XenGnttabState, nr_frames, 0,
133                               vmstate_info_uint64, uint64_t),
134         VMSTATE_END_OF_LIST()
135     }
136 };
137 
xen_gnttab_class_init(ObjectClass * klass,void * data)138 static void xen_gnttab_class_init(ObjectClass *klass, void *data)
139 {
140     DeviceClass *dc = DEVICE_CLASS(klass);
141 
142     dc->realize = xen_gnttab_realize;
143     dc->vmsd = &xen_gnttab_vmstate;
144 }
145 
146 static const TypeInfo xen_gnttab_info = {
147     .name          = TYPE_XEN_GNTTAB,
148     .parent        = TYPE_SYS_BUS_DEVICE,
149     .instance_size = sizeof(XenGnttabState),
150     .class_init    = xen_gnttab_class_init,
151 };
152 
xen_gnttab_create(void)153 void xen_gnttab_create(void)
154 {
155     xen_gnttab_singleton = XEN_GNTTAB(sysbus_create_simple(TYPE_XEN_GNTTAB,
156                                                            -1, NULL));
157 }
158 
xen_gnttab_register_types(void)159 static void xen_gnttab_register_types(void)
160 {
161     type_register_static(&xen_gnttab_info);
162 }
163 
type_init(xen_gnttab_register_types)164 type_init(xen_gnttab_register_types)
165 
166 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn)
167 {
168     XenGnttabState *s = xen_gnttab_singleton;
169     uint64_t gpa = gfn << XEN_PAGE_SHIFT;
170 
171     if (!s) {
172         return -ENOTSUP;
173     }
174 
175     if (idx >= s->max_frames) {
176         return -EINVAL;
177     }
178 
179     BQL_LOCK_GUARD();
180     QEMU_LOCK_GUARD(&s->gnt_lock);
181 
182     xen_overlay_do_map_page(&s->gnt_aliases[idx], gpa);
183 
184     s->gnt_frame_gpas[idx] = gpa;
185 
186     if (s->nr_frames <= idx) {
187         s->nr_frames = idx + 1;
188     }
189 
190     return 0;
191 }
192 
xen_gnttab_set_version_op(struct gnttab_set_version * set)193 int xen_gnttab_set_version_op(struct gnttab_set_version *set)
194 {
195     int ret;
196 
197     switch (set->version) {
198     case 1:
199         ret = 0;
200         break;
201 
202     case 2:
203         /* Behave as before set_version was introduced. */
204         ret = -ENOSYS;
205         break;
206 
207     default:
208         ret = -EINVAL;
209     }
210 
211     set->version = 1;
212     return ret;
213 }
214 
xen_gnttab_get_version_op(struct gnttab_get_version * get)215 int xen_gnttab_get_version_op(struct gnttab_get_version *get)
216 {
217     if (get->dom != DOMID_SELF && get->dom != xen_domid) {
218         return -ESRCH;
219     }
220 
221     get->version = 1;
222     return 0;
223 }
224 
xen_gnttab_query_size_op(struct gnttab_query_size * size)225 int xen_gnttab_query_size_op(struct gnttab_query_size *size)
226 {
227     XenGnttabState *s = xen_gnttab_singleton;
228 
229     if (!s) {
230         return -ENOTSUP;
231     }
232 
233     if (size->dom != DOMID_SELF && size->dom != xen_domid) {
234         size->status = GNTST_bad_domain;
235         return 0;
236     }
237 
238     size->status = GNTST_okay;
239     size->nr_frames = s->nr_frames;
240     size->max_nr_frames = s->max_frames;
241     return 0;
242 }
243 
244 /* Track per-open refs, to allow close() to clean up. */
245 struct active_ref {
246     MemoryRegionSection mrs;
247     void *virtaddr;
248     uint32_t refcnt;
249     int prot;
250 };
251 
gnt_unref(XenGnttabState * s,grant_ref_t ref,MemoryRegionSection * mrs,int prot)252 static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
253                       MemoryRegionSection *mrs, int prot)
254 {
255     if (mrs && mrs->mr) {
256         if (prot & PROT_WRITE) {
257             memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
258                                     XEN_PAGE_SIZE);
259         }
260         memory_region_unref(mrs->mr);
261         mrs->mr = NULL;
262     }
263     assert(s->map_track[ref] != 0);
264 
265     if (--s->map_track[ref] == 0) {
266         grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
267         qatomic_and(&gnt_p->flags, (uint16_t)~(GTF_reading | GTF_writing));
268     }
269 }
270 
gnt_ref(XenGnttabState * s,grant_ref_t ref,int prot)271 static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
272 {
273     uint16_t mask = GTF_type_mask | GTF_sub_page;
274     grant_entry_v1_t gnt, *gnt_p;
275     int retries = 0;
276 
277     if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
278         s->map_track[ref] == UINT8_MAX) {
279         return INVALID_GPA;
280     }
281 
282     if (prot & PROT_WRITE) {
283         mask |= GTF_readonly;
284     }
285 
286     gnt_p = &s->entries.v1[ref];
287 
288     /*
289      * The guest can legitimately be changing the GTF_readonly flag. Allow
290      * that, but don't let a malicious guest cause a livelock.
291      */
292     for (retries = 0; retries < 5; retries++) {
293         uint16_t new_flags;
294 
295         /* Read the entry before an atomic operation on its flags */
296         gnt = *(volatile grant_entry_v1_t *)gnt_p;
297 
298         if ((gnt.flags & mask) != GTF_permit_access ||
299             gnt.domid != DOMID_QEMU) {
300             return INVALID_GPA;
301         }
302 
303         new_flags = gnt.flags | GTF_reading;
304         if (prot & PROT_WRITE) {
305             new_flags |= GTF_writing;
306         }
307 
308         if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
309             return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
310         }
311     }
312 
313     return INVALID_GPA;
314 }
315 
316 struct xengntdev_handle {
317     GHashTable *active_maps;
318 };
319 
xen_be_gnttab_set_max_grants(struct xengntdev_handle * xgt,uint32_t nr_grants)320 static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
321                                         uint32_t nr_grants)
322 {
323     return 0;
324 }
325 
xen_be_gnttab_map_refs(struct xengntdev_handle * xgt,uint32_t count,uint32_t domid,uint32_t * refs,int prot)326 static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt,
327                                     uint32_t count, uint32_t domid,
328                                     uint32_t *refs, int prot)
329 {
330     XenGnttabState *s = xen_gnttab_singleton;
331     struct active_ref *act;
332 
333     if (!s) {
334         errno = ENOTSUP;
335         return NULL;
336     }
337 
338     if (domid != xen_domid) {
339         errno = EINVAL;
340         return NULL;
341     }
342 
343     if (!count || count > 4096) {
344         errno = EINVAL;
345         return NULL;
346     }
347 
348     /*
349      * Making a contiguous mapping from potentially discontiguous grant
350      * references would be... distinctly non-trivial. We don't support it.
351      * Even changing the API to return an array of pointers, one per page,
352      * wouldn't be simple to use in PV backends because some structures
353      * actually cross page boundaries (e.g. 32-bit blkif_response ring
354      * entries are 12 bytes).
355      */
356     if (count != 1) {
357         errno = EINVAL;
358         return NULL;
359     }
360 
361     QEMU_LOCK_GUARD(&s->gnt_lock);
362 
363     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
364     if (act) {
365         if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
366             if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
367                 return NULL;
368             }
369             act->prot |= PROT_WRITE;
370         }
371         act->refcnt++;
372     } else {
373         uint64_t gpa = gnt_ref(s, refs[0], prot);
374         if (gpa == INVALID_GPA) {
375             errno = EINVAL;
376             return NULL;
377         }
378 
379         act = g_new0(struct active_ref, 1);
380         act->prot = prot;
381         act->refcnt = 1;
382         act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
383 
384         if (act->mrs.mr &&
385             !int128_lt(act->mrs.size, int128_make64(XEN_PAGE_SIZE)) &&
386             memory_region_get_ram_addr(act->mrs.mr) != RAM_ADDR_INVALID) {
387             act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
388                                              act->mrs.offset_within_region);
389         }
390         if (!act->virtaddr) {
391             gnt_unref(s, refs[0], &act->mrs, 0);
392             g_free(act);
393             errno = EINVAL;
394             return NULL;
395         }
396 
397         s->map_track[refs[0]]++;
398         g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
399     }
400 
401     return act->virtaddr;
402 }
403 
do_unmap(gpointer key,gpointer value,gpointer user_data)404 static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
405 {
406     XenGnttabState *s = user_data;
407     grant_ref_t gref = GPOINTER_TO_INT(key);
408     struct active_ref *act = value;
409 
410     gnt_unref(s, gref, &act->mrs, act->prot);
411     g_free(act);
412     return true;
413 }
414 
xen_be_gnttab_unmap(struct xengntdev_handle * xgt,void * start_address,uint32_t * refs,uint32_t count)415 static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
416                                void *start_address, uint32_t *refs,
417                                uint32_t count)
418 {
419     XenGnttabState *s = xen_gnttab_singleton;
420     struct active_ref *act;
421 
422     if (!s) {
423         return -ENOTSUP;
424     }
425 
426     if (count != 1) {
427         return -EINVAL;
428     }
429 
430     QEMU_LOCK_GUARD(&s->gnt_lock);
431 
432     act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
433     if (!act) {
434         return -ENOENT;
435     }
436 
437     if (act->virtaddr != start_address) {
438         return -EINVAL;
439     }
440 
441     if (!--act->refcnt) {
442         do_unmap(GINT_TO_POINTER(refs[0]), act, s);
443         g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
444     }
445 
446     return 0;
447 }
448 
449 /*
450  * This looks a bit like the one for true Xen in xen-operations.c but
451  * in emulation we don't support multi-page mappings. And under Xen we
452  * *want* the multi-page mappings so we have fewer bounces through the
453  * kernel and the hypervisor. So the code paths end up being similar,
454  * but different.
455  */
xen_be_gnttab_copy(struct xengntdev_handle * xgt,bool to_domain,uint32_t domid,XenGrantCopySegment * segs,uint32_t nr_segs,Error ** errp)456 static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
457                               uint32_t domid, XenGrantCopySegment *segs,
458                               uint32_t nr_segs, Error **errp)
459 {
460     int prot = to_domain ? PROT_WRITE : PROT_READ;
461     unsigned int i;
462 
463     for (i = 0; i < nr_segs; i++) {
464         XenGrantCopySegment *seg = &segs[i];
465         void *page;
466         uint32_t ref = to_domain ? seg->dest.foreign.ref :
467             seg->source.foreign.ref;
468 
469         page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
470         if (!page) {
471             if (errp) {
472                 error_setg_errno(errp, errno,
473                                  "xen_be_gnttab_map_refs failed");
474             }
475             return -errno;
476         }
477 
478         if (to_domain) {
479             memcpy(page + seg->dest.foreign.offset, seg->source.virt,
480                    seg->len);
481         } else {
482             memcpy(seg->dest.virt, page + seg->source.foreign.offset,
483                    seg->len);
484         }
485 
486         if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
487             if (errp) {
488                 error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
489             }
490             return -errno;
491         }
492     }
493 
494     return 0;
495 }
496 
xen_be_gnttab_open(void)497 static struct xengntdev_handle *xen_be_gnttab_open(void)
498 {
499     struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
500 
501     xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
502     return xgt;
503 }
504 
xen_be_gnttab_close(struct xengntdev_handle * xgt)505 static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
506 {
507     XenGnttabState *s = xen_gnttab_singleton;
508 
509     if (!s) {
510         return -ENOTSUP;
511     }
512 
513     g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
514     g_hash_table_destroy(xgt->active_maps);
515     g_free(xgt);
516     return 0;
517 }
518 
519 static struct gnttab_backend_ops emu_gnttab_backend_ops = {
520     .open = xen_be_gnttab_open,
521     .close = xen_be_gnttab_close,
522     .grant_copy = xen_be_gnttab_copy,
523     .set_max_grants = xen_be_gnttab_set_max_grants,
524     .map_refs = xen_be_gnttab_map_refs,
525     .unmap = xen_be_gnttab_unmap,
526 };
527 
xen_gnttab_reset(void)528 int xen_gnttab_reset(void)
529 {
530     XenGnttabState *s = xen_gnttab_singleton;
531 
532     if (!s) {
533         return -ENOTSUP;
534     }
535 
536     QEMU_LOCK_GUARD(&s->gnt_lock);
537 
538     s->nr_frames = 0;
539 
540     memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
541     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
542     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
543 
544     if (xen_primary_console_get_pfn()) {
545         s->entries.v1[GNTTAB_RESERVED_CONSOLE].flags = GTF_permit_access;
546         s->entries.v1[GNTTAB_RESERVED_CONSOLE].frame = XEN_SPECIAL_PFN(CONSOLE);
547     }
548 
549     return 0;
550 }
551