1 /*
2 * QEMU Xen emulation: Shared/overlay pages support
3 *
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 *
6 * Authors: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30
31 #include "sysemu/kvm.h"
32 #include "sysemu/kvm_xen.h"
33
34 #include "trace.h"
35
36 #include "xenstore_impl.h"
37
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49
50 struct XenXenstoreState {
51 /*< private >*/
52 SysBusDevice busdev;
53 /*< public >*/
54
55 XenstoreImplState *impl;
56 GList *watch_events; /* for the guest */
57
58 MemoryRegion xenstore_page;
59 struct xenstore_domain_interface *xs;
60 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61 uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62 uint32_t req_offset;
63 uint32_t rsp_offset;
64 bool rsp_pending;
65 bool fatal_error;
66
67 evtchn_port_t guest_port;
68 evtchn_port_t be_port;
69 struct xenevtchn_handle *eh;
70
71 uint8_t *impl_state;
72 uint32_t impl_state_size;
73
74 struct xengntdev_handle *gt;
75 void *granted_xs;
76 };
77
78 struct XenXenstoreState *xen_xenstore_singleton;
79
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84
relpath_printf(XenXenstoreState * s,GList * perms,const char * relpath,const char * fmt,...)85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86 GList *perms,
87 const char *relpath,
88 const char *fmt, ...)
89 {
90 gchar *abspath;
91 gchar *value;
92 va_list args;
93 GByteArray *data;
94 int err;
95
96 abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97 va_start(args, fmt);
98 value = g_strdup_vprintf(fmt, args);
99 va_end(args);
100
101 data = g_byte_array_new_take((void *)value, strlen(value));
102
103 err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104 assert(!err);
105
106 g_byte_array_unref(data);
107
108 err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109 assert(!err);
110
111 g_free(abspath);
112 }
113
xen_xenstore_realize(DeviceState * dev,Error ** errp)114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116 XenXenstoreState *s = XEN_XENSTORE(dev);
117 GList *perms;
118
119 if (xen_mode != XEN_EMULATE) {
120 error_setg(errp, "Xen xenstore support is for Xen emulation");
121 return;
122 }
123 memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124 XEN_PAGE_SIZE, &error_abort);
125 memory_region_set_enabled(&s->xenstore_page, true);
126 s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127 memset(s->xs, 0, XEN_PAGE_SIZE);
128
129 /* We can't map it this early as KVM isn't ready */
130 xen_xenstore_singleton = s;
131
132 s->eh = xen_be_evtchn_open();
133 if (!s->eh) {
134 error_setg(errp, "Xenstore evtchn port init failed");
135 return;
136 }
137 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138 xen_xenstore_event, NULL, NULL, NULL, s);
139
140 s->impl = xs_impl_create(xen_domid);
141
142 /* Populate the default nodes */
143
144 /* Nodes owned by 'dom0' but readable by the guest */
145 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147
148 relpath_printf(s, perms, "", "%s", "");
149
150 relpath_printf(s, perms, "domid", "%u", xen_domid);
151
152 relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153 relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154
155 relpath_printf(s, perms, "platform/acpi", "%u", 1);
156 relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157 relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158 relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159
160 g_list_free_full(perms, g_free);
161
162 /* Nodes owned by the guest */
163 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164
165 relpath_printf(s, perms, "attr", "%s", "");
166
167 relpath_printf(s, perms, "control/shutdown", "%s", "");
168 relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169 relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170 relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171 relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172 relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173
174 relpath_printf(s, perms, "data", "%s", "");
175 relpath_printf(s, perms, "device", "%s", "");
176 relpath_printf(s, perms, "drivers", "%s", "");
177 relpath_printf(s, perms, "error", "%s", "");
178 relpath_printf(s, perms, "feature", "%s", "");
179
180 g_list_free_full(perms, g_free);
181
182 xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184
xen_xenstore_is_needed(void * opaque)185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187 return xen_mode == XEN_EMULATE;
188 }
189
xen_xenstore_pre_save(void * opaque)190 static int xen_xenstore_pre_save(void *opaque)
191 {
192 XenXenstoreState *s = opaque;
193 GByteArray *save;
194
195 if (s->eh) {
196 s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197 }
198
199 g_free(s->impl_state);
200 save = xs_impl_serialize(s->impl);
201 s->impl_state = save->data;
202 s->impl_state_size = save->len;
203 g_byte_array_free(save, false);
204
205 return 0;
206 }
207
xen_xenstore_post_load(void * opaque,int ver)208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210 XenXenstoreState *s = opaque;
211 GByteArray *save;
212 int ret;
213
214 /*
215 * As qemu/dom0, rebind to the guest's port. The Windows drivers may
216 * unbind the XenStore evtchn and rebind to it, having obtained the
217 * "remote" port through EVTCHNOP_status. In the case that migration
218 * occurs while it's unbound, the "remote" port needs to be the same
219 * as before so that the guest can find it, but should remain unbound.
220 */
221 if (s->guest_port) {
222 int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
223 s->guest_port);
224 if (be_port < 0) {
225 return be_port;
226 }
227 s->be_port = be_port;
228 }
229
230 save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
231 s->impl_state = NULL;
232 s->impl_state_size = 0;
233
234 ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
235 return ret;
236 }
237
238 static const VMStateDescription xen_xenstore_vmstate = {
239 .name = "xen_xenstore",
240 .unmigratable = 1, /* The PV back ends don't migrate yet */
241 .version_id = 1,
242 .minimum_version_id = 1,
243 .needed = xen_xenstore_is_needed,
244 .pre_save = xen_xenstore_pre_save,
245 .post_load = xen_xenstore_post_load,
246 .fields = (const VMStateField[]) {
247 VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
248 sizeof_field(XenXenstoreState, req_data)),
249 VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
250 sizeof_field(XenXenstoreState, rsp_data)),
251 VMSTATE_UINT32(req_offset, XenXenstoreState),
252 VMSTATE_UINT32(rsp_offset, XenXenstoreState),
253 VMSTATE_BOOL(rsp_pending, XenXenstoreState),
254 VMSTATE_UINT32(guest_port, XenXenstoreState),
255 VMSTATE_BOOL(fatal_error, XenXenstoreState),
256 VMSTATE_UINT32(impl_state_size, XenXenstoreState),
257 VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
258 impl_state_size, 0,
259 vmstate_info_uint8, uint8_t),
260 VMSTATE_END_OF_LIST()
261 }
262 };
263
xen_xenstore_class_init(ObjectClass * klass,void * data)264 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
265 {
266 DeviceClass *dc = DEVICE_CLASS(klass);
267
268 dc->realize = xen_xenstore_realize;
269 dc->vmsd = &xen_xenstore_vmstate;
270 }
271
272 static const TypeInfo xen_xenstore_info = {
273 .name = TYPE_XEN_XENSTORE,
274 .parent = TYPE_SYS_BUS_DEVICE,
275 .instance_size = sizeof(XenXenstoreState),
276 .class_init = xen_xenstore_class_init,
277 };
278
xen_xenstore_create(void)279 void xen_xenstore_create(void)
280 {
281 DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
282
283 xen_xenstore_singleton = XEN_XENSTORE(dev);
284
285 /*
286 * Defer the init (xen_xenstore_reset()) until KVM is set up and the
287 * overlay page can be mapped.
288 */
289 }
290
xen_xenstore_register_types(void)291 static void xen_xenstore_register_types(void)
292 {
293 type_register_static(&xen_xenstore_info);
294 }
295
type_init(xen_xenstore_register_types)296 type_init(xen_xenstore_register_types)
297
298 uint16_t xen_xenstore_get_port(void)
299 {
300 XenXenstoreState *s = xen_xenstore_singleton;
301 if (!s) {
302 return 0;
303 }
304 return s->guest_port;
305 }
306
req_pending(XenXenstoreState * s)307 static bool req_pending(XenXenstoreState *s)
308 {
309 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
310
311 return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
312 }
313
reset_req(XenXenstoreState * s)314 static void reset_req(XenXenstoreState *s)
315 {
316 memset(s->req_data, 0, sizeof(s->req_data));
317 s->req_offset = 0;
318 }
319
reset_rsp(XenXenstoreState * s)320 static void reset_rsp(XenXenstoreState *s)
321 {
322 s->rsp_pending = false;
323
324 memset(s->rsp_data, 0, sizeof(s->rsp_data));
325 s->rsp_offset = 0;
326 }
327
xs_error(XenXenstoreState * s,unsigned int id,xs_transaction_t tx_id,int errnum)328 static void xs_error(XenXenstoreState *s, unsigned int id,
329 xs_transaction_t tx_id, int errnum)
330 {
331 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
332 const char *errstr = NULL;
333
334 for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
335 const struct xsd_errors *xsd_error = &xsd_errors[i];
336
337 if (xsd_error->errnum == errnum) {
338 errstr = xsd_error->errstring;
339 break;
340 }
341 }
342 assert(errstr);
343
344 trace_xenstore_error(id, tx_id, errstr);
345
346 rsp->type = XS_ERROR;
347 rsp->req_id = id;
348 rsp->tx_id = tx_id;
349 rsp->len = (uint32_t)strlen(errstr) + 1;
350
351 memcpy(&rsp[1], errstr, rsp->len);
352 }
353
xs_ok(XenXenstoreState * s,unsigned int type,unsigned int req_id,xs_transaction_t tx_id)354 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
355 xs_transaction_t tx_id)
356 {
357 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
358 const char *okstr = "OK";
359
360 rsp->type = type;
361 rsp->req_id = req_id;
362 rsp->tx_id = tx_id;
363 rsp->len = (uint32_t)strlen(okstr) + 1;
364
365 memcpy(&rsp[1], okstr, rsp->len);
366 }
367
368 /*
369 * The correct request and response formats are documented in xen.git:
370 * docs/misc/xenstore.txt. A summary is given below for convenience.
371 * The '|' symbol represents a NUL character.
372 *
373 * ---------- Database read, write and permissions operations ----------
374 *
375 * READ <path>| <value|>
376 * WRITE <path>|<value|>
377 * Store and read the octet string <value> at <path>.
378 * WRITE creates any missing parent paths, with empty values.
379 *
380 * MKDIR <path>|
381 * Ensures that the <path> exists, by necessary by creating
382 * it and any missing parents with empty values. If <path>
383 * or any parent already exists, its value is left unchanged.
384 *
385 * RM <path>|
386 * Ensures that the <path> does not exist, by deleting
387 * it and all of its children. It is not an error if <path> does
388 * not exist, but it _is_ an error if <path>'s immediate parent
389 * does not exist either.
390 *
391 * DIRECTORY <path>| <child-leaf-name>|*
392 * Gives a list of the immediate children of <path>, as only the
393 * leafnames. The resulting children are each named
394 * <path>/<child-leaf-name>.
395 *
396 * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|*
397 * Same as DIRECTORY, but to be used for children lists longer than
398 * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
399 * the list of children to return. Return values are the generation
400 * count <gencnt> of the node (to be used to ensure the node hasn't
401 * changed between two reads: <gencnt> being the same for multiple
402 * reads guarantees the node hasn't changed) and the list of children
403 * starting at the specified <offset> of the complete list.
404 *
405 * GET_PERMS <path>| <perm-as-string>|+
406 * SET_PERMS <path>|<perm-as-string>|+?
407 * <perm-as-string> is one of the following
408 * w<domid> write only
409 * r<domid> read only
410 * b<domid> both read and write
411 * n<domid> no access
412 * See https://wiki.xen.org/wiki/XenBus section
413 * `Permissions' for details of the permissions system.
414 * It is possible to set permissions for the special watch paths
415 * "@introduceDomain" and "@releaseDomain" to enable receiving those
416 * watches in unprivileged domains.
417 *
418 * ---------- Watches ----------
419 *
420 * WATCH <wpath>|<token>|?
421 * Adds a watch.
422 *
423 * When a <path> is modified (including path creation, removal,
424 * contents change or permissions change) this generates an event
425 * on the changed <path>. Changes made in transactions cause an
426 * event only if and when committed. Each occurring event is
427 * matched against all the watches currently set up, and each
428 * matching watch results in a WATCH_EVENT message (see below).
429 *
430 * The event's path matches the watch's <wpath> if it is an child
431 * of <wpath>.
432 *
433 * <wpath> can be a <path> to watch or @<wspecial>. In the
434 * latter case <wspecial> may have any syntax but it matches
435 * (according to the rules above) only the following special
436 * events which are invented by xenstored:
437 * @introduceDomain occurs on INTRODUCE
438 * @releaseDomain occurs on any domain crash or
439 * shutdown, and also on RELEASE
440 * and domain destruction
441 * <wspecial> events are sent to privileged callers or explicitly
442 * via SET_PERMS enabled domains only.
443 *
444 * When a watch is first set up it is triggered once straight
445 * away, with <path> equal to <wpath>. Watches may be triggered
446 * spuriously. The tx_id in a WATCH request is ignored.
447 *
448 * Watches are supposed to be restricted by the permissions
449 * system but in practice the implementation is imperfect.
450 * Applications should not rely on being sent a notification for
451 * paths that they cannot read; however, an application may rely
452 * on being sent a watch when a path which it _is_ able to read
453 * is deleted even if that leaves only a nonexistent unreadable
454 * parent. A notification may omitted if a node's permissions
455 * are changed so as to make it unreadable, in which case future
456 * notifications may be suppressed (and if the node is later made
457 * readable, some notifications may have been lost).
458 *
459 * WATCH_EVENT <epath>|<token>|
460 * Unsolicited `reply' generated for matching modification events
461 * as described above. req_id and tx_id are both 0.
462 *
463 * <epath> is the event's path, ie the actual path that was
464 * modified; however if the event was the recursive removal of an
465 * parent of <wpath>, <epath> is just
466 * <wpath> (rather than the actual path which was removed). So
467 * <epath> is a child of <wpath>, regardless.
468 *
469 * Iff <wpath> for the watch was specified as a relative pathname,
470 * the <epath> path will also be relative (with the same base,
471 * obviously).
472 *
473 * UNWATCH <wpath>|<token>|?
474 *
475 * RESET_WATCHES |
476 * Reset all watches and transactions of the caller.
477 *
478 * ---------- Transactions ----------
479 *
480 * TRANSACTION_START | <transid>|
481 * <transid> is an opaque uint32_t allocated by xenstored
482 * represented as unsigned decimal. After this, transaction may
483 * be referenced by using <transid> (as 32-bit binary) in the
484 * tx_id request header field. When transaction is started whole
485 * db is copied; reads and writes happen on the copy.
486 * It is not legal to send non-0 tx_id in TRANSACTION_START.
487 *
488 * TRANSACTION_END T|
489 * TRANSACTION_END F|
490 * tx_id must refer to existing transaction. After this
491 * request the tx_id is no longer valid and may be reused by
492 * xenstore. If F, the transaction is discarded. If T,
493 * it is committed: if there were any other intervening writes
494 * then our END gets get EAGAIN.
495 *
496 * The plan is that in the future only intervening `conflicting'
497 * writes cause EAGAIN, meaning only writes or other commits
498 * which changed paths which were read or written in the
499 * transaction at hand.
500 *
501 */
502
xs_read(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)503 static void xs_read(XenXenstoreState *s, unsigned int req_id,
504 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
505 {
506 const char *path = (const char *)req_data;
507 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
508 uint8_t *rsp_data = (uint8_t *)&rsp[1];
509 g_autoptr(GByteArray) data = g_byte_array_new();
510 int err;
511
512 if (len == 0 || req_data[len - 1] != '\0') {
513 xs_error(s, req_id, tx_id, EINVAL);
514 return;
515 }
516
517 trace_xenstore_read(tx_id, path);
518 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
519 if (err) {
520 xs_error(s, req_id, tx_id, err);
521 return;
522 }
523
524 rsp->type = XS_READ;
525 rsp->req_id = req_id;
526 rsp->tx_id = tx_id;
527 rsp->len = 0;
528
529 len = data->len;
530 if (len > XENSTORE_PAYLOAD_MAX) {
531 xs_error(s, req_id, tx_id, E2BIG);
532 return;
533 }
534
535 memcpy(&rsp_data[rsp->len], data->data, len);
536 rsp->len += len;
537 }
538
xs_write(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)539 static void xs_write(XenXenstoreState *s, unsigned int req_id,
540 xs_transaction_t tx_id, uint8_t *req_data,
541 unsigned int len)
542 {
543 g_autoptr(GByteArray) data = g_byte_array_new();
544 const char *path;
545 int err;
546
547 if (len == 0) {
548 xs_error(s, req_id, tx_id, EINVAL);
549 return;
550 }
551
552 path = (const char *)req_data;
553
554 while (len--) {
555 if (*req_data++ == '\0') {
556 break;
557 }
558 if (len == 0) {
559 xs_error(s, req_id, tx_id, EINVAL);
560 return;
561 }
562 }
563
564 g_byte_array_append(data, req_data, len);
565
566 trace_xenstore_write(tx_id, path);
567 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
568 if (err) {
569 xs_error(s, req_id, tx_id, err);
570 return;
571 }
572
573 xs_ok(s, XS_WRITE, req_id, tx_id);
574 }
575
xs_mkdir(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)576 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
577 xs_transaction_t tx_id, uint8_t *req_data,
578 unsigned int len)
579 {
580 g_autoptr(GByteArray) data = g_byte_array_new();
581 const char *path;
582 int err;
583
584 if (len == 0 || req_data[len - 1] != '\0') {
585 xs_error(s, req_id, tx_id, EINVAL);
586 return;
587 }
588
589 path = (const char *)req_data;
590
591 trace_xenstore_mkdir(tx_id, path);
592 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
593 if (err == ENOENT) {
594 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
595 }
596
597 if (!err) {
598 xs_error(s, req_id, tx_id, err);
599 return;
600 }
601
602 xs_ok(s, XS_MKDIR, req_id, tx_id);
603 }
604
xs_append_strings(XenXenstoreState * s,struct xsd_sockmsg * rsp,GList * strings,unsigned int start,bool truncate)605 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
606 GList *strings, unsigned int start, bool truncate)
607 {
608 uint8_t *rsp_data = (uint8_t *)&rsp[1];
609 GList *l;
610
611 for (l = strings; l; l = l->next) {
612 size_t len = strlen(l->data) + 1; /* Including the NUL termination */
613 char *str = l->data;
614
615 if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
616 if (truncate) {
617 len = XENSTORE_PAYLOAD_MAX - rsp->len;
618 if (!len) {
619 return;
620 }
621 } else {
622 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
623 return;
624 }
625 }
626
627 if (start) {
628 if (start >= len) {
629 start -= len;
630 continue;
631 }
632
633 str += start;
634 len -= start;
635 start = 0;
636 }
637
638 memcpy(&rsp_data[rsp->len], str, len);
639 rsp->len += len;
640 }
641 /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
642 if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
643 rsp_data[rsp->len++] = '\0';
644 }
645 }
646
xs_directory(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)647 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
648 xs_transaction_t tx_id, uint8_t *req_data,
649 unsigned int len)
650 {
651 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
652 GList *items = NULL;
653 const char *path;
654 int err;
655
656 if (len == 0 || req_data[len - 1] != '\0') {
657 xs_error(s, req_id, tx_id, EINVAL);
658 return;
659 }
660
661 path = (const char *)req_data;
662
663 trace_xenstore_directory(tx_id, path);
664 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
665 if (err != 0) {
666 xs_error(s, req_id, tx_id, err);
667 return;
668 }
669
670 rsp->type = XS_DIRECTORY;
671 rsp->req_id = req_id;
672 rsp->tx_id = tx_id;
673 rsp->len = 0;
674
675 xs_append_strings(s, rsp, items, 0, false);
676
677 g_list_free_full(items, g_free);
678 }
679
xs_directory_part(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)680 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
681 xs_transaction_t tx_id, uint8_t *req_data,
682 unsigned int len)
683 {
684 const char *offset_str, *path = (const char *)req_data;
685 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
686 char *rsp_data = (char *)&rsp[1];
687 uint64_t gencnt = 0;
688 unsigned int offset;
689 GList *items = NULL;
690 int err;
691
692 if (len == 0) {
693 xs_error(s, req_id, tx_id, EINVAL);
694 return;
695 }
696
697 while (len--) {
698 if (*req_data++ == '\0') {
699 break;
700 }
701 if (len == 0) {
702 xs_error(s, req_id, tx_id, EINVAL);
703 return;
704 }
705 }
706
707 offset_str = (const char *)req_data;
708 while (len--) {
709 if (*req_data++ == '\0') {
710 break;
711 }
712 if (len == 0) {
713 xs_error(s, req_id, tx_id, EINVAL);
714 return;
715 }
716 }
717
718 if (len) {
719 xs_error(s, req_id, tx_id, EINVAL);
720 return;
721 }
722
723 if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
724 xs_error(s, req_id, tx_id, EINVAL);
725 return;
726 }
727
728 trace_xenstore_directory_part(tx_id, path, offset);
729 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
730 if (err != 0) {
731 xs_error(s, req_id, tx_id, err);
732 return;
733 }
734
735 rsp->type = XS_DIRECTORY_PART;
736 rsp->req_id = req_id;
737 rsp->tx_id = tx_id;
738 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
739
740 xs_append_strings(s, rsp, items, offset, true);
741
742 g_list_free_full(items, g_free);
743 }
744
xs_transaction_start(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)745 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
746 xs_transaction_t tx_id, uint8_t *req_data,
747 unsigned int len)
748 {
749 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
750 char *rsp_data = (char *)&rsp[1];
751 int err;
752
753 if (len != 1 || req_data[0] != '\0') {
754 xs_error(s, req_id, tx_id, EINVAL);
755 return;
756 }
757
758 rsp->type = XS_TRANSACTION_START;
759 rsp->req_id = req_id;
760 rsp->tx_id = tx_id;
761 rsp->len = 0;
762
763 err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
764 if (err) {
765 xs_error(s, req_id, tx_id, err);
766 return;
767 }
768
769 trace_xenstore_transaction_start(tx_id);
770
771 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
772 assert(rsp->len < XENSTORE_PAYLOAD_MAX);
773 rsp->len++;
774 }
775
xs_transaction_end(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)776 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
777 xs_transaction_t tx_id, uint8_t *req_data,
778 unsigned int len)
779 {
780 bool commit;
781 int err;
782
783 if (len != 2 || req_data[1] != '\0') {
784 xs_error(s, req_id, tx_id, EINVAL);
785 return;
786 }
787
788 switch (req_data[0]) {
789 case 'T':
790 commit = true;
791 break;
792 case 'F':
793 commit = false;
794 break;
795 default:
796 xs_error(s, req_id, tx_id, EINVAL);
797 return;
798 }
799
800 trace_xenstore_transaction_end(tx_id, commit);
801 err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
802 if (err) {
803 xs_error(s, req_id, tx_id, err);
804 return;
805 }
806
807 xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
808 }
809
xs_rm(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)810 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
811 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
812 {
813 const char *path = (const char *)req_data;
814 int err;
815
816 if (len == 0 || req_data[len - 1] != '\0') {
817 xs_error(s, req_id, tx_id, EINVAL);
818 return;
819 }
820
821 trace_xenstore_rm(tx_id, path);
822 err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
823 if (err) {
824 xs_error(s, req_id, tx_id, err);
825 return;
826 }
827
828 xs_ok(s, XS_RM, req_id, tx_id);
829 }
830
xs_get_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)831 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
832 xs_transaction_t tx_id, uint8_t *req_data,
833 unsigned int len)
834 {
835 const char *path = (const char *)req_data;
836 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
837 GList *perms = NULL;
838 int err;
839
840 if (len == 0 || req_data[len - 1] != '\0') {
841 xs_error(s, req_id, tx_id, EINVAL);
842 return;
843 }
844
845 trace_xenstore_get_perms(tx_id, path);
846 err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
847 if (err) {
848 xs_error(s, req_id, tx_id, err);
849 return;
850 }
851
852 rsp->type = XS_GET_PERMS;
853 rsp->req_id = req_id;
854 rsp->tx_id = tx_id;
855 rsp->len = 0;
856
857 xs_append_strings(s, rsp, perms, 0, false);
858
859 g_list_free_full(perms, g_free);
860 }
861
xs_set_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)862 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
863 xs_transaction_t tx_id, uint8_t *req_data,
864 unsigned int len)
865 {
866 const char *path = (const char *)req_data;
867 uint8_t *perm;
868 GList *perms = NULL;
869 int err;
870
871 if (len == 0) {
872 xs_error(s, req_id, tx_id, EINVAL);
873 return;
874 }
875
876 while (len--) {
877 if (*req_data++ == '\0') {
878 break;
879 }
880 if (len == 0) {
881 xs_error(s, req_id, tx_id, EINVAL);
882 return;
883 }
884 }
885
886 perm = req_data;
887 while (len--) {
888 if (*req_data++ == '\0') {
889 perms = g_list_append(perms, perm);
890 perm = req_data;
891 }
892 }
893
894 /*
895 * Note that there may be trailing garbage at the end of the buffer.
896 * This is explicitly permitted by the '?' at the end of the definition:
897 *
898 * SET_PERMS <path>|<perm-as-string>|+?
899 */
900
901 trace_xenstore_set_perms(tx_id, path);
902 err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
903 g_list_free(perms);
904 if (err) {
905 xs_error(s, req_id, tx_id, err);
906 return;
907 }
908
909 xs_ok(s, XS_SET_PERMS, req_id, tx_id);
910 }
911
xs_watch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)912 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
913 xs_transaction_t tx_id, uint8_t *req_data,
914 unsigned int len)
915 {
916 const char *token, *path = (const char *)req_data;
917 int err;
918
919 if (len == 0) {
920 xs_error(s, req_id, tx_id, EINVAL);
921 return;
922 }
923
924 while (len--) {
925 if (*req_data++ == '\0') {
926 break;
927 }
928 if (len == 0) {
929 xs_error(s, req_id, tx_id, EINVAL);
930 return;
931 }
932 }
933
934 token = (const char *)req_data;
935 while (len--) {
936 if (*req_data++ == '\0') {
937 break;
938 }
939 if (len == 0) {
940 xs_error(s, req_id, tx_id, EINVAL);
941 return;
942 }
943 }
944
945 /*
946 * Note that there may be trailing garbage at the end of the buffer.
947 * This is explicitly permitted by the '?' at the end of the definition:
948 *
949 * WATCH <wpath>|<token>|?
950 */
951
952 trace_xenstore_watch(path, token);
953 err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
954 if (err) {
955 xs_error(s, req_id, tx_id, err);
956 return;
957 }
958
959 xs_ok(s, XS_WATCH, req_id, tx_id);
960 }
961
xs_unwatch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)962 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
963 xs_transaction_t tx_id, uint8_t *req_data,
964 unsigned int len)
965 {
966 const char *token, *path = (const char *)req_data;
967 int err;
968
969 if (len == 0) {
970 xs_error(s, req_id, tx_id, EINVAL);
971 return;
972 }
973
974 while (len--) {
975 if (*req_data++ == '\0') {
976 break;
977 }
978 if (len == 0) {
979 xs_error(s, req_id, tx_id, EINVAL);
980 return;
981 }
982 }
983
984 token = (const char *)req_data;
985 while (len--) {
986 if (*req_data++ == '\0') {
987 break;
988 }
989 if (len == 0) {
990 xs_error(s, req_id, tx_id, EINVAL);
991 return;
992 }
993 }
994
995 trace_xenstore_unwatch(path, token);
996 err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
997 if (err) {
998 xs_error(s, req_id, tx_id, err);
999 return;
1000 }
1001
1002 xs_ok(s, XS_UNWATCH, req_id, tx_id);
1003 }
1004
xs_reset_watches(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)1005 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1006 xs_transaction_t tx_id, uint8_t *req_data,
1007 unsigned int len)
1008 {
1009 if (len == 0 || req_data[len - 1] != '\0') {
1010 xs_error(s, req_id, tx_id, EINVAL);
1011 return;
1012 }
1013
1014 trace_xenstore_reset_watches();
1015 xs_impl_reset_watches(s->impl, xen_domid);
1016
1017 xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1018 }
1019
xs_priv(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1020 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1021 xs_transaction_t tx_id, uint8_t *data,
1022 unsigned int len)
1023 {
1024 xs_error(s, req_id, tx_id, EACCES);
1025 }
1026
xs_unimpl(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1027 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1028 xs_transaction_t tx_id, uint8_t *data,
1029 unsigned int len)
1030 {
1031 xs_error(s, req_id, tx_id, ENOSYS);
1032 }
1033
1034 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1035 xs_transaction_t tx_id, uint8_t *data,
1036 unsigned int len);
1037
1038 struct xsd_req {
1039 const char *name;
1040 xs_impl fn;
1041 };
1042 #define XSD_REQ(_type, _fn) \
1043 [_type] = { .name = #_type, .fn = _fn }
1044
1045 struct xsd_req xsd_reqs[] = {
1046 XSD_REQ(XS_READ, xs_read),
1047 XSD_REQ(XS_WRITE, xs_write),
1048 XSD_REQ(XS_MKDIR, xs_mkdir),
1049 XSD_REQ(XS_DIRECTORY, xs_directory),
1050 XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1051 XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1052 XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1053 XSD_REQ(XS_RM, xs_rm),
1054 XSD_REQ(XS_GET_PERMS, xs_get_perms),
1055 XSD_REQ(XS_SET_PERMS, xs_set_perms),
1056 XSD_REQ(XS_WATCH, xs_watch),
1057 XSD_REQ(XS_UNWATCH, xs_unwatch),
1058 XSD_REQ(XS_CONTROL, xs_priv),
1059 XSD_REQ(XS_INTRODUCE, xs_priv),
1060 XSD_REQ(XS_RELEASE, xs_priv),
1061 XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1062 XSD_REQ(XS_RESUME, xs_priv),
1063 XSD_REQ(XS_SET_TARGET, xs_priv),
1064 XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1065 };
1066
process_req(XenXenstoreState * s)1067 static void process_req(XenXenstoreState *s)
1068 {
1069 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1070 xs_impl handler = NULL;
1071
1072 assert(req_pending(s));
1073 assert(!s->rsp_pending);
1074
1075 if (req->type < ARRAY_SIZE(xsd_reqs)) {
1076 handler = xsd_reqs[req->type].fn;
1077 }
1078 if (!handler) {
1079 handler = &xs_unimpl;
1080 }
1081
1082 handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1083
1084 s->rsp_pending = true;
1085 reset_req(s);
1086 }
1087
copy_from_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1088 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1089 unsigned int len)
1090 {
1091 if (!len) {
1092 return 0;
1093 }
1094
1095 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1096 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1097 unsigned int copied = 0;
1098
1099 /* Ensure the ring contents don't cross the req_prod access. */
1100 smp_rmb();
1101
1102 while (len) {
1103 unsigned int avail = prod - cons;
1104 unsigned int offset = MASK_XENSTORE_IDX(cons);
1105 unsigned int copylen = avail;
1106
1107 if (avail > XENSTORE_RING_SIZE) {
1108 error_report("XenStore ring handling error");
1109 s->fatal_error = true;
1110 break;
1111 } else if (avail == 0) {
1112 break;
1113 }
1114
1115 if (copylen > len) {
1116 copylen = len;
1117 }
1118 if (copylen > XENSTORE_RING_SIZE - offset) {
1119 copylen = XENSTORE_RING_SIZE - offset;
1120 }
1121
1122 memcpy(ptr, &s->xs->req[offset], copylen);
1123 copied += copylen;
1124
1125 ptr += copylen;
1126 len -= copylen;
1127
1128 cons += copylen;
1129 }
1130
1131 /*
1132 * Not sure this ever mattered except on Alpha, but this barrier
1133 * is to ensure that the update to req_cons is globally visible
1134 * only after we have consumed all the data from the ring, and we
1135 * don't end up seeing data written to the ring *after* the other
1136 * end sees the update and writes more to the ring. Xen's own
1137 * xenstored has the same barrier here (although with no comment
1138 * at all, obviously, because it's Xen code).
1139 */
1140 smp_mb();
1141
1142 qatomic_set(&s->xs->req_cons, cons);
1143
1144 return copied;
1145 }
1146
copy_to_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1147 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1148 unsigned int len)
1149 {
1150 if (!len) {
1151 return 0;
1152 }
1153
1154 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1155 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1156 unsigned int copied = 0;
1157
1158 /*
1159 * This matches the barrier in copy_to_ring() (or the guest's
1160 * equivalent) between writing the data to the ring and updating
1161 * rsp_prod. It protects against the pathological case (which
1162 * again I think never happened except on Alpha) where our
1163 * subsequent writes to the ring could *cross* the read of
1164 * rsp_cons and the guest could see the new data when it was
1165 * intending to read the old.
1166 */
1167 smp_mb();
1168
1169 while (len) {
1170 unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1171 unsigned int offset = MASK_XENSTORE_IDX(prod);
1172 unsigned int copylen = len;
1173
1174 if (avail > XENSTORE_RING_SIZE) {
1175 error_report("XenStore ring handling error");
1176 s->fatal_error = true;
1177 break;
1178 } else if (avail == 0) {
1179 break;
1180 }
1181
1182 if (copylen > avail) {
1183 copylen = avail;
1184 }
1185 if (copylen > XENSTORE_RING_SIZE - offset) {
1186 copylen = XENSTORE_RING_SIZE - offset;
1187 }
1188
1189
1190 memcpy(&s->xs->rsp[offset], ptr, copylen);
1191 copied += copylen;
1192
1193 ptr += copylen;
1194 len -= copylen;
1195
1196 prod += copylen;
1197 }
1198
1199 /* Ensure the ring contents are seen before rsp_prod update. */
1200 smp_wmb();
1201
1202 qatomic_set(&s->xs->rsp_prod, prod);
1203
1204 return copied;
1205 }
1206
get_req(XenXenstoreState * s)1207 static unsigned int get_req(XenXenstoreState *s)
1208 {
1209 unsigned int copied = 0;
1210
1211 if (s->fatal_error) {
1212 return 0;
1213 }
1214
1215 assert(!req_pending(s));
1216
1217 if (s->req_offset < XENSTORE_HEADER_SIZE) {
1218 void *ptr = s->req_data + s->req_offset;
1219 unsigned int len = XENSTORE_HEADER_SIZE;
1220 unsigned int copylen = copy_from_ring(s, ptr, len);
1221
1222 copied += copylen;
1223 s->req_offset += copylen;
1224 }
1225
1226 if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1227 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1228
1229 if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1230 error_report("Illegal XenStore request");
1231 s->fatal_error = true;
1232 return 0;
1233 }
1234
1235 void *ptr = s->req_data + s->req_offset;
1236 unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1237 unsigned int copylen = copy_from_ring(s, ptr, len);
1238
1239 copied += copylen;
1240 s->req_offset += copylen;
1241 }
1242
1243 return copied;
1244 }
1245
put_rsp(XenXenstoreState * s)1246 static unsigned int put_rsp(XenXenstoreState *s)
1247 {
1248 if (s->fatal_error) {
1249 return 0;
1250 }
1251
1252 assert(s->rsp_pending);
1253
1254 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1255 assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1256
1257 void *ptr = s->rsp_data + s->rsp_offset;
1258 unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1259 unsigned int copylen = copy_to_ring(s, ptr, len);
1260
1261 s->rsp_offset += copylen;
1262
1263 /* Have we produced a complete response? */
1264 if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1265 reset_rsp(s);
1266 }
1267
1268 return copylen;
1269 }
1270
deliver_watch(XenXenstoreState * s,const char * path,const char * token)1271 static void deliver_watch(XenXenstoreState *s, const char *path,
1272 const char *token)
1273 {
1274 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1275 uint8_t *rsp_data = (uint8_t *)&rsp[1];
1276 unsigned int len;
1277
1278 assert(!s->rsp_pending);
1279
1280 trace_xenstore_watch_event(path, token);
1281
1282 rsp->type = XS_WATCH_EVENT;
1283 rsp->req_id = 0;
1284 rsp->tx_id = 0;
1285 rsp->len = 0;
1286
1287 len = strlen(path);
1288
1289 /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1290 assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1291
1292 memcpy(&rsp_data[rsp->len], path, len);
1293 rsp->len += len;
1294 rsp_data[rsp->len] = '\0';
1295 rsp->len++;
1296
1297 len = strlen(token);
1298 /*
1299 * It is possible for the guest to have chosen a token that will
1300 * not fit (along with the patch) into a watch event. We have no
1301 * choice but to drop the event if this is the case.
1302 */
1303 if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1304 return;
1305 }
1306
1307 memcpy(&rsp_data[rsp->len], token, len);
1308 rsp->len += len;
1309 rsp_data[rsp->len] = '\0';
1310 rsp->len++;
1311
1312 s->rsp_pending = true;
1313 }
1314
1315 struct watch_event {
1316 char *path;
1317 char *token;
1318 };
1319
free_watch_event(struct watch_event * ev)1320 static void free_watch_event(struct watch_event *ev)
1321 {
1322 if (ev) {
1323 g_free(ev->path);
1324 g_free(ev->token);
1325 g_free(ev);
1326 }
1327 }
1328
queue_watch(XenXenstoreState * s,const char * path,const char * token)1329 static void queue_watch(XenXenstoreState *s, const char *path,
1330 const char *token)
1331 {
1332 struct watch_event *ev = g_new0(struct watch_event, 1);
1333
1334 ev->path = g_strdup(path);
1335 ev->token = g_strdup(token);
1336
1337 s->watch_events = g_list_append(s->watch_events, ev);
1338 }
1339
fire_watch_cb(void * opaque,const char * path,const char * token)1340 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1341 {
1342 XenXenstoreState *s = opaque;
1343
1344 assert(bql_locked());
1345
1346 /*
1347 * If there's a response pending, we obviously can't scribble over
1348 * it. But if there's a request pending, it has dibs on the buffer
1349 * too.
1350 *
1351 * In the common case of a watch firing due to backend activity
1352 * when the ring was otherwise idle, we should be able to copy the
1353 * strings directly into the rsp_data and thence the actual ring,
1354 * without needing to perform any allocations and queue them.
1355 */
1356 if (s->rsp_pending || req_pending(s)) {
1357 queue_watch(s, path, token);
1358 } else {
1359 deliver_watch(s, path, token);
1360 /*
1361 * Attempt to queue the message into the actual ring, and send
1362 * the event channel notification if any bytes are copied.
1363 */
1364 if (s->rsp_pending && put_rsp(s) > 0) {
1365 xen_be_evtchn_notify(s->eh, s->be_port);
1366 }
1367 }
1368 }
1369
process_watch_events(XenXenstoreState * s)1370 static void process_watch_events(XenXenstoreState *s)
1371 {
1372 struct watch_event *ev = s->watch_events->data;
1373
1374 deliver_watch(s, ev->path, ev->token);
1375
1376 s->watch_events = g_list_remove(s->watch_events, ev);
1377 free_watch_event(ev);
1378 }
1379
xen_xenstore_event(void * opaque)1380 static void xen_xenstore_event(void *opaque)
1381 {
1382 XenXenstoreState *s = opaque;
1383 evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1384 unsigned int copied_to, copied_from;
1385 bool processed, notify = false;
1386
1387 if (port != s->be_port) {
1388 return;
1389 }
1390
1391 /* We know this is a no-op. */
1392 xen_be_evtchn_unmask(s->eh, port);
1393
1394 do {
1395 copied_to = copied_from = 0;
1396 processed = false;
1397
1398 if (!s->rsp_pending && s->watch_events) {
1399 process_watch_events(s);
1400 }
1401
1402 if (s->rsp_pending) {
1403 copied_to = put_rsp(s);
1404 }
1405
1406 if (!req_pending(s)) {
1407 copied_from = get_req(s);
1408 }
1409
1410 if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1411 process_req(s);
1412 processed = true;
1413 }
1414
1415 notify |= copied_to || copied_from;
1416 } while (copied_to || copied_from || processed);
1417
1418 if (notify) {
1419 xen_be_evtchn_notify(s->eh, s->be_port);
1420 }
1421 }
1422
alloc_guest_port(XenXenstoreState * s)1423 static void alloc_guest_port(XenXenstoreState *s)
1424 {
1425 struct evtchn_alloc_unbound alloc = {
1426 .dom = DOMID_SELF,
1427 .remote_dom = DOMID_QEMU,
1428 };
1429
1430 if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1431 s->guest_port = alloc.port;
1432 }
1433 }
1434
xen_xenstore_reset(void)1435 int xen_xenstore_reset(void)
1436 {
1437 XenXenstoreState *s = xen_xenstore_singleton;
1438 int console_port;
1439 GList *perms;
1440 int err;
1441
1442 if (!s) {
1443 return -ENOTSUP;
1444 }
1445
1446 s->req_offset = s->rsp_offset = 0;
1447 s->rsp_pending = false;
1448
1449 if (!memory_region_is_mapped(&s->xenstore_page)) {
1450 uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1451 xen_overlay_do_map_page(&s->xenstore_page, gpa);
1452 }
1453
1454 alloc_guest_port(s);
1455
1456 /*
1457 * As qemu/dom0, bind to the guest's port. For incoming migration, this
1458 * will be unbound as the guest's evtchn table is overwritten. We then
1459 * rebind to the correct guest port in xen_xenstore_post_load().
1460 */
1461 err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1462 if (err < 0) {
1463 return err;
1464 }
1465 s->be_port = err;
1466
1467 /* Create frontend store nodes */
1468 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1469 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1470
1471 relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1472 relpath_printf(s, perms, "store/ring-ref", "%lu",
1473 XEN_SPECIAL_PFN(XENSTORE));
1474
1475 console_port = xen_primary_console_get_port();
1476 if (console_port) {
1477 relpath_printf(s, perms, "console/ring-ref", "%lu",
1478 XEN_SPECIAL_PFN(CONSOLE));
1479 relpath_printf(s, perms, "console/port", "%u", console_port);
1480 relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1481 }
1482
1483 g_list_free_full(perms, g_free);
1484
1485 /*
1486 * We don't actually access the guest's page through the grant, because
1487 * this isn't real Xen, and we can just use the page we gave it in the
1488 * first place. Map the grant anyway, mostly for cosmetic purposes so
1489 * it *looks* like it's in use in the guest-visible grant table.
1490 */
1491 s->gt = qemu_xen_gnttab_open();
1492 uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1493 s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1494 PROT_READ | PROT_WRITE);
1495
1496 return 0;
1497 }
1498
1499 struct qemu_xs_handle {
1500 XenstoreImplState *impl;
1501 GList *watches;
1502 QEMUBH *watch_bh;
1503 };
1504
1505 struct qemu_xs_watch {
1506 struct qemu_xs_handle *h;
1507 char *path;
1508 xs_watch_fn fn;
1509 void *opaque;
1510 GList *events;
1511 };
1512
xs_be_get_domain_path(struct qemu_xs_handle * h,unsigned int domid)1513 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1514 {
1515 return g_strdup_printf("/local/domain/%u", domid);
1516 }
1517
xs_be_directory(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * num)1518 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1519 const char *path, unsigned int *num)
1520 {
1521 GList *items = NULL, *l;
1522 unsigned int i = 0;
1523 char **items_ret;
1524 int err;
1525
1526 err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1527 if (err) {
1528 errno = err;
1529 return NULL;
1530 }
1531
1532 items_ret = g_new0(char *, g_list_length(items) + 1);
1533 *num = 0;
1534 for (l = items; l; l = l->next) {
1535 items_ret[i++] = l->data;
1536 (*num)++;
1537 }
1538 g_list_free(items);
1539 return items_ret;
1540 }
1541
xs_be_read(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * len)1542 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1543 const char *path, unsigned int *len)
1544 {
1545 GByteArray *data = g_byte_array_new();
1546 bool free_segment = false;
1547 int err;
1548
1549 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1550 if (err) {
1551 free_segment = true;
1552 errno = err;
1553 } else {
1554 if (len) {
1555 *len = data->len;
1556 }
1557 /* The xen-bus-helper code expects to get NUL terminated string! */
1558 g_byte_array_append(data, (void *)"", 1);
1559 }
1560
1561 return g_byte_array_free(data, free_segment);
1562 }
1563
xs_be_write(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,const void * data,unsigned int len)1564 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1565 const char *path, const void *data, unsigned int len)
1566 {
1567 GByteArray *gdata = g_byte_array_new();
1568 int err;
1569
1570 g_byte_array_append(gdata, data, len);
1571 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1572 g_byte_array_unref(gdata);
1573 if (err) {
1574 errno = err;
1575 return false;
1576 }
1577 return true;
1578 }
1579
xs_be_create(struct qemu_xs_handle * h,xs_transaction_t t,unsigned int owner,unsigned int domid,unsigned int perms,const char * path)1580 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1581 unsigned int owner, unsigned int domid,
1582 unsigned int perms, const char *path)
1583 {
1584 g_autoptr(GByteArray) data = g_byte_array_new();
1585 GList *perms_list = NULL;
1586 int err;
1587
1588 /* mkdir does this */
1589 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1590 if (err == ENOENT) {
1591 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1592 }
1593 if (err) {
1594 errno = err;
1595 return false;
1596 }
1597
1598 perms_list = g_list_append(perms_list,
1599 xs_perm_as_string(XS_PERM_NONE, owner));
1600 perms_list = g_list_append(perms_list,
1601 xs_perm_as_string(perms, domid));
1602
1603 err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1604 g_list_free_full(perms_list, g_free);
1605 if (err) {
1606 errno = err;
1607 return false;
1608 }
1609 return true;
1610 }
1611
xs_be_destroy(struct qemu_xs_handle * h,xs_transaction_t t,const char * path)1612 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1613 const char *path)
1614 {
1615 int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1616 if (err) {
1617 errno = err;
1618 return false;
1619 }
1620 return true;
1621 }
1622
be_watch_bh(void * _h)1623 static void be_watch_bh(void *_h)
1624 {
1625 struct qemu_xs_handle *h = _h;
1626 GList *l;
1627
1628 for (l = h->watches; l; l = l->next) {
1629 struct qemu_xs_watch *w = l->data;
1630
1631 while (w->events) {
1632 struct watch_event *ev = w->events->data;
1633
1634 w->fn(w->opaque, ev->path);
1635
1636 w->events = g_list_remove(w->events, ev);
1637 free_watch_event(ev);
1638 }
1639 }
1640 }
1641
xs_be_watch_cb(void * opaque,const char * path,const char * token)1642 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1643 {
1644 struct watch_event *ev = g_new0(struct watch_event, 1);
1645 struct qemu_xs_watch *w = opaque;
1646
1647 /* We don't care about the token */
1648 ev->path = g_strdup(path);
1649 w->events = g_list_append(w->events, ev);
1650
1651 qemu_bh_schedule(w->h->watch_bh);
1652 }
1653
xs_be_watch(struct qemu_xs_handle * h,const char * path,xs_watch_fn fn,void * opaque)1654 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1655 const char *path, xs_watch_fn fn,
1656 void *opaque)
1657 {
1658 struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1659 int err;
1660
1661 w->h = h;
1662 w->fn = fn;
1663 w->opaque = opaque;
1664
1665 err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1666 if (err) {
1667 errno = err;
1668 g_free(w);
1669 return NULL;
1670 }
1671
1672 w->path = g_strdup(path);
1673 h->watches = g_list_append(h->watches, w);
1674 return w;
1675 }
1676
xs_be_unwatch(struct qemu_xs_handle * h,struct qemu_xs_watch * w)1677 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1678 {
1679 xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1680
1681 h->watches = g_list_remove(h->watches, w);
1682 g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1683 g_free(w->path);
1684 g_free(w);
1685 }
1686
xs_be_transaction_start(struct qemu_xs_handle * h)1687 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1688 {
1689 unsigned int new_tx = XBT_NULL;
1690 int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1691 if (err) {
1692 errno = err;
1693 return XBT_NULL;
1694 }
1695 return new_tx;
1696 }
1697
xs_be_transaction_end(struct qemu_xs_handle * h,xs_transaction_t t,bool abort)1698 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1699 bool abort)
1700 {
1701 int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1702 if (err) {
1703 errno = err;
1704 return false;
1705 }
1706 return true;
1707 }
1708
xs_be_open(void)1709 static struct qemu_xs_handle *xs_be_open(void)
1710 {
1711 XenXenstoreState *s = xen_xenstore_singleton;
1712 struct qemu_xs_handle *h;
1713
1714 if (!s || !s->impl) {
1715 errno = -ENOSYS;
1716 return NULL;
1717 }
1718
1719 h = g_new0(struct qemu_xs_handle, 1);
1720 h->impl = s->impl;
1721
1722 h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1723
1724 return h;
1725 }
1726
xs_be_close(struct qemu_xs_handle * h)1727 static void xs_be_close(struct qemu_xs_handle *h)
1728 {
1729 while (h->watches) {
1730 struct qemu_xs_watch *w = h->watches->data;
1731 xs_be_unwatch(h, w);
1732 }
1733
1734 qemu_bh_delete(h->watch_bh);
1735 g_free(h);
1736 }
1737
1738 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1739 .open = xs_be_open,
1740 .close = xs_be_close,
1741 .get_domain_path = xs_be_get_domain_path,
1742 .directory = xs_be_directory,
1743 .read = xs_be_read,
1744 .write = xs_be_write,
1745 .create = xs_be_create,
1746 .destroy = xs_be_destroy,
1747 .watch = xs_be_watch,
1748 .unwatch = xs_be_unwatch,
1749 .transaction_start = xs_be_transaction_start,
1750 .transaction_end = xs_be_transaction_end,
1751 };
1752