1 /*
2 * QEMU Xen emulation: Shared/overlay pages support
3 *
4 * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
5 *
6 * Authors: David Woodhouse <dwmw2@infradead.org>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2 or later.
9 * See the COPYING file in the top-level directory.
10 */
11
12 #include "qemu/osdep.h"
13
14 #include "qemu/host-utils.h"
15 #include "qemu/module.h"
16 #include "qemu/main-loop.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qapi/error.h"
20 #include "qom/object.h"
21 #include "migration/vmstate.h"
22
23 #include "hw/sysbus.h"
24 #include "hw/xen/xen.h"
25 #include "hw/xen/xen_backend_ops.h"
26 #include "xen_overlay.h"
27 #include "xen_evtchn.h"
28 #include "xen_primary_console.h"
29 #include "xen_xenstore.h"
30
31 #include "system/kvm.h"
32 #include "system/kvm_xen.h"
33
34 #include "trace.h"
35
36 #include "xenstore_impl.h"
37
38 #include "hw/xen/interface/io/xs_wire.h"
39 #include "hw/xen/interface/event_channel.h"
40 #include "hw/xen/interface/grant_table.h"
41
42 #define TYPE_XEN_XENSTORE "xen-xenstore"
43 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
44
45 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
46 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
47
48 #define XENSTORE_HEADER_SIZE ((unsigned int)sizeof(struct xsd_sockmsg))
49
50 struct XenXenstoreState {
51 /*< private >*/
52 SysBusDevice busdev;
53 /*< public >*/
54
55 XenstoreImplState *impl;
56 GList *watch_events; /* for the guest */
57
58 MemoryRegion xenstore_page;
59 struct xenstore_domain_interface *xs;
60 uint8_t req_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
61 uint8_t rsp_data[XENSTORE_HEADER_SIZE + XENSTORE_PAYLOAD_MAX];
62 uint32_t req_offset;
63 uint32_t rsp_offset;
64 bool rsp_pending;
65 bool fatal_error;
66
67 evtchn_port_t guest_port;
68 evtchn_port_t be_port;
69 struct xenevtchn_handle *eh;
70
71 uint8_t *impl_state;
72 uint32_t impl_state_size;
73
74 struct xengntdev_handle *gt;
75 void *granted_xs;
76 };
77
78 struct XenXenstoreState *xen_xenstore_singleton;
79
80 static void xen_xenstore_event(void *opaque);
81 static void fire_watch_cb(void *opaque, const char *path, const char *token);
82
83 static struct xenstore_backend_ops emu_xenstore_backend_ops;
84
relpath_printf(XenXenstoreState * s,GList * perms,const char * relpath,const char * fmt,...)85 static void G_GNUC_PRINTF (4, 5) relpath_printf(XenXenstoreState *s,
86 GList *perms,
87 const char *relpath,
88 const char *fmt, ...)
89 {
90 gchar *abspath;
91 gchar *value;
92 va_list args;
93 GByteArray *data;
94 int err;
95
96 abspath = g_strdup_printf("/local/domain/%u/%s", xen_domid, relpath);
97 va_start(args, fmt);
98 value = g_strdup_vprintf(fmt, args);
99 va_end(args);
100
101 data = g_byte_array_new_take((void *)value, strlen(value));
102
103 err = xs_impl_write(s->impl, DOMID_QEMU, XBT_NULL, abspath, data);
104 assert(!err);
105
106 g_byte_array_unref(data);
107
108 err = xs_impl_set_perms(s->impl, DOMID_QEMU, XBT_NULL, abspath, perms);
109 assert(!err);
110
111 g_free(abspath);
112 }
113
xen_xenstore_realize(DeviceState * dev,Error ** errp)114 static void xen_xenstore_realize(DeviceState *dev, Error **errp)
115 {
116 XenXenstoreState *s = XEN_XENSTORE(dev);
117 GList *perms;
118
119 if (xen_mode != XEN_EMULATE) {
120 error_setg(errp, "Xen xenstore support is for Xen emulation");
121 return;
122 }
123 memory_region_init_ram(&s->xenstore_page, OBJECT(dev), "xen:xenstore_page",
124 XEN_PAGE_SIZE, &error_abort);
125 memory_region_set_enabled(&s->xenstore_page, true);
126 s->xs = memory_region_get_ram_ptr(&s->xenstore_page);
127 memset(s->xs, 0, XEN_PAGE_SIZE);
128
129 /* We can't map it this early as KVM isn't ready */
130 xen_xenstore_singleton = s;
131
132 s->eh = xen_be_evtchn_open();
133 if (!s->eh) {
134 error_setg(errp, "Xenstore evtchn port init failed");
135 return;
136 }
137 aio_set_fd_handler(qemu_get_aio_context(), xen_be_evtchn_fd(s->eh),
138 xen_xenstore_event, NULL, NULL, NULL, s);
139
140 s->impl = xs_impl_create(xen_domid);
141
142 /* Populate the default nodes */
143
144 /* Nodes owned by 'dom0' but readable by the guest */
145 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
146 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
147
148 relpath_printf(s, perms, "", "%s", "");
149
150 relpath_printf(s, perms, "domid", "%u", xen_domid);
151
152 relpath_printf(s, perms, "control/platform-feature-xs_reset_watches", "%u", 1);
153 relpath_printf(s, perms, "control/platform-feature-multiprocessor-suspend", "%u", 1);
154
155 relpath_printf(s, perms, "platform/acpi", "%u", 1);
156 relpath_printf(s, perms, "platform/acpi_s3", "%u", 1);
157 relpath_printf(s, perms, "platform/acpi_s4", "%u", 1);
158 relpath_printf(s, perms, "platform/acpi_laptop_slate", "%u", 0);
159
160 g_list_free_full(perms, g_free);
161
162 /* Nodes owned by the guest */
163 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, xen_domid));
164
165 relpath_printf(s, perms, "attr", "%s", "");
166
167 relpath_printf(s, perms, "control/shutdown", "%s", "");
168 relpath_printf(s, perms, "control/feature-poweroff", "%u", 1);
169 relpath_printf(s, perms, "control/feature-reboot", "%u", 1);
170 relpath_printf(s, perms, "control/feature-suspend", "%u", 1);
171 relpath_printf(s, perms, "control/feature-s3", "%u", 1);
172 relpath_printf(s, perms, "control/feature-s4", "%u", 1);
173
174 relpath_printf(s, perms, "data", "%s", "");
175 relpath_printf(s, perms, "device", "%s", "");
176 relpath_printf(s, perms, "drivers", "%s", "");
177 relpath_printf(s, perms, "error", "%s", "");
178 relpath_printf(s, perms, "feature", "%s", "");
179
180 g_list_free_full(perms, g_free);
181
182 xen_xenstore_ops = &emu_xenstore_backend_ops;
183 }
184
xen_xenstore_is_needed(void * opaque)185 static bool xen_xenstore_is_needed(void *opaque)
186 {
187 return xen_mode == XEN_EMULATE;
188 }
189
xen_xenstore_pre_save(void * opaque)190 static int xen_xenstore_pre_save(void *opaque)
191 {
192 XenXenstoreState *s = opaque;
193 GByteArray *save;
194
195 if (s->eh) {
196 s->guest_port = xen_be_evtchn_get_guest_port(s->eh);
197 }
198
199 g_free(s->impl_state);
200 save = xs_impl_serialize(s->impl);
201 s->impl_state = save->data;
202 s->impl_state_size = save->len;
203 g_byte_array_free(save, false);
204
205 return 0;
206 }
207
xen_xenstore_post_load(void * opaque,int ver)208 static int xen_xenstore_post_load(void *opaque, int ver)
209 {
210 XenXenstoreState *s = opaque;
211 GByteArray *save;
212 int ret;
213
214 /*
215 * As qemu/dom0, rebind to the guest's port. The Windows drivers may
216 * unbind the XenStore evtchn and rebind to it, having obtained the
217 * "remote" port through EVTCHNOP_status. In the case that migration
218 * occurs while it's unbound, the "remote" port needs to be the same
219 * as before so that the guest can find it, but should remain unbound.
220 */
221 if (s->guest_port) {
222 int be_port = xen_be_evtchn_bind_interdomain(s->eh, xen_domid,
223 s->guest_port);
224 if (be_port < 0) {
225 return be_port;
226 }
227 s->be_port = be_port;
228 }
229
230 save = g_byte_array_new_take(s->impl_state, s->impl_state_size);
231 s->impl_state = NULL;
232 s->impl_state_size = 0;
233
234 ret = xs_impl_deserialize(s->impl, save, xen_domid, fire_watch_cb, s);
235 return ret;
236 }
237
238 static const VMStateDescription xen_xenstore_vmstate = {
239 .name = "xen_xenstore",
240 .unmigratable = 1, /* The PV back ends don't migrate yet */
241 .version_id = 1,
242 .minimum_version_id = 1,
243 .needed = xen_xenstore_is_needed,
244 .pre_save = xen_xenstore_pre_save,
245 .post_load = xen_xenstore_post_load,
246 .fields = (const VMStateField[]) {
247 VMSTATE_UINT8_ARRAY(req_data, XenXenstoreState,
248 sizeof_field(XenXenstoreState, req_data)),
249 VMSTATE_UINT8_ARRAY(rsp_data, XenXenstoreState,
250 sizeof_field(XenXenstoreState, rsp_data)),
251 VMSTATE_UINT32(req_offset, XenXenstoreState),
252 VMSTATE_UINT32(rsp_offset, XenXenstoreState),
253 VMSTATE_BOOL(rsp_pending, XenXenstoreState),
254 VMSTATE_UINT32(guest_port, XenXenstoreState),
255 VMSTATE_BOOL(fatal_error, XenXenstoreState),
256 VMSTATE_UINT32(impl_state_size, XenXenstoreState),
257 VMSTATE_VARRAY_UINT32_ALLOC(impl_state, XenXenstoreState,
258 impl_state_size, 0,
259 vmstate_info_uint8, uint8_t),
260 VMSTATE_END_OF_LIST()
261 }
262 };
263
xen_xenstore_class_init(ObjectClass * klass,void * data)264 static void xen_xenstore_class_init(ObjectClass *klass, void *data)
265 {
266 DeviceClass *dc = DEVICE_CLASS(klass);
267
268 dc->realize = xen_xenstore_realize;
269 dc->vmsd = &xen_xenstore_vmstate;
270 }
271
272 static const TypeInfo xen_xenstore_info = {
273 .name = TYPE_XEN_XENSTORE,
274 .parent = TYPE_SYS_BUS_DEVICE,
275 .instance_size = sizeof(XenXenstoreState),
276 .class_init = xen_xenstore_class_init,
277 };
278
xen_xenstore_create(void)279 void xen_xenstore_create(void)
280 {
281 DeviceState *dev = sysbus_create_simple(TYPE_XEN_XENSTORE, -1, NULL);
282
283 xen_xenstore_singleton = XEN_XENSTORE(dev);
284
285 /*
286 * Defer the init (xen_xenstore_reset()) until KVM is set up and the
287 * overlay page can be mapped.
288 */
289 }
290
xen_xenstore_register_types(void)291 static void xen_xenstore_register_types(void)
292 {
293 type_register_static(&xen_xenstore_info);
294 }
295
type_init(xen_xenstore_register_types)296 type_init(xen_xenstore_register_types)
297
298 uint16_t xen_xenstore_get_port(void)
299 {
300 XenXenstoreState *s = xen_xenstore_singleton;
301 if (!s) {
302 return 0;
303 }
304 return s->guest_port;
305 }
306
req_pending(XenXenstoreState * s)307 static bool req_pending(XenXenstoreState *s)
308 {
309 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
310
311 return s->req_offset == XENSTORE_HEADER_SIZE + req->len;
312 }
313
reset_req(XenXenstoreState * s)314 static void reset_req(XenXenstoreState *s)
315 {
316 memset(s->req_data, 0, sizeof(s->req_data));
317 s->req_offset = 0;
318 }
319
reset_rsp(XenXenstoreState * s)320 static void reset_rsp(XenXenstoreState *s)
321 {
322 s->rsp_pending = false;
323
324 memset(s->rsp_data, 0, sizeof(s->rsp_data));
325 s->rsp_offset = 0;
326 }
327
xs_error(XenXenstoreState * s,unsigned int id,xs_transaction_t tx_id,int errnum)328 static void xs_error(XenXenstoreState *s, unsigned int id,
329 xs_transaction_t tx_id, int errnum)
330 {
331 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
332 const char *errstr = NULL;
333
334 for (unsigned int i = 0; i < ARRAY_SIZE(xsd_errors); i++) {
335 const struct xsd_errors *xsd_error = &xsd_errors[i];
336
337 if (xsd_error->errnum == errnum) {
338 errstr = xsd_error->errstring;
339 break;
340 }
341 }
342 assert(errstr);
343
344 trace_xenstore_error(id, tx_id, errstr);
345
346 rsp->type = XS_ERROR;
347 rsp->req_id = id;
348 rsp->tx_id = tx_id;
349 rsp->len = (uint32_t)strlen(errstr) + 1;
350
351 memcpy(&rsp[1], errstr, rsp->len);
352 }
353
xs_ok(XenXenstoreState * s,unsigned int type,unsigned int req_id,xs_transaction_t tx_id)354 static void xs_ok(XenXenstoreState *s, unsigned int type, unsigned int req_id,
355 xs_transaction_t tx_id)
356 {
357 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
358 const char *okstr = "OK";
359
360 rsp->type = type;
361 rsp->req_id = req_id;
362 rsp->tx_id = tx_id;
363 rsp->len = (uint32_t)strlen(okstr) + 1;
364
365 memcpy(&rsp[1], okstr, rsp->len);
366 }
367
368 /*
369 * The correct request and response formats are documented in xen.git:
370 * docs/misc/xenstore.txt. A summary is given below for convenience.
371 * The '|' symbol represents a NUL character.
372 *
373 * ---------- Database read, write and permissions operations ----------
374 *
375 * READ <path>| <value|>
376 * WRITE <path>|<value|>
377 * Store and read the octet string <value> at <path>.
378 * WRITE creates any missing parent paths, with empty values.
379 *
380 * MKDIR <path>|
381 * Ensures that the <path> exists, by necessary by creating
382 * it and any missing parents with empty values. If <path>
383 * or any parent already exists, its value is left unchanged.
384 *
385 * RM <path>|
386 * Ensures that the <path> does not exist, by deleting
387 * it and all of its children. It is not an error if <path> does
388 * not exist, but it _is_ an error if <path>'s immediate parent
389 * does not exist either.
390 *
391 * DIRECTORY <path>| <child-leaf-name>|*
392 * Gives a list of the immediate children of <path>, as only the
393 * leafnames. The resulting children are each named
394 * <path>/<child-leaf-name>.
395 *
396 * DIRECTORY_PART <path>|<offset> <gencnt>|<child-leaf-name>|*
397 * Same as DIRECTORY, but to be used for children lists longer than
398 * XENSTORE_PAYLOAD_MAX. Input are <path> and the byte offset into
399 * the list of children to return. Return values are the generation
400 * count <gencnt> of the node (to be used to ensure the node hasn't
401 * changed between two reads: <gencnt> being the same for multiple
402 * reads guarantees the node hasn't changed) and the list of children
403 * starting at the specified <offset> of the complete list.
404 *
405 * GET_PERMS <path>| <perm-as-string>|+
406 * SET_PERMS <path>|<perm-as-string>|+?
407 * <perm-as-string> is one of the following
408 * w<domid> write only
409 * r<domid> read only
410 * b<domid> both read and write
411 * n<domid> no access
412 * See https://wiki.xen.org/wiki/XenBus section
413 * `Permissions' for details of the permissions system.
414 * It is possible to set permissions for the special watch paths
415 * "@introduceDomain" and "@releaseDomain" to enable receiving those
416 * watches in unprivileged domains.
417 *
418 * ---------- Watches ----------
419 *
420 * WATCH <wpath>|<token>|?
421 * Adds a watch.
422 *
423 * When a <path> is modified (including path creation, removal,
424 * contents change or permissions change) this generates an event
425 * on the changed <path>. Changes made in transactions cause an
426 * event only if and when committed. Each occurring event is
427 * matched against all the watches currently set up, and each
428 * matching watch results in a WATCH_EVENT message (see below).
429 *
430 * The event's path matches the watch's <wpath> if it is an child
431 * of <wpath>.
432 *
433 * <wpath> can be a <path> to watch or @<wspecial>. In the
434 * latter case <wspecial> may have any syntax but it matches
435 * (according to the rules above) only the following special
436 * events which are invented by xenstored:
437 * @introduceDomain occurs on INTRODUCE
438 * @releaseDomain occurs on any domain crash or
439 * shutdown, and also on RELEASE
440 * and domain destruction
441 * <wspecial> events are sent to privileged callers or explicitly
442 * via SET_PERMS enabled domains only.
443 *
444 * When a watch is first set up it is triggered once straight
445 * away, with <path> equal to <wpath>. Watches may be triggered
446 * spuriously. The tx_id in a WATCH request is ignored.
447 *
448 * Watches are supposed to be restricted by the permissions
449 * system but in practice the implementation is imperfect.
450 * Applications should not rely on being sent a notification for
451 * paths that they cannot read; however, an application may rely
452 * on being sent a watch when a path which it _is_ able to read
453 * is deleted even if that leaves only a nonexistent unreadable
454 * parent. A notification may omitted if a node's permissions
455 * are changed so as to make it unreadable, in which case future
456 * notifications may be suppressed (and if the node is later made
457 * readable, some notifications may have been lost).
458 *
459 * WATCH_EVENT <epath>|<token>|
460 * Unsolicited `reply' generated for matching modification events
461 * as described above. req_id and tx_id are both 0.
462 *
463 * <epath> is the event's path, ie the actual path that was
464 * modified; however if the event was the recursive removal of an
465 * parent of <wpath>, <epath> is just
466 * <wpath> (rather than the actual path which was removed). So
467 * <epath> is a child of <wpath>, regardless.
468 *
469 * Iff <wpath> for the watch was specified as a relative pathname,
470 * the <epath> path will also be relative (with the same base,
471 * obviously).
472 *
473 * UNWATCH <wpath>|<token>|?
474 *
475 * RESET_WATCHES |
476 * Reset all watches and transactions of the caller.
477 *
478 * ---------- Transactions ----------
479 *
480 * TRANSACTION_START | <transid>|
481 * <transid> is an opaque uint32_t allocated by xenstored
482 * represented as unsigned decimal. After this, transaction may
483 * be referenced by using <transid> (as 32-bit binary) in the
484 * tx_id request header field. When transaction is started whole
485 * db is copied; reads and writes happen on the copy.
486 * It is not legal to send non-0 tx_id in TRANSACTION_START.
487 *
488 * TRANSACTION_END T|
489 * TRANSACTION_END F|
490 * tx_id must refer to existing transaction. After this
491 * request the tx_id is no longer valid and may be reused by
492 * xenstore. If F, the transaction is discarded. If T,
493 * it is committed: if there were any other intervening writes
494 * then our END gets get EAGAIN.
495 *
496 * The plan is that in the future only intervening `conflicting'
497 * writes cause EAGAIN, meaning only writes or other commits
498 * which changed paths which were read or written in the
499 * transaction at hand.
500 *
501 */
502
xs_read(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)503 static void xs_read(XenXenstoreState *s, unsigned int req_id,
504 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
505 {
506 const char *path = (const char *)req_data;
507 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
508 uint8_t *rsp_data = (uint8_t *)&rsp[1];
509 g_autoptr(GByteArray) data = g_byte_array_new();
510 int err;
511
512 if (len == 0 || req_data[len - 1] != '\0') {
513 xs_error(s, req_id, tx_id, EINVAL);
514 return;
515 }
516
517 trace_xenstore_read(tx_id, path);
518 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
519 if (err) {
520 xs_error(s, req_id, tx_id, err);
521 return;
522 }
523
524 rsp->type = XS_READ;
525 rsp->req_id = req_id;
526 rsp->tx_id = tx_id;
527 rsp->len = 0;
528
529 len = data->len;
530 if (len > XENSTORE_PAYLOAD_MAX) {
531 xs_error(s, req_id, tx_id, E2BIG);
532 return;
533 }
534
535 if (!len) {
536 return;
537 }
538
539 memcpy(&rsp_data[rsp->len], data->data, len);
540 rsp->len += len;
541 }
542
xs_write(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)543 static void xs_write(XenXenstoreState *s, unsigned int req_id,
544 xs_transaction_t tx_id, uint8_t *req_data,
545 unsigned int len)
546 {
547 g_autoptr(GByteArray) data = g_byte_array_new();
548 const char *path;
549 int err;
550
551 if (len == 0) {
552 xs_error(s, req_id, tx_id, EINVAL);
553 return;
554 }
555
556 path = (const char *)req_data;
557
558 while (len--) {
559 if (*req_data++ == '\0') {
560 break;
561 }
562 if (len == 0) {
563 xs_error(s, req_id, tx_id, EINVAL);
564 return;
565 }
566 }
567
568 g_byte_array_append(data, req_data, len);
569
570 trace_xenstore_write(tx_id, path);
571 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
572 if (err) {
573 xs_error(s, req_id, tx_id, err);
574 return;
575 }
576
577 xs_ok(s, XS_WRITE, req_id, tx_id);
578 }
579
xs_mkdir(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)580 static void xs_mkdir(XenXenstoreState *s, unsigned int req_id,
581 xs_transaction_t tx_id, uint8_t *req_data,
582 unsigned int len)
583 {
584 g_autoptr(GByteArray) data = g_byte_array_new();
585 const char *path;
586 int err;
587
588 if (len == 0 || req_data[len - 1] != '\0') {
589 xs_error(s, req_id, tx_id, EINVAL);
590 return;
591 }
592
593 path = (const char *)req_data;
594
595 trace_xenstore_mkdir(tx_id, path);
596 err = xs_impl_read(s->impl, xen_domid, tx_id, path, data);
597 if (err == ENOENT) {
598 err = xs_impl_write(s->impl, xen_domid, tx_id, path, data);
599 }
600
601 if (!err) {
602 xs_error(s, req_id, tx_id, err);
603 return;
604 }
605
606 xs_ok(s, XS_MKDIR, req_id, tx_id);
607 }
608
xs_append_strings(XenXenstoreState * s,struct xsd_sockmsg * rsp,GList * strings,unsigned int start,bool truncate)609 static void xs_append_strings(XenXenstoreState *s, struct xsd_sockmsg *rsp,
610 GList *strings, unsigned int start, bool truncate)
611 {
612 uint8_t *rsp_data = (uint8_t *)&rsp[1];
613 GList *l;
614
615 for (l = strings; l; l = l->next) {
616 size_t len = strlen(l->data) + 1; /* Including the NUL termination */
617 char *str = l->data;
618
619 if (rsp->len + len > XENSTORE_PAYLOAD_MAX) {
620 if (truncate) {
621 len = XENSTORE_PAYLOAD_MAX - rsp->len;
622 if (!len) {
623 return;
624 }
625 } else {
626 xs_error(s, rsp->req_id, rsp->tx_id, E2BIG);
627 return;
628 }
629 }
630
631 if (start) {
632 if (start >= len) {
633 start -= len;
634 continue;
635 }
636
637 str += start;
638 len -= start;
639 start = 0;
640 }
641
642 memcpy(&rsp_data[rsp->len], str, len);
643 rsp->len += len;
644 }
645 /* XS_DIRECTORY_PART wants an extra NUL to indicate the end */
646 if (truncate && rsp->len < XENSTORE_PAYLOAD_MAX) {
647 rsp_data[rsp->len++] = '\0';
648 }
649 }
650
xs_directory(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)651 static void xs_directory(XenXenstoreState *s, unsigned int req_id,
652 xs_transaction_t tx_id, uint8_t *req_data,
653 unsigned int len)
654 {
655 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
656 GList *items = NULL;
657 const char *path;
658 int err;
659
660 if (len == 0 || req_data[len - 1] != '\0') {
661 xs_error(s, req_id, tx_id, EINVAL);
662 return;
663 }
664
665 path = (const char *)req_data;
666
667 trace_xenstore_directory(tx_id, path);
668 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, NULL, &items);
669 if (err != 0) {
670 xs_error(s, req_id, tx_id, err);
671 return;
672 }
673
674 rsp->type = XS_DIRECTORY;
675 rsp->req_id = req_id;
676 rsp->tx_id = tx_id;
677 rsp->len = 0;
678
679 xs_append_strings(s, rsp, items, 0, false);
680
681 g_list_free_full(items, g_free);
682 }
683
xs_directory_part(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)684 static void xs_directory_part(XenXenstoreState *s, unsigned int req_id,
685 xs_transaction_t tx_id, uint8_t *req_data,
686 unsigned int len)
687 {
688 const char *offset_str, *path = (const char *)req_data;
689 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
690 char *rsp_data = (char *)&rsp[1];
691 uint64_t gencnt = 0;
692 unsigned int offset;
693 GList *items = NULL;
694 int err;
695
696 if (len == 0) {
697 xs_error(s, req_id, tx_id, EINVAL);
698 return;
699 }
700
701 while (len--) {
702 if (*req_data++ == '\0') {
703 break;
704 }
705 if (len == 0) {
706 xs_error(s, req_id, tx_id, EINVAL);
707 return;
708 }
709 }
710
711 offset_str = (const char *)req_data;
712 while (len--) {
713 if (*req_data++ == '\0') {
714 break;
715 }
716 if (len == 0) {
717 xs_error(s, req_id, tx_id, EINVAL);
718 return;
719 }
720 }
721
722 if (len) {
723 xs_error(s, req_id, tx_id, EINVAL);
724 return;
725 }
726
727 if (qemu_strtoui(offset_str, NULL, 10, &offset) < 0) {
728 xs_error(s, req_id, tx_id, EINVAL);
729 return;
730 }
731
732 trace_xenstore_directory_part(tx_id, path, offset);
733 err = xs_impl_directory(s->impl, xen_domid, tx_id, path, &gencnt, &items);
734 if (err != 0) {
735 xs_error(s, req_id, tx_id, err);
736 return;
737 }
738
739 rsp->type = XS_DIRECTORY_PART;
740 rsp->req_id = req_id;
741 rsp->tx_id = tx_id;
742 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%" PRIu64, gencnt) + 1;
743
744 xs_append_strings(s, rsp, items, offset, true);
745
746 g_list_free_full(items, g_free);
747 }
748
xs_transaction_start(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)749 static void xs_transaction_start(XenXenstoreState *s, unsigned int req_id,
750 xs_transaction_t tx_id, uint8_t *req_data,
751 unsigned int len)
752 {
753 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
754 char *rsp_data = (char *)&rsp[1];
755 int err;
756
757 if (len != 1 || req_data[0] != '\0') {
758 xs_error(s, req_id, tx_id, EINVAL);
759 return;
760 }
761
762 rsp->type = XS_TRANSACTION_START;
763 rsp->req_id = req_id;
764 rsp->tx_id = tx_id;
765 rsp->len = 0;
766
767 err = xs_impl_transaction_start(s->impl, xen_domid, &tx_id);
768 if (err) {
769 xs_error(s, req_id, tx_id, err);
770 return;
771 }
772
773 trace_xenstore_transaction_start(tx_id);
774
775 rsp->len = snprintf(rsp_data, XENSTORE_PAYLOAD_MAX, "%u", tx_id);
776 assert(rsp->len < XENSTORE_PAYLOAD_MAX);
777 rsp->len++;
778 }
779
xs_transaction_end(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)780 static void xs_transaction_end(XenXenstoreState *s, unsigned int req_id,
781 xs_transaction_t tx_id, uint8_t *req_data,
782 unsigned int len)
783 {
784 bool commit;
785 int err;
786
787 if (len != 2 || req_data[1] != '\0') {
788 xs_error(s, req_id, tx_id, EINVAL);
789 return;
790 }
791
792 switch (req_data[0]) {
793 case 'T':
794 commit = true;
795 break;
796 case 'F':
797 commit = false;
798 break;
799 default:
800 xs_error(s, req_id, tx_id, EINVAL);
801 return;
802 }
803
804 trace_xenstore_transaction_end(tx_id, commit);
805 err = xs_impl_transaction_end(s->impl, xen_domid, tx_id, commit);
806 if (err) {
807 xs_error(s, req_id, tx_id, err);
808 return;
809 }
810
811 xs_ok(s, XS_TRANSACTION_END, req_id, tx_id);
812 }
813
xs_rm(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)814 static void xs_rm(XenXenstoreState *s, unsigned int req_id,
815 xs_transaction_t tx_id, uint8_t *req_data, unsigned int len)
816 {
817 const char *path = (const char *)req_data;
818 int err;
819
820 if (len == 0 || req_data[len - 1] != '\0') {
821 xs_error(s, req_id, tx_id, EINVAL);
822 return;
823 }
824
825 trace_xenstore_rm(tx_id, path);
826 err = xs_impl_rm(s->impl, xen_domid, tx_id, path);
827 if (err) {
828 xs_error(s, req_id, tx_id, err);
829 return;
830 }
831
832 xs_ok(s, XS_RM, req_id, tx_id);
833 }
834
xs_get_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)835 static void xs_get_perms(XenXenstoreState *s, unsigned int req_id,
836 xs_transaction_t tx_id, uint8_t *req_data,
837 unsigned int len)
838 {
839 const char *path = (const char *)req_data;
840 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
841 GList *perms = NULL;
842 int err;
843
844 if (len == 0 || req_data[len - 1] != '\0') {
845 xs_error(s, req_id, tx_id, EINVAL);
846 return;
847 }
848
849 trace_xenstore_get_perms(tx_id, path);
850 err = xs_impl_get_perms(s->impl, xen_domid, tx_id, path, &perms);
851 if (err) {
852 xs_error(s, req_id, tx_id, err);
853 return;
854 }
855
856 rsp->type = XS_GET_PERMS;
857 rsp->req_id = req_id;
858 rsp->tx_id = tx_id;
859 rsp->len = 0;
860
861 xs_append_strings(s, rsp, perms, 0, false);
862
863 g_list_free_full(perms, g_free);
864 }
865
xs_set_perms(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)866 static void xs_set_perms(XenXenstoreState *s, unsigned int req_id,
867 xs_transaction_t tx_id, uint8_t *req_data,
868 unsigned int len)
869 {
870 const char *path = (const char *)req_data;
871 uint8_t *perm;
872 GList *perms = NULL;
873 int err;
874
875 if (len == 0) {
876 xs_error(s, req_id, tx_id, EINVAL);
877 return;
878 }
879
880 while (len--) {
881 if (*req_data++ == '\0') {
882 break;
883 }
884 if (len == 0) {
885 xs_error(s, req_id, tx_id, EINVAL);
886 return;
887 }
888 }
889
890 perm = req_data;
891 while (len--) {
892 if (*req_data++ == '\0') {
893 perms = g_list_append(perms, perm);
894 perm = req_data;
895 }
896 }
897
898 /*
899 * Note that there may be trailing garbage at the end of the buffer.
900 * This is explicitly permitted by the '?' at the end of the definition:
901 *
902 * SET_PERMS <path>|<perm-as-string>|+?
903 */
904
905 trace_xenstore_set_perms(tx_id, path);
906 err = xs_impl_set_perms(s->impl, xen_domid, tx_id, path, perms);
907 g_list_free(perms);
908 if (err) {
909 xs_error(s, req_id, tx_id, err);
910 return;
911 }
912
913 xs_ok(s, XS_SET_PERMS, req_id, tx_id);
914 }
915
xs_watch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)916 static void xs_watch(XenXenstoreState *s, unsigned int req_id,
917 xs_transaction_t tx_id, uint8_t *req_data,
918 unsigned int len)
919 {
920 const char *token, *path = (const char *)req_data;
921 int err;
922
923 if (len == 0) {
924 xs_error(s, req_id, tx_id, EINVAL);
925 return;
926 }
927
928 while (len--) {
929 if (*req_data++ == '\0') {
930 break;
931 }
932 if (len == 0) {
933 xs_error(s, req_id, tx_id, EINVAL);
934 return;
935 }
936 }
937
938 token = (const char *)req_data;
939 while (len--) {
940 if (*req_data++ == '\0') {
941 break;
942 }
943 if (len == 0) {
944 xs_error(s, req_id, tx_id, EINVAL);
945 return;
946 }
947 }
948
949 /*
950 * Note that there may be trailing garbage at the end of the buffer.
951 * This is explicitly permitted by the '?' at the end of the definition:
952 *
953 * WATCH <wpath>|<token>|?
954 */
955
956 trace_xenstore_watch(path, token);
957 err = xs_impl_watch(s->impl, xen_domid, path, token, fire_watch_cb, s);
958 if (err) {
959 xs_error(s, req_id, tx_id, err);
960 return;
961 }
962
963 xs_ok(s, XS_WATCH, req_id, tx_id);
964 }
965
xs_unwatch(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)966 static void xs_unwatch(XenXenstoreState *s, unsigned int req_id,
967 xs_transaction_t tx_id, uint8_t *req_data,
968 unsigned int len)
969 {
970 const char *token, *path = (const char *)req_data;
971 int err;
972
973 if (len == 0) {
974 xs_error(s, req_id, tx_id, EINVAL);
975 return;
976 }
977
978 while (len--) {
979 if (*req_data++ == '\0') {
980 break;
981 }
982 if (len == 0) {
983 xs_error(s, req_id, tx_id, EINVAL);
984 return;
985 }
986 }
987
988 token = (const char *)req_data;
989 while (len--) {
990 if (*req_data++ == '\0') {
991 break;
992 }
993 if (len == 0) {
994 xs_error(s, req_id, tx_id, EINVAL);
995 return;
996 }
997 }
998
999 trace_xenstore_unwatch(path, token);
1000 err = xs_impl_unwatch(s->impl, xen_domid, path, token, fire_watch_cb, s);
1001 if (err) {
1002 xs_error(s, req_id, tx_id, err);
1003 return;
1004 }
1005
1006 xs_ok(s, XS_UNWATCH, req_id, tx_id);
1007 }
1008
xs_reset_watches(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * req_data,unsigned int len)1009 static void xs_reset_watches(XenXenstoreState *s, unsigned int req_id,
1010 xs_transaction_t tx_id, uint8_t *req_data,
1011 unsigned int len)
1012 {
1013 if (len == 0 || req_data[len - 1] != '\0') {
1014 xs_error(s, req_id, tx_id, EINVAL);
1015 return;
1016 }
1017
1018 trace_xenstore_reset_watches();
1019 xs_impl_reset_watches(s->impl, xen_domid);
1020
1021 xs_ok(s, XS_RESET_WATCHES, req_id, tx_id);
1022 }
1023
xs_priv(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1024 static void xs_priv(XenXenstoreState *s, unsigned int req_id,
1025 xs_transaction_t tx_id, uint8_t *data,
1026 unsigned int len)
1027 {
1028 xs_error(s, req_id, tx_id, EACCES);
1029 }
1030
xs_unimpl(XenXenstoreState * s,unsigned int req_id,xs_transaction_t tx_id,uint8_t * data,unsigned int len)1031 static void xs_unimpl(XenXenstoreState *s, unsigned int req_id,
1032 xs_transaction_t tx_id, uint8_t *data,
1033 unsigned int len)
1034 {
1035 xs_error(s, req_id, tx_id, ENOSYS);
1036 }
1037
1038 typedef void (*xs_impl)(XenXenstoreState *s, unsigned int req_id,
1039 xs_transaction_t tx_id, uint8_t *data,
1040 unsigned int len);
1041
1042 struct xsd_req {
1043 const char *name;
1044 xs_impl fn;
1045 };
1046 #define XSD_REQ(_type, _fn) \
1047 [_type] = { .name = #_type, .fn = _fn }
1048
1049 struct xsd_req xsd_reqs[] = {
1050 XSD_REQ(XS_READ, xs_read),
1051 XSD_REQ(XS_WRITE, xs_write),
1052 XSD_REQ(XS_MKDIR, xs_mkdir),
1053 XSD_REQ(XS_DIRECTORY, xs_directory),
1054 XSD_REQ(XS_DIRECTORY_PART, xs_directory_part),
1055 XSD_REQ(XS_TRANSACTION_START, xs_transaction_start),
1056 XSD_REQ(XS_TRANSACTION_END, xs_transaction_end),
1057 XSD_REQ(XS_RM, xs_rm),
1058 XSD_REQ(XS_GET_PERMS, xs_get_perms),
1059 XSD_REQ(XS_SET_PERMS, xs_set_perms),
1060 XSD_REQ(XS_WATCH, xs_watch),
1061 XSD_REQ(XS_UNWATCH, xs_unwatch),
1062 XSD_REQ(XS_CONTROL, xs_priv),
1063 XSD_REQ(XS_INTRODUCE, xs_priv),
1064 XSD_REQ(XS_RELEASE, xs_priv),
1065 XSD_REQ(XS_IS_DOMAIN_INTRODUCED, xs_priv),
1066 XSD_REQ(XS_RESUME, xs_priv),
1067 XSD_REQ(XS_SET_TARGET, xs_priv),
1068 XSD_REQ(XS_RESET_WATCHES, xs_reset_watches),
1069 };
1070
process_req(XenXenstoreState * s)1071 static void process_req(XenXenstoreState *s)
1072 {
1073 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1074 xs_impl handler = NULL;
1075
1076 assert(req_pending(s));
1077 assert(!s->rsp_pending);
1078
1079 if (req->type < ARRAY_SIZE(xsd_reqs)) {
1080 handler = xsd_reqs[req->type].fn;
1081 }
1082 if (!handler) {
1083 handler = &xs_unimpl;
1084 }
1085
1086 handler(s, req->req_id, req->tx_id, (uint8_t *)&req[1], req->len);
1087
1088 s->rsp_pending = true;
1089 reset_req(s);
1090 }
1091
copy_from_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1092 static unsigned int copy_from_ring(XenXenstoreState *s, uint8_t *ptr,
1093 unsigned int len)
1094 {
1095 if (!len) {
1096 return 0;
1097 }
1098
1099 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->req_prod);
1100 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->req_cons);
1101 unsigned int copied = 0;
1102
1103 /* Ensure the ring contents don't cross the req_prod access. */
1104 smp_rmb();
1105
1106 while (len) {
1107 unsigned int avail = prod - cons;
1108 unsigned int offset = MASK_XENSTORE_IDX(cons);
1109 unsigned int copylen = avail;
1110
1111 if (avail > XENSTORE_RING_SIZE) {
1112 error_report("XenStore ring handling error");
1113 s->fatal_error = true;
1114 break;
1115 } else if (avail == 0) {
1116 break;
1117 }
1118
1119 if (copylen > len) {
1120 copylen = len;
1121 }
1122 if (copylen > XENSTORE_RING_SIZE - offset) {
1123 copylen = XENSTORE_RING_SIZE - offset;
1124 }
1125
1126 memcpy(ptr, &s->xs->req[offset], copylen);
1127 copied += copylen;
1128
1129 ptr += copylen;
1130 len -= copylen;
1131
1132 cons += copylen;
1133 }
1134
1135 /*
1136 * Not sure this ever mattered except on Alpha, but this barrier
1137 * is to ensure that the update to req_cons is globally visible
1138 * only after we have consumed all the data from the ring, and we
1139 * don't end up seeing data written to the ring *after* the other
1140 * end sees the update and writes more to the ring. Xen's own
1141 * xenstored has the same barrier here (although with no comment
1142 * at all, obviously, because it's Xen code).
1143 */
1144 smp_mb();
1145
1146 qatomic_set(&s->xs->req_cons, cons);
1147
1148 return copied;
1149 }
1150
copy_to_ring(XenXenstoreState * s,uint8_t * ptr,unsigned int len)1151 static unsigned int copy_to_ring(XenXenstoreState *s, uint8_t *ptr,
1152 unsigned int len)
1153 {
1154 if (!len) {
1155 return 0;
1156 }
1157
1158 XENSTORE_RING_IDX cons = qatomic_read(&s->xs->rsp_cons);
1159 XENSTORE_RING_IDX prod = qatomic_read(&s->xs->rsp_prod);
1160 unsigned int copied = 0;
1161
1162 /*
1163 * This matches the barrier in copy_to_ring() (or the guest's
1164 * equivalent) between writing the data to the ring and updating
1165 * rsp_prod. It protects against the pathological case (which
1166 * again I think never happened except on Alpha) where our
1167 * subsequent writes to the ring could *cross* the read of
1168 * rsp_cons and the guest could see the new data when it was
1169 * intending to read the old.
1170 */
1171 smp_mb();
1172
1173 while (len) {
1174 unsigned int avail = cons + XENSTORE_RING_SIZE - prod;
1175 unsigned int offset = MASK_XENSTORE_IDX(prod);
1176 unsigned int copylen = len;
1177
1178 if (avail > XENSTORE_RING_SIZE) {
1179 error_report("XenStore ring handling error");
1180 s->fatal_error = true;
1181 break;
1182 } else if (avail == 0) {
1183 break;
1184 }
1185
1186 if (copylen > avail) {
1187 copylen = avail;
1188 }
1189 if (copylen > XENSTORE_RING_SIZE - offset) {
1190 copylen = XENSTORE_RING_SIZE - offset;
1191 }
1192
1193
1194 memcpy(&s->xs->rsp[offset], ptr, copylen);
1195 copied += copylen;
1196
1197 ptr += copylen;
1198 len -= copylen;
1199
1200 prod += copylen;
1201 }
1202
1203 /* Ensure the ring contents are seen before rsp_prod update. */
1204 smp_wmb();
1205
1206 qatomic_set(&s->xs->rsp_prod, prod);
1207
1208 return copied;
1209 }
1210
get_req(XenXenstoreState * s)1211 static unsigned int get_req(XenXenstoreState *s)
1212 {
1213 unsigned int copied = 0;
1214
1215 if (s->fatal_error) {
1216 return 0;
1217 }
1218
1219 assert(!req_pending(s));
1220
1221 if (s->req_offset < XENSTORE_HEADER_SIZE) {
1222 void *ptr = s->req_data + s->req_offset;
1223 unsigned int len = XENSTORE_HEADER_SIZE;
1224 unsigned int copylen = copy_from_ring(s, ptr, len);
1225
1226 copied += copylen;
1227 s->req_offset += copylen;
1228 }
1229
1230 if (s->req_offset >= XENSTORE_HEADER_SIZE) {
1231 struct xsd_sockmsg *req = (struct xsd_sockmsg *)s->req_data;
1232
1233 if (req->len > (uint32_t)XENSTORE_PAYLOAD_MAX) {
1234 error_report("Illegal XenStore request");
1235 s->fatal_error = true;
1236 return 0;
1237 }
1238
1239 void *ptr = s->req_data + s->req_offset;
1240 unsigned int len = XENSTORE_HEADER_SIZE + req->len - s->req_offset;
1241 unsigned int copylen = copy_from_ring(s, ptr, len);
1242
1243 copied += copylen;
1244 s->req_offset += copylen;
1245 }
1246
1247 return copied;
1248 }
1249
put_rsp(XenXenstoreState * s)1250 static unsigned int put_rsp(XenXenstoreState *s)
1251 {
1252 if (s->fatal_error) {
1253 return 0;
1254 }
1255
1256 assert(s->rsp_pending);
1257
1258 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1259 assert(s->rsp_offset < XENSTORE_HEADER_SIZE + rsp->len);
1260
1261 void *ptr = s->rsp_data + s->rsp_offset;
1262 unsigned int len = XENSTORE_HEADER_SIZE + rsp->len - s->rsp_offset;
1263 unsigned int copylen = copy_to_ring(s, ptr, len);
1264
1265 s->rsp_offset += copylen;
1266
1267 /* Have we produced a complete response? */
1268 if (s->rsp_offset == XENSTORE_HEADER_SIZE + rsp->len) {
1269 reset_rsp(s);
1270 }
1271
1272 return copylen;
1273 }
1274
deliver_watch(XenXenstoreState * s,const char * path,const char * token)1275 static void deliver_watch(XenXenstoreState *s, const char *path,
1276 const char *token)
1277 {
1278 struct xsd_sockmsg *rsp = (struct xsd_sockmsg *)s->rsp_data;
1279 uint8_t *rsp_data = (uint8_t *)&rsp[1];
1280 unsigned int len;
1281
1282 assert(!s->rsp_pending);
1283
1284 trace_xenstore_watch_event(path, token);
1285
1286 rsp->type = XS_WATCH_EVENT;
1287 rsp->req_id = 0;
1288 rsp->tx_id = 0;
1289 rsp->len = 0;
1290
1291 len = strlen(path);
1292
1293 /* XENSTORE_ABS/REL_PATH_MAX should ensure there can be no overflow */
1294 assert(rsp->len + len < XENSTORE_PAYLOAD_MAX);
1295
1296 memcpy(&rsp_data[rsp->len], path, len);
1297 rsp->len += len;
1298 rsp_data[rsp->len] = '\0';
1299 rsp->len++;
1300
1301 len = strlen(token);
1302 /*
1303 * It is possible for the guest to have chosen a token that will
1304 * not fit (along with the patch) into a watch event. We have no
1305 * choice but to drop the event if this is the case.
1306 */
1307 if (rsp->len + len >= XENSTORE_PAYLOAD_MAX) {
1308 return;
1309 }
1310
1311 memcpy(&rsp_data[rsp->len], token, len);
1312 rsp->len += len;
1313 rsp_data[rsp->len] = '\0';
1314 rsp->len++;
1315
1316 s->rsp_pending = true;
1317 }
1318
1319 struct watch_event {
1320 char *path;
1321 char *token;
1322 };
1323
free_watch_event(struct watch_event * ev)1324 static void free_watch_event(struct watch_event *ev)
1325 {
1326 if (ev) {
1327 g_free(ev->path);
1328 g_free(ev->token);
1329 g_free(ev);
1330 }
1331 }
1332
queue_watch(XenXenstoreState * s,const char * path,const char * token)1333 static void queue_watch(XenXenstoreState *s, const char *path,
1334 const char *token)
1335 {
1336 struct watch_event *ev = g_new0(struct watch_event, 1);
1337
1338 ev->path = g_strdup(path);
1339 ev->token = g_strdup(token);
1340
1341 s->watch_events = g_list_append(s->watch_events, ev);
1342 }
1343
fire_watch_cb(void * opaque,const char * path,const char * token)1344 static void fire_watch_cb(void *opaque, const char *path, const char *token)
1345 {
1346 XenXenstoreState *s = opaque;
1347
1348 assert(bql_locked());
1349
1350 /*
1351 * If there's a response pending, we obviously can't scribble over
1352 * it. But if there's a request pending, it has dibs on the buffer
1353 * too.
1354 *
1355 * In the common case of a watch firing due to backend activity
1356 * when the ring was otherwise idle, we should be able to copy the
1357 * strings directly into the rsp_data and thence the actual ring,
1358 * without needing to perform any allocations and queue them.
1359 */
1360 if (s->rsp_pending || req_pending(s)) {
1361 queue_watch(s, path, token);
1362 } else {
1363 deliver_watch(s, path, token);
1364 /*
1365 * Attempt to queue the message into the actual ring, and send
1366 * the event channel notification if any bytes are copied.
1367 */
1368 if (s->rsp_pending && put_rsp(s) > 0) {
1369 xen_be_evtchn_notify(s->eh, s->be_port);
1370 }
1371 }
1372 }
1373
process_watch_events(XenXenstoreState * s)1374 static void process_watch_events(XenXenstoreState *s)
1375 {
1376 struct watch_event *ev = s->watch_events->data;
1377
1378 deliver_watch(s, ev->path, ev->token);
1379
1380 s->watch_events = g_list_remove(s->watch_events, ev);
1381 free_watch_event(ev);
1382 }
1383
xen_xenstore_event(void * opaque)1384 static void xen_xenstore_event(void *opaque)
1385 {
1386 XenXenstoreState *s = opaque;
1387 evtchn_port_t port = xen_be_evtchn_pending(s->eh);
1388 unsigned int copied_to, copied_from;
1389 bool processed, notify = false;
1390
1391 if (port != s->be_port) {
1392 return;
1393 }
1394
1395 /* We know this is a no-op. */
1396 xen_be_evtchn_unmask(s->eh, port);
1397
1398 do {
1399 copied_to = copied_from = 0;
1400 processed = false;
1401
1402 if (!s->rsp_pending && s->watch_events) {
1403 process_watch_events(s);
1404 }
1405
1406 if (s->rsp_pending) {
1407 copied_to = put_rsp(s);
1408 }
1409
1410 if (!req_pending(s)) {
1411 copied_from = get_req(s);
1412 }
1413
1414 if (req_pending(s) && !s->rsp_pending && !s->watch_events) {
1415 process_req(s);
1416 processed = true;
1417 }
1418
1419 notify |= copied_to || copied_from;
1420 } while (copied_to || copied_from || processed);
1421
1422 if (notify) {
1423 xen_be_evtchn_notify(s->eh, s->be_port);
1424 }
1425 }
1426
alloc_guest_port(XenXenstoreState * s)1427 static void alloc_guest_port(XenXenstoreState *s)
1428 {
1429 struct evtchn_alloc_unbound alloc = {
1430 .dom = DOMID_SELF,
1431 .remote_dom = DOMID_QEMU,
1432 };
1433
1434 if (!xen_evtchn_alloc_unbound_op(&alloc)) {
1435 s->guest_port = alloc.port;
1436 }
1437 }
1438
xen_xenstore_reset(void)1439 int xen_xenstore_reset(void)
1440 {
1441 XenXenstoreState *s = xen_xenstore_singleton;
1442 int console_port;
1443 GList *perms;
1444 int err;
1445
1446 if (!s) {
1447 return -ENOTSUP;
1448 }
1449
1450 s->req_offset = s->rsp_offset = 0;
1451 s->rsp_pending = false;
1452
1453 if (!memory_region_is_mapped(&s->xenstore_page)) {
1454 uint64_t gpa = XEN_SPECIAL_PFN(XENSTORE) << TARGET_PAGE_BITS;
1455 xen_overlay_do_map_page(&s->xenstore_page, gpa);
1456 }
1457
1458 alloc_guest_port(s);
1459
1460 /*
1461 * As qemu/dom0, bind to the guest's port. For incoming migration, this
1462 * will be unbound as the guest's evtchn table is overwritten. We then
1463 * rebind to the correct guest port in xen_xenstore_post_load().
1464 */
1465 err = xen_be_evtchn_bind_interdomain(s->eh, xen_domid, s->guest_port);
1466 if (err < 0) {
1467 return err;
1468 }
1469 s->be_port = err;
1470
1471 /* Create frontend store nodes */
1472 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, DOMID_QEMU));
1473 perms = g_list_append(perms, xs_perm_as_string(XS_PERM_READ, xen_domid));
1474
1475 relpath_printf(s, perms, "store/port", "%u", s->guest_port);
1476 relpath_printf(s, perms, "store/ring-ref", "%lu",
1477 XEN_SPECIAL_PFN(XENSTORE));
1478
1479 console_port = xen_primary_console_get_port();
1480 if (console_port) {
1481 relpath_printf(s, perms, "console/ring-ref", "%lu",
1482 XEN_SPECIAL_PFN(CONSOLE));
1483 relpath_printf(s, perms, "console/port", "%u", console_port);
1484 relpath_printf(s, perms, "console/state", "%u", XenbusStateInitialised);
1485 }
1486
1487 g_list_free_full(perms, g_free);
1488
1489 /*
1490 * We don't actually access the guest's page through the grant, because
1491 * this isn't real Xen, and we can just use the page we gave it in the
1492 * first place. Map the grant anyway, mostly for cosmetic purposes so
1493 * it *looks* like it's in use in the guest-visible grant table.
1494 */
1495 s->gt = qemu_xen_gnttab_open();
1496 uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
1497 s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid, &xs_gntref,
1498 PROT_READ | PROT_WRITE);
1499
1500 return 0;
1501 }
1502
1503 struct qemu_xs_handle {
1504 XenstoreImplState *impl;
1505 GList *watches;
1506 QEMUBH *watch_bh;
1507 };
1508
1509 struct qemu_xs_watch {
1510 struct qemu_xs_handle *h;
1511 char *path;
1512 xs_watch_fn fn;
1513 void *opaque;
1514 GList *events;
1515 };
1516
xs_be_get_domain_path(struct qemu_xs_handle * h,unsigned int domid)1517 static char *xs_be_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
1518 {
1519 return g_strdup_printf("/local/domain/%u", domid);
1520 }
1521
xs_be_directory(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * num)1522 static char **xs_be_directory(struct qemu_xs_handle *h, xs_transaction_t t,
1523 const char *path, unsigned int *num)
1524 {
1525 GList *items = NULL, *l;
1526 unsigned int i = 0;
1527 char **items_ret;
1528 int err;
1529
1530 err = xs_impl_directory(h->impl, DOMID_QEMU, t, path, NULL, &items);
1531 if (err) {
1532 errno = err;
1533 return NULL;
1534 }
1535
1536 items_ret = g_new0(char *, g_list_length(items) + 1);
1537 *num = 0;
1538 for (l = items; l; l = l->next) {
1539 items_ret[i++] = l->data;
1540 (*num)++;
1541 }
1542 g_list_free(items);
1543 return items_ret;
1544 }
1545
xs_be_read(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,unsigned int * len)1546 static void *xs_be_read(struct qemu_xs_handle *h, xs_transaction_t t,
1547 const char *path, unsigned int *len)
1548 {
1549 GByteArray *data = g_byte_array_new();
1550 bool free_segment = false;
1551 int err;
1552
1553 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1554 if (err) {
1555 free_segment = true;
1556 errno = err;
1557 } else {
1558 if (len) {
1559 *len = data->len;
1560 }
1561 /* The xen-bus-helper code expects to get NUL terminated string! */
1562 g_byte_array_append(data, (void *)"", 1);
1563 }
1564
1565 return g_byte_array_free(data, free_segment);
1566 }
1567
xs_be_write(struct qemu_xs_handle * h,xs_transaction_t t,const char * path,const void * data,unsigned int len)1568 static bool xs_be_write(struct qemu_xs_handle *h, xs_transaction_t t,
1569 const char *path, const void *data, unsigned int len)
1570 {
1571 GByteArray *gdata = g_byte_array_new();
1572 int err;
1573
1574 g_byte_array_append(gdata, data, len);
1575 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, gdata);
1576 g_byte_array_unref(gdata);
1577 if (err) {
1578 errno = err;
1579 return false;
1580 }
1581 return true;
1582 }
1583
xs_be_create(struct qemu_xs_handle * h,xs_transaction_t t,unsigned int owner,unsigned int domid,unsigned int perms,const char * path)1584 static bool xs_be_create(struct qemu_xs_handle *h, xs_transaction_t t,
1585 unsigned int owner, unsigned int domid,
1586 unsigned int perms, const char *path)
1587 {
1588 g_autoptr(GByteArray) data = g_byte_array_new();
1589 GList *perms_list = NULL;
1590 int err;
1591
1592 /* mkdir does this */
1593 err = xs_impl_read(h->impl, DOMID_QEMU, t, path, data);
1594 if (err == ENOENT) {
1595 err = xs_impl_write(h->impl, DOMID_QEMU, t, path, data);
1596 }
1597 if (err) {
1598 errno = err;
1599 return false;
1600 }
1601
1602 perms_list = g_list_append(perms_list,
1603 xs_perm_as_string(XS_PERM_NONE, owner));
1604 perms_list = g_list_append(perms_list,
1605 xs_perm_as_string(perms, domid));
1606
1607 err = xs_impl_set_perms(h->impl, DOMID_QEMU, t, path, perms_list);
1608 g_list_free_full(perms_list, g_free);
1609 if (err) {
1610 errno = err;
1611 return false;
1612 }
1613 return true;
1614 }
1615
xs_be_destroy(struct qemu_xs_handle * h,xs_transaction_t t,const char * path)1616 static bool xs_be_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
1617 const char *path)
1618 {
1619 int err = xs_impl_rm(h->impl, DOMID_QEMU, t, path);
1620 if (err) {
1621 errno = err;
1622 return false;
1623 }
1624 return true;
1625 }
1626
be_watch_bh(void * _h)1627 static void be_watch_bh(void *_h)
1628 {
1629 struct qemu_xs_handle *h = _h;
1630 GList *l;
1631
1632 for (l = h->watches; l; l = l->next) {
1633 struct qemu_xs_watch *w = l->data;
1634
1635 while (w->events) {
1636 struct watch_event *ev = w->events->data;
1637
1638 w->fn(w->opaque, ev->path);
1639
1640 w->events = g_list_remove(w->events, ev);
1641 free_watch_event(ev);
1642 }
1643 }
1644 }
1645
xs_be_watch_cb(void * opaque,const char * path,const char * token)1646 static void xs_be_watch_cb(void *opaque, const char *path, const char *token)
1647 {
1648 struct watch_event *ev = g_new0(struct watch_event, 1);
1649 struct qemu_xs_watch *w = opaque;
1650
1651 /* We don't care about the token */
1652 ev->path = g_strdup(path);
1653 w->events = g_list_append(w->events, ev);
1654
1655 qemu_bh_schedule(w->h->watch_bh);
1656 }
1657
xs_be_watch(struct qemu_xs_handle * h,const char * path,xs_watch_fn fn,void * opaque)1658 static struct qemu_xs_watch *xs_be_watch(struct qemu_xs_handle *h,
1659 const char *path, xs_watch_fn fn,
1660 void *opaque)
1661 {
1662 struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
1663 int err;
1664
1665 w->h = h;
1666 w->fn = fn;
1667 w->opaque = opaque;
1668
1669 err = xs_impl_watch(h->impl, DOMID_QEMU, path, NULL, xs_be_watch_cb, w);
1670 if (err) {
1671 errno = err;
1672 g_free(w);
1673 return NULL;
1674 }
1675
1676 w->path = g_strdup(path);
1677 h->watches = g_list_append(h->watches, w);
1678 return w;
1679 }
1680
xs_be_unwatch(struct qemu_xs_handle * h,struct qemu_xs_watch * w)1681 static void xs_be_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
1682 {
1683 xs_impl_unwatch(h->impl, DOMID_QEMU, w->path, NULL, xs_be_watch_cb, w);
1684
1685 h->watches = g_list_remove(h->watches, w);
1686 g_list_free_full(w->events, (GDestroyNotify)free_watch_event);
1687 g_free(w->path);
1688 g_free(w);
1689 }
1690
xs_be_transaction_start(struct qemu_xs_handle * h)1691 static xs_transaction_t xs_be_transaction_start(struct qemu_xs_handle *h)
1692 {
1693 unsigned int new_tx = XBT_NULL;
1694 int err = xs_impl_transaction_start(h->impl, DOMID_QEMU, &new_tx);
1695 if (err) {
1696 errno = err;
1697 return XBT_NULL;
1698 }
1699 return new_tx;
1700 }
1701
xs_be_transaction_end(struct qemu_xs_handle * h,xs_transaction_t t,bool abort)1702 static bool xs_be_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
1703 bool abort)
1704 {
1705 int err = xs_impl_transaction_end(h->impl, DOMID_QEMU, t, !abort);
1706 if (err) {
1707 errno = err;
1708 return false;
1709 }
1710 return true;
1711 }
1712
xs_be_open(void)1713 static struct qemu_xs_handle *xs_be_open(void)
1714 {
1715 XenXenstoreState *s = xen_xenstore_singleton;
1716 struct qemu_xs_handle *h;
1717
1718 if (!s || !s->impl) {
1719 errno = -ENOSYS;
1720 return NULL;
1721 }
1722
1723 h = g_new0(struct qemu_xs_handle, 1);
1724 h->impl = s->impl;
1725
1726 h->watch_bh = aio_bh_new(qemu_get_aio_context(), be_watch_bh, h);
1727
1728 return h;
1729 }
1730
xs_be_close(struct qemu_xs_handle * h)1731 static void xs_be_close(struct qemu_xs_handle *h)
1732 {
1733 while (h->watches) {
1734 struct qemu_xs_watch *w = h->watches->data;
1735 xs_be_unwatch(h, w);
1736 }
1737
1738 qemu_bh_delete(h->watch_bh);
1739 g_free(h);
1740 }
1741
1742 static struct xenstore_backend_ops emu_xenstore_backend_ops = {
1743 .open = xs_be_open,
1744 .close = xs_be_close,
1745 .get_domain_path = xs_be_get_domain_path,
1746 .directory = xs_be_directory,
1747 .read = xs_be_read,
1748 .write = xs_be_write,
1749 .create = xs_be_create,
1750 .destroy = xs_be_destroy,
1751 .watch = xs_be_watch,
1752 .unwatch = xs_be_unwatch,
1753 .transaction_start = xs_be_transaction_start,
1754 .transaction_end = xs_be_transaction_end,
1755 };
1756