1 /* 2 * QEMU Xen emulation: The actual implementation of XenStore 3 * 4 * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qom/object.h" 14 15 #include "hw/xen/xen.h" 16 17 #include "xen_xenstore.h" 18 #include "xenstore_impl.h" 19 20 #include "hw/xen/interface/io/xs_wire.h" 21 22 #define XS_MAX_WATCHES 128 23 #define XS_MAX_DOMAIN_NODES 1000 24 #define XS_MAX_NODE_SIZE 2048 25 #define XS_MAX_TRANSACTIONS 10 26 #define XS_MAX_PERMS_PER_NODE 5 27 28 #define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ 29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ 30 "0123456789-/_" 31 32 typedef struct XsNode { 33 uint32_t ref; 34 GByteArray *content; 35 GList *perms; 36 GHashTable *children; 37 uint64_t gencnt; 38 bool deleted_in_tx; 39 bool modified_in_tx; 40 unsigned int serialized_tx; 41 #ifdef XS_NODE_UNIT_TEST 42 gchar *name; /* debug only */ 43 #endif 44 } XsNode; 45 46 typedef struct XsWatch { 47 struct XsWatch *next; 48 xs_impl_watch_fn *cb; 49 void *cb_opaque; 50 char *token; 51 unsigned int dom_id; 52 int rel_prefix; 53 } XsWatch; 54 55 typedef struct XsTransaction { 56 XsNode *root; 57 unsigned int nr_nodes; 58 unsigned int base_tx; 59 unsigned int tx_id; 60 unsigned int dom_id; 61 } XsTransaction; 62 63 struct XenstoreImplState { 64 XsNode *root; 65 unsigned int nr_nodes; 66 GHashTable *watches; 67 unsigned int nr_domu_watches; 68 GHashTable *transactions; 69 unsigned int nr_domu_transactions; 70 unsigned int root_tx; 71 unsigned int last_tx; 72 bool serialized; 73 }; 74 75 76 static void nobble_tx(gpointer key, gpointer value, gpointer user_data) 77 { 78 unsigned int *new_tx_id = user_data; 79 XsTransaction *tx = value; 80 81 if (tx->base_tx == *new_tx_id) { 82 /* Transactions based on XBT_NULL will always fail */ 83 tx->base_tx = XBT_NULL; 84 } 85 } 86 87 static inline unsigned int next_tx(struct XenstoreImplState *s) 88 { 89 unsigned int tx_id; 90 91 /* Find the next TX id which isn't either XBT_NULL or in use. */ 92 do { 93 tx_id = ++s->last_tx; 94 } while (tx_id == XBT_NULL || tx_id == s->root_tx || 95 g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); 96 97 /* 98 * It is vanishingly unlikely, but ensure that no outstanding transaction 99 * is based on the (previous incarnation of the) newly-allocated TX id. 100 */ 101 g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); 102 103 return tx_id; 104 } 105 106 static inline XsNode *xs_node_new(void) 107 { 108 XsNode *n = g_new0(XsNode, 1); 109 n->ref = 1; 110 111 #ifdef XS_NODE_UNIT_TEST 112 nr_xs_nodes++; 113 xs_node_list = g_list_prepend(xs_node_list, n); 114 #endif 115 return n; 116 } 117 118 static inline XsNode *xs_node_ref(XsNode *n) 119 { 120 /* With just 10 transactions, it can never get anywhere near this. */ 121 g_assert(n->ref < INT_MAX); 122 123 g_assert(n->ref); 124 n->ref++; 125 return n; 126 } 127 128 static inline void xs_node_unref(XsNode *n) 129 { 130 if (!n) { 131 return; 132 } 133 g_assert(n->ref); 134 if (--n->ref) { 135 return; 136 } 137 138 if (n->content) { 139 g_byte_array_unref(n->content); 140 } 141 if (n->perms) { 142 g_list_free_full(n->perms, g_free); 143 } 144 if (n->children) { 145 g_hash_table_unref(n->children); 146 } 147 #ifdef XS_NODE_UNIT_TEST 148 g_free(n->name); 149 nr_xs_nodes--; 150 xs_node_list = g_list_remove(xs_node_list, n); 151 #endif 152 g_free(n); 153 } 154 155 char *xs_perm_as_string(unsigned int perm, unsigned int domid) 156 { 157 char letter; 158 159 switch (perm) { 160 case XS_PERM_READ | XS_PERM_WRITE: 161 letter = 'b'; 162 break; 163 case XS_PERM_READ: 164 letter = 'r'; 165 break; 166 case XS_PERM_WRITE: 167 letter = 'w'; 168 break; 169 case XS_PERM_NONE: 170 default: 171 letter = 'n'; 172 break; 173 } 174 175 return g_strdup_printf("%c%u", letter, domid); 176 } 177 178 static gpointer do_perm_copy(gconstpointer src, gpointer user_data) 179 { 180 return g_strdup(src); 181 } 182 183 static XsNode *xs_node_create(const char *name, GList *perms) 184 { 185 XsNode *n = xs_node_new(); 186 187 #ifdef XS_NODE_UNIT_TEST 188 if (name) { 189 n->name = g_strdup(name); 190 } 191 #endif 192 193 n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); 194 195 return n; 196 } 197 198 /* For copying from one hash table to another using g_hash_table_foreach() */ 199 static void do_child_insert(gpointer key, gpointer value, gpointer user_data) 200 { 201 g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); 202 } 203 204 static XsNode *xs_node_copy(XsNode *old) 205 { 206 XsNode *n = xs_node_new(); 207 208 n->gencnt = old->gencnt; 209 210 #ifdef XS_NODE_UNIT_TEST 211 if (n->name) { 212 n->name = g_strdup(old->name); 213 } 214 #endif 215 216 assert(old); 217 if (old->children) { 218 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 219 (GDestroyNotify)xs_node_unref); 220 g_hash_table_foreach(old->children, do_child_insert, n->children); 221 } 222 if (old->perms) { 223 n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); 224 } 225 if (old->content) { 226 n->content = g_byte_array_ref(old->content); 227 } 228 return n; 229 } 230 231 /* Returns true if it made a change to the hash table */ 232 static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) 233 { 234 assert(!strchr(path_elem, '/')); 235 236 if (!child) { 237 assert(n->children); 238 return g_hash_table_remove(n->children, path_elem); 239 } 240 241 #ifdef XS_NODE_UNIT_TEST 242 g_free(child->name); 243 child->name = g_strdup(path_elem); 244 #endif 245 if (!n->children) { 246 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 247 (GDestroyNotify)xs_node_unref); 248 } 249 250 /* 251 * The documentation for g_hash_table_insert() says that it "returns a 252 * boolean value to indicate whether the newly added value was already 253 * in the hash table or not." 254 * 255 * It could perhaps be clearer that returning TRUE means it wasn't, 256 */ 257 return g_hash_table_insert(n->children, g_strdup(path_elem), child); 258 } 259 260 struct walk_op { 261 struct XenstoreImplState *s; 262 char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ 263 int (*op_fn)(XsNode **n, struct walk_op *op); 264 void *op_opaque; 265 void *op_opaque2; 266 267 GList *watches; 268 unsigned int dom_id; 269 unsigned int tx_id; 270 271 /* The number of nodes which will exist in the tree if this op succeeds. */ 272 unsigned int new_nr_nodes; 273 274 /* 275 * This is maintained on the way *down* the walk to indicate 276 * whether nodes can be modified in place or whether COW is 277 * required. It starts off being true, as we're always going to 278 * replace the root node. If we walk into a shared subtree it 279 * becomes false. If we start *creating* new nodes for a write, 280 * it becomes true again. 281 * 282 * Do not use it on the way back up. 283 */ 284 bool inplace; 285 bool mutating; 286 bool create_dirs; 287 bool in_transaction; 288 289 /* Tracking during recursion so we know which is first. */ 290 bool deleted_in_tx; 291 }; 292 293 static void fire_watches(struct walk_op *op, bool parents) 294 { 295 GList *l = NULL; 296 XsWatch *w; 297 298 if (!op->mutating || op->in_transaction) { 299 return; 300 } 301 302 if (parents) { 303 l = op->watches; 304 } 305 306 w = g_hash_table_lookup(op->s->watches, op->path); 307 while (w || l) { 308 if (!w) { 309 /* Fire the parent nodes from 'op' if asked to */ 310 w = l->data; 311 l = l->next; 312 continue; 313 } 314 315 assert(strlen(op->path) > w->rel_prefix); 316 w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); 317 318 w = w->next; 319 } 320 } 321 322 static int xs_node_add_content(XsNode **n, struct walk_op *op) 323 { 324 GByteArray *data = op->op_opaque; 325 326 if (op->dom_id) { 327 /* 328 * The real XenStored includes permissions and names of child nodes 329 * in the calculated datasize but life's too short. For a single 330 * tenant internal XenStore, we don't have to be quite as pedantic. 331 */ 332 if (data->len > XS_MAX_NODE_SIZE) { 333 return E2BIG; 334 } 335 } 336 /* We *are* the node to be written. Either this or a copy. */ 337 if (!op->inplace) { 338 XsNode *old = *n; 339 *n = xs_node_copy(old); 340 xs_node_unref(old); 341 } 342 343 if ((*n)->content) { 344 g_byte_array_unref((*n)->content); 345 } 346 (*n)->content = g_byte_array_ref(data); 347 if (op->tx_id != XBT_NULL) { 348 (*n)->modified_in_tx = true; 349 } 350 return 0; 351 } 352 353 static int xs_node_get_content(XsNode **n, struct walk_op *op) 354 { 355 GByteArray *data = op->op_opaque; 356 GByteArray *node_data; 357 358 assert(op->inplace); 359 assert(*n); 360 361 node_data = (*n)->content; 362 if (node_data) { 363 g_byte_array_append(data, node_data->data, node_data->len); 364 } 365 366 return 0; 367 } 368 369 static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) 370 { 371 struct walk_op *op = user_data; 372 int path_len = strlen(op->path); 373 int key_len = strlen(key); 374 XsNode *n = value; 375 bool this_inplace = op->inplace; 376 377 if (n->ref != 1) { 378 op->inplace = 0; 379 } 380 381 assert(key_len + path_len + 2 <= sizeof(op->path)); 382 op->path[path_len] = '/'; 383 memcpy(op->path + path_len + 1, key, key_len + 1); 384 385 if (n->children) { 386 g_hash_table_foreach_remove(n->children, node_rm_recurse, op); 387 } 388 op->new_nr_nodes--; 389 390 /* 391 * Fire watches on *this* node but not the parents because they are 392 * going to be deleted too, so the watch will fire for them anyway. 393 */ 394 fire_watches(op, false); 395 op->path[path_len] = '\0'; 396 397 /* 398 * Actually deleting the child here is just an optimisation; if we 399 * don't then the final unref on the topmost victim will just have 400 * to cascade down again repeating all the g_hash_table_foreach() 401 * calls. 402 */ 403 return this_inplace; 404 } 405 406 static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); 407 static void copy_deleted_recurse(gpointer key, gpointer value, 408 gpointer user_data) 409 { 410 struct walk_op *op = user_data; 411 GHashTable *siblings = op->op_opaque2; 412 XsNode *n = xs_node_copy_deleted(value, op); 413 414 /* 415 * Reinsert the deleted_in_tx copy of the node into the parent's 416 * 'children' hash table. Having stashed it from op->op_opaque2 417 * before the recursive call to xs_node_copy_deleted() scribbled 418 * over it. 419 */ 420 g_hash_table_insert(siblings, g_strdup(key), n); 421 } 422 423 static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) 424 { 425 XsNode *n = xs_node_new(); 426 427 n->gencnt = old->gencnt; 428 429 #ifdef XS_NODE_UNIT_TEST 430 if (old->name) { 431 n->name = g_strdup(old->name); 432 } 433 #endif 434 435 if (old->children) { 436 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 437 (GDestroyNotify)xs_node_unref); 438 op->op_opaque2 = n->children; 439 g_hash_table_foreach(old->children, copy_deleted_recurse, op); 440 } 441 if (old->perms) { 442 n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); 443 } 444 n->deleted_in_tx = true; 445 /* If it gets resurrected we only fire a watch if it lost its content */ 446 if (old->content) { 447 n->modified_in_tx = true; 448 } 449 op->new_nr_nodes--; 450 return n; 451 } 452 453 static int xs_node_rm(XsNode **n, struct walk_op *op) 454 { 455 bool this_inplace = op->inplace; 456 457 if (op->tx_id != XBT_NULL) { 458 /* It's not trivial to do inplace handling for this one */ 459 XsNode *old = *n; 460 *n = xs_node_copy_deleted(old, op); 461 xs_node_unref(old); 462 return 0; 463 } 464 465 /* Fire watches for, and count, nodes in the subtree which get deleted */ 466 if ((*n)->children) { 467 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); 468 } 469 op->new_nr_nodes--; 470 471 if (this_inplace) { 472 xs_node_unref(*n); 473 } 474 *n = NULL; 475 return 0; 476 } 477 478 static int xs_node_get_perms(XsNode **n, struct walk_op *op) 479 { 480 GList **perms = op->op_opaque; 481 482 assert(op->inplace); 483 assert(*n); 484 485 *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); 486 return 0; 487 } 488 489 static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) 490 { 491 unsigned int n = sscanf(perm, "%c%u", letter, dom_id); 492 493 assert(n == 2); 494 } 495 496 static bool can_access(unsigned int dom_id, GList *perms, const char *letters) 497 { 498 unsigned int i, n; 499 char perm_letter; 500 unsigned int perm_dom_id; 501 bool access; 502 503 if (dom_id == 0) { 504 return true; 505 } 506 507 n = g_list_length(perms); 508 assert(n >= 1); 509 510 /* 511 * The dom_id of the first perm is the owner, and the owner always has 512 * read-write access. 513 */ 514 parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); 515 if (dom_id == perm_dom_id) { 516 return true; 517 } 518 519 /* 520 * The letter of the first perm specified the default access for all other 521 * domains. 522 */ 523 access = !!strchr(letters, perm_letter); 524 for (i = 1; i < n; i++) { 525 parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); 526 if (dom_id != perm_dom_id) { 527 continue; 528 } 529 access = !!strchr(letters, perm_letter); 530 } 531 532 return access; 533 } 534 535 static int xs_node_set_perms(XsNode **n, struct walk_op *op) 536 { 537 GList *perms = op->op_opaque; 538 539 if (op->dom_id) { 540 unsigned int perm_dom_id; 541 char perm_letter; 542 543 /* A guest may not change permissions on nodes it does not own */ 544 if (!can_access(op->dom_id, (*n)->perms, "")) { 545 return EPERM; 546 } 547 548 /* A guest may not change the owner of a node it owns. */ 549 parse_perm(perms->data, &perm_letter, &perm_dom_id); 550 if (perm_dom_id != op->dom_id) { 551 return EPERM; 552 } 553 554 if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { 555 return ENOSPC; 556 } 557 } 558 559 /* We *are* the node to be written. Either this or a copy. */ 560 if (!op->inplace) { 561 XsNode *old = *n; 562 *n = xs_node_copy(old); 563 xs_node_unref(old); 564 } 565 566 if ((*n)->perms) { 567 g_list_free_full((*n)->perms, g_free); 568 } 569 (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); 570 if (op->tx_id != XBT_NULL) { 571 (*n)->modified_in_tx = true; 572 } 573 return 0; 574 } 575 576 /* 577 * Passed a full reference in *n which it may free if it needs to COW. 578 * 579 * When changing the tree, the op->inplace flag indicates whether this 580 * node may be modified in place (i.e. it and all its parents had a 581 * refcount of one). If walking down the tree we find a node whose 582 * refcount is higher, we must clear op->inplace and COW from there 583 * down. Unless we are creating new nodes as scaffolding for a write 584 * (which works like 'mkdir -p' does). In which case those newly 585 * created nodes can (and must) be modified in place again. 586 */ 587 static int xs_node_walk(XsNode **n, struct walk_op *op) 588 { 589 char *child_name = NULL; 590 size_t namelen; 591 XsNode *old = *n, *child = NULL; 592 bool stole_child = false; 593 bool this_inplace; 594 XsWatch *watch; 595 int err; 596 597 namelen = strlen(op->path); 598 watch = g_hash_table_lookup(op->s->watches, op->path); 599 600 /* Is there a child, or do we hit the double-NUL termination? */ 601 if (op->path[namelen + 1]) { 602 char *slash; 603 child_name = op->path + namelen + 1; 604 slash = strchr(child_name, '/'); 605 if (slash) { 606 *slash = '\0'; 607 } 608 op->path[namelen] = '/'; 609 } 610 611 /* If we walk into a subtree which is shared, we must COW */ 612 if (op->mutating && old->ref != 1) { 613 op->inplace = false; 614 } 615 616 if (!child_name) { 617 const char *letters = op->mutating ? "wb" : "rb"; 618 619 if (!can_access(op->dom_id, old->perms, letters)) { 620 err = EACCES; 621 goto out; 622 } 623 624 /* This is the actual node on which the operation shall be performed */ 625 err = op->op_fn(n, op); 626 if (!err) { 627 fire_watches(op, true); 628 } 629 goto out; 630 } 631 632 /* op->inplace will be further modified during the recursion */ 633 this_inplace = op->inplace; 634 635 if (old && old->children) { 636 child = g_hash_table_lookup(old->children, child_name); 637 /* This is a *weak* reference to 'child', owned by the hash table */ 638 } 639 640 if (child) { 641 if (child->deleted_in_tx) { 642 assert(child->ref == 1); 643 /* Cannot actually set child->deleted_in_tx = false until later */ 644 } 645 xs_node_ref(child); 646 /* 647 * Now we own it too. But if we can modify inplace, that's going to 648 * foil the check and force it to COW. We want to be the *only* owner 649 * so that it can be modified in place, so remove it from the hash 650 * table in that case. We'll add it (or its replacement) back later. 651 */ 652 if (op->mutating && this_inplace) { 653 g_hash_table_remove(old->children, child_name); 654 stole_child = true; 655 } 656 } else if (op->create_dirs) { 657 assert(op->mutating); 658 659 if (!can_access(op->dom_id, old->perms, "wb")) { 660 err = EACCES; 661 goto out; 662 } 663 664 if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { 665 err = ENOSPC; 666 goto out; 667 } 668 669 child = xs_node_create(child_name, old->perms); 670 op->new_nr_nodes++; 671 672 /* 673 * If we're creating a new child, we can clearly modify it (and its 674 * children) in place from here on down. 675 */ 676 op->inplace = true; 677 } else { 678 err = ENOENT; 679 goto out; 680 } 681 682 /* 683 * If there's a watch on this node, add it to the list to be fired 684 * (with the correct full pathname for the modified node) at the end. 685 */ 686 if (watch) { 687 op->watches = g_list_append(op->watches, watch); 688 } 689 690 /* 691 * Except for the temporary child-stealing as noted, our node has not 692 * changed yet. We don't yet know the overall operation will complete. 693 */ 694 err = xs_node_walk(&child, op); 695 696 if (watch) { 697 op->watches = g_list_remove(op->watches, watch); 698 } 699 700 if (err || !op->mutating) { 701 if (stole_child) { 702 /* Put it back as it was. */ 703 g_hash_table_replace(old->children, g_strdup(child_name), child); 704 } else { 705 xs_node_unref(child); 706 } 707 goto out; 708 } 709 710 /* 711 * Now we know the operation has completed successfully and we're on 712 * the way back up. Make the change, substituting 'child' in the 713 * node at our level. 714 */ 715 if (!this_inplace) { 716 *n = xs_node_copy(old); 717 xs_node_unref(old); 718 } 719 720 /* 721 * If we resurrected a deleted_in_tx node, we can mark it as no longer 722 * deleted now that we know the overall operation has succeeded. 723 */ 724 if (op->create_dirs && child && child->deleted_in_tx) { 725 op->new_nr_nodes++; 726 child->deleted_in_tx = false; 727 } 728 729 /* 730 * The child may be NULL here, for a remove operation. Either way, 731 * xs_node_add_child() will do the right thing and return a value 732 * indicating whether it changed the parent's hash table or not. 733 * 734 * We bump the parent gencnt if it adds a child that we *didn't* 735 * steal from it in the first place, or if child==NULL and was 736 * thus removed (whether we stole it earlier and didn't put it 737 * back, or xs_node_add_child() actually removed it now). 738 */ 739 if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { 740 (*n)->gencnt++; 741 } 742 743 out: 744 op->path[namelen] = '\0'; 745 if (!namelen) { 746 assert(!op->watches); 747 /* 748 * On completing the recursion back up the path walk and reaching the 749 * top, assign the new node count if the operation was successful. If 750 * the main tree was changed, bump its tx ID so that outstanding 751 * transactions correctly fail. But don't bump it every time; only 752 * if it makes a difference. 753 */ 754 if (!err && op->mutating) { 755 if (!op->in_transaction) { 756 if (op->s->root_tx != op->s->last_tx) { 757 op->s->root_tx = next_tx(op->s); 758 } 759 op->s->nr_nodes = op->new_nr_nodes; 760 } else { 761 XsTransaction *tx = g_hash_table_lookup(op->s->transactions, 762 GINT_TO_POINTER(op->tx_id)); 763 assert(tx); 764 tx->nr_nodes = op->new_nr_nodes; 765 } 766 } 767 } 768 return err; 769 } 770 771 static void append_directory_item(gpointer key, gpointer value, 772 gpointer user_data) 773 { 774 GList **items = user_data; 775 776 *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); 777 } 778 779 /* Populates items with char * names which caller must free. */ 780 static int xs_node_directory(XsNode **n, struct walk_op *op) 781 { 782 GList **items = op->op_opaque; 783 784 assert(op->inplace); 785 assert(*n); 786 787 if ((*n)->children) { 788 g_hash_table_foreach((*n)->children, append_directory_item, items); 789 } 790 791 if (op->op_opaque2) { 792 *(uint64_t *)op->op_opaque2 = (*n)->gencnt; 793 } 794 795 return 0; 796 } 797 798 static int validate_path(char *outpath, const char *userpath, 799 unsigned int dom_id) 800 { 801 size_t i, pathlen = strlen(userpath); 802 803 if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { 804 return EINVAL; 805 } 806 for (i = 0; i < pathlen; i++) { 807 if (!strchr(XS_VALID_CHARS, userpath[i])) { 808 return EINVAL; 809 } 810 } 811 if (userpath[0] == '/') { 812 if (pathlen > XENSTORE_ABS_PATH_MAX) { 813 return E2BIG; 814 } 815 memcpy(outpath, userpath, pathlen + 1); 816 } else { 817 if (pathlen > XENSTORE_REL_PATH_MAX) { 818 return E2BIG; 819 } 820 snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, 821 userpath); 822 } 823 return 0; 824 } 825 826 827 static int init_walk_op(XenstoreImplState *s, struct walk_op *op, 828 xs_transaction_t tx_id, unsigned int dom_id, 829 const char *path, XsNode ***rootp) 830 { 831 int ret = validate_path(op->path, path, dom_id); 832 if (ret) { 833 return ret; 834 } 835 836 /* 837 * We use *two* NUL terminators at the end of the path, as during the walk 838 * we will temporarily turn each '/' into a NUL to allow us to use that 839 * path element for the lookup. 840 */ 841 op->path[strlen(op->path) + 1] = '\0'; 842 op->watches = NULL; 843 op->path[0] = '\0'; 844 op->inplace = true; 845 op->mutating = false; 846 op->create_dirs = false; 847 op->in_transaction = false; 848 op->dom_id = dom_id; 849 op->tx_id = tx_id; 850 op->s = s; 851 852 if (tx_id == XBT_NULL) { 853 *rootp = &s->root; 854 op->new_nr_nodes = s->nr_nodes; 855 } else { 856 XsTransaction *tx = g_hash_table_lookup(s->transactions, 857 GINT_TO_POINTER(tx_id)); 858 if (!tx) { 859 return ENOENT; 860 } 861 *rootp = &tx->root; 862 op->new_nr_nodes = tx->nr_nodes; 863 op->in_transaction = true; 864 } 865 866 return 0; 867 } 868 869 int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, 870 xs_transaction_t tx_id, const char *path, GByteArray *data) 871 { 872 /* 873 * The data GByteArray shall exist, and will be freed by caller. 874 * Just g_byte_array_append() to it. 875 */ 876 struct walk_op op; 877 XsNode **n; 878 int ret; 879 880 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 881 if (ret) { 882 return ret; 883 } 884 op.op_fn = xs_node_get_content; 885 op.op_opaque = data; 886 return xs_node_walk(n, &op); 887 } 888 889 int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, 890 xs_transaction_t tx_id, const char *path, GByteArray *data) 891 { 892 /* 893 * The data GByteArray shall exist, will be freed by caller. You are 894 * free to use g_byte_array_steal() and keep the data. Or just ref it. 895 */ 896 struct walk_op op; 897 XsNode **n; 898 int ret; 899 900 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 901 if (ret) { 902 return ret; 903 } 904 op.op_fn = xs_node_add_content; 905 op.op_opaque = data; 906 op.mutating = true; 907 op.create_dirs = true; 908 return xs_node_walk(n, &op); 909 } 910 911 int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, 912 xs_transaction_t tx_id, const char *path, 913 uint64_t *gencnt, GList **items) 914 { 915 /* 916 * The items are (char *) to be freed by caller. Although it's consumed 917 * immediately so if you want to change it to (const char *) and keep 918 * them, go ahead and change the caller. 919 */ 920 struct walk_op op; 921 XsNode **n; 922 int ret; 923 924 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 925 if (ret) { 926 return ret; 927 } 928 op.op_fn = xs_node_directory; 929 op.op_opaque = items; 930 op.op_opaque2 = gencnt; 931 return xs_node_walk(n, &op); 932 } 933 934 int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, 935 xs_transaction_t *tx_id) 936 { 937 XsTransaction *tx; 938 939 if (*tx_id != XBT_NULL) { 940 return EINVAL; 941 } 942 943 if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { 944 return ENOSPC; 945 } 946 947 tx = g_new0(XsTransaction, 1); 948 949 tx->nr_nodes = s->nr_nodes; 950 tx->tx_id = next_tx(s); 951 tx->base_tx = s->root_tx; 952 tx->root = xs_node_ref(s->root); 953 tx->dom_id = dom_id; 954 955 g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); 956 if (dom_id) { 957 s->nr_domu_transactions++; 958 } 959 *tx_id = tx->tx_id; 960 return 0; 961 } 962 963 static gboolean tx_commit_walk(gpointer key, gpointer value, 964 gpointer user_data) 965 { 966 struct walk_op *op = user_data; 967 int path_len = strlen(op->path); 968 int key_len = strlen(key); 969 bool fire_parents = true; 970 XsWatch *watch; 971 XsNode *n = value; 972 973 if (n->ref != 1) { 974 return false; 975 } 976 977 if (n->deleted_in_tx) { 978 /* 979 * We fire watches on our parents if we are the *first* node 980 * to be deleted (the topmost one). This matches the behaviour 981 * when deleting in the live tree. 982 */ 983 fire_parents = !op->deleted_in_tx; 984 985 /* Only used on the way down so no need to clear it later */ 986 op->deleted_in_tx = true; 987 } 988 989 assert(key_len + path_len + 2 <= sizeof(op->path)); 990 op->path[path_len] = '/'; 991 memcpy(op->path + path_len + 1, key, key_len + 1); 992 993 watch = g_hash_table_lookup(op->s->watches, op->path); 994 if (watch) { 995 op->watches = g_list_append(op->watches, watch); 996 } 997 998 if (n->children) { 999 g_hash_table_foreach_remove(n->children, tx_commit_walk, op); 1000 } 1001 1002 if (watch) { 1003 op->watches = g_list_remove(op->watches, watch); 1004 } 1005 1006 /* 1007 * Don't fire watches if this node was only copied because a 1008 * descendent was changed. The modified_in_tx flag indicates the 1009 * ones which were really changed. 1010 */ 1011 if (n->modified_in_tx || n->deleted_in_tx) { 1012 fire_watches(op, fire_parents); 1013 n->modified_in_tx = false; 1014 } 1015 op->path[path_len] = '\0'; 1016 1017 /* Deleted nodes really do get expunged when we commit */ 1018 return n->deleted_in_tx; 1019 } 1020 1021 static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) 1022 { 1023 struct walk_op op; 1024 XsNode **n; 1025 1026 if (s->root_tx != tx->base_tx) { 1027 return EAGAIN; 1028 } 1029 xs_node_unref(s->root); 1030 s->root = tx->root; 1031 tx->root = NULL; 1032 s->root_tx = tx->tx_id; 1033 s->nr_nodes = tx->nr_nodes; 1034 1035 init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); 1036 op.deleted_in_tx = false; 1037 op.mutating = true; 1038 1039 /* 1040 * Walk the new root and fire watches on any node which has a 1041 * refcount of one (which is therefore unique to this transaction). 1042 */ 1043 if (s->root->children) { 1044 g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); 1045 } 1046 1047 return 0; 1048 } 1049 1050 int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, 1051 xs_transaction_t tx_id, bool commit) 1052 { 1053 int ret = 0; 1054 XsTransaction *tx = g_hash_table_lookup(s->transactions, 1055 GINT_TO_POINTER(tx_id)); 1056 1057 if (!tx || tx->dom_id != dom_id) { 1058 return ENOENT; 1059 } 1060 1061 if (commit) { 1062 ret = transaction_commit(s, tx); 1063 } 1064 1065 g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); 1066 if (dom_id) { 1067 assert(s->nr_domu_transactions); 1068 s->nr_domu_transactions--; 1069 } 1070 return ret; 1071 } 1072 1073 int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, 1074 xs_transaction_t tx_id, const char *path) 1075 { 1076 struct walk_op op; 1077 XsNode **n; 1078 int ret; 1079 1080 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1081 if (ret) { 1082 return ret; 1083 } 1084 op.op_fn = xs_node_rm; 1085 op.mutating = true; 1086 return xs_node_walk(n, &op); 1087 } 1088 1089 int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, 1090 xs_transaction_t tx_id, const char *path, GList **perms) 1091 { 1092 struct walk_op op; 1093 XsNode **n; 1094 int ret; 1095 1096 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1097 if (ret) { 1098 return ret; 1099 } 1100 op.op_fn = xs_node_get_perms; 1101 op.op_opaque = perms; 1102 return xs_node_walk(n, &op); 1103 } 1104 1105 static void is_valid_perm(gpointer data, gpointer user_data) 1106 { 1107 char *perm = data; 1108 bool *valid = user_data; 1109 char letter; 1110 unsigned int dom_id; 1111 1112 if (!*valid) { 1113 return; 1114 } 1115 1116 if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { 1117 *valid = false; 1118 return; 1119 } 1120 1121 switch (letter) { 1122 case 'n': 1123 case 'r': 1124 case 'w': 1125 case 'b': 1126 break; 1127 1128 default: 1129 *valid = false; 1130 break; 1131 } 1132 } 1133 1134 int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, 1135 xs_transaction_t tx_id, const char *path, GList *perms) 1136 { 1137 struct walk_op op; 1138 XsNode **n; 1139 bool valid = true; 1140 int ret; 1141 1142 if (!g_list_length(perms)) { 1143 return EINVAL; 1144 } 1145 1146 g_list_foreach(perms, is_valid_perm, &valid); 1147 if (!valid) { 1148 return EINVAL; 1149 } 1150 1151 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1152 if (ret) { 1153 return ret; 1154 } 1155 op.op_fn = xs_node_set_perms; 1156 op.op_opaque = perms; 1157 op.mutating = true; 1158 return xs_node_walk(n, &op); 1159 } 1160 1161 static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, 1162 const char *path, const char *token, 1163 xs_impl_watch_fn fn, void *opaque) 1164 1165 { 1166 char abspath[XENSTORE_ABS_PATH_MAX + 1]; 1167 XsWatch *w, *l; 1168 int ret; 1169 1170 ret = validate_path(abspath, path, dom_id); 1171 if (ret) { 1172 return ret; 1173 } 1174 1175 /* Check for duplicates */ 1176 l = w = g_hash_table_lookup(s->watches, abspath); 1177 while (w) { 1178 if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque && 1179 fn == w->cb && dom_id == w->dom_id) { 1180 return EEXIST; 1181 } 1182 w = w->next; 1183 } 1184 1185 if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { 1186 return E2BIG; 1187 } 1188 1189 w = g_new0(XsWatch, 1); 1190 w->token = g_strdup(token); 1191 w->cb = fn; 1192 w->cb_opaque = opaque; 1193 w->dom_id = dom_id; 1194 w->rel_prefix = strlen(abspath) - strlen(path); 1195 1196 /* l was looked up above when checking for duplicates */ 1197 if (l) { 1198 w->next = l->next; 1199 l->next = w; 1200 } else { 1201 g_hash_table_insert(s->watches, g_strdup(abspath), w); 1202 } 1203 if (dom_id) { 1204 s->nr_domu_watches++; 1205 } 1206 1207 return 0; 1208 } 1209 1210 int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, 1211 const char *token, xs_impl_watch_fn fn, void *opaque) 1212 { 1213 int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); 1214 1215 if (!ret) { 1216 /* A new watch should fire immediately */ 1217 fn(opaque, path, token); 1218 } 1219 1220 return ret; 1221 } 1222 1223 static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) 1224 { 1225 XsWatch *next = w->next; 1226 1227 if (w->dom_id) { 1228 assert(s->nr_domu_watches); 1229 s->nr_domu_watches--; 1230 } 1231 1232 g_free(w->token); 1233 g_free(w); 1234 1235 return next; 1236 } 1237 1238 int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, 1239 const char *path, const char *token, 1240 xs_impl_watch_fn fn, void *opaque) 1241 { 1242 char abspath[XENSTORE_ABS_PATH_MAX + 1]; 1243 XsWatch *w, **l; 1244 int ret; 1245 1246 ret = validate_path(abspath, path, dom_id); 1247 if (ret) { 1248 return ret; 1249 } 1250 1251 w = g_hash_table_lookup(s->watches, abspath); 1252 if (!w) { 1253 return ENOENT; 1254 } 1255 1256 /* 1257 * The hash table contains the first element of a list of 1258 * watches. Removing the first element in the list is a 1259 * special case because we have to update the hash table to 1260 * point to the next (or remove it if there's nothing left). 1261 */ 1262 if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && 1263 dom_id == w->dom_id) { 1264 if (w->next) { 1265 /* Insert the previous 'next' into the hash table */ 1266 g_hash_table_insert(s->watches, g_strdup(abspath), w->next); 1267 } else { 1268 /* Nothing left; remove from hash table */ 1269 g_hash_table_remove(s->watches, abspath); 1270 } 1271 free_watch(s, w); 1272 return 0; 1273 } 1274 1275 /* 1276 * We're all done messing with the hash table because the element 1277 * it points to has survived the cull. Now it's just a simple 1278 * linked list removal operation. 1279 */ 1280 for (l = &w->next; *l; l = &w->next) { 1281 w = *l; 1282 1283 if (!g_strcmp0(token, w->token) && fn == w->cb && 1284 opaque != w->cb_opaque && dom_id == w->dom_id) { 1285 *l = free_watch(s, w); 1286 return 0; 1287 } 1288 } 1289 1290 return ENOENT; 1291 } 1292 1293 int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) 1294 { 1295 char **watch_paths; 1296 guint nr_watch_paths; 1297 guint i; 1298 1299 watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, 1300 &nr_watch_paths); 1301 1302 for (i = 0; i < nr_watch_paths; i++) { 1303 XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); 1304 XsWatch *w2, *w, **l; 1305 1306 /* 1307 * w1 is the original list. The hash table has this pointer. 1308 * w2 is the head of our newly-filtered list. 1309 * w and l are temporary for processing. w is somewhat redundant 1310 * with *l but makes my eyes bleed less. 1311 */ 1312 1313 w = w2 = w1; 1314 l = &w; 1315 while (w) { 1316 if (w->dom_id == dom_id) { 1317 /* If we're freeing the head of the list, bump w2 */ 1318 if (w2 == w) { 1319 w2 = w->next; 1320 } 1321 *l = free_watch(s, w); 1322 } else { 1323 l = &w->next; 1324 } 1325 w = *l; 1326 } 1327 /* 1328 * If the head of the list survived the cull, we don't need to 1329 * touch the hash table and we're done with this path. Else... 1330 */ 1331 if (w1 != w2) { 1332 g_hash_table_steal(s->watches, watch_paths[i]); 1333 1334 /* 1335 * It was already freed. (Don't worry, this whole thing is 1336 * single-threaded and nobody saw it in the meantime). And 1337 * having *stolen* it, we now own the watch_paths[i] string 1338 * so if we don't give it back to the hash table, we need 1339 * to free it. 1340 */ 1341 if (w2) { 1342 g_hash_table_insert(s->watches, watch_paths[i], w2); 1343 } else { 1344 g_free(watch_paths[i]); 1345 } 1346 } 1347 } 1348 g_free(watch_paths); 1349 return 0; 1350 } 1351 1352 static void xs_tx_free(void *_tx) 1353 { 1354 XsTransaction *tx = _tx; 1355 if (tx->root) { 1356 xs_node_unref(tx->root); 1357 } 1358 g_free(tx); 1359 } 1360 1361 XenstoreImplState *xs_impl_create(unsigned int dom_id) 1362 { 1363 XenstoreImplState *s = g_new0(XenstoreImplState, 1); 1364 GList *perms; 1365 1366 s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); 1367 s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, 1368 NULL, xs_tx_free); 1369 1370 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); 1371 s->root = xs_node_create("/", perms); 1372 g_list_free_full(perms, g_free); 1373 s->nr_nodes = 1; 1374 1375 s->root_tx = s->last_tx = 1; 1376 return s; 1377 } 1378 1379 1380 static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) 1381 { 1382 XsNode *n = value; 1383 1384 n->serialized_tx = XBT_NULL; 1385 if (n->children) { 1386 g_hash_table_foreach(n->children, clear_serialized_tx, NULL); 1387 } 1388 } 1389 1390 static void clear_tx_serialized_tx(gpointer key, gpointer value, 1391 gpointer opaque) 1392 { 1393 XsTransaction *t = value; 1394 1395 clear_serialized_tx(NULL, t->root, NULL); 1396 } 1397 1398 static void write_be32(GByteArray *save, uint32_t val) 1399 { 1400 uint32_t be = htonl(val); 1401 g_byte_array_append(save, (void *)&be, sizeof(be)); 1402 } 1403 1404 1405 struct save_state { 1406 GByteArray *bytes; 1407 unsigned int tx_id; 1408 }; 1409 1410 #define MODIFIED_IN_TX (1U << 0) 1411 #define DELETED_IN_TX (1U << 1) 1412 #define NODE_REF (1U << 2) 1413 1414 static void save_node(gpointer key, gpointer value, gpointer opaque) 1415 { 1416 struct save_state *ss = opaque; 1417 XsNode *n = value; 1418 char *name = key; 1419 uint8_t flag = 0; 1420 1421 /* Child nodes (i.e. anything but the root) have a name */ 1422 if (name) { 1423 g_byte_array_append(ss->bytes, key, strlen(key) + 1); 1424 } 1425 1426 /* 1427 * If we already wrote this node, refer to the previous copy. 1428 * There's no rename/move in XenStore, so all we need to find 1429 * it is the tx_id of the transation in which it exists. Which 1430 * may be the root tx. 1431 */ 1432 if (n->serialized_tx != XBT_NULL) { 1433 flag = NODE_REF; 1434 g_byte_array_append(ss->bytes, &flag, 1); 1435 write_be32(ss->bytes, n->serialized_tx); 1436 } else { 1437 GList *l; 1438 n->serialized_tx = ss->tx_id; 1439 1440 if (n->modified_in_tx) { 1441 flag |= MODIFIED_IN_TX; 1442 } 1443 if (n->deleted_in_tx) { 1444 flag |= DELETED_IN_TX; 1445 } 1446 g_byte_array_append(ss->bytes, &flag, 1); 1447 1448 if (n->content) { 1449 write_be32(ss->bytes, n->content->len); 1450 g_byte_array_append(ss->bytes, n->content->data, n->content->len); 1451 } else { 1452 write_be32(ss->bytes, 0); 1453 } 1454 1455 for (l = n->perms; l; l = l->next) { 1456 g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); 1457 } 1458 /* NUL termination after perms */ 1459 g_byte_array_append(ss->bytes, (void *)"", 1); 1460 1461 if (n->children) { 1462 g_hash_table_foreach(n->children, save_node, ss); 1463 } 1464 /* NUL termination after children (child name is NUL) */ 1465 g_byte_array_append(ss->bytes, (void *)"", 1); 1466 } 1467 } 1468 1469 static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) 1470 { 1471 write_be32(ss->bytes, tx_id); 1472 ss->tx_id = tx_id; 1473 save_node(NULL, root, ss); 1474 } 1475 1476 static void save_tx(gpointer key, gpointer value, gpointer opaque) 1477 { 1478 uint32_t tx_id = GPOINTER_TO_INT(key); 1479 struct save_state *ss = opaque; 1480 XsTransaction *n = value; 1481 1482 write_be32(ss->bytes, n->base_tx); 1483 write_be32(ss->bytes, n->dom_id); 1484 1485 save_tree(ss, tx_id, n->root); 1486 } 1487 1488 static void save_watch(gpointer key, gpointer value, gpointer opaque) 1489 { 1490 struct save_state *ss = opaque; 1491 XsWatch *w = value; 1492 1493 /* We only save the *guest* watches. */ 1494 if (w->dom_id) { 1495 gpointer relpath = key + w->rel_prefix; 1496 g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); 1497 g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); 1498 } 1499 } 1500 1501 GByteArray *xs_impl_serialize(XenstoreImplState *s) 1502 { 1503 struct save_state ss; 1504 1505 ss.bytes = g_byte_array_new(); 1506 1507 /* 1508 * node = flags [ real_node / node_ref ] 1509 * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) 1510 * node_ref = tx_id (in which the original version of this node exists) 1511 * real_node = content perms child* NUL 1512 * content = len data 1513 * len = uint32_t 1514 * data = uint8_t{len} 1515 * perms = perm* NUL 1516 * perm = asciiz 1517 * child = name node 1518 * name = asciiz 1519 * 1520 * tree = tx_id node 1521 * tx_id = uint32_t 1522 * 1523 * transaction = base_tx_id dom_id tree 1524 * base_tx_id = uint32_t 1525 * dom_id = uint32_t 1526 * 1527 * tx_list = tree transaction* XBT_NULL 1528 * 1529 * watch = path token 1530 * path = asciiz 1531 * token = asciiz 1532 * 1533 * watch_list = watch* NUL 1534 * 1535 * xs_serialize_stream = last_tx tx_list watch_list 1536 * last_tx = uint32_t 1537 */ 1538 1539 /* Clear serialized_tx in every node. */ 1540 if (s->serialized) { 1541 clear_serialized_tx(NULL, s->root, NULL); 1542 g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); 1543 } 1544 1545 s->serialized = true; 1546 1547 write_be32(ss.bytes, s->last_tx); 1548 save_tree(&ss, s->root_tx, s->root); 1549 g_hash_table_foreach(s->transactions, save_tx, &ss); 1550 1551 write_be32(ss.bytes, XBT_NULL); 1552 1553 g_hash_table_foreach(s->watches, save_watch, &ss); 1554 g_byte_array_append(ss.bytes, (void *)"", 1); 1555 1556 return ss.bytes; 1557 } 1558 1559 struct unsave_state { 1560 char path[XENSTORE_ABS_PATH_MAX + 1]; 1561 XenstoreImplState *s; 1562 GByteArray *bytes; 1563 uint8_t *d; 1564 size_t l; 1565 bool root_walk; 1566 }; 1567 1568 static int consume_be32(struct unsave_state *us, unsigned int *val) 1569 { 1570 uint32_t d; 1571 1572 if (us->l < sizeof(d)) { 1573 return -EINVAL; 1574 } 1575 memcpy(&d, us->d, sizeof(d)); 1576 *val = ntohl(d); 1577 us->d += sizeof(d); 1578 us->l -= sizeof(d); 1579 return 0; 1580 } 1581 1582 static int consume_string(struct unsave_state *us, char **str, size_t *len) 1583 { 1584 size_t l; 1585 1586 if (!us->l) { 1587 return -EINVAL; 1588 } 1589 1590 l = strnlen((void *)us->d, us->l); 1591 if (l == us->l) { 1592 return -EINVAL; 1593 } 1594 1595 if (str) { 1596 *str = (void *)us->d; 1597 } 1598 if (len) { 1599 *len = l; 1600 } 1601 1602 us->d += l + 1; 1603 us->l -= l + 1; 1604 return 0; 1605 } 1606 1607 static XsNode *lookup_node(XsNode *n, char *path) 1608 { 1609 char *slash = strchr(path, '/'); 1610 XsNode *child; 1611 1612 if (path[0] == '\0') { 1613 return n; 1614 } 1615 1616 if (slash) { 1617 *slash = '\0'; 1618 } 1619 1620 if (!n->children) { 1621 return NULL; 1622 } 1623 child = g_hash_table_lookup(n->children, path); 1624 if (!slash) { 1625 return child; 1626 } 1627 1628 *slash = '/'; 1629 if (!child) { 1630 return NULL; 1631 } 1632 return lookup_node(child, slash + 1); 1633 } 1634 1635 static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) 1636 { 1637 XsTransaction *t; 1638 if (tx_id == us->s->root_tx) { 1639 return lookup_node(us->s->root, us->path + 1); 1640 } 1641 1642 t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); 1643 if (!t) { 1644 return NULL; 1645 } 1646 g_assert(t->root); 1647 return lookup_node(t->root, us->path + 1); 1648 } 1649 1650 static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) 1651 { 1652 unsigned int *nr_nodes = user_data; 1653 XsNode *n = value; 1654 1655 (*nr_nodes)++; 1656 1657 if (n->children) { 1658 g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); 1659 } 1660 } 1661 1662 static int consume_node(struct unsave_state *us, XsNode **nodep, 1663 unsigned int *nr_nodes) 1664 { 1665 XsNode *n = NULL; 1666 uint8_t flags; 1667 int ret; 1668 1669 if (us->l < 1) { 1670 return -EINVAL; 1671 } 1672 flags = us->d[0]; 1673 us->d++; 1674 us->l--; 1675 1676 if (flags == NODE_REF) { 1677 unsigned int tx; 1678 1679 ret = consume_be32(us, &tx); 1680 if (ret) { 1681 return ret; 1682 } 1683 1684 n = lookup_tx_node(us, tx); 1685 if (!n) { 1686 return -EINVAL; 1687 } 1688 n->ref++; 1689 if (n->children) { 1690 g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); 1691 } 1692 } else { 1693 uint32_t datalen; 1694 1695 if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { 1696 return -EINVAL; 1697 } 1698 n = xs_node_new(); 1699 1700 if (flags & DELETED_IN_TX) { 1701 n->deleted_in_tx = true; 1702 } 1703 if (flags & MODIFIED_IN_TX) { 1704 n->modified_in_tx = true; 1705 } 1706 ret = consume_be32(us, &datalen); 1707 if (ret) { 1708 xs_node_unref(n); 1709 return -EINVAL; 1710 } 1711 if (datalen) { 1712 if (datalen > us->l) { 1713 xs_node_unref(n); 1714 return -EINVAL; 1715 } 1716 1717 GByteArray *node_data = g_byte_array_new(); 1718 g_byte_array_append(node_data, us->d, datalen); 1719 us->d += datalen; 1720 us->l -= datalen; 1721 n->content = node_data; 1722 1723 if (us->root_walk) { 1724 n->modified_in_tx = true; 1725 } 1726 } 1727 while (1) { 1728 char *perm = NULL; 1729 size_t permlen = 0; 1730 1731 ret = consume_string(us, &perm, &permlen); 1732 if (ret) { 1733 xs_node_unref(n); 1734 return ret; 1735 } 1736 1737 if (!permlen) { 1738 break; 1739 } 1740 1741 n->perms = g_list_append(n->perms, g_strdup(perm)); 1742 } 1743 1744 /* Now children */ 1745 while (1) { 1746 size_t childlen; 1747 char *childname; 1748 char *pathend; 1749 XsNode *child = NULL; 1750 1751 ret = consume_string(us, &childname, &childlen); 1752 if (ret) { 1753 xs_node_unref(n); 1754 return ret; 1755 } 1756 1757 if (!childlen) { 1758 break; 1759 } 1760 1761 pathend = us->path + strlen(us->path); 1762 strncat(us->path, "/", sizeof(us->path) - 1); 1763 strncat(us->path, childname, sizeof(us->path) - 1); 1764 1765 ret = consume_node(us, &child, nr_nodes); 1766 *pathend = '\0'; 1767 if (ret) { 1768 xs_node_unref(n); 1769 return ret; 1770 } 1771 g_assert(child); 1772 xs_node_add_child(n, childname, child); 1773 } 1774 1775 /* 1776 * If the node has no data and no children we still want to fire 1777 * a watch on it. 1778 */ 1779 if (us->root_walk && !n->children) { 1780 n->modified_in_tx = true; 1781 } 1782 } 1783 1784 if (!n->deleted_in_tx) { 1785 (*nr_nodes)++; 1786 } 1787 1788 *nodep = n; 1789 return 0; 1790 } 1791 1792 static int consume_tree(struct unsave_state *us, XsTransaction *t) 1793 { 1794 int ret; 1795 1796 ret = consume_be32(us, &t->tx_id); 1797 if (ret) { 1798 return ret; 1799 } 1800 1801 if (t->tx_id > us->s->last_tx) { 1802 return -EINVAL; 1803 } 1804 1805 us->path[0] = '\0'; 1806 1807 return consume_node(us, &t->root, &t->nr_nodes); 1808 } 1809 1810 int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, 1811 unsigned int dom_id, xs_impl_watch_fn watch_fn, 1812 void *watch_opaque) 1813 { 1814 struct unsave_state us; 1815 XsTransaction base_t = { 0 }; 1816 int ret; 1817 1818 us.s = s; 1819 us.bytes = bytes; 1820 us.d = bytes->data; 1821 us.l = bytes->len; 1822 1823 xs_impl_reset_watches(s, dom_id); 1824 g_hash_table_remove_all(s->transactions); 1825 1826 xs_node_unref(s->root); 1827 s->root = NULL; 1828 s->root_tx = s->last_tx = XBT_NULL; 1829 1830 ret = consume_be32(&us, &s->last_tx); 1831 if (ret) { 1832 return ret; 1833 } 1834 1835 /* 1836 * Consume the base tree into a transaction so that watches can be 1837 * fired as we commit it. By setting us.root_walk we cause the nodes 1838 * to be marked as 'modified_in_tx' as they are created, so that the 1839 * watches are triggered on them. 1840 */ 1841 base_t.dom_id = dom_id; 1842 base_t.base_tx = XBT_NULL; 1843 us.root_walk = true; 1844 ret = consume_tree(&us, &base_t); 1845 if (ret) { 1846 return ret; 1847 } 1848 us.root_walk = false; 1849 1850 /* 1851 * Commit the transaction now while the refcount on all nodes is 1. 1852 * Note that we haven't yet reinstated the *guest* watches but that's 1853 * OK because we don't want the guest to see any changes. Even any 1854 * backend nodes which get recreated should be *precisely* as they 1855 * were before the migration. Back ends may have been instantiated 1856 * already, and will see the frontend magically blink into existence 1857 * now (well, from the aio_bh which fires the watches). It's their 1858 * responsibility to rebuild everything precisely as it was before. 1859 */ 1860 ret = transaction_commit(s, &base_t); 1861 if (ret) { 1862 return ret; 1863 } 1864 1865 while (1) { 1866 unsigned int base_tx; 1867 XsTransaction *t; 1868 1869 ret = consume_be32(&us, &base_tx); 1870 if (ret) { 1871 return ret; 1872 } 1873 if (base_tx == XBT_NULL) { 1874 break; 1875 } 1876 1877 t = g_new0(XsTransaction, 1); 1878 t->base_tx = base_tx; 1879 1880 ret = consume_be32(&us, &t->dom_id); 1881 if (!ret) { 1882 ret = consume_tree(&us, t); 1883 } 1884 if (ret) { 1885 g_free(t); 1886 return ret; 1887 } 1888 g_assert(t->root); 1889 if (t->dom_id) { 1890 s->nr_domu_transactions++; 1891 } 1892 g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); 1893 } 1894 1895 while (1) { 1896 char *path, *token; 1897 size_t pathlen, toklen; 1898 1899 ret = consume_string(&us, &path, &pathlen); 1900 if (ret) { 1901 return ret; 1902 } 1903 if (!pathlen) { 1904 break; 1905 } 1906 1907 ret = consume_string(&us, &token, &toklen); 1908 if (ret) { 1909 return ret; 1910 } 1911 1912 if (!watch_fn) { 1913 continue; 1914 } 1915 1916 ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); 1917 if (ret) { 1918 return ret; 1919 } 1920 } 1921 1922 if (us.l) { 1923 return -EINVAL; 1924 } 1925 1926 return 0; 1927 } 1928