1 /* 2 * QEMU Xen emulation: The actual implementation of XenStore 3 * 4 * Copyright © 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * Authors: David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qom/object.h" 14 15 #include "hw/xen/xen.h" 16 17 #include "xen_xenstore.h" 18 #include "xenstore_impl.h" 19 20 #include "hw/xen/interface/io/xs_wire.h" 21 22 #define XS_MAX_WATCHES 128 23 #define XS_MAX_DOMAIN_NODES 1000 24 #define XS_MAX_NODE_SIZE 2048 25 #define XS_MAX_TRANSACTIONS 10 26 #define XS_MAX_PERMS_PER_NODE 5 27 28 #define XS_VALID_CHARS "abcdefghijklmnopqrstuvwxyz" \ 29 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \ 30 "0123456789-/_" 31 32 typedef struct XsNode { 33 uint32_t ref; 34 GByteArray *content; 35 GList *perms; 36 GHashTable *children; 37 uint64_t gencnt; 38 bool deleted_in_tx; 39 bool modified_in_tx; 40 unsigned int serialized_tx; 41 #ifdef XS_NODE_UNIT_TEST 42 gchar *name; /* debug only */ 43 #endif 44 } XsNode; 45 46 typedef struct XsWatch { 47 struct XsWatch *next; 48 xs_impl_watch_fn *cb; 49 void *cb_opaque; 50 char *token; 51 unsigned int dom_id; 52 int rel_prefix; 53 } XsWatch; 54 55 typedef struct XsTransaction { 56 XsNode *root; 57 unsigned int nr_nodes; 58 unsigned int base_tx; 59 unsigned int tx_id; 60 unsigned int dom_id; 61 } XsTransaction; 62 63 struct XenstoreImplState { 64 XsNode *root; 65 unsigned int nr_nodes; 66 GHashTable *watches; 67 unsigned int nr_domu_watches; 68 GHashTable *transactions; 69 unsigned int nr_domu_transactions; 70 unsigned int root_tx; 71 unsigned int last_tx; 72 bool serialized; 73 }; 74 75 76 static void nobble_tx(gpointer key, gpointer value, gpointer user_data) 77 { 78 unsigned int *new_tx_id = user_data; 79 XsTransaction *tx = value; 80 81 if (tx->base_tx == *new_tx_id) { 82 /* Transactions based on XBT_NULL will always fail */ 83 tx->base_tx = XBT_NULL; 84 } 85 } 86 87 static inline unsigned int next_tx(struct XenstoreImplState *s) 88 { 89 unsigned int tx_id; 90 91 /* Find the next TX id which isn't either XBT_NULL or in use. */ 92 do { 93 tx_id = ++s->last_tx; 94 } while (tx_id == XBT_NULL || tx_id == s->root_tx || 95 g_hash_table_lookup(s->transactions, GINT_TO_POINTER(tx_id))); 96 97 /* 98 * It is vanishingly unlikely, but ensure that no outstanding transaction 99 * is based on the (previous incarnation of the) newly-allocated TX id. 100 */ 101 g_hash_table_foreach(s->transactions, nobble_tx, &tx_id); 102 103 return tx_id; 104 } 105 106 static inline XsNode *xs_node_new(void) 107 { 108 XsNode *n = g_new0(XsNode, 1); 109 n->ref = 1; 110 111 #ifdef XS_NODE_UNIT_TEST 112 nr_xs_nodes++; 113 xs_node_list = g_list_prepend(xs_node_list, n); 114 #endif 115 return n; 116 } 117 118 static inline XsNode *xs_node_ref(XsNode *n) 119 { 120 /* With just 10 transactions, it can never get anywhere near this. */ 121 g_assert(n->ref < INT_MAX); 122 123 g_assert(n->ref); 124 n->ref++; 125 return n; 126 } 127 128 static inline void xs_node_unref(XsNode *n) 129 { 130 if (!n) { 131 return; 132 } 133 g_assert(n->ref); 134 if (--n->ref) { 135 return; 136 } 137 138 if (n->content) { 139 g_byte_array_unref(n->content); 140 } 141 if (n->perms) { 142 g_list_free_full(n->perms, g_free); 143 } 144 if (n->children) { 145 g_hash_table_unref(n->children); 146 } 147 #ifdef XS_NODE_UNIT_TEST 148 g_free(n->name); 149 nr_xs_nodes--; 150 xs_node_list = g_list_remove(xs_node_list, n); 151 #endif 152 g_free(n); 153 } 154 155 char *xs_perm_as_string(unsigned int perm, unsigned int domid) 156 { 157 char letter; 158 159 switch (perm) { 160 case XS_PERM_READ | XS_PERM_WRITE: 161 letter = 'b'; 162 break; 163 case XS_PERM_READ: 164 letter = 'r'; 165 break; 166 case XS_PERM_WRITE: 167 letter = 'w'; 168 break; 169 case XS_PERM_NONE: 170 default: 171 letter = 'n'; 172 break; 173 } 174 175 return g_strdup_printf("%c%u", letter, domid); 176 } 177 178 static gpointer do_perm_copy(gconstpointer src, gpointer user_data) 179 { 180 return g_strdup(src); 181 } 182 183 static XsNode *xs_node_create(const char *name, GList *perms) 184 { 185 XsNode *n = xs_node_new(); 186 187 #ifdef XS_NODE_UNIT_TEST 188 if (name) { 189 n->name = g_strdup(name); 190 } 191 #endif 192 193 n->perms = g_list_copy_deep(perms, do_perm_copy, NULL); 194 195 return n; 196 } 197 198 /* For copying from one hash table to another using g_hash_table_foreach() */ 199 static void do_child_insert(gpointer key, gpointer value, gpointer user_data) 200 { 201 g_hash_table_insert(user_data, g_strdup(key), xs_node_ref(value)); 202 } 203 204 static XsNode *xs_node_copy(XsNode *old) 205 { 206 XsNode *n = xs_node_new(); 207 208 n->gencnt = old->gencnt; 209 210 #ifdef XS_NODE_UNIT_TEST 211 if (n->name) { 212 n->name = g_strdup(old->name); 213 } 214 #endif 215 216 assert(old); 217 if (old->children) { 218 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 219 (GDestroyNotify)xs_node_unref); 220 g_hash_table_foreach(old->children, do_child_insert, n->children); 221 } 222 if (old->perms) { 223 n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); 224 } 225 if (old->content) { 226 n->content = g_byte_array_ref(old->content); 227 } 228 return n; 229 } 230 231 /* Returns true if it made a change to the hash table */ 232 static bool xs_node_add_child(XsNode *n, const char *path_elem, XsNode *child) 233 { 234 assert(!strchr(path_elem, '/')); 235 236 if (!child) { 237 assert(n->children); 238 return g_hash_table_remove(n->children, path_elem); 239 } 240 241 #ifdef XS_NODE_UNIT_TEST 242 g_free(child->name); 243 child->name = g_strdup(path_elem); 244 #endif 245 if (!n->children) { 246 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 247 (GDestroyNotify)xs_node_unref); 248 } 249 250 /* 251 * The documentation for g_hash_table_insert() says that it "returns a 252 * boolean value to indicate whether the newly added value was already 253 * in the hash table or not." 254 * 255 * It could perhaps be clearer that returning TRUE means it wasn't, 256 */ 257 return g_hash_table_insert(n->children, g_strdup(path_elem), child); 258 } 259 260 struct walk_op { 261 struct XenstoreImplState *s; 262 char path[XENSTORE_ABS_PATH_MAX + 2]; /* Two NUL terminators */ 263 int (*op_fn)(XsNode **n, struct walk_op *op); 264 void *op_opaque; 265 void *op_opaque2; 266 267 GList *watches; 268 unsigned int dom_id; 269 unsigned int tx_id; 270 271 /* The number of nodes which will exist in the tree if this op succeeds. */ 272 unsigned int new_nr_nodes; 273 274 /* 275 * This is maintained on the way *down* the walk to indicate 276 * whether nodes can be modified in place or whether COW is 277 * required. It starts off being true, as we're always going to 278 * replace the root node. If we walk into a shared subtree it 279 * becomes false. If we start *creating* new nodes for a write, 280 * it becomes true again. 281 * 282 * Do not use it on the way back up. 283 */ 284 bool inplace; 285 bool mutating; 286 bool create_dirs; 287 bool in_transaction; 288 289 /* Tracking during recursion so we know which is first. */ 290 bool deleted_in_tx; 291 }; 292 293 static void fire_watches(struct walk_op *op, bool parents) 294 { 295 GList *l = NULL; 296 XsWatch *w; 297 298 if (!op->mutating || op->in_transaction) { 299 return; 300 } 301 302 if (parents) { 303 l = op->watches; 304 } 305 306 w = g_hash_table_lookup(op->s->watches, op->path); 307 while (w || l) { 308 if (!w) { 309 /* Fire the parent nodes from 'op' if asked to */ 310 w = l->data; 311 l = l->next; 312 continue; 313 } 314 315 assert(strlen(op->path) > w->rel_prefix); 316 w->cb(w->cb_opaque, op->path + w->rel_prefix, w->token); 317 318 w = w->next; 319 } 320 } 321 322 static int xs_node_add_content(XsNode **n, struct walk_op *op) 323 { 324 GByteArray *data = op->op_opaque; 325 326 if (op->dom_id) { 327 /* 328 * The real XenStored includes permissions and names of child nodes 329 * in the calculated datasize but life's too short. For a single 330 * tenant internal XenStore, we don't have to be quite as pedantic. 331 */ 332 if (data->len > XS_MAX_NODE_SIZE) { 333 return E2BIG; 334 } 335 } 336 /* We *are* the node to be written. Either this or a copy. */ 337 if (!op->inplace) { 338 XsNode *old = *n; 339 *n = xs_node_copy(old); 340 xs_node_unref(old); 341 } 342 343 if ((*n)->content) { 344 g_byte_array_unref((*n)->content); 345 } 346 (*n)->content = g_byte_array_ref(data); 347 if (op->tx_id != XBT_NULL) { 348 (*n)->modified_in_tx = true; 349 } 350 return 0; 351 } 352 353 static int xs_node_get_content(XsNode **n, struct walk_op *op) 354 { 355 GByteArray *data = op->op_opaque; 356 GByteArray *node_data; 357 358 assert(op->inplace); 359 assert(*n); 360 361 node_data = (*n)->content; 362 if (node_data) { 363 g_byte_array_append(data, node_data->data, node_data->len); 364 } 365 366 return 0; 367 } 368 369 static int node_rm_recurse(gpointer key, gpointer value, gpointer user_data) 370 { 371 struct walk_op *op = user_data; 372 int path_len = strlen(op->path); 373 int key_len = strlen(key); 374 XsNode *n = value; 375 bool this_inplace = op->inplace; 376 377 if (n->ref != 1) { 378 op->inplace = 0; 379 } 380 381 assert(key_len + path_len + 2 <= sizeof(op->path)); 382 op->path[path_len] = '/'; 383 memcpy(op->path + path_len + 1, key, key_len + 1); 384 385 if (n->children) { 386 g_hash_table_foreach_remove(n->children, node_rm_recurse, op); 387 } 388 op->new_nr_nodes--; 389 390 /* 391 * Fire watches on *this* node but not the parents because they are 392 * going to be deleted too, so the watch will fire for them anyway. 393 */ 394 fire_watches(op, false); 395 op->path[path_len] = '\0'; 396 397 /* 398 * Actually deleting the child here is just an optimisation; if we 399 * don't then the final unref on the topmost victim will just have 400 * to cascade down again repeating all the g_hash_table_foreach() 401 * calls. 402 */ 403 return this_inplace; 404 } 405 406 static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op); 407 static void copy_deleted_recurse(gpointer key, gpointer value, 408 gpointer user_data) 409 { 410 struct walk_op *op = user_data; 411 GHashTable *siblings = op->op_opaque2; 412 XsNode *n = xs_node_copy_deleted(value, op); 413 414 /* 415 * Reinsert the deleted_in_tx copy of the node into the parent's 416 * 'children' hash table. Having stashed it from op->op_opaque2 417 * before the recursive call to xs_node_copy_deleted() scribbled 418 * over it. 419 */ 420 g_hash_table_insert(siblings, g_strdup(key), n); 421 } 422 423 static XsNode *xs_node_copy_deleted(XsNode *old, struct walk_op *op) 424 { 425 XsNode *n = xs_node_new(); 426 427 n->gencnt = old->gencnt; 428 429 #ifdef XS_NODE_UNIT_TEST 430 if (old->name) { 431 n->name = g_strdup(old->name); 432 } 433 #endif 434 435 if (old->children) { 436 n->children = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, 437 (GDestroyNotify)xs_node_unref); 438 op->op_opaque2 = n->children; 439 g_hash_table_foreach(old->children, copy_deleted_recurse, op); 440 } 441 if (old->perms) { 442 n->perms = g_list_copy_deep(old->perms, do_perm_copy, NULL); 443 } 444 n->deleted_in_tx = true; 445 /* If it gets resurrected we only fire a watch if it lost its content */ 446 if (old->content) { 447 n->modified_in_tx = true; 448 } 449 op->new_nr_nodes--; 450 return n; 451 } 452 453 static int xs_node_rm(XsNode **n, struct walk_op *op) 454 { 455 bool this_inplace = op->inplace; 456 457 if (op->tx_id != XBT_NULL) { 458 /* It's not trivial to do inplace handling for this one */ 459 XsNode *old = *n; 460 *n = xs_node_copy_deleted(old, op); 461 xs_node_unref(old); 462 return 0; 463 } 464 465 /* Fire watches for, and count, nodes in the subtree which get deleted */ 466 if ((*n)->children) { 467 g_hash_table_foreach_remove((*n)->children, node_rm_recurse, op); 468 } 469 op->new_nr_nodes--; 470 471 if (this_inplace) { 472 xs_node_unref(*n); 473 } 474 *n = NULL; 475 return 0; 476 } 477 478 static int xs_node_get_perms(XsNode **n, struct walk_op *op) 479 { 480 GList **perms = op->op_opaque; 481 482 assert(op->inplace); 483 assert(*n); 484 485 *perms = g_list_copy_deep((*n)->perms, do_perm_copy, NULL); 486 return 0; 487 } 488 489 static void parse_perm(const char *perm, char *letter, unsigned int *dom_id) 490 { 491 unsigned int n = sscanf(perm, "%c%u", letter, dom_id); 492 493 assert(n == 2); 494 } 495 496 static bool can_access(unsigned int dom_id, GList *perms, const char *letters) 497 { 498 unsigned int i, n; 499 char perm_letter; 500 unsigned int perm_dom_id; 501 bool access; 502 503 if (dom_id == 0) { 504 return true; 505 } 506 507 n = g_list_length(perms); 508 assert(n >= 1); 509 510 /* 511 * The dom_id of the first perm is the owner, and the owner always has 512 * read-write access. 513 */ 514 parse_perm(g_list_nth_data(perms, 0), &perm_letter, &perm_dom_id); 515 if (dom_id == perm_dom_id) { 516 return true; 517 } 518 519 /* 520 * The letter of the first perm specified the default access for all other 521 * domains. 522 */ 523 access = !!strchr(letters, perm_letter); 524 for (i = 1; i < n; i++) { 525 parse_perm(g_list_nth_data(perms, i), &perm_letter, &perm_dom_id); 526 if (dom_id != perm_dom_id) { 527 continue; 528 } 529 access = !!strchr(letters, perm_letter); 530 } 531 532 return access; 533 } 534 535 static int xs_node_set_perms(XsNode **n, struct walk_op *op) 536 { 537 GList *perms = op->op_opaque; 538 539 if (op->dom_id) { 540 unsigned int perm_dom_id; 541 char perm_letter; 542 543 /* A guest may not change permissions on nodes it does not own */ 544 if (!can_access(op->dom_id, (*n)->perms, "")) { 545 return EPERM; 546 } 547 548 /* A guest may not change the owner of a node it owns. */ 549 parse_perm(perms->data, &perm_letter, &perm_dom_id); 550 if (perm_dom_id != op->dom_id) { 551 return EPERM; 552 } 553 554 if (g_list_length(perms) > XS_MAX_PERMS_PER_NODE) { 555 return ENOSPC; 556 } 557 } 558 559 /* We *are* the node to be written. Either this or a copy. */ 560 if (!op->inplace) { 561 XsNode *old = *n; 562 *n = xs_node_copy(old); 563 xs_node_unref(old); 564 } 565 566 if ((*n)->perms) { 567 g_list_free_full((*n)->perms, g_free); 568 } 569 (*n)->perms = g_list_copy_deep(perms, do_perm_copy, NULL); 570 if (op->tx_id != XBT_NULL) { 571 (*n)->modified_in_tx = true; 572 } 573 return 0; 574 } 575 576 /* 577 * Passed a full reference in *n which it may free if it needs to COW. 578 * 579 * When changing the tree, the op->inplace flag indicates whether this 580 * node may be modified in place (i.e. it and all its parents had a 581 * refcount of one). If walking down the tree we find a node whose 582 * refcount is higher, we must clear op->inplace and COW from there 583 * down. Unless we are creating new nodes as scaffolding for a write 584 * (which works like 'mkdir -p' does). In which case those newly 585 * created nodes can (and must) be modified in place again. 586 */ 587 static int xs_node_walk(XsNode **n, struct walk_op *op) 588 { 589 char *child_name = NULL; 590 size_t namelen; 591 XsNode *old = *n, *child = NULL; 592 bool stole_child = false; 593 bool this_inplace; 594 XsWatch *watch; 595 int err; 596 597 namelen = strlen(op->path); 598 watch = g_hash_table_lookup(op->s->watches, op->path); 599 600 /* Is there a child, or do we hit the double-NUL termination? */ 601 if (op->path[namelen + 1]) { 602 char *slash; 603 child_name = op->path + namelen + 1; 604 slash = strchr(child_name, '/'); 605 if (slash) { 606 *slash = '\0'; 607 } 608 op->path[namelen] = '/'; 609 } 610 611 /* If we walk into a subtree which is shared, we must COW */ 612 if (op->mutating && old->ref != 1) { 613 op->inplace = false; 614 } 615 616 if (!child_name) { 617 const char *letters = op->mutating ? "wb" : "rb"; 618 619 if (!can_access(op->dom_id, old->perms, letters)) { 620 err = EACCES; 621 goto out; 622 } 623 624 /* This is the actual node on which the operation shall be performed */ 625 err = op->op_fn(n, op); 626 if (!err) { 627 fire_watches(op, true); 628 } 629 goto out; 630 } 631 632 /* op->inplace will be further modified during the recursion */ 633 this_inplace = op->inplace; 634 635 if (old && old->children) { 636 child = g_hash_table_lookup(old->children, child_name); 637 /* This is a *weak* reference to 'child', owned by the hash table */ 638 } 639 640 if (child) { 641 if (child->deleted_in_tx) { 642 assert(child->ref == 1); 643 /* Cannot actually set child->deleted_in_tx = false until later */ 644 } 645 xs_node_ref(child); 646 /* 647 * Now we own it too. But if we can modify inplace, that's going to 648 * foil the check and force it to COW. We want to be the *only* owner 649 * so that it can be modified in place, so remove it from the hash 650 * table in that case. We'll add it (or its replacement) back later. 651 */ 652 if (op->mutating && this_inplace) { 653 g_hash_table_remove(old->children, child_name); 654 stole_child = true; 655 } 656 } else if (op->create_dirs) { 657 assert(op->mutating); 658 659 if (!can_access(op->dom_id, old->perms, "wb")) { 660 err = EACCES; 661 goto out; 662 } 663 664 if (op->dom_id && op->new_nr_nodes >= XS_MAX_DOMAIN_NODES) { 665 err = ENOSPC; 666 goto out; 667 } 668 669 child = xs_node_create(child_name, old->perms); 670 op->new_nr_nodes++; 671 672 /* 673 * If we're creating a new child, we can clearly modify it (and its 674 * children) in place from here on down. 675 */ 676 op->inplace = true; 677 } else { 678 err = ENOENT; 679 goto out; 680 } 681 682 /* 683 * If there's a watch on this node, add it to the list to be fired 684 * (with the correct full pathname for the modified node) at the end. 685 */ 686 if (watch) { 687 op->watches = g_list_append(op->watches, watch); 688 } 689 690 /* 691 * Except for the temporary child-stealing as noted, our node has not 692 * changed yet. We don't yet know the overall operation will complete. 693 */ 694 err = xs_node_walk(&child, op); 695 696 if (watch) { 697 op->watches = g_list_remove(op->watches, watch); 698 } 699 700 if (err || !op->mutating) { 701 if (stole_child) { 702 /* Put it back as it was. */ 703 g_hash_table_replace(old->children, g_strdup(child_name), child); 704 } else { 705 xs_node_unref(child); 706 } 707 goto out; 708 } 709 710 /* 711 * Now we know the operation has completed successfully and we're on 712 * the way back up. Make the change, substituting 'child' in the 713 * node at our level. 714 */ 715 if (!this_inplace) { 716 *n = xs_node_copy(old); 717 xs_node_unref(old); 718 } 719 720 /* 721 * If we resurrected a deleted_in_tx node, we can mark it as no longer 722 * deleted now that we know the overall operation has succeeded. 723 */ 724 if (op->create_dirs && child && child->deleted_in_tx) { 725 op->new_nr_nodes++; 726 child->deleted_in_tx = false; 727 } 728 729 /* 730 * The child may be NULL here, for a remove operation. Either way, 731 * xs_node_add_child() will do the right thing and return a value 732 * indicating whether it changed the parent's hash table or not. 733 * 734 * We bump the parent gencnt if it adds a child that we *didn't* 735 * steal from it in the first place, or if child==NULL and was 736 * thus removed (whether we stole it earlier and didn't put it 737 * back, or xs_node_add_child() actually removed it now). 738 */ 739 if ((xs_node_add_child(*n, child_name, child) && !stole_child) || !child) { 740 (*n)->gencnt++; 741 } 742 743 out: 744 op->path[namelen] = '\0'; 745 if (!namelen) { 746 assert(!op->watches); 747 /* 748 * On completing the recursion back up the path walk and reaching the 749 * top, assign the new node count if the operation was successful. If 750 * the main tree was changed, bump its tx ID so that outstanding 751 * transactions correctly fail. But don't bump it every time; only 752 * if it makes a difference. 753 */ 754 if (!err && op->mutating) { 755 if (!op->in_transaction) { 756 if (op->s->root_tx != op->s->last_tx) { 757 op->s->root_tx = next_tx(op->s); 758 } 759 op->s->nr_nodes = op->new_nr_nodes; 760 } else { 761 XsTransaction *tx = g_hash_table_lookup(op->s->transactions, 762 GINT_TO_POINTER(op->tx_id)); 763 assert(tx); 764 tx->nr_nodes = op->new_nr_nodes; 765 } 766 } 767 } 768 return err; 769 } 770 771 static void append_directory_item(gpointer key, gpointer value, 772 gpointer user_data) 773 { 774 GList **items = user_data; 775 776 *items = g_list_insert_sorted(*items, g_strdup(key), (GCompareFunc)strcmp); 777 } 778 779 /* Populates items with char * names which caller must free. */ 780 static int xs_node_directory(XsNode **n, struct walk_op *op) 781 { 782 GList **items = op->op_opaque; 783 784 assert(op->inplace); 785 assert(*n); 786 787 if ((*n)->children) { 788 g_hash_table_foreach((*n)->children, append_directory_item, items); 789 } 790 791 if (op->op_opaque2) { 792 *(uint64_t *)op->op_opaque2 = (*n)->gencnt; 793 } 794 795 return 0; 796 } 797 798 static int validate_path(char *outpath, const char *userpath, 799 unsigned int dom_id) 800 { 801 size_t i, pathlen = strlen(userpath); 802 803 if (!pathlen || userpath[pathlen] == '/' || strstr(userpath, "//")) { 804 return EINVAL; 805 } 806 for (i = 0; i < pathlen; i++) { 807 if (!strchr(XS_VALID_CHARS, userpath[i])) { 808 return EINVAL; 809 } 810 } 811 if (userpath[0] == '/') { 812 if (pathlen > XENSTORE_ABS_PATH_MAX) { 813 return E2BIG; 814 } 815 memcpy(outpath, userpath, pathlen + 1); 816 } else { 817 if (pathlen > XENSTORE_REL_PATH_MAX) { 818 return E2BIG; 819 } 820 snprintf(outpath, XENSTORE_ABS_PATH_MAX, "/local/domain/%u/%s", dom_id, 821 userpath); 822 } 823 return 0; 824 } 825 826 827 static int init_walk_op(XenstoreImplState *s, struct walk_op *op, 828 xs_transaction_t tx_id, unsigned int dom_id, 829 const char *path, XsNode ***rootp) 830 { 831 int ret = validate_path(op->path, path, dom_id); 832 if (ret) { 833 return ret; 834 } 835 836 /* 837 * We use *two* NUL terminators at the end of the path, as during the walk 838 * we will temporarily turn each '/' into a NUL to allow us to use that 839 * path element for the lookup. 840 */ 841 op->path[strlen(op->path) + 1] = '\0'; 842 op->watches = NULL; 843 op->path[0] = '\0'; 844 op->inplace = true; 845 op->mutating = false; 846 op->create_dirs = false; 847 op->in_transaction = false; 848 op->dom_id = dom_id; 849 op->tx_id = tx_id; 850 op->s = s; 851 852 if (tx_id == XBT_NULL) { 853 *rootp = &s->root; 854 op->new_nr_nodes = s->nr_nodes; 855 } else { 856 XsTransaction *tx = g_hash_table_lookup(s->transactions, 857 GINT_TO_POINTER(tx_id)); 858 if (!tx) { 859 return ENOENT; 860 } 861 *rootp = &tx->root; 862 op->new_nr_nodes = tx->nr_nodes; 863 op->in_transaction = true; 864 } 865 866 return 0; 867 } 868 869 int xs_impl_read(XenstoreImplState *s, unsigned int dom_id, 870 xs_transaction_t tx_id, const char *path, GByteArray *data) 871 { 872 /* 873 * The data GByteArray shall exist, and will be freed by caller. 874 * Just g_byte_array_append() to it. 875 */ 876 struct walk_op op; 877 XsNode **n; 878 int ret; 879 880 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 881 if (ret) { 882 return ret; 883 } 884 op.op_fn = xs_node_get_content; 885 op.op_opaque = data; 886 return xs_node_walk(n, &op); 887 } 888 889 int xs_impl_write(XenstoreImplState *s, unsigned int dom_id, 890 xs_transaction_t tx_id, const char *path, GByteArray *data) 891 { 892 /* 893 * The data GByteArray shall exist, will be freed by caller. You are 894 * free to use g_byte_array_steal() and keep the data. Or just ref it. 895 */ 896 struct walk_op op; 897 XsNode **n; 898 int ret; 899 900 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 901 if (ret) { 902 return ret; 903 } 904 op.op_fn = xs_node_add_content; 905 op.op_opaque = data; 906 op.mutating = true; 907 op.create_dirs = true; 908 return xs_node_walk(n, &op); 909 } 910 911 int xs_impl_directory(XenstoreImplState *s, unsigned int dom_id, 912 xs_transaction_t tx_id, const char *path, 913 uint64_t *gencnt, GList **items) 914 { 915 /* 916 * The items are (char *) to be freed by caller. Although it's consumed 917 * immediately so if you want to change it to (const char *) and keep 918 * them, go ahead and change the caller. 919 */ 920 struct walk_op op; 921 XsNode **n; 922 int ret; 923 924 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 925 if (ret) { 926 return ret; 927 } 928 op.op_fn = xs_node_directory; 929 op.op_opaque = items; 930 op.op_opaque2 = gencnt; 931 return xs_node_walk(n, &op); 932 } 933 934 int xs_impl_transaction_start(XenstoreImplState *s, unsigned int dom_id, 935 xs_transaction_t *tx_id) 936 { 937 XsTransaction *tx; 938 939 if (*tx_id != XBT_NULL) { 940 return EINVAL; 941 } 942 943 if (dom_id && s->nr_domu_transactions >= XS_MAX_TRANSACTIONS) { 944 return ENOSPC; 945 } 946 947 tx = g_new0(XsTransaction, 1); 948 949 tx->nr_nodes = s->nr_nodes; 950 tx->tx_id = next_tx(s); 951 tx->base_tx = s->root_tx; 952 tx->root = xs_node_ref(s->root); 953 tx->dom_id = dom_id; 954 955 g_hash_table_insert(s->transactions, GINT_TO_POINTER(tx->tx_id), tx); 956 if (dom_id) { 957 s->nr_domu_transactions++; 958 } 959 *tx_id = tx->tx_id; 960 return 0; 961 } 962 963 static gboolean tx_commit_walk(gpointer key, gpointer value, 964 gpointer user_data) 965 { 966 struct walk_op *op = user_data; 967 int path_len = strlen(op->path); 968 int key_len = strlen(key); 969 bool fire_parents = true; 970 XsWatch *watch; 971 XsNode *n = value; 972 973 if (n->ref != 1) { 974 return false; 975 } 976 977 if (n->deleted_in_tx) { 978 /* 979 * We fire watches on our parents if we are the *first* node 980 * to be deleted (the topmost one). This matches the behaviour 981 * when deleting in the live tree. 982 */ 983 fire_parents = !op->deleted_in_tx; 984 985 /* Only used on the way down so no need to clear it later */ 986 op->deleted_in_tx = true; 987 } 988 989 assert(key_len + path_len + 2 <= sizeof(op->path)); 990 op->path[path_len] = '/'; 991 memcpy(op->path + path_len + 1, key, key_len + 1); 992 993 watch = g_hash_table_lookup(op->s->watches, op->path); 994 if (watch) { 995 op->watches = g_list_append(op->watches, watch); 996 } 997 998 if (n->children) { 999 g_hash_table_foreach_remove(n->children, tx_commit_walk, op); 1000 } 1001 1002 if (watch) { 1003 op->watches = g_list_remove(op->watches, watch); 1004 } 1005 1006 /* 1007 * Don't fire watches if this node was only copied because a 1008 * descendent was changed. The modified_in_tx flag indicates the 1009 * ones which were really changed. 1010 */ 1011 if (n->modified_in_tx || n->deleted_in_tx) { 1012 fire_watches(op, fire_parents); 1013 n->modified_in_tx = false; 1014 } 1015 op->path[path_len] = '\0'; 1016 1017 /* Deleted nodes really do get expunged when we commit */ 1018 return n->deleted_in_tx; 1019 } 1020 1021 static int transaction_commit(XenstoreImplState *s, XsTransaction *tx) 1022 { 1023 struct walk_op op; 1024 XsNode **n; 1025 int ret; 1026 1027 if (s->root_tx != tx->base_tx) { 1028 return EAGAIN; 1029 } 1030 xs_node_unref(s->root); 1031 s->root = tx->root; 1032 tx->root = NULL; 1033 s->root_tx = tx->tx_id; 1034 s->nr_nodes = tx->nr_nodes; 1035 1036 ret = init_walk_op(s, &op, XBT_NULL, tx->dom_id, "/", &n); 1037 /* 1038 * There are two reasons why init_walk_op() may fail: an invalid tx_id, 1039 * or an invalid path. We pass XBT_NULL and "/", and it cannot fail. 1040 * If it does, the world is broken. And returning 'ret' would be weird 1041 * because the transaction *was* committed, and all this tree walk is 1042 * trying to do is fire the resulting watches on newly-committed nodes. 1043 */ 1044 g_assert(!ret); 1045 1046 op.deleted_in_tx = false; 1047 op.mutating = true; 1048 1049 /* 1050 * Walk the new root and fire watches on any node which has a 1051 * refcount of one (which is therefore unique to this transaction). 1052 */ 1053 if (s->root->children) { 1054 g_hash_table_foreach_remove(s->root->children, tx_commit_walk, &op); 1055 } 1056 1057 return 0; 1058 } 1059 1060 int xs_impl_transaction_end(XenstoreImplState *s, unsigned int dom_id, 1061 xs_transaction_t tx_id, bool commit) 1062 { 1063 int ret = 0; 1064 XsTransaction *tx = g_hash_table_lookup(s->transactions, 1065 GINT_TO_POINTER(tx_id)); 1066 1067 if (!tx || tx->dom_id != dom_id) { 1068 return ENOENT; 1069 } 1070 1071 if (commit) { 1072 ret = transaction_commit(s, tx); 1073 } 1074 1075 g_hash_table_remove(s->transactions, GINT_TO_POINTER(tx_id)); 1076 if (dom_id) { 1077 assert(s->nr_domu_transactions); 1078 s->nr_domu_transactions--; 1079 } 1080 return ret; 1081 } 1082 1083 int xs_impl_rm(XenstoreImplState *s, unsigned int dom_id, 1084 xs_transaction_t tx_id, const char *path) 1085 { 1086 struct walk_op op; 1087 XsNode **n; 1088 int ret; 1089 1090 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1091 if (ret) { 1092 return ret; 1093 } 1094 op.op_fn = xs_node_rm; 1095 op.mutating = true; 1096 return xs_node_walk(n, &op); 1097 } 1098 1099 int xs_impl_get_perms(XenstoreImplState *s, unsigned int dom_id, 1100 xs_transaction_t tx_id, const char *path, GList **perms) 1101 { 1102 struct walk_op op; 1103 XsNode **n; 1104 int ret; 1105 1106 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1107 if (ret) { 1108 return ret; 1109 } 1110 op.op_fn = xs_node_get_perms; 1111 op.op_opaque = perms; 1112 return xs_node_walk(n, &op); 1113 } 1114 1115 static void is_valid_perm(gpointer data, gpointer user_data) 1116 { 1117 char *perm = data; 1118 bool *valid = user_data; 1119 char letter; 1120 unsigned int dom_id; 1121 1122 if (!*valid) { 1123 return; 1124 } 1125 1126 if (sscanf(perm, "%c%u", &letter, &dom_id) != 2) { 1127 *valid = false; 1128 return; 1129 } 1130 1131 switch (letter) { 1132 case 'n': 1133 case 'r': 1134 case 'w': 1135 case 'b': 1136 break; 1137 1138 default: 1139 *valid = false; 1140 break; 1141 } 1142 } 1143 1144 int xs_impl_set_perms(XenstoreImplState *s, unsigned int dom_id, 1145 xs_transaction_t tx_id, const char *path, GList *perms) 1146 { 1147 struct walk_op op; 1148 XsNode **n; 1149 bool valid = true; 1150 int ret; 1151 1152 if (!g_list_length(perms)) { 1153 return EINVAL; 1154 } 1155 1156 g_list_foreach(perms, is_valid_perm, &valid); 1157 if (!valid) { 1158 return EINVAL; 1159 } 1160 1161 ret = init_walk_op(s, &op, tx_id, dom_id, path, &n); 1162 if (ret) { 1163 return ret; 1164 } 1165 op.op_fn = xs_node_set_perms; 1166 op.op_opaque = perms; 1167 op.mutating = true; 1168 return xs_node_walk(n, &op); 1169 } 1170 1171 static int do_xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, 1172 const char *path, const char *token, 1173 xs_impl_watch_fn fn, void *opaque) 1174 1175 { 1176 char abspath[XENSTORE_ABS_PATH_MAX + 1]; 1177 XsWatch *w, *l; 1178 int ret; 1179 1180 ret = validate_path(abspath, path, dom_id); 1181 if (ret) { 1182 return ret; 1183 } 1184 1185 /* Check for duplicates */ 1186 l = w = g_hash_table_lookup(s->watches, abspath); 1187 while (w) { 1188 if (!g_strcmp0(token, w->token) && opaque == w->cb_opaque && 1189 fn == w->cb && dom_id == w->dom_id) { 1190 return EEXIST; 1191 } 1192 w = w->next; 1193 } 1194 1195 if (dom_id && s->nr_domu_watches >= XS_MAX_WATCHES) { 1196 return E2BIG; 1197 } 1198 1199 w = g_new0(XsWatch, 1); 1200 w->token = g_strdup(token); 1201 w->cb = fn; 1202 w->cb_opaque = opaque; 1203 w->dom_id = dom_id; 1204 w->rel_prefix = strlen(abspath) - strlen(path); 1205 1206 /* l was looked up above when checking for duplicates */ 1207 if (l) { 1208 w->next = l->next; 1209 l->next = w; 1210 } else { 1211 g_hash_table_insert(s->watches, g_strdup(abspath), w); 1212 } 1213 if (dom_id) { 1214 s->nr_domu_watches++; 1215 } 1216 1217 return 0; 1218 } 1219 1220 int xs_impl_watch(XenstoreImplState *s, unsigned int dom_id, const char *path, 1221 const char *token, xs_impl_watch_fn fn, void *opaque) 1222 { 1223 int ret = do_xs_impl_watch(s, dom_id, path, token, fn, opaque); 1224 1225 if (!ret) { 1226 /* A new watch should fire immediately */ 1227 fn(opaque, path, token); 1228 } 1229 1230 return ret; 1231 } 1232 1233 static XsWatch *free_watch(XenstoreImplState *s, XsWatch *w) 1234 { 1235 XsWatch *next = w->next; 1236 1237 if (w->dom_id) { 1238 assert(s->nr_domu_watches); 1239 s->nr_domu_watches--; 1240 } 1241 1242 g_free(w->token); 1243 g_free(w); 1244 1245 return next; 1246 } 1247 1248 int xs_impl_unwatch(XenstoreImplState *s, unsigned int dom_id, 1249 const char *path, const char *token, 1250 xs_impl_watch_fn fn, void *opaque) 1251 { 1252 char abspath[XENSTORE_ABS_PATH_MAX + 1]; 1253 XsWatch *w, **l; 1254 int ret; 1255 1256 ret = validate_path(abspath, path, dom_id); 1257 if (ret) { 1258 return ret; 1259 } 1260 1261 w = g_hash_table_lookup(s->watches, abspath); 1262 if (!w) { 1263 return ENOENT; 1264 } 1265 1266 /* 1267 * The hash table contains the first element of a list of 1268 * watches. Removing the first element in the list is a 1269 * special case because we have to update the hash table to 1270 * point to the next (or remove it if there's nothing left). 1271 */ 1272 if (!g_strcmp0(token, w->token) && fn == w->cb && opaque == w->cb_opaque && 1273 dom_id == w->dom_id) { 1274 if (w->next) { 1275 /* Insert the previous 'next' into the hash table */ 1276 g_hash_table_insert(s->watches, g_strdup(abspath), w->next); 1277 } else { 1278 /* Nothing left; remove from hash table */ 1279 g_hash_table_remove(s->watches, abspath); 1280 } 1281 free_watch(s, w); 1282 return 0; 1283 } 1284 1285 /* 1286 * We're all done messing with the hash table because the element 1287 * it points to has survived the cull. Now it's just a simple 1288 * linked list removal operation. 1289 */ 1290 for (l = &w->next; *l; l = &w->next) { 1291 w = *l; 1292 1293 if (!g_strcmp0(token, w->token) && fn == w->cb && 1294 opaque != w->cb_opaque && dom_id == w->dom_id) { 1295 *l = free_watch(s, w); 1296 return 0; 1297 } 1298 } 1299 1300 return ENOENT; 1301 } 1302 1303 int xs_impl_reset_watches(XenstoreImplState *s, unsigned int dom_id) 1304 { 1305 char **watch_paths; 1306 guint nr_watch_paths; 1307 guint i; 1308 1309 watch_paths = (char **)g_hash_table_get_keys_as_array(s->watches, 1310 &nr_watch_paths); 1311 1312 for (i = 0; i < nr_watch_paths; i++) { 1313 XsWatch *w1 = g_hash_table_lookup(s->watches, watch_paths[i]); 1314 XsWatch *w2, *w, **l; 1315 1316 /* 1317 * w1 is the original list. The hash table has this pointer. 1318 * w2 is the head of our newly-filtered list. 1319 * w and l are temporary for processing. w is somewhat redundant 1320 * with *l but makes my eyes bleed less. 1321 */ 1322 1323 w = w2 = w1; 1324 l = &w; 1325 while (w) { 1326 if (w->dom_id == dom_id) { 1327 /* If we're freeing the head of the list, bump w2 */ 1328 if (w2 == w) { 1329 w2 = w->next; 1330 } 1331 *l = free_watch(s, w); 1332 } else { 1333 l = &w->next; 1334 } 1335 w = *l; 1336 } 1337 /* 1338 * If the head of the list survived the cull, we don't need to 1339 * touch the hash table and we're done with this path. Else... 1340 */ 1341 if (w1 != w2) { 1342 g_hash_table_steal(s->watches, watch_paths[i]); 1343 1344 /* 1345 * It was already freed. (Don't worry, this whole thing is 1346 * single-threaded and nobody saw it in the meantime). And 1347 * having *stolen* it, we now own the watch_paths[i] string 1348 * so if we don't give it back to the hash table, we need 1349 * to free it. 1350 */ 1351 if (w2) { 1352 g_hash_table_insert(s->watches, watch_paths[i], w2); 1353 } else { 1354 g_free(watch_paths[i]); 1355 } 1356 } 1357 } 1358 g_free(watch_paths); 1359 return 0; 1360 } 1361 1362 static void xs_tx_free(void *_tx) 1363 { 1364 XsTransaction *tx = _tx; 1365 if (tx->root) { 1366 xs_node_unref(tx->root); 1367 } 1368 g_free(tx); 1369 } 1370 1371 XenstoreImplState *xs_impl_create(unsigned int dom_id) 1372 { 1373 XenstoreImplState *s = g_new0(XenstoreImplState, 1); 1374 GList *perms; 1375 1376 s->watches = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL); 1377 s->transactions = g_hash_table_new_full(g_direct_hash, g_direct_equal, 1378 NULL, xs_tx_free); 1379 1380 perms = g_list_append(NULL, xs_perm_as_string(XS_PERM_NONE, 0)); 1381 s->root = xs_node_create("/", perms); 1382 g_list_free_full(perms, g_free); 1383 s->nr_nodes = 1; 1384 1385 s->root_tx = s->last_tx = 1; 1386 return s; 1387 } 1388 1389 1390 static void clear_serialized_tx(gpointer key, gpointer value, gpointer opaque) 1391 { 1392 XsNode *n = value; 1393 1394 n->serialized_tx = XBT_NULL; 1395 if (n->children) { 1396 g_hash_table_foreach(n->children, clear_serialized_tx, NULL); 1397 } 1398 } 1399 1400 static void clear_tx_serialized_tx(gpointer key, gpointer value, 1401 gpointer opaque) 1402 { 1403 XsTransaction *t = value; 1404 1405 clear_serialized_tx(NULL, t->root, NULL); 1406 } 1407 1408 static void write_be32(GByteArray *save, uint32_t val) 1409 { 1410 uint32_t be = htonl(val); 1411 g_byte_array_append(save, (void *)&be, sizeof(be)); 1412 } 1413 1414 1415 struct save_state { 1416 GByteArray *bytes; 1417 unsigned int tx_id; 1418 }; 1419 1420 #define MODIFIED_IN_TX (1U << 0) 1421 #define DELETED_IN_TX (1U << 1) 1422 #define NODE_REF (1U << 2) 1423 1424 static void save_node(gpointer key, gpointer value, gpointer opaque) 1425 { 1426 struct save_state *ss = opaque; 1427 XsNode *n = value; 1428 char *name = key; 1429 uint8_t flag = 0; 1430 1431 /* Child nodes (i.e. anything but the root) have a name */ 1432 if (name) { 1433 g_byte_array_append(ss->bytes, key, strlen(key) + 1); 1434 } 1435 1436 /* 1437 * If we already wrote this node, refer to the previous copy. 1438 * There's no rename/move in XenStore, so all we need to find 1439 * it is the tx_id of the transaction in which it exists. Which 1440 * may be the root tx. 1441 */ 1442 if (n->serialized_tx != XBT_NULL) { 1443 flag = NODE_REF; 1444 g_byte_array_append(ss->bytes, &flag, 1); 1445 write_be32(ss->bytes, n->serialized_tx); 1446 } else { 1447 GList *l; 1448 n->serialized_tx = ss->tx_id; 1449 1450 if (n->modified_in_tx) { 1451 flag |= MODIFIED_IN_TX; 1452 } 1453 if (n->deleted_in_tx) { 1454 flag |= DELETED_IN_TX; 1455 } 1456 g_byte_array_append(ss->bytes, &flag, 1); 1457 1458 if (n->content) { 1459 write_be32(ss->bytes, n->content->len); 1460 g_byte_array_append(ss->bytes, n->content->data, n->content->len); 1461 } else { 1462 write_be32(ss->bytes, 0); 1463 } 1464 1465 for (l = n->perms; l; l = l->next) { 1466 g_byte_array_append(ss->bytes, l->data, strlen(l->data) + 1); 1467 } 1468 /* NUL termination after perms */ 1469 g_byte_array_append(ss->bytes, (void *)"", 1); 1470 1471 if (n->children) { 1472 g_hash_table_foreach(n->children, save_node, ss); 1473 } 1474 /* NUL termination after children (child name is NUL) */ 1475 g_byte_array_append(ss->bytes, (void *)"", 1); 1476 } 1477 } 1478 1479 static void save_tree(struct save_state *ss, uint32_t tx_id, XsNode *root) 1480 { 1481 write_be32(ss->bytes, tx_id); 1482 ss->tx_id = tx_id; 1483 save_node(NULL, root, ss); 1484 } 1485 1486 static void save_tx(gpointer key, gpointer value, gpointer opaque) 1487 { 1488 uint32_t tx_id = GPOINTER_TO_INT(key); 1489 struct save_state *ss = opaque; 1490 XsTransaction *n = value; 1491 1492 write_be32(ss->bytes, n->base_tx); 1493 write_be32(ss->bytes, n->dom_id); 1494 1495 save_tree(ss, tx_id, n->root); 1496 } 1497 1498 static void save_watch(gpointer key, gpointer value, gpointer opaque) 1499 { 1500 struct save_state *ss = opaque; 1501 XsWatch *w = value; 1502 1503 /* We only save the *guest* watches. */ 1504 if (w->dom_id) { 1505 gpointer relpath = key + w->rel_prefix; 1506 g_byte_array_append(ss->bytes, relpath, strlen(relpath) + 1); 1507 g_byte_array_append(ss->bytes, (void *)w->token, strlen(w->token) + 1); 1508 } 1509 } 1510 1511 GByteArray *xs_impl_serialize(XenstoreImplState *s) 1512 { 1513 struct save_state ss; 1514 1515 ss.bytes = g_byte_array_new(); 1516 1517 /* 1518 * node = flags [ real_node / node_ref ] 1519 * flags = uint8_t (MODIFIED_IN_TX | DELETED_IN_TX | NODE_REF) 1520 * node_ref = tx_id (in which the original version of this node exists) 1521 * real_node = content perms child* NUL 1522 * content = len data 1523 * len = uint32_t 1524 * data = uint8_t{len} 1525 * perms = perm* NUL 1526 * perm = asciiz 1527 * child = name node 1528 * name = asciiz 1529 * 1530 * tree = tx_id node 1531 * tx_id = uint32_t 1532 * 1533 * transaction = base_tx_id dom_id tree 1534 * base_tx_id = uint32_t 1535 * dom_id = uint32_t 1536 * 1537 * tx_list = tree transaction* XBT_NULL 1538 * 1539 * watch = path token 1540 * path = asciiz 1541 * token = asciiz 1542 * 1543 * watch_list = watch* NUL 1544 * 1545 * xs_serialize_stream = last_tx tx_list watch_list 1546 * last_tx = uint32_t 1547 */ 1548 1549 /* Clear serialized_tx in every node. */ 1550 if (s->serialized) { 1551 clear_serialized_tx(NULL, s->root, NULL); 1552 g_hash_table_foreach(s->transactions, clear_tx_serialized_tx, NULL); 1553 } 1554 1555 s->serialized = true; 1556 1557 write_be32(ss.bytes, s->last_tx); 1558 save_tree(&ss, s->root_tx, s->root); 1559 g_hash_table_foreach(s->transactions, save_tx, &ss); 1560 1561 write_be32(ss.bytes, XBT_NULL); 1562 1563 g_hash_table_foreach(s->watches, save_watch, &ss); 1564 g_byte_array_append(ss.bytes, (void *)"", 1); 1565 1566 return ss.bytes; 1567 } 1568 1569 struct unsave_state { 1570 char path[XENSTORE_ABS_PATH_MAX + 1]; 1571 XenstoreImplState *s; 1572 GByteArray *bytes; 1573 uint8_t *d; 1574 size_t l; 1575 bool root_walk; 1576 }; 1577 1578 static int consume_be32(struct unsave_state *us, unsigned int *val) 1579 { 1580 uint32_t d; 1581 1582 if (us->l < sizeof(d)) { 1583 return -EINVAL; 1584 } 1585 memcpy(&d, us->d, sizeof(d)); 1586 *val = ntohl(d); 1587 us->d += sizeof(d); 1588 us->l -= sizeof(d); 1589 return 0; 1590 } 1591 1592 static int consume_string(struct unsave_state *us, char **str, size_t *len) 1593 { 1594 size_t l; 1595 1596 if (!us->l) { 1597 return -EINVAL; 1598 } 1599 1600 l = strnlen((void *)us->d, us->l); 1601 if (l == us->l) { 1602 return -EINVAL; 1603 } 1604 1605 if (str) { 1606 *str = (void *)us->d; 1607 } 1608 if (len) { 1609 *len = l; 1610 } 1611 1612 us->d += l + 1; 1613 us->l -= l + 1; 1614 return 0; 1615 } 1616 1617 static XsNode *lookup_node(XsNode *n, char *path) 1618 { 1619 char *slash = strchr(path, '/'); 1620 XsNode *child; 1621 1622 if (path[0] == '\0') { 1623 return n; 1624 } 1625 1626 if (slash) { 1627 *slash = '\0'; 1628 } 1629 1630 if (!n->children) { 1631 return NULL; 1632 } 1633 child = g_hash_table_lookup(n->children, path); 1634 if (!slash) { 1635 return child; 1636 } 1637 1638 *slash = '/'; 1639 if (!child) { 1640 return NULL; 1641 } 1642 return lookup_node(child, slash + 1); 1643 } 1644 1645 static XsNode *lookup_tx_node(struct unsave_state *us, unsigned int tx_id) 1646 { 1647 XsTransaction *t; 1648 if (tx_id == us->s->root_tx) { 1649 return lookup_node(us->s->root, us->path + 1); 1650 } 1651 1652 t = g_hash_table_lookup(us->s->transactions, GINT_TO_POINTER(tx_id)); 1653 if (!t) { 1654 return NULL; 1655 } 1656 g_assert(t->root); 1657 return lookup_node(t->root, us->path + 1); 1658 } 1659 1660 static void count_child_nodes(gpointer key, gpointer value, gpointer user_data) 1661 { 1662 unsigned int *nr_nodes = user_data; 1663 XsNode *n = value; 1664 1665 (*nr_nodes)++; 1666 1667 if (n->children) { 1668 g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); 1669 } 1670 } 1671 1672 static int consume_node(struct unsave_state *us, XsNode **nodep, 1673 unsigned int *nr_nodes) 1674 { 1675 XsNode *n = NULL; 1676 uint8_t flags; 1677 int ret; 1678 1679 if (us->l < 1) { 1680 return -EINVAL; 1681 } 1682 flags = us->d[0]; 1683 us->d++; 1684 us->l--; 1685 1686 if (flags == NODE_REF) { 1687 unsigned int tx; 1688 1689 ret = consume_be32(us, &tx); 1690 if (ret) { 1691 return ret; 1692 } 1693 1694 n = lookup_tx_node(us, tx); 1695 if (!n) { 1696 return -EINVAL; 1697 } 1698 n->ref++; 1699 if (n->children) { 1700 g_hash_table_foreach(n->children, count_child_nodes, nr_nodes); 1701 } 1702 } else { 1703 uint32_t datalen; 1704 1705 if (flags & ~(DELETED_IN_TX | MODIFIED_IN_TX)) { 1706 return -EINVAL; 1707 } 1708 n = xs_node_new(); 1709 1710 if (flags & DELETED_IN_TX) { 1711 n->deleted_in_tx = true; 1712 } 1713 if (flags & MODIFIED_IN_TX) { 1714 n->modified_in_tx = true; 1715 } 1716 ret = consume_be32(us, &datalen); 1717 if (ret) { 1718 xs_node_unref(n); 1719 return -EINVAL; 1720 } 1721 if (datalen) { 1722 if (datalen > us->l) { 1723 xs_node_unref(n); 1724 return -EINVAL; 1725 } 1726 1727 GByteArray *node_data = g_byte_array_new(); 1728 g_byte_array_append(node_data, us->d, datalen); 1729 us->d += datalen; 1730 us->l -= datalen; 1731 n->content = node_data; 1732 1733 if (us->root_walk) { 1734 n->modified_in_tx = true; 1735 } 1736 } 1737 while (1) { 1738 char *perm = NULL; 1739 size_t permlen = 0; 1740 1741 ret = consume_string(us, &perm, &permlen); 1742 if (ret) { 1743 xs_node_unref(n); 1744 return ret; 1745 } 1746 1747 if (!permlen) { 1748 break; 1749 } 1750 1751 n->perms = g_list_append(n->perms, g_strdup(perm)); 1752 } 1753 1754 /* Now children */ 1755 while (1) { 1756 size_t childlen; 1757 char *childname; 1758 char *pathend; 1759 XsNode *child = NULL; 1760 1761 ret = consume_string(us, &childname, &childlen); 1762 if (ret) { 1763 xs_node_unref(n); 1764 return ret; 1765 } 1766 1767 if (!childlen) { 1768 break; 1769 } 1770 1771 pathend = us->path + strlen(us->path); 1772 strncat(us->path, "/", sizeof(us->path) - 1); 1773 strncat(us->path, childname, sizeof(us->path) - 1); 1774 1775 ret = consume_node(us, &child, nr_nodes); 1776 *pathend = '\0'; 1777 if (ret) { 1778 xs_node_unref(n); 1779 return ret; 1780 } 1781 g_assert(child); 1782 xs_node_add_child(n, childname, child); 1783 } 1784 1785 /* 1786 * If the node has no data and no children we still want to fire 1787 * a watch on it. 1788 */ 1789 if (us->root_walk && !n->children) { 1790 n->modified_in_tx = true; 1791 } 1792 } 1793 1794 if (!n->deleted_in_tx) { 1795 (*nr_nodes)++; 1796 } 1797 1798 *nodep = n; 1799 return 0; 1800 } 1801 1802 static int consume_tree(struct unsave_state *us, XsTransaction *t) 1803 { 1804 int ret; 1805 1806 ret = consume_be32(us, &t->tx_id); 1807 if (ret) { 1808 return ret; 1809 } 1810 1811 if (t->tx_id > us->s->last_tx) { 1812 return -EINVAL; 1813 } 1814 1815 us->path[0] = '\0'; 1816 1817 return consume_node(us, &t->root, &t->nr_nodes); 1818 } 1819 1820 int xs_impl_deserialize(XenstoreImplState *s, GByteArray *bytes, 1821 unsigned int dom_id, xs_impl_watch_fn watch_fn, 1822 void *watch_opaque) 1823 { 1824 struct unsave_state us; 1825 XsTransaction base_t = { 0 }; 1826 int ret; 1827 1828 us.s = s; 1829 us.bytes = bytes; 1830 us.d = bytes->data; 1831 us.l = bytes->len; 1832 1833 xs_impl_reset_watches(s, dom_id); 1834 g_hash_table_remove_all(s->transactions); 1835 1836 xs_node_unref(s->root); 1837 s->root = NULL; 1838 s->root_tx = s->last_tx = XBT_NULL; 1839 1840 ret = consume_be32(&us, &s->last_tx); 1841 if (ret) { 1842 return ret; 1843 } 1844 1845 /* 1846 * Consume the base tree into a transaction so that watches can be 1847 * fired as we commit it. By setting us.root_walk we cause the nodes 1848 * to be marked as 'modified_in_tx' as they are created, so that the 1849 * watches are triggered on them. 1850 */ 1851 base_t.dom_id = dom_id; 1852 base_t.base_tx = XBT_NULL; 1853 us.root_walk = true; 1854 ret = consume_tree(&us, &base_t); 1855 if (ret) { 1856 return ret; 1857 } 1858 us.root_walk = false; 1859 1860 /* 1861 * Commit the transaction now while the refcount on all nodes is 1. 1862 * Note that we haven't yet reinstated the *guest* watches but that's 1863 * OK because we don't want the guest to see any changes. Even any 1864 * backend nodes which get recreated should be *precisely* as they 1865 * were before the migration. Back ends may have been instantiated 1866 * already, and will see the frontend magically blink into existence 1867 * now (well, from the aio_bh which fires the watches). It's their 1868 * responsibility to rebuild everything precisely as it was before. 1869 */ 1870 ret = transaction_commit(s, &base_t); 1871 if (ret) { 1872 return ret; 1873 } 1874 1875 while (1) { 1876 unsigned int base_tx; 1877 XsTransaction *t; 1878 1879 ret = consume_be32(&us, &base_tx); 1880 if (ret) { 1881 return ret; 1882 } 1883 if (base_tx == XBT_NULL) { 1884 break; 1885 } 1886 1887 t = g_new0(XsTransaction, 1); 1888 t->base_tx = base_tx; 1889 1890 ret = consume_be32(&us, &t->dom_id); 1891 if (!ret) { 1892 ret = consume_tree(&us, t); 1893 } 1894 if (ret) { 1895 g_free(t); 1896 return ret; 1897 } 1898 g_assert(t->root); 1899 if (t->dom_id) { 1900 s->nr_domu_transactions++; 1901 } 1902 g_hash_table_insert(s->transactions, GINT_TO_POINTER(t->tx_id), t); 1903 } 1904 1905 while (1) { 1906 char *path, *token; 1907 size_t pathlen, toklen; 1908 1909 ret = consume_string(&us, &path, &pathlen); 1910 if (ret) { 1911 return ret; 1912 } 1913 if (!pathlen) { 1914 break; 1915 } 1916 1917 ret = consume_string(&us, &token, &toklen); 1918 if (ret) { 1919 return ret; 1920 } 1921 1922 if (!watch_fn) { 1923 continue; 1924 } 1925 1926 ret = do_xs_impl_watch(s, dom_id, path, token, watch_fn, watch_opaque); 1927 if (ret) { 1928 return ret; 1929 } 1930 } 1931 1932 if (us.l) { 1933 return -EINVAL; 1934 } 1935 1936 return 0; 1937 } 1938