1 /* 2 * Virtio 9p backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * Not so fast! You might want to read the 9p developer docs first: 16 * https://wiki.qemu.org/Documentation/9p 17 */ 18 19 #include "qemu/osdep.h" 20 #ifdef CONFIG_LINUX 21 #include <linux/limits.h> 22 #else 23 #include <limits.h> 24 #endif 25 #include <glib/gprintf.h> 26 #include "hw/virtio/virtio.h" 27 #include "qapi/error.h" 28 #include "qemu/error-report.h" 29 #include "qemu/iov.h" 30 #include "qemu/main-loop.h" 31 #include "qemu/sockets.h" 32 #include "virtio-9p.h" 33 #include "fsdev/qemu-fsdev.h" 34 #include "9p-xattr.h" 35 #include "9p-util.h" 36 #include "coth.h" 37 #include "trace.h" 38 #include "migration/blocker.h" 39 #include "qemu/xxhash.h" 40 #include <math.h> 41 42 int open_fd_hw; 43 int total_open_fd; 44 static int open_fd_rc; 45 46 enum { 47 Oread = 0x00, 48 Owrite = 0x01, 49 Ordwr = 0x02, 50 Oexec = 0x03, 51 Oexcl = 0x04, 52 Otrunc = 0x10, 53 Orexec = 0x20, 54 Orclose = 0x40, 55 Oappend = 0x80, 56 }; 57 58 P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free); 59 60 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 61 { 62 ssize_t ret; 63 va_list ap; 64 65 va_start(ap, fmt); 66 ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap); 67 va_end(ap); 68 69 return ret; 70 } 71 72 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 73 { 74 ssize_t ret; 75 va_list ap; 76 77 va_start(ap, fmt); 78 ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap); 79 va_end(ap); 80 81 return ret; 82 } 83 84 static int omode_to_uflags(int8_t mode) 85 { 86 int ret = 0; 87 88 switch (mode & 3) { 89 case Oread: 90 ret = O_RDONLY; 91 break; 92 case Ordwr: 93 ret = O_RDWR; 94 break; 95 case Owrite: 96 ret = O_WRONLY; 97 break; 98 case Oexec: 99 ret = O_RDONLY; 100 break; 101 } 102 103 if (mode & Otrunc) { 104 ret |= O_TRUNC; 105 } 106 107 if (mode & Oappend) { 108 ret |= O_APPEND; 109 } 110 111 if (mode & Oexcl) { 112 ret |= O_EXCL; 113 } 114 115 return ret; 116 } 117 118 typedef struct DotlOpenflagMap { 119 int dotl_flag; 120 int open_flag; 121 } DotlOpenflagMap; 122 123 static int dotl_to_open_flags(int flags) 124 { 125 int i; 126 /* 127 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY 128 * and P9_DOTL_NOACCESS 129 */ 130 int oflags = flags & O_ACCMODE; 131 132 DotlOpenflagMap dotl_oflag_map[] = { 133 { P9_DOTL_CREATE, O_CREAT }, 134 { P9_DOTL_EXCL, O_EXCL }, 135 { P9_DOTL_NOCTTY , O_NOCTTY }, 136 { P9_DOTL_TRUNC, O_TRUNC }, 137 { P9_DOTL_APPEND, O_APPEND }, 138 { P9_DOTL_NONBLOCK, O_NONBLOCK } , 139 { P9_DOTL_DSYNC, O_DSYNC }, 140 { P9_DOTL_FASYNC, FASYNC }, 141 #ifndef CONFIG_DARWIN 142 { P9_DOTL_NOATIME, O_NOATIME }, 143 /* 144 * On Darwin, we could map to F_NOCACHE, which is 145 * similar, but doesn't quite have the same 146 * semantics. However, we don't support O_DIRECT 147 * even on linux at the moment, so we just ignore 148 * it here. 149 */ 150 { P9_DOTL_DIRECT, O_DIRECT }, 151 #endif 152 { P9_DOTL_LARGEFILE, O_LARGEFILE }, 153 { P9_DOTL_DIRECTORY, O_DIRECTORY }, 154 { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, 155 { P9_DOTL_SYNC, O_SYNC }, 156 }; 157 158 for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { 159 if (flags & dotl_oflag_map[i].dotl_flag) { 160 oflags |= dotl_oflag_map[i].open_flag; 161 } 162 } 163 164 return oflags; 165 } 166 167 void cred_init(FsCred *credp) 168 { 169 credp->fc_uid = -1; 170 credp->fc_gid = -1; 171 credp->fc_mode = -1; 172 credp->fc_rdev = -1; 173 } 174 175 static int get_dotl_openflags(V9fsState *s, int oflags) 176 { 177 int flags; 178 /* 179 * Filter the client open flags 180 */ 181 flags = dotl_to_open_flags(oflags); 182 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); 183 #ifndef CONFIG_DARWIN 184 /* 185 * Ignore direct disk access hint until the server supports it. 186 */ 187 flags &= ~O_DIRECT; 188 #endif 189 return flags; 190 } 191 192 void v9fs_path_init(V9fsPath *path) 193 { 194 path->data = NULL; 195 path->size = 0; 196 } 197 198 void v9fs_path_free(V9fsPath *path) 199 { 200 g_free(path->data); 201 path->data = NULL; 202 path->size = 0; 203 } 204 205 206 void G_GNUC_PRINTF(2, 3) 207 v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) 208 { 209 va_list ap; 210 211 v9fs_path_free(path); 212 213 va_start(ap, fmt); 214 /* Bump the size for including terminating NULL */ 215 path->size = g_vasprintf(&path->data, fmt, ap) + 1; 216 va_end(ap); 217 } 218 219 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src) 220 { 221 v9fs_path_free(dst); 222 dst->size = src->size; 223 dst->data = g_memdup(src->data, src->size); 224 } 225 226 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, 227 const char *name, V9fsPath *path) 228 { 229 int err; 230 err = s->ops->name_to_path(&s->ctx, dirpath, name, path); 231 if (err < 0) { 232 err = -errno; 233 } 234 return err; 235 } 236 237 /* 238 * Return TRUE if s1 is an ancestor of s2. 239 * 240 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d". 241 * As a special case, We treat s1 as ancestor of s2 if they are same! 242 */ 243 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2) 244 { 245 if (!strncmp(s1->data, s2->data, s1->size - 1)) { 246 if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') { 247 return 1; 248 } 249 } 250 return 0; 251 } 252 253 static size_t v9fs_string_size(V9fsString *str) 254 { 255 return str->size; 256 } 257 258 /* 259 * returns 0 if fid got re-opened, 1 if not, < 0 on error 260 */ 261 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f) 262 { 263 int err = 1; 264 if (f->fid_type == P9_FID_FILE) { 265 if (f->fs.fd == -1) { 266 do { 267 err = v9fs_co_open(pdu, f, f->open_flags); 268 } while (err == -EINTR && !pdu->cancelled); 269 } 270 } else if (f->fid_type == P9_FID_DIR) { 271 if (f->fs.dir.stream == NULL) { 272 do { 273 err = v9fs_co_opendir(pdu, f); 274 } while (err == -EINTR && !pdu->cancelled); 275 } 276 } 277 return err; 278 } 279 280 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid) 281 { 282 int err; 283 V9fsFidState *f; 284 V9fsState *s = pdu->s; 285 286 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 287 if (f) { 288 BUG_ON(f->clunked); 289 /* 290 * Update the fid ref upfront so that 291 * we don't get reclaimed when we yield 292 * in open later. 293 */ 294 f->ref++; 295 /* 296 * check whether we need to reopen the 297 * file. We might have closed the fd 298 * while trying to free up some file 299 * descriptors. 300 */ 301 err = v9fs_reopen_fid(pdu, f); 302 if (err < 0) { 303 f->ref--; 304 return NULL; 305 } 306 /* 307 * Mark the fid as referenced so that the LRU 308 * reclaim won't close the file descriptor 309 */ 310 f->flags |= FID_REFERENCED; 311 return f; 312 } 313 return NULL; 314 } 315 316 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) 317 { 318 V9fsFidState *f; 319 320 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 321 if (f) { 322 /* If fid is already there return NULL */ 323 BUG_ON(f->clunked); 324 return NULL; 325 } 326 f = g_new0(V9fsFidState, 1); 327 f->fid = fid; 328 f->fid_type = P9_FID_NONE; 329 f->ref = 1; 330 /* 331 * Mark the fid as referenced so that the LRU 332 * reclaim won't close the file descriptor 333 */ 334 f->flags |= FID_REFERENCED; 335 g_hash_table_insert(s->fids, GINT_TO_POINTER(fid), f); 336 337 v9fs_readdir_init(s->proto_version, &f->fs.dir); 338 v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir); 339 340 return f; 341 } 342 343 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) 344 { 345 int retval = 0; 346 347 if (fidp->fs.xattr.xattrwalk_fid) { 348 /* getxattr/listxattr fid */ 349 goto free_value; 350 } 351 /* 352 * if this is fid for setxattr. clunk should 353 * result in setxattr localcall 354 */ 355 if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) { 356 /* clunk after partial write */ 357 retval = -EINVAL; 358 goto free_out; 359 } 360 if (fidp->fs.xattr.len) { 361 retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name, 362 fidp->fs.xattr.value, 363 fidp->fs.xattr.len, 364 fidp->fs.xattr.flags); 365 } else { 366 retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name); 367 } 368 free_out: 369 v9fs_string_free(&fidp->fs.xattr.name); 370 free_value: 371 g_free(fidp->fs.xattr.value); 372 return retval; 373 } 374 375 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp) 376 { 377 int retval = 0; 378 379 if (fidp->fid_type == P9_FID_FILE) { 380 /* If we reclaimed the fd no need to close */ 381 if (fidp->fs.fd != -1) { 382 retval = v9fs_co_close(pdu, &fidp->fs); 383 } 384 } else if (fidp->fid_type == P9_FID_DIR) { 385 if (fidp->fs.dir.stream != NULL) { 386 retval = v9fs_co_closedir(pdu, &fidp->fs); 387 } 388 } else if (fidp->fid_type == P9_FID_XATTR) { 389 retval = v9fs_xattr_fid_clunk(pdu, fidp); 390 } 391 v9fs_path_free(&fidp->path); 392 g_free(fidp); 393 return retval; 394 } 395 396 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) 397 { 398 BUG_ON(!fidp->ref); 399 fidp->ref--; 400 /* 401 * Don't free the fid if it is in reclaim list 402 */ 403 if (!fidp->ref && fidp->clunked) { 404 if (fidp->fid == pdu->s->root_fid) { 405 /* 406 * if the clunked fid is root fid then we 407 * have unmounted the fs on the client side. 408 * delete the migration blocker. Ideally, this 409 * should be hooked to transport close notification 410 */ 411 if (pdu->s->migration_blocker) { 412 migrate_del_blocker(pdu->s->migration_blocker); 413 error_free(pdu->s->migration_blocker); 414 pdu->s->migration_blocker = NULL; 415 } 416 } 417 return free_fid(pdu, fidp); 418 } 419 return 0; 420 } 421 422 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid) 423 { 424 V9fsFidState *fidp; 425 426 /* TODO: Use g_hash_table_steal_extended() instead? */ 427 fidp = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 428 if (fidp) { 429 g_hash_table_remove(s->fids, GINT_TO_POINTER(fid)); 430 fidp->clunked = true; 431 return fidp; 432 } 433 return NULL; 434 } 435 436 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) 437 { 438 int reclaim_count = 0; 439 V9fsState *s = pdu->s; 440 V9fsFidState *f; 441 GHashTableIter iter; 442 gpointer fid; 443 444 g_hash_table_iter_init(&iter, s->fids); 445 446 QSLIST_HEAD(, V9fsFidState) reclaim_list = 447 QSLIST_HEAD_INITIALIZER(reclaim_list); 448 449 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) { 450 /* 451 * Unlink fids cannot be reclaimed. Check 452 * for them and skip them. Also skip fids 453 * currently being operated on. 454 */ 455 if (f->ref || f->flags & FID_NON_RECLAIMABLE) { 456 continue; 457 } 458 /* 459 * if it is a recently referenced fid 460 * we leave the fid untouched and clear the 461 * reference bit. We come back to it later 462 * in the next iteration. (a simple LRU without 463 * moving list elements around) 464 */ 465 if (f->flags & FID_REFERENCED) { 466 f->flags &= ~FID_REFERENCED; 467 continue; 468 } 469 /* 470 * Add fids to reclaim list. 471 */ 472 if (f->fid_type == P9_FID_FILE) { 473 if (f->fs.fd != -1) { 474 /* 475 * Up the reference count so that 476 * a clunk request won't free this fid 477 */ 478 f->ref++; 479 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 480 f->fs_reclaim.fd = f->fs.fd; 481 f->fs.fd = -1; 482 reclaim_count++; 483 } 484 } else if (f->fid_type == P9_FID_DIR) { 485 if (f->fs.dir.stream != NULL) { 486 /* 487 * Up the reference count so that 488 * a clunk request won't free this fid 489 */ 490 f->ref++; 491 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 492 f->fs_reclaim.dir.stream = f->fs.dir.stream; 493 f->fs.dir.stream = NULL; 494 reclaim_count++; 495 } 496 } 497 if (reclaim_count >= open_fd_rc) { 498 break; 499 } 500 } 501 /* 502 * Now close the fid in reclaim list. Free them if they 503 * are already clunked. 504 */ 505 while (!QSLIST_EMPTY(&reclaim_list)) { 506 f = QSLIST_FIRST(&reclaim_list); 507 QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next); 508 if (f->fid_type == P9_FID_FILE) { 509 v9fs_co_close(pdu, &f->fs_reclaim); 510 } else if (f->fid_type == P9_FID_DIR) { 511 v9fs_co_closedir(pdu, &f->fs_reclaim); 512 } 513 /* 514 * Now drop the fid reference, free it 515 * if clunked. 516 */ 517 put_fid(pdu, f); 518 } 519 } 520 521 /* 522 * This is used when a path is removed from the directory tree. Any 523 * fids that still reference it must not be closed from then on, since 524 * they cannot be reopened. 525 */ 526 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path) 527 { 528 int err = 0; 529 V9fsState *s = pdu->s; 530 V9fsFidState *fidp; 531 gpointer fid; 532 GHashTableIter iter; 533 /* 534 * The most common case is probably that we have exactly one 535 * fid for the given path, so preallocate exactly one. 536 */ 537 g_autoptr(GArray) to_reopen = g_array_sized_new(FALSE, FALSE, 538 sizeof(V9fsFidState *), 1); 539 gint i; 540 541 g_hash_table_iter_init(&iter, s->fids); 542 543 /* 544 * We iterate over the fid table looking for the entries we need 545 * to reopen, and store them in to_reopen. This is because 546 * v9fs_reopen_fid() and put_fid() yield. This allows the fid table 547 * to be modified in the meantime, invalidating our iterator. 548 */ 549 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &fidp)) { 550 if (fidp->path.size == path->size && 551 !memcmp(fidp->path.data, path->data, path->size)) { 552 /* 553 * Ensure the fid survives a potential clunk request during 554 * v9fs_reopen_fid or put_fid. 555 */ 556 fidp->ref++; 557 fidp->flags |= FID_NON_RECLAIMABLE; 558 g_array_append_val(to_reopen, fidp); 559 } 560 } 561 562 for (i = 0; i < to_reopen->len; i++) { 563 fidp = g_array_index(to_reopen, V9fsFidState*, i); 564 /* reopen the file/dir if already closed */ 565 err = v9fs_reopen_fid(pdu, fidp); 566 if (err < 0) { 567 break; 568 } 569 } 570 571 for (i = 0; i < to_reopen->len; i++) { 572 put_fid(pdu, g_array_index(to_reopen, V9fsFidState*, i)); 573 } 574 return err; 575 } 576 577 static void coroutine_fn virtfs_reset(V9fsPDU *pdu) 578 { 579 V9fsState *s = pdu->s; 580 V9fsFidState *fidp; 581 GList *freeing; 582 /* 583 * Get a list of all the values (fid states) in the table, which 584 * we then... 585 */ 586 g_autoptr(GList) fids = g_hash_table_get_values(s->fids); 587 588 /* ... remove from the table, taking over ownership. */ 589 g_hash_table_steal_all(s->fids); 590 591 /* 592 * This allows us to release our references to them asynchronously without 593 * iterating over the hash table and risking iterator invalidation 594 * through concurrent modifications. 595 */ 596 for (freeing = fids; freeing; freeing = freeing->next) { 597 fidp = freeing->data; 598 fidp->ref++; 599 fidp->clunked = true; 600 put_fid(pdu, fidp); 601 } 602 } 603 604 #define P9_QID_TYPE_DIR 0x80 605 #define P9_QID_TYPE_SYMLINK 0x02 606 607 #define P9_STAT_MODE_DIR 0x80000000 608 #define P9_STAT_MODE_APPEND 0x40000000 609 #define P9_STAT_MODE_EXCL 0x20000000 610 #define P9_STAT_MODE_MOUNT 0x10000000 611 #define P9_STAT_MODE_AUTH 0x08000000 612 #define P9_STAT_MODE_TMP 0x04000000 613 #define P9_STAT_MODE_SYMLINK 0x02000000 614 #define P9_STAT_MODE_LINK 0x01000000 615 #define P9_STAT_MODE_DEVICE 0x00800000 616 #define P9_STAT_MODE_NAMED_PIPE 0x00200000 617 #define P9_STAT_MODE_SOCKET 0x00100000 618 #define P9_STAT_MODE_SETUID 0x00080000 619 #define P9_STAT_MODE_SETGID 0x00040000 620 #define P9_STAT_MODE_SETVTX 0x00010000 621 622 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR | \ 623 P9_STAT_MODE_SYMLINK | \ 624 P9_STAT_MODE_LINK | \ 625 P9_STAT_MODE_DEVICE | \ 626 P9_STAT_MODE_NAMED_PIPE | \ 627 P9_STAT_MODE_SOCKET) 628 629 /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */ 630 static inline uint8_t mirror8bit(uint8_t byte) 631 { 632 return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023; 633 } 634 635 /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */ 636 static inline uint64_t mirror64bit(uint64_t value) 637 { 638 return ((uint64_t)mirror8bit(value & 0xff) << 56) | 639 ((uint64_t)mirror8bit((value >> 8) & 0xff) << 48) | 640 ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) | 641 ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) | 642 ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) | 643 ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) | 644 ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8) | 645 ((uint64_t)mirror8bit((value >> 56) & 0xff)); 646 } 647 648 /* 649 * Parameter k for the Exponential Golomb algorihm to be used. 650 * 651 * The smaller this value, the smaller the minimum bit count for the Exp. 652 * Golomb generated affixes will be (at lowest index) however for the 653 * price of having higher maximum bit count of generated affixes (at highest 654 * index). Likewise increasing this parameter yields in smaller maximum bit 655 * count for the price of having higher minimum bit count. 656 * 657 * In practice that means: a good value for k depends on the expected amount 658 * of devices to be exposed by one export. For a small amount of devices k 659 * should be small, for a large amount of devices k might be increased 660 * instead. The default of k=0 should be fine for most users though. 661 * 662 * IMPORTANT: In case this ever becomes a runtime parameter; the value of 663 * k should not change as long as guest is still running! Because that would 664 * cause completely different inode numbers to be generated on guest. 665 */ 666 #define EXP_GOLOMB_K 0 667 668 /** 669 * expGolombEncode() - Exponential Golomb algorithm for arbitrary k 670 * (including k=0). 671 * 672 * @n: natural number (or index) of the prefix to be generated 673 * (1, 2, 3, ...) 674 * @k: parameter k of Exp. Golomb algorithm to be used 675 * (see comment on EXP_GOLOMB_K macro for details about k) 676 * Return: prefix for given @n and @k 677 * 678 * The Exponential Golomb algorithm generates prefixes (NOT suffixes!) 679 * with growing length and with the mathematical property of being 680 * "prefix-free". The latter means the generated prefixes can be prepended 681 * in front of arbitrary numbers and the resulting concatenated numbers are 682 * guaranteed to be always unique. 683 * 684 * This is a minor adjustment to the original Exp. Golomb algorithm in the 685 * sense that lowest allowed index (@n) starts with 1, not with zero. 686 */ 687 static VariLenAffix expGolombEncode(uint64_t n, int k) 688 { 689 const uint64_t value = n + (1 << k) - 1; 690 const int bits = (int) log2(value) + 1; 691 return (VariLenAffix) { 692 .type = AffixType_Prefix, 693 .value = value, 694 .bits = bits + MAX((bits - 1 - k), 0) 695 }; 696 } 697 698 /** 699 * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix. 700 * @affix: either suffix or prefix to be inverted 701 * Return: inversion of passed @affix 702 * 703 * Simply mirror all bits of the affix value, for the purpose to preserve 704 * respectively the mathematical "prefix-free" or "suffix-free" property 705 * after the conversion. 706 * 707 * If a passed prefix is suitable to create unique numbers, then the 708 * returned suffix is suitable to create unique numbers as well (and vice 709 * versa). 710 */ 711 static VariLenAffix invertAffix(const VariLenAffix *affix) 712 { 713 return (VariLenAffix) { 714 .type = 715 (affix->type == AffixType_Suffix) ? 716 AffixType_Prefix : AffixType_Suffix, 717 .value = 718 mirror64bit(affix->value) >> 719 ((sizeof(affix->value) * 8) - affix->bits), 720 .bits = affix->bits 721 }; 722 } 723 724 /** 725 * affixForIndex() - Generates suffix numbers with "suffix-free" property. 726 * @index: natural number (or index) of the suffix to be generated 727 * (1, 2, 3, ...) 728 * Return: Suffix suitable to assemble unique number. 729 * 730 * This is just a wrapper function on top of the Exp. Golomb algorithm. 731 * 732 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, 733 * this function converts the Exp. Golomb prefixes into appropriate suffixes 734 * which are still suitable for generating unique numbers. 735 */ 736 static VariLenAffix affixForIndex(uint64_t index) 737 { 738 VariLenAffix prefix; 739 prefix = expGolombEncode(index, EXP_GOLOMB_K); 740 return invertAffix(&prefix); /* convert prefix to suffix */ 741 } 742 743 /* creative abuse of tb_hash_func7, which is based on xxhash */ 744 static uint32_t qpp_hash(QppEntry e) 745 { 746 return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); 747 } 748 749 static uint32_t qpf_hash(QpfEntry e) 750 { 751 return qemu_xxhash7(e.ino, e.dev, 0, 0, 0); 752 } 753 754 static bool qpd_cmp_func(const void *obj, const void *userp) 755 { 756 const QpdEntry *e1 = obj, *e2 = userp; 757 return e1->dev == e2->dev; 758 } 759 760 static bool qpp_cmp_func(const void *obj, const void *userp) 761 { 762 const QppEntry *e1 = obj, *e2 = userp; 763 return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; 764 } 765 766 static bool qpf_cmp_func(const void *obj, const void *userp) 767 { 768 const QpfEntry *e1 = obj, *e2 = userp; 769 return e1->dev == e2->dev && e1->ino == e2->ino; 770 } 771 772 static void qp_table_remove(void *p, uint32_t h, void *up) 773 { 774 g_free(p); 775 } 776 777 static void qp_table_destroy(struct qht *ht) 778 { 779 if (!ht || !ht->map) { 780 return; 781 } 782 qht_iter(ht, qp_table_remove, NULL); 783 qht_destroy(ht); 784 } 785 786 static void qpd_table_init(struct qht *ht) 787 { 788 qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 789 } 790 791 static void qpp_table_init(struct qht *ht) 792 { 793 qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 794 } 795 796 static void qpf_table_init(struct qht *ht) 797 { 798 qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE); 799 } 800 801 /* 802 * Returns how many (high end) bits of inode numbers of the passed fs 803 * device shall be used (in combination with the device number) to 804 * generate hash values for qpp_table entries. 805 * 806 * This function is required if variable length suffixes are used for inode 807 * number mapping on guest level. Since a device may end up having multiple 808 * entries in qpp_table, each entry most probably with a different suffix 809 * length, we thus need this function in conjunction with qpd_table to 810 * "agree" about a fix amount of bits (per device) to be always used for 811 * generating hash values for the purpose of accessing qpp_table in order 812 * get consistent behaviour when accessing qpp_table. 813 */ 814 static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) 815 { 816 QpdEntry lookup = { 817 .dev = dev 818 }, *val; 819 uint32_t hash = dev; 820 VariLenAffix affix; 821 822 val = qht_lookup(&pdu->s->qpd_table, &lookup, hash); 823 if (!val) { 824 val = g_new0(QpdEntry, 1); 825 *val = lookup; 826 affix = affixForIndex(pdu->s->qp_affix_next); 827 val->prefix_bits = affix.bits; 828 qht_insert(&pdu->s->qpd_table, val, hash, NULL); 829 pdu->s->qp_ndevices++; 830 } 831 return val->prefix_bits; 832 } 833 834 /* 835 * Slow / full mapping host inode nr -> guest inode nr. 836 * 837 * This function performs a slower and much more costly remapping of an 838 * original file inode number on host to an appropriate different inode 839 * number on guest. For every (dev, inode) combination on host a new 840 * sequential number is generated, cached and exposed as inode number on 841 * guest. 842 * 843 * This is just a "last resort" fallback solution if the much faster/cheaper 844 * qid_path_suffixmap() failed. In practice this slow / full mapping is not 845 * expected ever to be used at all though. 846 * 847 * See qid_path_suffixmap() for details 848 * 849 */ 850 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, 851 uint64_t *path) 852 { 853 QpfEntry lookup = { 854 .dev = stbuf->st_dev, 855 .ino = stbuf->st_ino 856 }, *val; 857 uint32_t hash = qpf_hash(lookup); 858 VariLenAffix affix; 859 860 val = qht_lookup(&pdu->s->qpf_table, &lookup, hash); 861 862 if (!val) { 863 if (pdu->s->qp_fullpath_next == 0) { 864 /* no more files can be mapped :'( */ 865 error_report_once( 866 "9p: No more prefixes available for remapping inodes from " 867 "host to guest." 868 ); 869 return -ENFILE; 870 } 871 872 val = g_new0(QpfEntry, 1); 873 *val = lookup; 874 875 /* new unique inode and device combo */ 876 affix = affixForIndex( 877 1ULL << (sizeof(pdu->s->qp_affix_next) * 8) 878 ); 879 val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value; 880 pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1); 881 qht_insert(&pdu->s->qpf_table, val, hash, NULL); 882 } 883 884 *path = val->path; 885 return 0; 886 } 887 888 /* 889 * Quick mapping host inode nr -> guest inode nr. 890 * 891 * This function performs quick remapping of an original file inode number 892 * on host to an appropriate different inode number on guest. This remapping 893 * of inodes is required to avoid inode nr collisions on guest which would 894 * happen if the 9p export contains more than 1 exported file system (or 895 * more than 1 file system data set), because unlike on host level where the 896 * files would have different device nrs, all files exported by 9p would 897 * share the same device nr on guest (the device nr of the virtual 9p device 898 * that is). 899 * 900 * Inode remapping is performed by chopping off high end bits of the original 901 * inode number from host, shifting the result upwards and then assigning a 902 * generated suffix number for the low end bits, where the same suffix number 903 * will be shared by all inodes with the same device id AND the same high end 904 * bits that have been chopped off. That approach utilizes the fact that inode 905 * numbers very likely share the same high end bits (i.e. due to their common 906 * sequential generation by file systems) and hence we only have to generate 907 * and track a very limited amount of suffixes in practice due to that. 908 * 909 * We generate variable size suffixes for that purpose. The 1st generated 910 * suffix will only have 1 bit and hence we only need to chop off 1 bit from 911 * the original inode number. The subsequent suffixes being generated will 912 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being 913 * generated will have 3 bits and hence we have to chop off 3 bits from their 914 * original inodes, and so on. That approach of using variable length suffixes 915 * (i.e. over fixed size ones) utilizes the fact that in practice only a very 916 * limited amount of devices are shared by the same export (e.g. typically 917 * less than 2 dozen devices per 9p export), so in practice we need to chop 918 * off less bits than with fixed size prefixes and yet are flexible to add 919 * new devices at runtime below host's export directory at any time without 920 * having to reboot guest nor requiring to reconfigure guest for that. And due 921 * to the very limited amount of original high end bits that we chop off that 922 * way, the total amount of suffixes we need to generate is less than by using 923 * fixed size prefixes and hence it also improves performance of the inode 924 * remapping algorithm, and finally has the nice side effect that the inode 925 * numbers on guest will be much smaller & human friendly. ;-) 926 */ 927 static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf, 928 uint64_t *path) 929 { 930 const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev); 931 QppEntry lookup = { 932 .dev = stbuf->st_dev, 933 .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits)) 934 }, *val; 935 uint32_t hash = qpp_hash(lookup); 936 937 val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); 938 939 if (!val) { 940 if (pdu->s->qp_affix_next == 0) { 941 /* we ran out of affixes */ 942 warn_report_once( 943 "9p: Potential degraded performance of inode remapping" 944 ); 945 return -ENFILE; 946 } 947 948 val = g_new0(QppEntry, 1); 949 *val = lookup; 950 951 /* new unique inode affix and device combo */ 952 val->qp_affix_index = pdu->s->qp_affix_next++; 953 val->qp_affix = affixForIndex(val->qp_affix_index); 954 qht_insert(&pdu->s->qpp_table, val, hash, NULL); 955 } 956 /* assuming generated affix to be suffix type, not prefix */ 957 *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value; 958 return 0; 959 } 960 961 static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) 962 { 963 int err; 964 size_t size; 965 966 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 967 /* map inode+device to qid path (fast path) */ 968 err = qid_path_suffixmap(pdu, stbuf, &qidp->path); 969 if (err == -ENFILE) { 970 /* fast path didn't work, fall back to full map */ 971 err = qid_path_fullmap(pdu, stbuf, &qidp->path); 972 } 973 if (err) { 974 return err; 975 } 976 } else { 977 if (pdu->s->dev_id != stbuf->st_dev) { 978 if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { 979 error_report_once( 980 "9p: Multiple devices detected in same VirtFS export. " 981 "Access of guest to additional devices is (partly) " 982 "denied due to virtfs option 'multidevs=forbid' being " 983 "effective." 984 ); 985 return -ENODEV; 986 } else { 987 warn_report_once( 988 "9p: Multiple devices detected in same VirtFS export, " 989 "which might lead to file ID collisions and severe " 990 "misbehaviours on guest! You should either use a " 991 "separate export for each device shared from host or " 992 "use virtfs option 'multidevs=remap'!" 993 ); 994 } 995 } 996 memset(&qidp->path, 0, sizeof(qidp->path)); 997 size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); 998 memcpy(&qidp->path, &stbuf->st_ino, size); 999 } 1000 1001 qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); 1002 qidp->type = 0; 1003 if (S_ISDIR(stbuf->st_mode)) { 1004 qidp->type |= P9_QID_TYPE_DIR; 1005 } 1006 if (S_ISLNK(stbuf->st_mode)) { 1007 qidp->type |= P9_QID_TYPE_SYMLINK; 1008 } 1009 1010 return 0; 1011 } 1012 1013 V9fsPDU *pdu_alloc(V9fsState *s) 1014 { 1015 V9fsPDU *pdu = NULL; 1016 1017 if (!QLIST_EMPTY(&s->free_list)) { 1018 pdu = QLIST_FIRST(&s->free_list); 1019 QLIST_REMOVE(pdu, next); 1020 QLIST_INSERT_HEAD(&s->active_list, pdu, next); 1021 } 1022 return pdu; 1023 } 1024 1025 void pdu_free(V9fsPDU *pdu) 1026 { 1027 V9fsState *s = pdu->s; 1028 1029 g_assert(!pdu->cancelled); 1030 QLIST_REMOVE(pdu, next); 1031 QLIST_INSERT_HEAD(&s->free_list, pdu, next); 1032 } 1033 1034 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) 1035 { 1036 int8_t id = pdu->id + 1; /* Response */ 1037 V9fsState *s = pdu->s; 1038 int ret; 1039 1040 /* 1041 * The 9p spec requires that successfully cancelled pdus receive no reply. 1042 * Sending a reply would confuse clients because they would 1043 * assume that any EINTR is the actual result of the operation, 1044 * rather than a consequence of the cancellation. However, if 1045 * the operation completed (succesfully or with an error other 1046 * than caused be cancellation), we do send out that reply, both 1047 * for efficiency and to avoid confusing the rest of the state machine 1048 * that assumes passing a non-error here will mean a successful 1049 * transmission of the reply. 1050 */ 1051 bool discard = pdu->cancelled && len == -EINTR; 1052 if (discard) { 1053 trace_v9fs_rcancel(pdu->tag, pdu->id); 1054 pdu->size = 0; 1055 goto out_notify; 1056 } 1057 1058 if (len < 0) { 1059 int err = -len; 1060 len = 7; 1061 1062 if (s->proto_version != V9FS_PROTO_2000L) { 1063 V9fsString str; 1064 1065 str.data = strerror(err); 1066 str.size = strlen(str.data); 1067 1068 ret = pdu_marshal(pdu, len, "s", &str); 1069 if (ret < 0) { 1070 goto out_notify; 1071 } 1072 len += ret; 1073 id = P9_RERROR; 1074 } else { 1075 err = errno_to_dotl(err); 1076 } 1077 1078 ret = pdu_marshal(pdu, len, "d", err); 1079 if (ret < 0) { 1080 goto out_notify; 1081 } 1082 len += ret; 1083 1084 if (s->proto_version == V9FS_PROTO_2000L) { 1085 id = P9_RLERROR; 1086 } 1087 trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */ 1088 } 1089 1090 /* fill out the header */ 1091 if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) { 1092 goto out_notify; 1093 } 1094 1095 /* keep these in sync */ 1096 pdu->size = len; 1097 pdu->id = id; 1098 1099 out_notify: 1100 pdu->s->transport->push_and_notify(pdu); 1101 1102 /* Now wakeup anybody waiting in flush for this request */ 1103 if (!qemu_co_queue_next(&pdu->complete)) { 1104 pdu_free(pdu); 1105 } 1106 } 1107 1108 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) 1109 { 1110 mode_t ret; 1111 1112 ret = mode & 0777; 1113 if (mode & P9_STAT_MODE_DIR) { 1114 ret |= S_IFDIR; 1115 } 1116 1117 if (mode & P9_STAT_MODE_SYMLINK) { 1118 ret |= S_IFLNK; 1119 } 1120 if (mode & P9_STAT_MODE_SOCKET) { 1121 ret |= S_IFSOCK; 1122 } 1123 if (mode & P9_STAT_MODE_NAMED_PIPE) { 1124 ret |= S_IFIFO; 1125 } 1126 if (mode & P9_STAT_MODE_DEVICE) { 1127 if (extension->size && extension->data[0] == 'c') { 1128 ret |= S_IFCHR; 1129 } else { 1130 ret |= S_IFBLK; 1131 } 1132 } 1133 1134 if (!(ret & ~0777)) { 1135 ret |= S_IFREG; 1136 } 1137 1138 if (mode & P9_STAT_MODE_SETUID) { 1139 ret |= S_ISUID; 1140 } 1141 if (mode & P9_STAT_MODE_SETGID) { 1142 ret |= S_ISGID; 1143 } 1144 if (mode & P9_STAT_MODE_SETVTX) { 1145 ret |= S_ISVTX; 1146 } 1147 1148 return ret; 1149 } 1150 1151 static int donttouch_stat(V9fsStat *stat) 1152 { 1153 if (stat->type == -1 && 1154 stat->dev == -1 && 1155 stat->qid.type == 0xff && 1156 stat->qid.version == (uint32_t) -1 && 1157 stat->qid.path == (uint64_t) -1 && 1158 stat->mode == -1 && 1159 stat->atime == -1 && 1160 stat->mtime == -1 && 1161 stat->length == -1 && 1162 !stat->name.size && 1163 !stat->uid.size && 1164 !stat->gid.size && 1165 !stat->muid.size && 1166 stat->n_uid == -1 && 1167 stat->n_gid == -1 && 1168 stat->n_muid == -1) { 1169 return 1; 1170 } 1171 1172 return 0; 1173 } 1174 1175 static void v9fs_stat_init(V9fsStat *stat) 1176 { 1177 v9fs_string_init(&stat->name); 1178 v9fs_string_init(&stat->uid); 1179 v9fs_string_init(&stat->gid); 1180 v9fs_string_init(&stat->muid); 1181 v9fs_string_init(&stat->extension); 1182 } 1183 1184 static void v9fs_stat_free(V9fsStat *stat) 1185 { 1186 v9fs_string_free(&stat->name); 1187 v9fs_string_free(&stat->uid); 1188 v9fs_string_free(&stat->gid); 1189 v9fs_string_free(&stat->muid); 1190 v9fs_string_free(&stat->extension); 1191 } 1192 1193 static uint32_t stat_to_v9mode(const struct stat *stbuf) 1194 { 1195 uint32_t mode; 1196 1197 mode = stbuf->st_mode & 0777; 1198 if (S_ISDIR(stbuf->st_mode)) { 1199 mode |= P9_STAT_MODE_DIR; 1200 } 1201 1202 if (S_ISLNK(stbuf->st_mode)) { 1203 mode |= P9_STAT_MODE_SYMLINK; 1204 } 1205 1206 if (S_ISSOCK(stbuf->st_mode)) { 1207 mode |= P9_STAT_MODE_SOCKET; 1208 } 1209 1210 if (S_ISFIFO(stbuf->st_mode)) { 1211 mode |= P9_STAT_MODE_NAMED_PIPE; 1212 } 1213 1214 if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) { 1215 mode |= P9_STAT_MODE_DEVICE; 1216 } 1217 1218 if (stbuf->st_mode & S_ISUID) { 1219 mode |= P9_STAT_MODE_SETUID; 1220 } 1221 1222 if (stbuf->st_mode & S_ISGID) { 1223 mode |= P9_STAT_MODE_SETGID; 1224 } 1225 1226 if (stbuf->st_mode & S_ISVTX) { 1227 mode |= P9_STAT_MODE_SETVTX; 1228 } 1229 1230 return mode; 1231 } 1232 1233 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, 1234 const char *basename, 1235 const struct stat *stbuf, 1236 V9fsStat *v9stat) 1237 { 1238 int err; 1239 1240 memset(v9stat, 0, sizeof(*v9stat)); 1241 1242 err = stat_to_qid(pdu, stbuf, &v9stat->qid); 1243 if (err < 0) { 1244 return err; 1245 } 1246 v9stat->mode = stat_to_v9mode(stbuf); 1247 v9stat->atime = stbuf->st_atime; 1248 v9stat->mtime = stbuf->st_mtime; 1249 v9stat->length = stbuf->st_size; 1250 1251 v9fs_string_free(&v9stat->uid); 1252 v9fs_string_free(&v9stat->gid); 1253 v9fs_string_free(&v9stat->muid); 1254 1255 v9stat->n_uid = stbuf->st_uid; 1256 v9stat->n_gid = stbuf->st_gid; 1257 v9stat->n_muid = 0; 1258 1259 v9fs_string_free(&v9stat->extension); 1260 1261 if (v9stat->mode & P9_STAT_MODE_SYMLINK) { 1262 err = v9fs_co_readlink(pdu, path, &v9stat->extension); 1263 if (err < 0) { 1264 return err; 1265 } 1266 } else if (v9stat->mode & P9_STAT_MODE_DEVICE) { 1267 v9fs_string_sprintf(&v9stat->extension, "%c %u %u", 1268 S_ISCHR(stbuf->st_mode) ? 'c' : 'b', 1269 major(stbuf->st_rdev), minor(stbuf->st_rdev)); 1270 } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) { 1271 v9fs_string_sprintf(&v9stat->extension, "%s %lu", 1272 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink); 1273 } 1274 1275 v9fs_string_sprintf(&v9stat->name, "%s", basename); 1276 1277 v9stat->size = 61 + 1278 v9fs_string_size(&v9stat->name) + 1279 v9fs_string_size(&v9stat->uid) + 1280 v9fs_string_size(&v9stat->gid) + 1281 v9fs_string_size(&v9stat->muid) + 1282 v9fs_string_size(&v9stat->extension); 1283 return 0; 1284 } 1285 1286 #define P9_STATS_MODE 0x00000001ULL 1287 #define P9_STATS_NLINK 0x00000002ULL 1288 #define P9_STATS_UID 0x00000004ULL 1289 #define P9_STATS_GID 0x00000008ULL 1290 #define P9_STATS_RDEV 0x00000010ULL 1291 #define P9_STATS_ATIME 0x00000020ULL 1292 #define P9_STATS_MTIME 0x00000040ULL 1293 #define P9_STATS_CTIME 0x00000080ULL 1294 #define P9_STATS_INO 0x00000100ULL 1295 #define P9_STATS_SIZE 0x00000200ULL 1296 #define P9_STATS_BLOCKS 0x00000400ULL 1297 1298 #define P9_STATS_BTIME 0x00000800ULL 1299 #define P9_STATS_GEN 0x00001000ULL 1300 #define P9_STATS_DATA_VERSION 0x00002000ULL 1301 1302 #define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ 1303 #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ 1304 1305 1306 /** 1307 * blksize_to_iounit() - Block size exposed to 9p client. 1308 * Return: block size 1309 * 1310 * @pdu: 9p client request 1311 * @blksize: host filesystem's block size 1312 * 1313 * Convert host filesystem's block size into an appropriate block size for 1314 * 9p client (guest OS side). The value returned suggests an "optimum" block 1315 * size for 9p I/O, i.e. to maximize performance. 1316 */ 1317 static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) 1318 { 1319 int32_t iounit = 0; 1320 V9fsState *s = pdu->s; 1321 1322 /* 1323 * iounit should be multiples of blksize (host filesystem block size) 1324 * as well as less than (client msize - P9_IOHDRSZ) 1325 */ 1326 if (blksize) { 1327 iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize); 1328 } 1329 if (!iounit) { 1330 iounit = s->msize - P9_IOHDRSZ; 1331 } 1332 return iounit; 1333 } 1334 1335 static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf) 1336 { 1337 return blksize_to_iounit(pdu, stbuf->st_blksize); 1338 } 1339 1340 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, 1341 V9fsStatDotl *v9lstat) 1342 { 1343 memset(v9lstat, 0, sizeof(*v9lstat)); 1344 1345 v9lstat->st_mode = stbuf->st_mode; 1346 v9lstat->st_nlink = stbuf->st_nlink; 1347 v9lstat->st_uid = stbuf->st_uid; 1348 v9lstat->st_gid = stbuf->st_gid; 1349 v9lstat->st_rdev = host_dev_to_dotl_dev(stbuf->st_rdev); 1350 v9lstat->st_size = stbuf->st_size; 1351 v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); 1352 v9lstat->st_blocks = stbuf->st_blocks; 1353 v9lstat->st_atime_sec = stbuf->st_atime; 1354 v9lstat->st_mtime_sec = stbuf->st_mtime; 1355 v9lstat->st_ctime_sec = stbuf->st_ctime; 1356 #ifdef CONFIG_DARWIN 1357 v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; 1358 v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; 1359 v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; 1360 #else 1361 v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; 1362 v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; 1363 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; 1364 #endif 1365 /* Currently we only support BASIC fields in stat */ 1366 v9lstat->st_result_mask = P9_STATS_BASIC; 1367 1368 return stat_to_qid(pdu, stbuf, &v9lstat->qid); 1369 } 1370 1371 static void print_sg(struct iovec *sg, int cnt) 1372 { 1373 int i; 1374 1375 printf("sg[%d]: {", cnt); 1376 for (i = 0; i < cnt; i++) { 1377 if (i) { 1378 printf(", "); 1379 } 1380 printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len); 1381 } 1382 printf("}\n"); 1383 } 1384 1385 /* Will call this only for path name based fid */ 1386 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len) 1387 { 1388 V9fsPath str; 1389 v9fs_path_init(&str); 1390 v9fs_path_copy(&str, dst); 1391 v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len); 1392 v9fs_path_free(&str); 1393 } 1394 1395 static inline bool is_ro_export(FsContext *ctx) 1396 { 1397 return ctx->export_flags & V9FS_RDONLY; 1398 } 1399 1400 static void coroutine_fn v9fs_version(void *opaque) 1401 { 1402 ssize_t err; 1403 V9fsPDU *pdu = opaque; 1404 V9fsState *s = pdu->s; 1405 V9fsString version; 1406 size_t offset = 7; 1407 1408 v9fs_string_init(&version); 1409 err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version); 1410 if (err < 0) { 1411 goto out; 1412 } 1413 trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data); 1414 1415 virtfs_reset(pdu); 1416 1417 if (!strcmp(version.data, "9P2000.u")) { 1418 s->proto_version = V9FS_PROTO_2000U; 1419 } else if (!strcmp(version.data, "9P2000.L")) { 1420 s->proto_version = V9FS_PROTO_2000L; 1421 } else { 1422 v9fs_string_sprintf(&version, "unknown"); 1423 /* skip min. msize check, reporting invalid version has priority */ 1424 goto marshal; 1425 } 1426 1427 if (s->msize < P9_MIN_MSIZE) { 1428 err = -EMSGSIZE; 1429 error_report( 1430 "9pfs: Client requested msize < minimum msize (" 1431 stringify(P9_MIN_MSIZE) ") supported by this server." 1432 ); 1433 goto out; 1434 } 1435 1436 /* 8192 is the default msize of Linux clients */ 1437 if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) { 1438 warn_report_once( 1439 "9p: degraded performance: a reasonable high msize should be " 1440 "chosen on client/guest side (chosen msize is <= 8192). See " 1441 "https://wiki.qemu.org/Documentation/9psetup#msize for details." 1442 ); 1443 } 1444 1445 marshal: 1446 err = pdu_marshal(pdu, offset, "ds", s->msize, &version); 1447 if (err < 0) { 1448 goto out; 1449 } 1450 err += offset; 1451 trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data); 1452 out: 1453 pdu_complete(pdu, err); 1454 v9fs_string_free(&version); 1455 } 1456 1457 static void coroutine_fn v9fs_attach(void *opaque) 1458 { 1459 V9fsPDU *pdu = opaque; 1460 V9fsState *s = pdu->s; 1461 int32_t fid, afid, n_uname; 1462 V9fsString uname, aname; 1463 V9fsFidState *fidp; 1464 size_t offset = 7; 1465 V9fsQID qid; 1466 ssize_t err; 1467 struct stat stbuf; 1468 1469 v9fs_string_init(&uname); 1470 v9fs_string_init(&aname); 1471 err = pdu_unmarshal(pdu, offset, "ddssd", &fid, 1472 &afid, &uname, &aname, &n_uname); 1473 if (err < 0) { 1474 goto out_nofid; 1475 } 1476 trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data); 1477 1478 fidp = alloc_fid(s, fid); 1479 if (fidp == NULL) { 1480 err = -EINVAL; 1481 goto out_nofid; 1482 } 1483 fidp->uid = n_uname; 1484 err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path); 1485 if (err < 0) { 1486 err = -EINVAL; 1487 clunk_fid(s, fid); 1488 goto out; 1489 } 1490 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1491 if (err < 0) { 1492 err = -EINVAL; 1493 clunk_fid(s, fid); 1494 goto out; 1495 } 1496 err = stat_to_qid(pdu, &stbuf, &qid); 1497 if (err < 0) { 1498 err = -EINVAL; 1499 clunk_fid(s, fid); 1500 goto out; 1501 } 1502 1503 /* 1504 * disable migration if we haven't done already. 1505 * attach could get called multiple times for the same export. 1506 */ 1507 if (!s->migration_blocker) { 1508 error_setg(&s->migration_blocker, 1509 "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", 1510 s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); 1511 err = migrate_add_blocker(s->migration_blocker, NULL); 1512 if (err < 0) { 1513 error_free(s->migration_blocker); 1514 s->migration_blocker = NULL; 1515 clunk_fid(s, fid); 1516 goto out; 1517 } 1518 s->root_fid = fid; 1519 } 1520 1521 err = pdu_marshal(pdu, offset, "Q", &qid); 1522 if (err < 0) { 1523 clunk_fid(s, fid); 1524 goto out; 1525 } 1526 err += offset; 1527 1528 memcpy(&s->root_st, &stbuf, sizeof(stbuf)); 1529 trace_v9fs_attach_return(pdu->tag, pdu->id, 1530 qid.type, qid.version, qid.path); 1531 out: 1532 put_fid(pdu, fidp); 1533 out_nofid: 1534 pdu_complete(pdu, err); 1535 v9fs_string_free(&uname); 1536 v9fs_string_free(&aname); 1537 } 1538 1539 static void coroutine_fn v9fs_stat(void *opaque) 1540 { 1541 int32_t fid; 1542 V9fsStat v9stat; 1543 ssize_t err = 0; 1544 size_t offset = 7; 1545 struct stat stbuf; 1546 V9fsFidState *fidp; 1547 V9fsPDU *pdu = opaque; 1548 char *basename; 1549 1550 err = pdu_unmarshal(pdu, offset, "d", &fid); 1551 if (err < 0) { 1552 goto out_nofid; 1553 } 1554 trace_v9fs_stat(pdu->tag, pdu->id, fid); 1555 1556 fidp = get_fid(pdu, fid); 1557 if (fidp == NULL) { 1558 err = -ENOENT; 1559 goto out_nofid; 1560 } 1561 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1562 if (err < 0) { 1563 goto out; 1564 } 1565 basename = g_path_get_basename(fidp->path.data); 1566 err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat); 1567 g_free(basename); 1568 if (err < 0) { 1569 goto out; 1570 } 1571 err = pdu_marshal(pdu, offset, "wS", 0, &v9stat); 1572 if (err < 0) { 1573 v9fs_stat_free(&v9stat); 1574 goto out; 1575 } 1576 trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode, 1577 v9stat.atime, v9stat.mtime, v9stat.length); 1578 err += offset; 1579 v9fs_stat_free(&v9stat); 1580 out: 1581 put_fid(pdu, fidp); 1582 out_nofid: 1583 pdu_complete(pdu, err); 1584 } 1585 1586 static void coroutine_fn v9fs_getattr(void *opaque) 1587 { 1588 int32_t fid; 1589 size_t offset = 7; 1590 ssize_t retval = 0; 1591 struct stat stbuf; 1592 V9fsFidState *fidp; 1593 uint64_t request_mask; 1594 V9fsStatDotl v9stat_dotl; 1595 V9fsPDU *pdu = opaque; 1596 1597 retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask); 1598 if (retval < 0) { 1599 goto out_nofid; 1600 } 1601 trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask); 1602 1603 fidp = get_fid(pdu, fid); 1604 if (fidp == NULL) { 1605 retval = -ENOENT; 1606 goto out_nofid; 1607 } 1608 /* 1609 * Currently we only support BASIC fields in stat, so there is no 1610 * need to look at request_mask. 1611 */ 1612 retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1613 if (retval < 0) { 1614 goto out; 1615 } 1616 retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl); 1617 if (retval < 0) { 1618 goto out; 1619 } 1620 1621 /* fill st_gen if requested and supported by underlying fs */ 1622 if (request_mask & P9_STATS_GEN) { 1623 retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl); 1624 switch (retval) { 1625 case 0: 1626 /* we have valid st_gen: update result mask */ 1627 v9stat_dotl.st_result_mask |= P9_STATS_GEN; 1628 break; 1629 case -EINTR: 1630 /* request cancelled, e.g. by Tflush */ 1631 goto out; 1632 default: 1633 /* failed to get st_gen: not fatal, ignore */ 1634 break; 1635 } 1636 } 1637 retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl); 1638 if (retval < 0) { 1639 goto out; 1640 } 1641 retval += offset; 1642 trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask, 1643 v9stat_dotl.st_mode, v9stat_dotl.st_uid, 1644 v9stat_dotl.st_gid); 1645 out: 1646 put_fid(pdu, fidp); 1647 out_nofid: 1648 pdu_complete(pdu, retval); 1649 } 1650 1651 /* Attribute flags */ 1652 #define P9_ATTR_MODE (1 << 0) 1653 #define P9_ATTR_UID (1 << 1) 1654 #define P9_ATTR_GID (1 << 2) 1655 #define P9_ATTR_SIZE (1 << 3) 1656 #define P9_ATTR_ATIME (1 << 4) 1657 #define P9_ATTR_MTIME (1 << 5) 1658 #define P9_ATTR_CTIME (1 << 6) 1659 #define P9_ATTR_ATIME_SET (1 << 7) 1660 #define P9_ATTR_MTIME_SET (1 << 8) 1661 1662 #define P9_ATTR_MASK 127 1663 1664 static void coroutine_fn v9fs_setattr(void *opaque) 1665 { 1666 int err = 0; 1667 int32_t fid; 1668 V9fsFidState *fidp; 1669 size_t offset = 7; 1670 V9fsIattr v9iattr; 1671 V9fsPDU *pdu = opaque; 1672 1673 err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr); 1674 if (err < 0) { 1675 goto out_nofid; 1676 } 1677 1678 trace_v9fs_setattr(pdu->tag, pdu->id, fid, 1679 v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid, 1680 v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec); 1681 1682 fidp = get_fid(pdu, fid); 1683 if (fidp == NULL) { 1684 err = -EINVAL; 1685 goto out_nofid; 1686 } 1687 if (v9iattr.valid & P9_ATTR_MODE) { 1688 err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode); 1689 if (err < 0) { 1690 goto out; 1691 } 1692 } 1693 if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) { 1694 struct timespec times[2]; 1695 if (v9iattr.valid & P9_ATTR_ATIME) { 1696 if (v9iattr.valid & P9_ATTR_ATIME_SET) { 1697 times[0].tv_sec = v9iattr.atime_sec; 1698 times[0].tv_nsec = v9iattr.atime_nsec; 1699 } else { 1700 times[0].tv_nsec = UTIME_NOW; 1701 } 1702 } else { 1703 times[0].tv_nsec = UTIME_OMIT; 1704 } 1705 if (v9iattr.valid & P9_ATTR_MTIME) { 1706 if (v9iattr.valid & P9_ATTR_MTIME_SET) { 1707 times[1].tv_sec = v9iattr.mtime_sec; 1708 times[1].tv_nsec = v9iattr.mtime_nsec; 1709 } else { 1710 times[1].tv_nsec = UTIME_NOW; 1711 } 1712 } else { 1713 times[1].tv_nsec = UTIME_OMIT; 1714 } 1715 err = v9fs_co_utimensat(pdu, &fidp->path, times); 1716 if (err < 0) { 1717 goto out; 1718 } 1719 } 1720 /* 1721 * If the only valid entry in iattr is ctime we can call 1722 * chown(-1,-1) to update the ctime of the file 1723 */ 1724 if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) || 1725 ((v9iattr.valid & P9_ATTR_CTIME) 1726 && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) { 1727 if (!(v9iattr.valid & P9_ATTR_UID)) { 1728 v9iattr.uid = -1; 1729 } 1730 if (!(v9iattr.valid & P9_ATTR_GID)) { 1731 v9iattr.gid = -1; 1732 } 1733 err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid, 1734 v9iattr.gid); 1735 if (err < 0) { 1736 goto out; 1737 } 1738 } 1739 if (v9iattr.valid & (P9_ATTR_SIZE)) { 1740 err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size); 1741 if (err < 0) { 1742 goto out; 1743 } 1744 } 1745 err = offset; 1746 trace_v9fs_setattr_return(pdu->tag, pdu->id); 1747 out: 1748 put_fid(pdu, fidp); 1749 out_nofid: 1750 pdu_complete(pdu, err); 1751 } 1752 1753 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids) 1754 { 1755 int i; 1756 ssize_t err; 1757 size_t offset = 7; 1758 1759 err = pdu_marshal(pdu, offset, "w", nwnames); 1760 if (err < 0) { 1761 return err; 1762 } 1763 offset += err; 1764 for (i = 0; i < nwnames; i++) { 1765 err = pdu_marshal(pdu, offset, "Q", &qids[i]); 1766 if (err < 0) { 1767 return err; 1768 } 1769 offset += err; 1770 } 1771 return offset; 1772 } 1773 1774 static bool name_is_illegal(const char *name) 1775 { 1776 return !*name || strchr(name, '/') != NULL; 1777 } 1778 1779 static bool same_stat_id(const struct stat *a, const struct stat *b) 1780 { 1781 return a->st_dev == b->st_dev && a->st_ino == b->st_ino; 1782 } 1783 1784 static void coroutine_fn v9fs_walk(void *opaque) 1785 { 1786 int name_idx, nwalked; 1787 g_autofree V9fsQID *qids = NULL; 1788 int i, err = 0, any_err = 0; 1789 V9fsPath dpath, path; 1790 P9ARRAY_REF(V9fsPath) pathes = NULL; 1791 uint16_t nwnames; 1792 struct stat stbuf, fidst; 1793 g_autofree struct stat *stbufs = NULL; 1794 size_t offset = 7; 1795 int32_t fid, newfid; 1796 P9ARRAY_REF(V9fsString) wnames = NULL; 1797 V9fsFidState *fidp; 1798 V9fsFidState *newfidp = NULL; 1799 V9fsPDU *pdu = opaque; 1800 V9fsState *s = pdu->s; 1801 V9fsQID qid; 1802 1803 err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames); 1804 if (err < 0) { 1805 pdu_complete(pdu, err); 1806 return ; 1807 } 1808 offset += err; 1809 1810 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames); 1811 1812 if (nwnames > P9_MAXWELEM) { 1813 err = -EINVAL; 1814 goto out_nofid; 1815 } 1816 if (nwnames) { 1817 P9ARRAY_NEW(V9fsString, wnames, nwnames); 1818 qids = g_new0(V9fsQID, nwnames); 1819 stbufs = g_new0(struct stat, nwnames); 1820 P9ARRAY_NEW(V9fsPath, pathes, nwnames); 1821 for (i = 0; i < nwnames; i++) { 1822 err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); 1823 if (err < 0) { 1824 goto out_nofid; 1825 } 1826 if (name_is_illegal(wnames[i].data)) { 1827 err = -ENOENT; 1828 goto out_nofid; 1829 } 1830 offset += err; 1831 } 1832 } 1833 fidp = get_fid(pdu, fid); 1834 if (fidp == NULL) { 1835 err = -ENOENT; 1836 goto out_nofid; 1837 } 1838 1839 v9fs_path_init(&dpath); 1840 v9fs_path_init(&path); 1841 /* 1842 * Both dpath and path initially point to fidp. 1843 * Needed to handle request with nwnames == 0 1844 */ 1845 v9fs_path_copy(&dpath, &fidp->path); 1846 v9fs_path_copy(&path, &fidp->path); 1847 1848 /* 1849 * To keep latency (i.e. overall execution time for processing this 1850 * Twalk client request) as small as possible, run all the required fs 1851 * driver code altogether inside the following block. 1852 */ 1853 v9fs_co_run_in_worker({ 1854 nwalked = 0; 1855 if (v9fs_request_cancelled(pdu)) { 1856 any_err |= err = -EINTR; 1857 break; 1858 } 1859 err = s->ops->lstat(&s->ctx, &dpath, &fidst); 1860 if (err < 0) { 1861 any_err |= err = -errno; 1862 break; 1863 } 1864 stbuf = fidst; 1865 for (; nwalked < nwnames; nwalked++) { 1866 if (v9fs_request_cancelled(pdu)) { 1867 any_err |= err = -EINTR; 1868 break; 1869 } 1870 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1871 strcmp("..", wnames[nwalked].data)) 1872 { 1873 err = s->ops->name_to_path(&s->ctx, &dpath, 1874 wnames[nwalked].data, 1875 &pathes[nwalked]); 1876 if (err < 0) { 1877 any_err |= err = -errno; 1878 break; 1879 } 1880 if (v9fs_request_cancelled(pdu)) { 1881 any_err |= err = -EINTR; 1882 break; 1883 } 1884 err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf); 1885 if (err < 0) { 1886 any_err |= err = -errno; 1887 break; 1888 } 1889 stbufs[nwalked] = stbuf; 1890 v9fs_path_copy(&dpath, &pathes[nwalked]); 1891 } 1892 } 1893 }); 1894 /* 1895 * Handle all the rest of this Twalk request on main thread ... 1896 * 1897 * NOTE: -EINTR is an exception where we deviate from the protocol spec 1898 * and simply send a (R)Lerror response instead of bothering to assemble 1899 * a (deducted) Rwalk response; because -EINTR is always the result of a 1900 * Tflush request, so client would no longer wait for a response in this 1901 * case anyway. 1902 */ 1903 if ((err < 0 && !nwalked) || err == -EINTR) { 1904 goto out; 1905 } 1906 1907 any_err |= err = stat_to_qid(pdu, &fidst, &qid); 1908 if (err < 0 && !nwalked) { 1909 goto out; 1910 } 1911 stbuf = fidst; 1912 1913 /* reset dpath and path */ 1914 v9fs_path_copy(&dpath, &fidp->path); 1915 v9fs_path_copy(&path, &fidp->path); 1916 1917 for (name_idx = 0; name_idx < nwalked; name_idx++) { 1918 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1919 strcmp("..", wnames[name_idx].data)) 1920 { 1921 stbuf = stbufs[name_idx]; 1922 any_err |= err = stat_to_qid(pdu, &stbuf, &qid); 1923 if (err < 0) { 1924 break; 1925 } 1926 v9fs_path_copy(&path, &pathes[name_idx]); 1927 v9fs_path_copy(&dpath, &path); 1928 } 1929 memcpy(&qids[name_idx], &qid, sizeof(qid)); 1930 } 1931 if (any_err < 0) { 1932 if (!name_idx) { 1933 /* don't send any QIDs, send Rlerror instead */ 1934 goto out; 1935 } else { 1936 /* send QIDs (not Rlerror), but fid MUST remain unaffected */ 1937 goto send_qids; 1938 } 1939 } 1940 if (fid == newfid) { 1941 if (fidp->fid_type != P9_FID_NONE) { 1942 err = -EINVAL; 1943 goto out; 1944 } 1945 v9fs_path_write_lock(s); 1946 v9fs_path_copy(&fidp->path, &path); 1947 v9fs_path_unlock(s); 1948 } else { 1949 newfidp = alloc_fid(s, newfid); 1950 if (newfidp == NULL) { 1951 err = -EINVAL; 1952 goto out; 1953 } 1954 newfidp->uid = fidp->uid; 1955 v9fs_path_copy(&newfidp->path, &path); 1956 } 1957 send_qids: 1958 err = v9fs_walk_marshal(pdu, name_idx, qids); 1959 trace_v9fs_walk_return(pdu->tag, pdu->id, name_idx, qids); 1960 out: 1961 put_fid(pdu, fidp); 1962 if (newfidp) { 1963 put_fid(pdu, newfidp); 1964 } 1965 v9fs_path_free(&dpath); 1966 v9fs_path_free(&path); 1967 out_nofid: 1968 pdu_complete(pdu, err); 1969 } 1970 1971 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path) 1972 { 1973 struct statfs stbuf; 1974 int err = v9fs_co_statfs(pdu, path, &stbuf); 1975 1976 return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0); 1977 } 1978 1979 static void coroutine_fn v9fs_open(void *opaque) 1980 { 1981 int flags; 1982 int32_t fid; 1983 int32_t mode; 1984 V9fsQID qid; 1985 int iounit = 0; 1986 ssize_t err = 0; 1987 size_t offset = 7; 1988 struct stat stbuf; 1989 V9fsFidState *fidp; 1990 V9fsPDU *pdu = opaque; 1991 V9fsState *s = pdu->s; 1992 1993 if (s->proto_version == V9FS_PROTO_2000L) { 1994 err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode); 1995 } else { 1996 uint8_t modebyte; 1997 err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte); 1998 mode = modebyte; 1999 } 2000 if (err < 0) { 2001 goto out_nofid; 2002 } 2003 trace_v9fs_open(pdu->tag, pdu->id, fid, mode); 2004 2005 fidp = get_fid(pdu, fid); 2006 if (fidp == NULL) { 2007 err = -ENOENT; 2008 goto out_nofid; 2009 } 2010 if (fidp->fid_type != P9_FID_NONE) { 2011 err = -EINVAL; 2012 goto out; 2013 } 2014 2015 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2016 if (err < 0) { 2017 goto out; 2018 } 2019 err = stat_to_qid(pdu, &stbuf, &qid); 2020 if (err < 0) { 2021 goto out; 2022 } 2023 if (S_ISDIR(stbuf.st_mode)) { 2024 err = v9fs_co_opendir(pdu, fidp); 2025 if (err < 0) { 2026 goto out; 2027 } 2028 fidp->fid_type = P9_FID_DIR; 2029 err = pdu_marshal(pdu, offset, "Qd", &qid, 0); 2030 if (err < 0) { 2031 goto out; 2032 } 2033 err += offset; 2034 } else { 2035 if (s->proto_version == V9FS_PROTO_2000L) { 2036 flags = get_dotl_openflags(s, mode); 2037 } else { 2038 flags = omode_to_uflags(mode); 2039 } 2040 if (is_ro_export(&s->ctx)) { 2041 if (mode & O_WRONLY || mode & O_RDWR || 2042 mode & O_APPEND || mode & O_TRUNC) { 2043 err = -EROFS; 2044 goto out; 2045 } 2046 } 2047 err = v9fs_co_open(pdu, fidp, flags); 2048 if (err < 0) { 2049 goto out; 2050 } 2051 fidp->fid_type = P9_FID_FILE; 2052 fidp->open_flags = flags; 2053 if (flags & O_EXCL) { 2054 /* 2055 * We let the host file system do O_EXCL check 2056 * We should not reclaim such fd 2057 */ 2058 fidp->flags |= FID_NON_RECLAIMABLE; 2059 } 2060 iounit = get_iounit(pdu, &fidp->path); 2061 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2062 if (err < 0) { 2063 goto out; 2064 } 2065 err += offset; 2066 } 2067 trace_v9fs_open_return(pdu->tag, pdu->id, 2068 qid.type, qid.version, qid.path, iounit); 2069 out: 2070 put_fid(pdu, fidp); 2071 out_nofid: 2072 pdu_complete(pdu, err); 2073 } 2074 2075 static void coroutine_fn v9fs_lcreate(void *opaque) 2076 { 2077 int32_t dfid, flags, mode; 2078 gid_t gid; 2079 ssize_t err = 0; 2080 ssize_t offset = 7; 2081 V9fsString name; 2082 V9fsFidState *fidp; 2083 struct stat stbuf; 2084 V9fsQID qid; 2085 int32_t iounit; 2086 V9fsPDU *pdu = opaque; 2087 2088 v9fs_string_init(&name); 2089 err = pdu_unmarshal(pdu, offset, "dsddd", &dfid, 2090 &name, &flags, &mode, &gid); 2091 if (err < 0) { 2092 goto out_nofid; 2093 } 2094 trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid); 2095 2096 if (name_is_illegal(name.data)) { 2097 err = -ENOENT; 2098 goto out_nofid; 2099 } 2100 2101 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2102 err = -EEXIST; 2103 goto out_nofid; 2104 } 2105 2106 fidp = get_fid(pdu, dfid); 2107 if (fidp == NULL) { 2108 err = -ENOENT; 2109 goto out_nofid; 2110 } 2111 if (fidp->fid_type != P9_FID_NONE) { 2112 err = -EINVAL; 2113 goto out; 2114 } 2115 2116 flags = get_dotl_openflags(pdu->s, flags); 2117 err = v9fs_co_open2(pdu, fidp, &name, gid, 2118 flags | O_CREAT, mode, &stbuf); 2119 if (err < 0) { 2120 goto out; 2121 } 2122 fidp->fid_type = P9_FID_FILE; 2123 fidp->open_flags = flags; 2124 if (flags & O_EXCL) { 2125 /* 2126 * We let the host file system do O_EXCL check 2127 * We should not reclaim such fd 2128 */ 2129 fidp->flags |= FID_NON_RECLAIMABLE; 2130 } 2131 iounit = get_iounit(pdu, &fidp->path); 2132 err = stat_to_qid(pdu, &stbuf, &qid); 2133 if (err < 0) { 2134 goto out; 2135 } 2136 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2137 if (err < 0) { 2138 goto out; 2139 } 2140 err += offset; 2141 trace_v9fs_lcreate_return(pdu->tag, pdu->id, 2142 qid.type, qid.version, qid.path, iounit); 2143 out: 2144 put_fid(pdu, fidp); 2145 out_nofid: 2146 pdu_complete(pdu, err); 2147 v9fs_string_free(&name); 2148 } 2149 2150 static void coroutine_fn v9fs_fsync(void *opaque) 2151 { 2152 int err; 2153 int32_t fid; 2154 int datasync; 2155 size_t offset = 7; 2156 V9fsFidState *fidp; 2157 V9fsPDU *pdu = opaque; 2158 2159 err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync); 2160 if (err < 0) { 2161 goto out_nofid; 2162 } 2163 trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync); 2164 2165 fidp = get_fid(pdu, fid); 2166 if (fidp == NULL) { 2167 err = -ENOENT; 2168 goto out_nofid; 2169 } 2170 err = v9fs_co_fsync(pdu, fidp, datasync); 2171 if (!err) { 2172 err = offset; 2173 } 2174 put_fid(pdu, fidp); 2175 out_nofid: 2176 pdu_complete(pdu, err); 2177 } 2178 2179 static void coroutine_fn v9fs_clunk(void *opaque) 2180 { 2181 int err; 2182 int32_t fid; 2183 size_t offset = 7; 2184 V9fsFidState *fidp; 2185 V9fsPDU *pdu = opaque; 2186 V9fsState *s = pdu->s; 2187 2188 err = pdu_unmarshal(pdu, offset, "d", &fid); 2189 if (err < 0) { 2190 goto out_nofid; 2191 } 2192 trace_v9fs_clunk(pdu->tag, pdu->id, fid); 2193 2194 fidp = clunk_fid(s, fid); 2195 if (fidp == NULL) { 2196 err = -ENOENT; 2197 goto out_nofid; 2198 } 2199 /* 2200 * Bump the ref so that put_fid will 2201 * free the fid. 2202 */ 2203 fidp->ref++; 2204 err = put_fid(pdu, fidp); 2205 if (!err) { 2206 err = offset; 2207 } 2208 out_nofid: 2209 pdu_complete(pdu, err); 2210 } 2211 2212 /* 2213 * Create a QEMUIOVector for a sub-region of PDU iovecs 2214 * 2215 * @qiov: uninitialized QEMUIOVector 2216 * @skip: number of bytes to skip from beginning of PDU 2217 * @size: number of bytes to include 2218 * @is_write: true - write, false - read 2219 * 2220 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up 2221 * with qemu_iovec_destroy(). 2222 */ 2223 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, 2224 size_t skip, size_t size, 2225 bool is_write) 2226 { 2227 QEMUIOVector elem; 2228 struct iovec *iov; 2229 unsigned int niov; 2230 2231 if (is_write) { 2232 pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); 2233 } else { 2234 pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); 2235 } 2236 2237 qemu_iovec_init_external(&elem, iov, niov); 2238 qemu_iovec_init(qiov, niov); 2239 qemu_iovec_concat(qiov, &elem, skip, size); 2240 } 2241 2242 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2243 uint64_t off, uint32_t max_count) 2244 { 2245 ssize_t err; 2246 size_t offset = 7; 2247 uint64_t read_count; 2248 QEMUIOVector qiov_full; 2249 2250 if (fidp->fs.xattr.len < off) { 2251 read_count = 0; 2252 } else { 2253 read_count = fidp->fs.xattr.len - off; 2254 } 2255 if (read_count > max_count) { 2256 read_count = max_count; 2257 } 2258 err = pdu_marshal(pdu, offset, "d", read_count); 2259 if (err < 0) { 2260 return err; 2261 } 2262 offset += err; 2263 2264 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); 2265 err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, 2266 ((char *)fidp->fs.xattr.value) + off, 2267 read_count); 2268 qemu_iovec_destroy(&qiov_full); 2269 if (err < 0) { 2270 return err; 2271 } 2272 offset += err; 2273 return offset; 2274 } 2275 2276 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, 2277 V9fsFidState *fidp, 2278 uint32_t max_count) 2279 { 2280 V9fsPath path; 2281 V9fsStat v9stat; 2282 int len, err = 0; 2283 int32_t count = 0; 2284 struct stat stbuf; 2285 off_t saved_dir_pos; 2286 struct dirent *dent; 2287 2288 /* save the directory position */ 2289 saved_dir_pos = v9fs_co_telldir(pdu, fidp); 2290 if (saved_dir_pos < 0) { 2291 return saved_dir_pos; 2292 } 2293 2294 while (1) { 2295 v9fs_path_init(&path); 2296 2297 v9fs_readdir_lock(&fidp->fs.dir); 2298 2299 err = v9fs_co_readdir(pdu, fidp, &dent); 2300 if (err || !dent) { 2301 break; 2302 } 2303 err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); 2304 if (err < 0) { 2305 break; 2306 } 2307 err = v9fs_co_lstat(pdu, &path, &stbuf); 2308 if (err < 0) { 2309 break; 2310 } 2311 err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat); 2312 if (err < 0) { 2313 break; 2314 } 2315 if ((count + v9stat.size + 2) > max_count) { 2316 v9fs_readdir_unlock(&fidp->fs.dir); 2317 2318 /* Ran out of buffer. Set dir back to old position and return */ 2319 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2320 v9fs_stat_free(&v9stat); 2321 v9fs_path_free(&path); 2322 return count; 2323 } 2324 2325 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2326 len = pdu_marshal(pdu, 11 + count, "S", &v9stat); 2327 2328 v9fs_readdir_unlock(&fidp->fs.dir); 2329 2330 if (len < 0) { 2331 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2332 v9fs_stat_free(&v9stat); 2333 v9fs_path_free(&path); 2334 return len; 2335 } 2336 count += len; 2337 v9fs_stat_free(&v9stat); 2338 v9fs_path_free(&path); 2339 saved_dir_pos = qemu_dirent_off(dent); 2340 } 2341 2342 v9fs_readdir_unlock(&fidp->fs.dir); 2343 2344 v9fs_path_free(&path); 2345 if (err < 0) { 2346 return err; 2347 } 2348 return count; 2349 } 2350 2351 static void coroutine_fn v9fs_read(void *opaque) 2352 { 2353 int32_t fid; 2354 uint64_t off; 2355 ssize_t err = 0; 2356 int32_t count = 0; 2357 size_t offset = 7; 2358 uint32_t max_count; 2359 V9fsFidState *fidp; 2360 V9fsPDU *pdu = opaque; 2361 V9fsState *s = pdu->s; 2362 2363 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count); 2364 if (err < 0) { 2365 goto out_nofid; 2366 } 2367 trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count); 2368 2369 fidp = get_fid(pdu, fid); 2370 if (fidp == NULL) { 2371 err = -EINVAL; 2372 goto out_nofid; 2373 } 2374 if (fidp->fid_type == P9_FID_DIR) { 2375 if (s->proto_version != V9FS_PROTO_2000U) { 2376 warn_report_once( 2377 "9p: bad client: T_read request on directory only expected " 2378 "with 9P2000.u protocol version" 2379 ); 2380 err = -EOPNOTSUPP; 2381 goto out; 2382 } 2383 if (off == 0) { 2384 v9fs_co_rewinddir(pdu, fidp); 2385 } 2386 count = v9fs_do_readdir_with_stat(pdu, fidp, max_count); 2387 if (count < 0) { 2388 err = count; 2389 goto out; 2390 } 2391 err = pdu_marshal(pdu, offset, "d", count); 2392 if (err < 0) { 2393 goto out; 2394 } 2395 err += offset + count; 2396 } else if (fidp->fid_type == P9_FID_FILE) { 2397 QEMUIOVector qiov_full; 2398 QEMUIOVector qiov; 2399 int32_t len; 2400 2401 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); 2402 qemu_iovec_init(&qiov, qiov_full.niov); 2403 do { 2404 qemu_iovec_reset(&qiov); 2405 qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); 2406 if (0) { 2407 print_sg(qiov.iov, qiov.niov); 2408 } 2409 /* Loop in case of EINTR */ 2410 do { 2411 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off); 2412 if (len >= 0) { 2413 off += len; 2414 count += len; 2415 } 2416 } while (len == -EINTR && !pdu->cancelled); 2417 if (len < 0) { 2418 /* IO error return the error */ 2419 err = len; 2420 goto out_free_iovec; 2421 } 2422 } while (count < max_count && len > 0); 2423 err = pdu_marshal(pdu, offset, "d", count); 2424 if (err < 0) { 2425 goto out_free_iovec; 2426 } 2427 err += offset + count; 2428 out_free_iovec: 2429 qemu_iovec_destroy(&qiov); 2430 qemu_iovec_destroy(&qiov_full); 2431 } else if (fidp->fid_type == P9_FID_XATTR) { 2432 err = v9fs_xattr_read(s, pdu, fidp, off, max_count); 2433 } else { 2434 err = -EINVAL; 2435 } 2436 trace_v9fs_read_return(pdu->tag, pdu->id, count, err); 2437 out: 2438 put_fid(pdu, fidp); 2439 out_nofid: 2440 pdu_complete(pdu, err); 2441 } 2442 2443 /** 2444 * v9fs_readdir_response_size() - Returns size required in Rreaddir response 2445 * for the passed dirent @name. 2446 * 2447 * @name: directory entry's name (i.e. file name, directory name) 2448 * Return: required size in bytes 2449 */ 2450 size_t v9fs_readdir_response_size(V9fsString *name) 2451 { 2452 /* 2453 * Size of each dirent on the wire: size of qid (13) + size of offset (8) 2454 * size of type (1) + size of name.size (2) + strlen(name.data) 2455 */ 2456 return 24 + v9fs_string_size(name); 2457 } 2458 2459 static void v9fs_free_dirents(struct V9fsDirEnt *e) 2460 { 2461 struct V9fsDirEnt *next = NULL; 2462 2463 for (; e; e = next) { 2464 next = e->next; 2465 g_free(e->dent); 2466 g_free(e->st); 2467 g_free(e); 2468 } 2469 } 2470 2471 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, 2472 off_t offset, int32_t max_count) 2473 { 2474 size_t size; 2475 V9fsQID qid; 2476 V9fsString name; 2477 int len, err = 0; 2478 int32_t count = 0; 2479 off_t off; 2480 struct dirent *dent; 2481 struct stat *st; 2482 struct V9fsDirEnt *entries = NULL; 2483 2484 /* 2485 * inode remapping requires the device id, which in turn might be 2486 * different for different directory entries, so if inode remapping is 2487 * enabled we have to make a full stat for each directory entry 2488 */ 2489 const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES; 2490 2491 /* 2492 * Fetch all required directory entries altogether on a background IO 2493 * thread from fs driver. We don't want to do that for each entry 2494 * individually, because hopping between threads (this main IO thread 2495 * and background IO driver thread) would sum up to huge latencies. 2496 */ 2497 count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count, 2498 dostat); 2499 if (count < 0) { 2500 err = count; 2501 count = 0; 2502 goto out; 2503 } 2504 count = 0; 2505 2506 for (struct V9fsDirEnt *e = entries; e; e = e->next) { 2507 dent = e->dent; 2508 2509 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 2510 st = e->st; 2511 /* e->st should never be NULL, but just to be sure */ 2512 if (!st) { 2513 err = -1; 2514 break; 2515 } 2516 2517 /* remap inode */ 2518 err = stat_to_qid(pdu, st, &qid); 2519 if (err < 0) { 2520 break; 2521 } 2522 } else { 2523 /* 2524 * Fill up just the path field of qid because the client uses 2525 * only that. To fill the entire qid structure we will have 2526 * to stat each dirent found, which is expensive. For the 2527 * latter reason we don't call stat_to_qid() here. Only drawback 2528 * is that no multi-device export detection of stat_to_qid() 2529 * would be done and provided as error to the user here. But 2530 * user would get that error anyway when accessing those 2531 * files/dirs through other ways. 2532 */ 2533 size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); 2534 memcpy(&qid.path, &dent->d_ino, size); 2535 /* Fill the other fields with dummy values */ 2536 qid.type = 0; 2537 qid.version = 0; 2538 } 2539 2540 off = qemu_dirent_off(dent); 2541 v9fs_string_init(&name); 2542 v9fs_string_sprintf(&name, "%s", dent->d_name); 2543 2544 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2545 len = pdu_marshal(pdu, 11 + count, "Qqbs", 2546 &qid, off, 2547 dent->d_type, &name); 2548 2549 v9fs_string_free(&name); 2550 2551 if (len < 0) { 2552 err = len; 2553 break; 2554 } 2555 2556 count += len; 2557 } 2558 2559 out: 2560 v9fs_free_dirents(entries); 2561 if (err < 0) { 2562 return err; 2563 } 2564 return count; 2565 } 2566 2567 static void coroutine_fn v9fs_readdir(void *opaque) 2568 { 2569 int32_t fid; 2570 V9fsFidState *fidp; 2571 ssize_t retval = 0; 2572 size_t offset = 7; 2573 uint64_t initial_offset; 2574 int32_t count; 2575 uint32_t max_count; 2576 V9fsPDU *pdu = opaque; 2577 V9fsState *s = pdu->s; 2578 2579 retval = pdu_unmarshal(pdu, offset, "dqd", &fid, 2580 &initial_offset, &max_count); 2581 if (retval < 0) { 2582 goto out_nofid; 2583 } 2584 trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count); 2585 2586 /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */ 2587 if (max_count > s->msize - 11) { 2588 max_count = s->msize - 11; 2589 warn_report_once( 2590 "9p: bad client: T_readdir with count > msize - 11" 2591 ); 2592 } 2593 2594 fidp = get_fid(pdu, fid); 2595 if (fidp == NULL) { 2596 retval = -EINVAL; 2597 goto out_nofid; 2598 } 2599 if (!fidp->fs.dir.stream) { 2600 retval = -EINVAL; 2601 goto out; 2602 } 2603 if (s->proto_version != V9FS_PROTO_2000L) { 2604 warn_report_once( 2605 "9p: bad client: T_readdir request only expected with 9P2000.L " 2606 "protocol version" 2607 ); 2608 retval = -EOPNOTSUPP; 2609 goto out; 2610 } 2611 count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count); 2612 if (count < 0) { 2613 retval = count; 2614 goto out; 2615 } 2616 retval = pdu_marshal(pdu, offset, "d", count); 2617 if (retval < 0) { 2618 goto out; 2619 } 2620 retval += count + offset; 2621 trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval); 2622 out: 2623 put_fid(pdu, fidp); 2624 out_nofid: 2625 pdu_complete(pdu, retval); 2626 } 2627 2628 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2629 uint64_t off, uint32_t count, 2630 struct iovec *sg, int cnt) 2631 { 2632 int i, to_copy; 2633 ssize_t err = 0; 2634 uint64_t write_count; 2635 size_t offset = 7; 2636 2637 2638 if (fidp->fs.xattr.len < off) { 2639 return -ENOSPC; 2640 } 2641 write_count = fidp->fs.xattr.len - off; 2642 if (write_count > count) { 2643 write_count = count; 2644 } 2645 err = pdu_marshal(pdu, offset, "d", write_count); 2646 if (err < 0) { 2647 return err; 2648 } 2649 err += offset; 2650 fidp->fs.xattr.copied_len += write_count; 2651 /* 2652 * Now copy the content from sg list 2653 */ 2654 for (i = 0; i < cnt; i++) { 2655 if (write_count > sg[i].iov_len) { 2656 to_copy = sg[i].iov_len; 2657 } else { 2658 to_copy = write_count; 2659 } 2660 memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy); 2661 /* updating vs->off since we are not using below */ 2662 off += to_copy; 2663 write_count -= to_copy; 2664 } 2665 2666 return err; 2667 } 2668 2669 static void coroutine_fn v9fs_write(void *opaque) 2670 { 2671 ssize_t err; 2672 int32_t fid; 2673 uint64_t off; 2674 uint32_t count; 2675 int32_t len = 0; 2676 int32_t total = 0; 2677 size_t offset = 7; 2678 V9fsFidState *fidp; 2679 V9fsPDU *pdu = opaque; 2680 V9fsState *s = pdu->s; 2681 QEMUIOVector qiov_full; 2682 QEMUIOVector qiov; 2683 2684 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count); 2685 if (err < 0) { 2686 pdu_complete(pdu, err); 2687 return; 2688 } 2689 offset += err; 2690 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); 2691 trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); 2692 2693 fidp = get_fid(pdu, fid); 2694 if (fidp == NULL) { 2695 err = -EINVAL; 2696 goto out_nofid; 2697 } 2698 if (fidp->fid_type == P9_FID_FILE) { 2699 if (fidp->fs.fd == -1) { 2700 err = -EINVAL; 2701 goto out; 2702 } 2703 } else if (fidp->fid_type == P9_FID_XATTR) { 2704 /* 2705 * setxattr operation 2706 */ 2707 err = v9fs_xattr_write(s, pdu, fidp, off, count, 2708 qiov_full.iov, qiov_full.niov); 2709 goto out; 2710 } else { 2711 err = -EINVAL; 2712 goto out; 2713 } 2714 qemu_iovec_init(&qiov, qiov_full.niov); 2715 do { 2716 qemu_iovec_reset(&qiov); 2717 qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); 2718 if (0) { 2719 print_sg(qiov.iov, qiov.niov); 2720 } 2721 /* Loop in case of EINTR */ 2722 do { 2723 len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off); 2724 if (len >= 0) { 2725 off += len; 2726 total += len; 2727 } 2728 } while (len == -EINTR && !pdu->cancelled); 2729 if (len < 0) { 2730 /* IO error return the error */ 2731 err = len; 2732 goto out_qiov; 2733 } 2734 } while (total < count && len > 0); 2735 2736 offset = 7; 2737 err = pdu_marshal(pdu, offset, "d", total); 2738 if (err < 0) { 2739 goto out_qiov; 2740 } 2741 err += offset; 2742 trace_v9fs_write_return(pdu->tag, pdu->id, total, err); 2743 out_qiov: 2744 qemu_iovec_destroy(&qiov); 2745 out: 2746 put_fid(pdu, fidp); 2747 out_nofid: 2748 qemu_iovec_destroy(&qiov_full); 2749 pdu_complete(pdu, err); 2750 } 2751 2752 static void coroutine_fn v9fs_create(void *opaque) 2753 { 2754 int32_t fid; 2755 int err = 0; 2756 size_t offset = 7; 2757 V9fsFidState *fidp; 2758 V9fsQID qid; 2759 int32_t perm; 2760 int8_t mode; 2761 V9fsPath path; 2762 struct stat stbuf; 2763 V9fsString name; 2764 V9fsString extension; 2765 int iounit; 2766 V9fsPDU *pdu = opaque; 2767 V9fsState *s = pdu->s; 2768 2769 v9fs_path_init(&path); 2770 v9fs_string_init(&name); 2771 v9fs_string_init(&extension); 2772 err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name, 2773 &perm, &mode, &extension); 2774 if (err < 0) { 2775 goto out_nofid; 2776 } 2777 trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode); 2778 2779 if (name_is_illegal(name.data)) { 2780 err = -ENOENT; 2781 goto out_nofid; 2782 } 2783 2784 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2785 err = -EEXIST; 2786 goto out_nofid; 2787 } 2788 2789 fidp = get_fid(pdu, fid); 2790 if (fidp == NULL) { 2791 err = -EINVAL; 2792 goto out_nofid; 2793 } 2794 if (fidp->fid_type != P9_FID_NONE) { 2795 err = -EINVAL; 2796 goto out; 2797 } 2798 if (perm & P9_STAT_MODE_DIR) { 2799 err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777, 2800 fidp->uid, -1, &stbuf); 2801 if (err < 0) { 2802 goto out; 2803 } 2804 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2805 if (err < 0) { 2806 goto out; 2807 } 2808 v9fs_path_write_lock(s); 2809 v9fs_path_copy(&fidp->path, &path); 2810 v9fs_path_unlock(s); 2811 err = v9fs_co_opendir(pdu, fidp); 2812 if (err < 0) { 2813 goto out; 2814 } 2815 fidp->fid_type = P9_FID_DIR; 2816 } else if (perm & P9_STAT_MODE_SYMLINK) { 2817 err = v9fs_co_symlink(pdu, fidp, &name, 2818 extension.data, -1 , &stbuf); 2819 if (err < 0) { 2820 goto out; 2821 } 2822 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2823 if (err < 0) { 2824 goto out; 2825 } 2826 v9fs_path_write_lock(s); 2827 v9fs_path_copy(&fidp->path, &path); 2828 v9fs_path_unlock(s); 2829 } else if (perm & P9_STAT_MODE_LINK) { 2830 int32_t ofid = atoi(extension.data); 2831 V9fsFidState *ofidp = get_fid(pdu, ofid); 2832 if (ofidp == NULL) { 2833 err = -EINVAL; 2834 goto out; 2835 } 2836 err = v9fs_co_link(pdu, ofidp, fidp, &name); 2837 put_fid(pdu, ofidp); 2838 if (err < 0) { 2839 goto out; 2840 } 2841 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2842 if (err < 0) { 2843 fidp->fid_type = P9_FID_NONE; 2844 goto out; 2845 } 2846 v9fs_path_write_lock(s); 2847 v9fs_path_copy(&fidp->path, &path); 2848 v9fs_path_unlock(s); 2849 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2850 if (err < 0) { 2851 fidp->fid_type = P9_FID_NONE; 2852 goto out; 2853 } 2854 } else if (perm & P9_STAT_MODE_DEVICE) { 2855 char ctype; 2856 uint32_t major, minor; 2857 mode_t nmode = 0; 2858 2859 if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) { 2860 err = -errno; 2861 goto out; 2862 } 2863 2864 switch (ctype) { 2865 case 'c': 2866 nmode = S_IFCHR; 2867 break; 2868 case 'b': 2869 nmode = S_IFBLK; 2870 break; 2871 default: 2872 err = -EIO; 2873 goto out; 2874 } 2875 2876 nmode |= perm & 0777; 2877 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2878 makedev(major, minor), nmode, &stbuf); 2879 if (err < 0) { 2880 goto out; 2881 } 2882 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2883 if (err < 0) { 2884 goto out; 2885 } 2886 v9fs_path_write_lock(s); 2887 v9fs_path_copy(&fidp->path, &path); 2888 v9fs_path_unlock(s); 2889 } else if (perm & P9_STAT_MODE_NAMED_PIPE) { 2890 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2891 0, S_IFIFO | (perm & 0777), &stbuf); 2892 if (err < 0) { 2893 goto out; 2894 } 2895 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2896 if (err < 0) { 2897 goto out; 2898 } 2899 v9fs_path_write_lock(s); 2900 v9fs_path_copy(&fidp->path, &path); 2901 v9fs_path_unlock(s); 2902 } else if (perm & P9_STAT_MODE_SOCKET) { 2903 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2904 0, S_IFSOCK | (perm & 0777), &stbuf); 2905 if (err < 0) { 2906 goto out; 2907 } 2908 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2909 if (err < 0) { 2910 goto out; 2911 } 2912 v9fs_path_write_lock(s); 2913 v9fs_path_copy(&fidp->path, &path); 2914 v9fs_path_unlock(s); 2915 } else { 2916 err = v9fs_co_open2(pdu, fidp, &name, -1, 2917 omode_to_uflags(mode) | O_CREAT, perm, &stbuf); 2918 if (err < 0) { 2919 goto out; 2920 } 2921 fidp->fid_type = P9_FID_FILE; 2922 fidp->open_flags = omode_to_uflags(mode); 2923 if (fidp->open_flags & O_EXCL) { 2924 /* 2925 * We let the host file system do O_EXCL check 2926 * We should not reclaim such fd 2927 */ 2928 fidp->flags |= FID_NON_RECLAIMABLE; 2929 } 2930 } 2931 iounit = get_iounit(pdu, &fidp->path); 2932 err = stat_to_qid(pdu, &stbuf, &qid); 2933 if (err < 0) { 2934 goto out; 2935 } 2936 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2937 if (err < 0) { 2938 goto out; 2939 } 2940 err += offset; 2941 trace_v9fs_create_return(pdu->tag, pdu->id, 2942 qid.type, qid.version, qid.path, iounit); 2943 out: 2944 put_fid(pdu, fidp); 2945 out_nofid: 2946 pdu_complete(pdu, err); 2947 v9fs_string_free(&name); 2948 v9fs_string_free(&extension); 2949 v9fs_path_free(&path); 2950 } 2951 2952 static void coroutine_fn v9fs_symlink(void *opaque) 2953 { 2954 V9fsPDU *pdu = opaque; 2955 V9fsString name; 2956 V9fsString symname; 2957 V9fsFidState *dfidp; 2958 V9fsQID qid; 2959 struct stat stbuf; 2960 int32_t dfid; 2961 int err = 0; 2962 gid_t gid; 2963 size_t offset = 7; 2964 2965 v9fs_string_init(&name); 2966 v9fs_string_init(&symname); 2967 err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid); 2968 if (err < 0) { 2969 goto out_nofid; 2970 } 2971 trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid); 2972 2973 if (name_is_illegal(name.data)) { 2974 err = -ENOENT; 2975 goto out_nofid; 2976 } 2977 2978 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2979 err = -EEXIST; 2980 goto out_nofid; 2981 } 2982 2983 dfidp = get_fid(pdu, dfid); 2984 if (dfidp == NULL) { 2985 err = -EINVAL; 2986 goto out_nofid; 2987 } 2988 err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf); 2989 if (err < 0) { 2990 goto out; 2991 } 2992 err = stat_to_qid(pdu, &stbuf, &qid); 2993 if (err < 0) { 2994 goto out; 2995 } 2996 err = pdu_marshal(pdu, offset, "Q", &qid); 2997 if (err < 0) { 2998 goto out; 2999 } 3000 err += offset; 3001 trace_v9fs_symlink_return(pdu->tag, pdu->id, 3002 qid.type, qid.version, qid.path); 3003 out: 3004 put_fid(pdu, dfidp); 3005 out_nofid: 3006 pdu_complete(pdu, err); 3007 v9fs_string_free(&name); 3008 v9fs_string_free(&symname); 3009 } 3010 3011 static void coroutine_fn v9fs_flush(void *opaque) 3012 { 3013 ssize_t err; 3014 int16_t tag; 3015 size_t offset = 7; 3016 V9fsPDU *cancel_pdu = NULL; 3017 V9fsPDU *pdu = opaque; 3018 V9fsState *s = pdu->s; 3019 3020 err = pdu_unmarshal(pdu, offset, "w", &tag); 3021 if (err < 0) { 3022 pdu_complete(pdu, err); 3023 return; 3024 } 3025 trace_v9fs_flush(pdu->tag, pdu->id, tag); 3026 3027 if (pdu->tag == tag) { 3028 warn_report("the guest sent a self-referencing 9P flush request"); 3029 } else { 3030 QLIST_FOREACH(cancel_pdu, &s->active_list, next) { 3031 if (cancel_pdu->tag == tag) { 3032 break; 3033 } 3034 } 3035 } 3036 if (cancel_pdu) { 3037 cancel_pdu->cancelled = 1; 3038 /* 3039 * Wait for pdu to complete. 3040 */ 3041 qemu_co_queue_wait(&cancel_pdu->complete, NULL); 3042 if (!qemu_co_queue_next(&cancel_pdu->complete)) { 3043 cancel_pdu->cancelled = 0; 3044 pdu_free(cancel_pdu); 3045 } 3046 } 3047 pdu_complete(pdu, 7); 3048 } 3049 3050 static void coroutine_fn v9fs_link(void *opaque) 3051 { 3052 V9fsPDU *pdu = opaque; 3053 int32_t dfid, oldfid; 3054 V9fsFidState *dfidp, *oldfidp; 3055 V9fsString name; 3056 size_t offset = 7; 3057 int err = 0; 3058 3059 v9fs_string_init(&name); 3060 err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name); 3061 if (err < 0) { 3062 goto out_nofid; 3063 } 3064 trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data); 3065 3066 if (name_is_illegal(name.data)) { 3067 err = -ENOENT; 3068 goto out_nofid; 3069 } 3070 3071 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3072 err = -EEXIST; 3073 goto out_nofid; 3074 } 3075 3076 dfidp = get_fid(pdu, dfid); 3077 if (dfidp == NULL) { 3078 err = -ENOENT; 3079 goto out_nofid; 3080 } 3081 3082 oldfidp = get_fid(pdu, oldfid); 3083 if (oldfidp == NULL) { 3084 err = -ENOENT; 3085 goto out; 3086 } 3087 err = v9fs_co_link(pdu, oldfidp, dfidp, &name); 3088 if (!err) { 3089 err = offset; 3090 } 3091 put_fid(pdu, oldfidp); 3092 out: 3093 put_fid(pdu, dfidp); 3094 out_nofid: 3095 v9fs_string_free(&name); 3096 pdu_complete(pdu, err); 3097 } 3098 3099 /* Only works with path name based fid */ 3100 static void coroutine_fn v9fs_remove(void *opaque) 3101 { 3102 int32_t fid; 3103 int err = 0; 3104 size_t offset = 7; 3105 V9fsFidState *fidp; 3106 V9fsPDU *pdu = opaque; 3107 3108 err = pdu_unmarshal(pdu, offset, "d", &fid); 3109 if (err < 0) { 3110 goto out_nofid; 3111 } 3112 trace_v9fs_remove(pdu->tag, pdu->id, fid); 3113 3114 fidp = get_fid(pdu, fid); 3115 if (fidp == NULL) { 3116 err = -EINVAL; 3117 goto out_nofid; 3118 } 3119 /* if fs driver is not path based, return EOPNOTSUPP */ 3120 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3121 err = -EOPNOTSUPP; 3122 goto out_err; 3123 } 3124 /* 3125 * IF the file is unlinked, we cannot reopen 3126 * the file later. So don't reclaim fd 3127 */ 3128 err = v9fs_mark_fids_unreclaim(pdu, &fidp->path); 3129 if (err < 0) { 3130 goto out_err; 3131 } 3132 err = v9fs_co_remove(pdu, &fidp->path); 3133 if (!err) { 3134 err = offset; 3135 } 3136 out_err: 3137 /* For TREMOVE we need to clunk the fid even on failed remove */ 3138 clunk_fid(pdu->s, fidp->fid); 3139 put_fid(pdu, fidp); 3140 out_nofid: 3141 pdu_complete(pdu, err); 3142 } 3143 3144 static void coroutine_fn v9fs_unlinkat(void *opaque) 3145 { 3146 int err = 0; 3147 V9fsString name; 3148 int32_t dfid, flags, rflags = 0; 3149 size_t offset = 7; 3150 V9fsPath path; 3151 V9fsFidState *dfidp; 3152 V9fsPDU *pdu = opaque; 3153 3154 v9fs_string_init(&name); 3155 err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags); 3156 if (err < 0) { 3157 goto out_nofid; 3158 } 3159 3160 if (name_is_illegal(name.data)) { 3161 err = -ENOENT; 3162 goto out_nofid; 3163 } 3164 3165 if (!strcmp(".", name.data)) { 3166 err = -EINVAL; 3167 goto out_nofid; 3168 } 3169 3170 if (!strcmp("..", name.data)) { 3171 err = -ENOTEMPTY; 3172 goto out_nofid; 3173 } 3174 3175 if (flags & ~P9_DOTL_AT_REMOVEDIR) { 3176 err = -EINVAL; 3177 goto out_nofid; 3178 } 3179 3180 if (flags & P9_DOTL_AT_REMOVEDIR) { 3181 rflags |= AT_REMOVEDIR; 3182 } 3183 3184 dfidp = get_fid(pdu, dfid); 3185 if (dfidp == NULL) { 3186 err = -EINVAL; 3187 goto out_nofid; 3188 } 3189 /* 3190 * IF the file is unlinked, we cannot reopen 3191 * the file later. So don't reclaim fd 3192 */ 3193 v9fs_path_init(&path); 3194 err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path); 3195 if (err < 0) { 3196 goto out_err; 3197 } 3198 err = v9fs_mark_fids_unreclaim(pdu, &path); 3199 if (err < 0) { 3200 goto out_err; 3201 } 3202 err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags); 3203 if (!err) { 3204 err = offset; 3205 } 3206 out_err: 3207 put_fid(pdu, dfidp); 3208 v9fs_path_free(&path); 3209 out_nofid: 3210 pdu_complete(pdu, err); 3211 v9fs_string_free(&name); 3212 } 3213 3214 3215 /* Only works with path name based fid */ 3216 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, 3217 int32_t newdirfid, 3218 V9fsString *name) 3219 { 3220 int err = 0; 3221 V9fsPath new_path; 3222 V9fsFidState *tfidp; 3223 V9fsState *s = pdu->s; 3224 V9fsFidState *dirfidp = NULL; 3225 GHashTableIter iter; 3226 gpointer fid; 3227 3228 v9fs_path_init(&new_path); 3229 if (newdirfid != -1) { 3230 dirfidp = get_fid(pdu, newdirfid); 3231 if (dirfidp == NULL) { 3232 return -ENOENT; 3233 } 3234 if (fidp->fid_type != P9_FID_NONE) { 3235 err = -EINVAL; 3236 goto out; 3237 } 3238 err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); 3239 if (err < 0) { 3240 goto out; 3241 } 3242 } else { 3243 char *dir_name = g_path_get_dirname(fidp->path.data); 3244 V9fsPath dir_path; 3245 3246 v9fs_path_init(&dir_path); 3247 v9fs_path_sprintf(&dir_path, "%s", dir_name); 3248 g_free(dir_name); 3249 3250 err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path); 3251 v9fs_path_free(&dir_path); 3252 if (err < 0) { 3253 goto out; 3254 } 3255 } 3256 err = v9fs_co_rename(pdu, &fidp->path, &new_path); 3257 if (err < 0) { 3258 goto out; 3259 } 3260 3261 /* 3262 * Fixup fid's pointing to the old name to 3263 * start pointing to the new name 3264 */ 3265 g_hash_table_iter_init(&iter, s->fids); 3266 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3267 if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) { 3268 /* replace the name */ 3269 v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data)); 3270 } 3271 } 3272 out: 3273 if (dirfidp) { 3274 put_fid(pdu, dirfidp); 3275 } 3276 v9fs_path_free(&new_path); 3277 return err; 3278 } 3279 3280 /* Only works with path name based fid */ 3281 static void coroutine_fn v9fs_rename(void *opaque) 3282 { 3283 int32_t fid; 3284 ssize_t err = 0; 3285 size_t offset = 7; 3286 V9fsString name; 3287 int32_t newdirfid; 3288 V9fsFidState *fidp; 3289 V9fsPDU *pdu = opaque; 3290 V9fsState *s = pdu->s; 3291 3292 v9fs_string_init(&name); 3293 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name); 3294 if (err < 0) { 3295 goto out_nofid; 3296 } 3297 3298 if (name_is_illegal(name.data)) { 3299 err = -ENOENT; 3300 goto out_nofid; 3301 } 3302 3303 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3304 err = -EISDIR; 3305 goto out_nofid; 3306 } 3307 3308 fidp = get_fid(pdu, fid); 3309 if (fidp == NULL) { 3310 err = -ENOENT; 3311 goto out_nofid; 3312 } 3313 if (fidp->fid_type != P9_FID_NONE) { 3314 err = -EINVAL; 3315 goto out; 3316 } 3317 /* if fs driver is not path based, return EOPNOTSUPP */ 3318 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3319 err = -EOPNOTSUPP; 3320 goto out; 3321 } 3322 v9fs_path_write_lock(s); 3323 err = v9fs_complete_rename(pdu, fidp, newdirfid, &name); 3324 v9fs_path_unlock(s); 3325 if (!err) { 3326 err = offset; 3327 } 3328 out: 3329 put_fid(pdu, fidp); 3330 out_nofid: 3331 pdu_complete(pdu, err); 3332 v9fs_string_free(&name); 3333 } 3334 3335 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, 3336 V9fsString *old_name, 3337 V9fsPath *newdir, 3338 V9fsString *new_name) 3339 { 3340 V9fsFidState *tfidp; 3341 V9fsPath oldpath, newpath; 3342 V9fsState *s = pdu->s; 3343 int err; 3344 GHashTableIter iter; 3345 gpointer fid; 3346 3347 v9fs_path_init(&oldpath); 3348 v9fs_path_init(&newpath); 3349 err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); 3350 if (err < 0) { 3351 goto out; 3352 } 3353 err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); 3354 if (err < 0) { 3355 goto out; 3356 } 3357 3358 /* 3359 * Fixup fid's pointing to the old name to 3360 * start pointing to the new name 3361 */ 3362 g_hash_table_iter_init(&iter, s->fids); 3363 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3364 if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) { 3365 /* replace the name */ 3366 v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); 3367 } 3368 } 3369 out: 3370 v9fs_path_free(&oldpath); 3371 v9fs_path_free(&newpath); 3372 return err; 3373 } 3374 3375 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, 3376 V9fsString *old_name, 3377 int32_t newdirfid, 3378 V9fsString *new_name) 3379 { 3380 int err = 0; 3381 V9fsState *s = pdu->s; 3382 V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL; 3383 3384 olddirfidp = get_fid(pdu, olddirfid); 3385 if (olddirfidp == NULL) { 3386 err = -ENOENT; 3387 goto out; 3388 } 3389 if (newdirfid != -1) { 3390 newdirfidp = get_fid(pdu, newdirfid); 3391 if (newdirfidp == NULL) { 3392 err = -ENOENT; 3393 goto out; 3394 } 3395 } else { 3396 newdirfidp = get_fid(pdu, olddirfid); 3397 } 3398 3399 err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name, 3400 &newdirfidp->path, new_name); 3401 if (err < 0) { 3402 goto out; 3403 } 3404 if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { 3405 /* Only for path based fid we need to do the below fixup */ 3406 err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, 3407 &newdirfidp->path, new_name); 3408 } 3409 out: 3410 if (olddirfidp) { 3411 put_fid(pdu, olddirfidp); 3412 } 3413 if (newdirfidp) { 3414 put_fid(pdu, newdirfidp); 3415 } 3416 return err; 3417 } 3418 3419 static void coroutine_fn v9fs_renameat(void *opaque) 3420 { 3421 ssize_t err = 0; 3422 size_t offset = 7; 3423 V9fsPDU *pdu = opaque; 3424 V9fsState *s = pdu->s; 3425 int32_t olddirfid, newdirfid; 3426 V9fsString old_name, new_name; 3427 3428 v9fs_string_init(&old_name); 3429 v9fs_string_init(&new_name); 3430 err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid, 3431 &old_name, &newdirfid, &new_name); 3432 if (err < 0) { 3433 goto out_err; 3434 } 3435 3436 if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) { 3437 err = -ENOENT; 3438 goto out_err; 3439 } 3440 3441 if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) || 3442 !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) { 3443 err = -EISDIR; 3444 goto out_err; 3445 } 3446 3447 v9fs_path_write_lock(s); 3448 err = v9fs_complete_renameat(pdu, olddirfid, 3449 &old_name, newdirfid, &new_name); 3450 v9fs_path_unlock(s); 3451 if (!err) { 3452 err = offset; 3453 } 3454 3455 out_err: 3456 pdu_complete(pdu, err); 3457 v9fs_string_free(&old_name); 3458 v9fs_string_free(&new_name); 3459 } 3460 3461 static void coroutine_fn v9fs_wstat(void *opaque) 3462 { 3463 int32_t fid; 3464 int err = 0; 3465 int16_t unused; 3466 V9fsStat v9stat; 3467 size_t offset = 7; 3468 struct stat stbuf; 3469 V9fsFidState *fidp; 3470 V9fsPDU *pdu = opaque; 3471 V9fsState *s = pdu->s; 3472 3473 v9fs_stat_init(&v9stat); 3474 err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat); 3475 if (err < 0) { 3476 goto out_nofid; 3477 } 3478 trace_v9fs_wstat(pdu->tag, pdu->id, fid, 3479 v9stat.mode, v9stat.atime, v9stat.mtime); 3480 3481 fidp = get_fid(pdu, fid); 3482 if (fidp == NULL) { 3483 err = -EINVAL; 3484 goto out_nofid; 3485 } 3486 /* do we need to sync the file? */ 3487 if (donttouch_stat(&v9stat)) { 3488 err = v9fs_co_fsync(pdu, fidp, 0); 3489 goto out; 3490 } 3491 if (v9stat.mode != -1) { 3492 uint32_t v9_mode; 3493 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 3494 if (err < 0) { 3495 goto out; 3496 } 3497 v9_mode = stat_to_v9mode(&stbuf); 3498 if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) != 3499 (v9_mode & P9_STAT_MODE_TYPE_BITS)) { 3500 /* Attempting to change the type */ 3501 err = -EIO; 3502 goto out; 3503 } 3504 err = v9fs_co_chmod(pdu, &fidp->path, 3505 v9mode_to_mode(v9stat.mode, 3506 &v9stat.extension)); 3507 if (err < 0) { 3508 goto out; 3509 } 3510 } 3511 if (v9stat.mtime != -1 || v9stat.atime != -1) { 3512 struct timespec times[2]; 3513 if (v9stat.atime != -1) { 3514 times[0].tv_sec = v9stat.atime; 3515 times[0].tv_nsec = 0; 3516 } else { 3517 times[0].tv_nsec = UTIME_OMIT; 3518 } 3519 if (v9stat.mtime != -1) { 3520 times[1].tv_sec = v9stat.mtime; 3521 times[1].tv_nsec = 0; 3522 } else { 3523 times[1].tv_nsec = UTIME_OMIT; 3524 } 3525 err = v9fs_co_utimensat(pdu, &fidp->path, times); 3526 if (err < 0) { 3527 goto out; 3528 } 3529 } 3530 if (v9stat.n_gid != -1 || v9stat.n_uid != -1) { 3531 err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid); 3532 if (err < 0) { 3533 goto out; 3534 } 3535 } 3536 if (v9stat.name.size != 0) { 3537 v9fs_path_write_lock(s); 3538 err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name); 3539 v9fs_path_unlock(s); 3540 if (err < 0) { 3541 goto out; 3542 } 3543 } 3544 if (v9stat.length != -1) { 3545 err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length); 3546 if (err < 0) { 3547 goto out; 3548 } 3549 } 3550 err = offset; 3551 out: 3552 put_fid(pdu, fidp); 3553 out_nofid: 3554 v9fs_stat_free(&v9stat); 3555 pdu_complete(pdu, err); 3556 } 3557 3558 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) 3559 { 3560 uint32_t f_type; 3561 uint32_t f_bsize; 3562 uint64_t f_blocks; 3563 uint64_t f_bfree; 3564 uint64_t f_bavail; 3565 uint64_t f_files; 3566 uint64_t f_ffree; 3567 uint64_t fsid_val; 3568 uint32_t f_namelen; 3569 size_t offset = 7; 3570 int32_t bsize_factor; 3571 3572 /* 3573 * compute bsize factor based on host file system block size 3574 * and client msize 3575 */ 3576 bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize; 3577 if (!bsize_factor) { 3578 bsize_factor = 1; 3579 } 3580 f_type = stbuf->f_type; 3581 f_bsize = stbuf->f_bsize; 3582 f_bsize *= bsize_factor; 3583 /* 3584 * f_bsize is adjusted(multiplied) by bsize factor, so we need to 3585 * adjust(divide) the number of blocks, free blocks and available 3586 * blocks by bsize factor 3587 */ 3588 f_blocks = stbuf->f_blocks / bsize_factor; 3589 f_bfree = stbuf->f_bfree / bsize_factor; 3590 f_bavail = stbuf->f_bavail / bsize_factor; 3591 f_files = stbuf->f_files; 3592 f_ffree = stbuf->f_ffree; 3593 #ifdef CONFIG_DARWIN 3594 fsid_val = (unsigned int)stbuf->f_fsid.val[0] | 3595 (unsigned long long)stbuf->f_fsid.val[1] << 32; 3596 f_namelen = NAME_MAX; 3597 #else 3598 fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | 3599 (unsigned long long)stbuf->f_fsid.__val[1] << 32; 3600 f_namelen = stbuf->f_namelen; 3601 #endif 3602 3603 return pdu_marshal(pdu, offset, "ddqqqqqqd", 3604 f_type, f_bsize, f_blocks, f_bfree, 3605 f_bavail, f_files, f_ffree, 3606 fsid_val, f_namelen); 3607 } 3608 3609 static void coroutine_fn v9fs_statfs(void *opaque) 3610 { 3611 int32_t fid; 3612 ssize_t retval = 0; 3613 size_t offset = 7; 3614 V9fsFidState *fidp; 3615 struct statfs stbuf; 3616 V9fsPDU *pdu = opaque; 3617 V9fsState *s = pdu->s; 3618 3619 retval = pdu_unmarshal(pdu, offset, "d", &fid); 3620 if (retval < 0) { 3621 goto out_nofid; 3622 } 3623 fidp = get_fid(pdu, fid); 3624 if (fidp == NULL) { 3625 retval = -ENOENT; 3626 goto out_nofid; 3627 } 3628 retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf); 3629 if (retval < 0) { 3630 goto out; 3631 } 3632 retval = v9fs_fill_statfs(s, pdu, &stbuf); 3633 if (retval < 0) { 3634 goto out; 3635 } 3636 retval += offset; 3637 out: 3638 put_fid(pdu, fidp); 3639 out_nofid: 3640 pdu_complete(pdu, retval); 3641 } 3642 3643 static void coroutine_fn v9fs_mknod(void *opaque) 3644 { 3645 3646 int mode; 3647 gid_t gid; 3648 int32_t fid; 3649 V9fsQID qid; 3650 int err = 0; 3651 int major, minor; 3652 size_t offset = 7; 3653 V9fsString name; 3654 struct stat stbuf; 3655 V9fsFidState *fidp; 3656 V9fsPDU *pdu = opaque; 3657 3658 v9fs_string_init(&name); 3659 err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode, 3660 &major, &minor, &gid); 3661 if (err < 0) { 3662 goto out_nofid; 3663 } 3664 trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor); 3665 3666 if (name_is_illegal(name.data)) { 3667 err = -ENOENT; 3668 goto out_nofid; 3669 } 3670 3671 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3672 err = -EEXIST; 3673 goto out_nofid; 3674 } 3675 3676 fidp = get_fid(pdu, fid); 3677 if (fidp == NULL) { 3678 err = -ENOENT; 3679 goto out_nofid; 3680 } 3681 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid, 3682 makedev(major, minor), mode, &stbuf); 3683 if (err < 0) { 3684 goto out; 3685 } 3686 err = stat_to_qid(pdu, &stbuf, &qid); 3687 if (err < 0) { 3688 goto out; 3689 } 3690 err = pdu_marshal(pdu, offset, "Q", &qid); 3691 if (err < 0) { 3692 goto out; 3693 } 3694 err += offset; 3695 trace_v9fs_mknod_return(pdu->tag, pdu->id, 3696 qid.type, qid.version, qid.path); 3697 out: 3698 put_fid(pdu, fidp); 3699 out_nofid: 3700 pdu_complete(pdu, err); 3701 v9fs_string_free(&name); 3702 } 3703 3704 /* 3705 * Implement posix byte range locking code 3706 * Server side handling of locking code is very simple, because 9p server in 3707 * QEMU can handle only one client. And most of the lock handling 3708 * (like conflict, merging) etc is done by the VFS layer itself, so no need to 3709 * do any thing in * qemu 9p server side lock code path. 3710 * So when a TLOCK request comes, always return success 3711 */ 3712 static void coroutine_fn v9fs_lock(void *opaque) 3713 { 3714 V9fsFlock flock; 3715 size_t offset = 7; 3716 struct stat stbuf; 3717 V9fsFidState *fidp; 3718 int32_t fid, err = 0; 3719 V9fsPDU *pdu = opaque; 3720 3721 v9fs_string_init(&flock.client_id); 3722 err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, 3723 &flock.flags, &flock.start, &flock.length, 3724 &flock.proc_id, &flock.client_id); 3725 if (err < 0) { 3726 goto out_nofid; 3727 } 3728 trace_v9fs_lock(pdu->tag, pdu->id, fid, 3729 flock.type, flock.start, flock.length); 3730 3731 3732 /* We support only block flag now (that too ignored currently) */ 3733 if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) { 3734 err = -EINVAL; 3735 goto out_nofid; 3736 } 3737 fidp = get_fid(pdu, fid); 3738 if (fidp == NULL) { 3739 err = -ENOENT; 3740 goto out_nofid; 3741 } 3742 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3743 if (err < 0) { 3744 goto out; 3745 } 3746 err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); 3747 if (err < 0) { 3748 goto out; 3749 } 3750 err += offset; 3751 trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); 3752 out: 3753 put_fid(pdu, fidp); 3754 out_nofid: 3755 pdu_complete(pdu, err); 3756 v9fs_string_free(&flock.client_id); 3757 } 3758 3759 /* 3760 * When a TGETLOCK request comes, always return success because all lock 3761 * handling is done by client's VFS layer. 3762 */ 3763 static void coroutine_fn v9fs_getlock(void *opaque) 3764 { 3765 size_t offset = 7; 3766 struct stat stbuf; 3767 V9fsFidState *fidp; 3768 V9fsGetlock glock; 3769 int32_t fid, err = 0; 3770 V9fsPDU *pdu = opaque; 3771 3772 v9fs_string_init(&glock.client_id); 3773 err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type, 3774 &glock.start, &glock.length, &glock.proc_id, 3775 &glock.client_id); 3776 if (err < 0) { 3777 goto out_nofid; 3778 } 3779 trace_v9fs_getlock(pdu->tag, pdu->id, fid, 3780 glock.type, glock.start, glock.length); 3781 3782 fidp = get_fid(pdu, fid); 3783 if (fidp == NULL) { 3784 err = -ENOENT; 3785 goto out_nofid; 3786 } 3787 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3788 if (err < 0) { 3789 goto out; 3790 } 3791 glock.type = P9_LOCK_TYPE_UNLCK; 3792 err = pdu_marshal(pdu, offset, "bqqds", glock.type, 3793 glock.start, glock.length, glock.proc_id, 3794 &glock.client_id); 3795 if (err < 0) { 3796 goto out; 3797 } 3798 err += offset; 3799 trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start, 3800 glock.length, glock.proc_id); 3801 out: 3802 put_fid(pdu, fidp); 3803 out_nofid: 3804 pdu_complete(pdu, err); 3805 v9fs_string_free(&glock.client_id); 3806 } 3807 3808 static void coroutine_fn v9fs_mkdir(void *opaque) 3809 { 3810 V9fsPDU *pdu = opaque; 3811 size_t offset = 7; 3812 int32_t fid; 3813 struct stat stbuf; 3814 V9fsQID qid; 3815 V9fsString name; 3816 V9fsFidState *fidp; 3817 gid_t gid; 3818 int mode; 3819 int err = 0; 3820 3821 v9fs_string_init(&name); 3822 err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid); 3823 if (err < 0) { 3824 goto out_nofid; 3825 } 3826 trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid); 3827 3828 if (name_is_illegal(name.data)) { 3829 err = -ENOENT; 3830 goto out_nofid; 3831 } 3832 3833 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3834 err = -EEXIST; 3835 goto out_nofid; 3836 } 3837 3838 fidp = get_fid(pdu, fid); 3839 if (fidp == NULL) { 3840 err = -ENOENT; 3841 goto out_nofid; 3842 } 3843 err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf); 3844 if (err < 0) { 3845 goto out; 3846 } 3847 err = stat_to_qid(pdu, &stbuf, &qid); 3848 if (err < 0) { 3849 goto out; 3850 } 3851 err = pdu_marshal(pdu, offset, "Q", &qid); 3852 if (err < 0) { 3853 goto out; 3854 } 3855 err += offset; 3856 trace_v9fs_mkdir_return(pdu->tag, pdu->id, 3857 qid.type, qid.version, qid.path, err); 3858 out: 3859 put_fid(pdu, fidp); 3860 out_nofid: 3861 pdu_complete(pdu, err); 3862 v9fs_string_free(&name); 3863 } 3864 3865 static void coroutine_fn v9fs_xattrwalk(void *opaque) 3866 { 3867 int64_t size; 3868 V9fsString name; 3869 ssize_t err = 0; 3870 size_t offset = 7; 3871 int32_t fid, newfid; 3872 V9fsFidState *file_fidp; 3873 V9fsFidState *xattr_fidp = NULL; 3874 V9fsPDU *pdu = opaque; 3875 V9fsState *s = pdu->s; 3876 3877 v9fs_string_init(&name); 3878 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name); 3879 if (err < 0) { 3880 goto out_nofid; 3881 } 3882 trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data); 3883 3884 file_fidp = get_fid(pdu, fid); 3885 if (file_fidp == NULL) { 3886 err = -ENOENT; 3887 goto out_nofid; 3888 } 3889 xattr_fidp = alloc_fid(s, newfid); 3890 if (xattr_fidp == NULL) { 3891 err = -EINVAL; 3892 goto out; 3893 } 3894 v9fs_path_copy(&xattr_fidp->path, &file_fidp->path); 3895 if (!v9fs_string_size(&name)) { 3896 /* 3897 * listxattr request. Get the size first 3898 */ 3899 size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0); 3900 if (size < 0) { 3901 err = size; 3902 clunk_fid(s, xattr_fidp->fid); 3903 goto out; 3904 } 3905 /* 3906 * Read the xattr value 3907 */ 3908 xattr_fidp->fs.xattr.len = size; 3909 xattr_fidp->fid_type = P9_FID_XATTR; 3910 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3911 xattr_fidp->fs.xattr.value = g_malloc0(size); 3912 if (size) { 3913 err = v9fs_co_llistxattr(pdu, &xattr_fidp->path, 3914 xattr_fidp->fs.xattr.value, 3915 xattr_fidp->fs.xattr.len); 3916 if (err < 0) { 3917 clunk_fid(s, xattr_fidp->fid); 3918 goto out; 3919 } 3920 } 3921 err = pdu_marshal(pdu, offset, "q", size); 3922 if (err < 0) { 3923 goto out; 3924 } 3925 err += offset; 3926 } else { 3927 /* 3928 * specific xattr fid. We check for xattr 3929 * presence also collect the xattr size 3930 */ 3931 size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 3932 &name, NULL, 0); 3933 if (size < 0) { 3934 err = size; 3935 clunk_fid(s, xattr_fidp->fid); 3936 goto out; 3937 } 3938 /* 3939 * Read the xattr value 3940 */ 3941 xattr_fidp->fs.xattr.len = size; 3942 xattr_fidp->fid_type = P9_FID_XATTR; 3943 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3944 xattr_fidp->fs.xattr.value = g_malloc0(size); 3945 if (size) { 3946 err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 3947 &name, xattr_fidp->fs.xattr.value, 3948 xattr_fidp->fs.xattr.len); 3949 if (err < 0) { 3950 clunk_fid(s, xattr_fidp->fid); 3951 goto out; 3952 } 3953 } 3954 err = pdu_marshal(pdu, offset, "q", size); 3955 if (err < 0) { 3956 goto out; 3957 } 3958 err += offset; 3959 } 3960 trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size); 3961 out: 3962 put_fid(pdu, file_fidp); 3963 if (xattr_fidp) { 3964 put_fid(pdu, xattr_fidp); 3965 } 3966 out_nofid: 3967 pdu_complete(pdu, err); 3968 v9fs_string_free(&name); 3969 } 3970 3971 #if defined(CONFIG_LINUX) 3972 /* Currently, only Linux has XATTR_SIZE_MAX */ 3973 #define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX 3974 #elif defined(CONFIG_DARWIN) 3975 /* 3976 * Darwin doesn't seem to define a maximum xattr size in its user 3977 * space header, so manually configure it across platforms as 64k. 3978 * 3979 * Having no limit at all can lead to QEMU crashing during large g_malloc() 3980 * calls. Because QEMU does not currently support macOS guests, the below 3981 * preliminary solution only works due to its being a reflection of the limit of 3982 * Linux guests. 3983 */ 3984 #define P9_XATTR_SIZE_MAX 65536 3985 #else 3986 #error Missing definition for P9_XATTR_SIZE_MAX for this host system 3987 #endif 3988 3989 static void coroutine_fn v9fs_xattrcreate(void *opaque) 3990 { 3991 int flags, rflags = 0; 3992 int32_t fid; 3993 uint64_t size; 3994 ssize_t err = 0; 3995 V9fsString name; 3996 size_t offset = 7; 3997 V9fsFidState *file_fidp; 3998 V9fsFidState *xattr_fidp; 3999 V9fsPDU *pdu = opaque; 4000 4001 v9fs_string_init(&name); 4002 err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags); 4003 if (err < 0) { 4004 goto out_nofid; 4005 } 4006 trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags); 4007 4008 if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) { 4009 err = -EINVAL; 4010 goto out_nofid; 4011 } 4012 4013 if (flags & P9_XATTR_CREATE) { 4014 rflags |= XATTR_CREATE; 4015 } 4016 4017 if (flags & P9_XATTR_REPLACE) { 4018 rflags |= XATTR_REPLACE; 4019 } 4020 4021 if (size > P9_XATTR_SIZE_MAX) { 4022 err = -E2BIG; 4023 goto out_nofid; 4024 } 4025 4026 file_fidp = get_fid(pdu, fid); 4027 if (file_fidp == NULL) { 4028 err = -EINVAL; 4029 goto out_nofid; 4030 } 4031 if (file_fidp->fid_type != P9_FID_NONE) { 4032 err = -EINVAL; 4033 goto out_put_fid; 4034 } 4035 4036 /* Make the file fid point to xattr */ 4037 xattr_fidp = file_fidp; 4038 xattr_fidp->fid_type = P9_FID_XATTR; 4039 xattr_fidp->fs.xattr.copied_len = 0; 4040 xattr_fidp->fs.xattr.xattrwalk_fid = false; 4041 xattr_fidp->fs.xattr.len = size; 4042 xattr_fidp->fs.xattr.flags = rflags; 4043 v9fs_string_init(&xattr_fidp->fs.xattr.name); 4044 v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name); 4045 xattr_fidp->fs.xattr.value = g_malloc0(size); 4046 err = offset; 4047 out_put_fid: 4048 put_fid(pdu, file_fidp); 4049 out_nofid: 4050 pdu_complete(pdu, err); 4051 v9fs_string_free(&name); 4052 } 4053 4054 static void coroutine_fn v9fs_readlink(void *opaque) 4055 { 4056 V9fsPDU *pdu = opaque; 4057 size_t offset = 7; 4058 V9fsString target; 4059 int32_t fid; 4060 int err = 0; 4061 V9fsFidState *fidp; 4062 4063 err = pdu_unmarshal(pdu, offset, "d", &fid); 4064 if (err < 0) { 4065 goto out_nofid; 4066 } 4067 trace_v9fs_readlink(pdu->tag, pdu->id, fid); 4068 fidp = get_fid(pdu, fid); 4069 if (fidp == NULL) { 4070 err = -ENOENT; 4071 goto out_nofid; 4072 } 4073 4074 v9fs_string_init(&target); 4075 err = v9fs_co_readlink(pdu, &fidp->path, &target); 4076 if (err < 0) { 4077 goto out; 4078 } 4079 err = pdu_marshal(pdu, offset, "s", &target); 4080 if (err < 0) { 4081 v9fs_string_free(&target); 4082 goto out; 4083 } 4084 err += offset; 4085 trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data); 4086 v9fs_string_free(&target); 4087 out: 4088 put_fid(pdu, fidp); 4089 out_nofid: 4090 pdu_complete(pdu, err); 4091 } 4092 4093 static CoroutineEntry *pdu_co_handlers[] = { 4094 [P9_TREADDIR] = v9fs_readdir, 4095 [P9_TSTATFS] = v9fs_statfs, 4096 [P9_TGETATTR] = v9fs_getattr, 4097 [P9_TSETATTR] = v9fs_setattr, 4098 [P9_TXATTRWALK] = v9fs_xattrwalk, 4099 [P9_TXATTRCREATE] = v9fs_xattrcreate, 4100 [P9_TMKNOD] = v9fs_mknod, 4101 [P9_TRENAME] = v9fs_rename, 4102 [P9_TLOCK] = v9fs_lock, 4103 [P9_TGETLOCK] = v9fs_getlock, 4104 [P9_TRENAMEAT] = v9fs_renameat, 4105 [P9_TREADLINK] = v9fs_readlink, 4106 [P9_TUNLINKAT] = v9fs_unlinkat, 4107 [P9_TMKDIR] = v9fs_mkdir, 4108 [P9_TVERSION] = v9fs_version, 4109 [P9_TLOPEN] = v9fs_open, 4110 [P9_TATTACH] = v9fs_attach, 4111 [P9_TSTAT] = v9fs_stat, 4112 [P9_TWALK] = v9fs_walk, 4113 [P9_TCLUNK] = v9fs_clunk, 4114 [P9_TFSYNC] = v9fs_fsync, 4115 [P9_TOPEN] = v9fs_open, 4116 [P9_TREAD] = v9fs_read, 4117 #if 0 4118 [P9_TAUTH] = v9fs_auth, 4119 #endif 4120 [P9_TFLUSH] = v9fs_flush, 4121 [P9_TLINK] = v9fs_link, 4122 [P9_TSYMLINK] = v9fs_symlink, 4123 [P9_TCREATE] = v9fs_create, 4124 [P9_TLCREATE] = v9fs_lcreate, 4125 [P9_TWRITE] = v9fs_write, 4126 [P9_TWSTAT] = v9fs_wstat, 4127 [P9_TREMOVE] = v9fs_remove, 4128 }; 4129 4130 static void coroutine_fn v9fs_op_not_supp(void *opaque) 4131 { 4132 V9fsPDU *pdu = opaque; 4133 pdu_complete(pdu, -EOPNOTSUPP); 4134 } 4135 4136 static void coroutine_fn v9fs_fs_ro(void *opaque) 4137 { 4138 V9fsPDU *pdu = opaque; 4139 pdu_complete(pdu, -EROFS); 4140 } 4141 4142 static inline bool is_read_only_op(V9fsPDU *pdu) 4143 { 4144 switch (pdu->id) { 4145 case P9_TREADDIR: 4146 case P9_TSTATFS: 4147 case P9_TGETATTR: 4148 case P9_TXATTRWALK: 4149 case P9_TLOCK: 4150 case P9_TGETLOCK: 4151 case P9_TREADLINK: 4152 case P9_TVERSION: 4153 case P9_TLOPEN: 4154 case P9_TATTACH: 4155 case P9_TSTAT: 4156 case P9_TWALK: 4157 case P9_TCLUNK: 4158 case P9_TFSYNC: 4159 case P9_TOPEN: 4160 case P9_TREAD: 4161 case P9_TAUTH: 4162 case P9_TFLUSH: 4163 return 1; 4164 default: 4165 return 0; 4166 } 4167 } 4168 4169 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) 4170 { 4171 Coroutine *co; 4172 CoroutineEntry *handler; 4173 V9fsState *s = pdu->s; 4174 4175 pdu->size = le32_to_cpu(hdr->size_le); 4176 pdu->id = hdr->id; 4177 pdu->tag = le16_to_cpu(hdr->tag_le); 4178 4179 if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || 4180 (pdu_co_handlers[pdu->id] == NULL)) { 4181 handler = v9fs_op_not_supp; 4182 } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { 4183 handler = v9fs_fs_ro; 4184 } else { 4185 handler = pdu_co_handlers[pdu->id]; 4186 } 4187 4188 qemu_co_queue_init(&pdu->complete); 4189 co = qemu_coroutine_create(handler, pdu); 4190 qemu_coroutine_enter(co); 4191 } 4192 4193 /* Returns 0 on success, 1 on failure. */ 4194 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, 4195 Error **errp) 4196 { 4197 ERRP_GUARD(); 4198 int i, len; 4199 struct stat stat; 4200 FsDriverEntry *fse; 4201 V9fsPath path; 4202 int rc = 1; 4203 4204 assert(!s->transport); 4205 s->transport = t; 4206 4207 /* initialize pdu allocator */ 4208 QLIST_INIT(&s->free_list); 4209 QLIST_INIT(&s->active_list); 4210 for (i = 0; i < MAX_REQ; i++) { 4211 QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next); 4212 s->pdus[i].s = s; 4213 s->pdus[i].idx = i; 4214 } 4215 4216 v9fs_path_init(&path); 4217 4218 fse = get_fsdev_fsentry(s->fsconf.fsdev_id); 4219 4220 if (!fse) { 4221 /* We don't have a fsdev identified by fsdev_id */ 4222 error_setg(errp, "9pfs device couldn't find fsdev with the " 4223 "id = %s", 4224 s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL"); 4225 goto out; 4226 } 4227 4228 if (!s->fsconf.tag) { 4229 /* we haven't specified a mount_tag */ 4230 error_setg(errp, "fsdev with id %s needs mount_tag arguments", 4231 s->fsconf.fsdev_id); 4232 goto out; 4233 } 4234 4235 s->ctx.export_flags = fse->export_flags; 4236 s->ctx.fs_root = g_strdup(fse->path); 4237 s->ctx.exops.get_st_gen = NULL; 4238 len = strlen(s->fsconf.tag); 4239 if (len > MAX_TAG_LEN - 1) { 4240 error_setg(errp, "mount tag '%s' (%d bytes) is longer than " 4241 "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1); 4242 goto out; 4243 } 4244 4245 s->tag = g_strdup(s->fsconf.tag); 4246 s->ctx.uid = -1; 4247 4248 s->ops = fse->ops; 4249 4250 s->ctx.fmode = fse->fmode; 4251 s->ctx.dmode = fse->dmode; 4252 4253 s->fids = g_hash_table_new(NULL, NULL); 4254 qemu_co_rwlock_init(&s->rename_lock); 4255 4256 if (s->ops->init(&s->ctx, errp) < 0) { 4257 error_prepend(errp, "cannot initialize fsdev '%s': ", 4258 s->fsconf.fsdev_id); 4259 goto out; 4260 } 4261 4262 /* 4263 * Check details of export path, We need to use fs driver 4264 * call back to do that. Since we are in the init path, we don't 4265 * use co-routines here. 4266 */ 4267 if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) { 4268 error_setg(errp, 4269 "error in converting name to path %s", strerror(errno)); 4270 goto out; 4271 } 4272 if (s->ops->lstat(&s->ctx, &path, &stat)) { 4273 error_setg(errp, "share path %s does not exist", fse->path); 4274 goto out; 4275 } else if (!S_ISDIR(stat.st_mode)) { 4276 error_setg(errp, "share path %s is not a directory", fse->path); 4277 goto out; 4278 } 4279 4280 s->dev_id = stat.st_dev; 4281 4282 /* init inode remapping : */ 4283 /* hash table for variable length inode suffixes */ 4284 qpd_table_init(&s->qpd_table); 4285 /* hash table for slow/full inode remapping (most users won't need it) */ 4286 qpf_table_init(&s->qpf_table); 4287 /* hash table for quick inode remapping */ 4288 qpp_table_init(&s->qpp_table); 4289 s->qp_ndevices = 0; 4290 s->qp_affix_next = 1; /* reserve 0 to detect overflow */ 4291 s->qp_fullpath_next = 1; 4292 4293 s->ctx.fst = &fse->fst; 4294 fsdev_throttle_init(s->ctx.fst); 4295 4296 rc = 0; 4297 out: 4298 if (rc) { 4299 v9fs_device_unrealize_common(s); 4300 } 4301 v9fs_path_free(&path); 4302 return rc; 4303 } 4304 4305 void v9fs_device_unrealize_common(V9fsState *s) 4306 { 4307 if (s->ops && s->ops->cleanup) { 4308 s->ops->cleanup(&s->ctx); 4309 } 4310 if (s->ctx.fst) { 4311 fsdev_throttle_cleanup(s->ctx.fst); 4312 } 4313 if (s->fids) { 4314 g_hash_table_destroy(s->fids); 4315 s->fids = NULL; 4316 } 4317 g_free(s->tag); 4318 qp_table_destroy(&s->qpd_table); 4319 qp_table_destroy(&s->qpp_table); 4320 qp_table_destroy(&s->qpf_table); 4321 g_free(s->ctx.fs_root); 4322 } 4323 4324 typedef struct VirtfsCoResetData { 4325 V9fsPDU pdu; 4326 bool done; 4327 } VirtfsCoResetData; 4328 4329 static void coroutine_fn virtfs_co_reset(void *opaque) 4330 { 4331 VirtfsCoResetData *data = opaque; 4332 4333 virtfs_reset(&data->pdu); 4334 data->done = true; 4335 } 4336 4337 void v9fs_reset(V9fsState *s) 4338 { 4339 VirtfsCoResetData data = { .pdu = { .s = s }, .done = false }; 4340 Coroutine *co; 4341 4342 while (!QLIST_EMPTY(&s->active_list)) { 4343 aio_poll(qemu_get_aio_context(), true); 4344 } 4345 4346 co = qemu_coroutine_create(virtfs_co_reset, &data); 4347 qemu_coroutine_enter(co); 4348 4349 while (!data.done) { 4350 aio_poll(qemu_get_aio_context(), true); 4351 } 4352 } 4353 4354 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void) 4355 { 4356 struct rlimit rlim; 4357 if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { 4358 error_report("Failed to get the resource limit"); 4359 exit(1); 4360 } 4361 open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3); 4362 open_fd_rc = rlim.rlim_cur / 2; 4363 } 4364