1 /* 2 * Virtio 9p backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 #include "qemu/osdep.h" 15 #include <glib/gprintf.h> 16 #include "hw/virtio/virtio.h" 17 #include "qapi/error.h" 18 #include "qemu/error-report.h" 19 #include "qemu/iov.h" 20 #include "qemu/main-loop.h" 21 #include "qemu/sockets.h" 22 #include "virtio-9p.h" 23 #include "fsdev/qemu-fsdev.h" 24 #include "9p-xattr.h" 25 #include "coth.h" 26 #include "trace.h" 27 #include "migration/blocker.h" 28 #include "qemu/xxhash.h" 29 #include <math.h> 30 #include <linux/limits.h> 31 32 int open_fd_hw; 33 int total_open_fd; 34 static int open_fd_rc; 35 36 enum { 37 Oread = 0x00, 38 Owrite = 0x01, 39 Ordwr = 0x02, 40 Oexec = 0x03, 41 Oexcl = 0x04, 42 Otrunc = 0x10, 43 Orexec = 0x20, 44 Orclose = 0x40, 45 Oappend = 0x80, 46 }; 47 48 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 49 { 50 ssize_t ret; 51 va_list ap; 52 53 va_start(ap, fmt); 54 ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap); 55 va_end(ap); 56 57 return ret; 58 } 59 60 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 61 { 62 ssize_t ret; 63 va_list ap; 64 65 va_start(ap, fmt); 66 ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap); 67 va_end(ap); 68 69 return ret; 70 } 71 72 static int omode_to_uflags(int8_t mode) 73 { 74 int ret = 0; 75 76 switch (mode & 3) { 77 case Oread: 78 ret = O_RDONLY; 79 break; 80 case Ordwr: 81 ret = O_RDWR; 82 break; 83 case Owrite: 84 ret = O_WRONLY; 85 break; 86 case Oexec: 87 ret = O_RDONLY; 88 break; 89 } 90 91 if (mode & Otrunc) { 92 ret |= O_TRUNC; 93 } 94 95 if (mode & Oappend) { 96 ret |= O_APPEND; 97 } 98 99 if (mode & Oexcl) { 100 ret |= O_EXCL; 101 } 102 103 return ret; 104 } 105 106 typedef struct DotlOpenflagMap { 107 int dotl_flag; 108 int open_flag; 109 } DotlOpenflagMap; 110 111 static int dotl_to_open_flags(int flags) 112 { 113 int i; 114 /* 115 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY 116 * and P9_DOTL_NOACCESS 117 */ 118 int oflags = flags & O_ACCMODE; 119 120 DotlOpenflagMap dotl_oflag_map[] = { 121 { P9_DOTL_CREATE, O_CREAT }, 122 { P9_DOTL_EXCL, O_EXCL }, 123 { P9_DOTL_NOCTTY , O_NOCTTY }, 124 { P9_DOTL_TRUNC, O_TRUNC }, 125 { P9_DOTL_APPEND, O_APPEND }, 126 { P9_DOTL_NONBLOCK, O_NONBLOCK } , 127 { P9_DOTL_DSYNC, O_DSYNC }, 128 { P9_DOTL_FASYNC, FASYNC }, 129 { P9_DOTL_DIRECT, O_DIRECT }, 130 { P9_DOTL_LARGEFILE, O_LARGEFILE }, 131 { P9_DOTL_DIRECTORY, O_DIRECTORY }, 132 { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, 133 { P9_DOTL_NOATIME, O_NOATIME }, 134 { P9_DOTL_SYNC, O_SYNC }, 135 }; 136 137 for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { 138 if (flags & dotl_oflag_map[i].dotl_flag) { 139 oflags |= dotl_oflag_map[i].open_flag; 140 } 141 } 142 143 return oflags; 144 } 145 146 void cred_init(FsCred *credp) 147 { 148 credp->fc_uid = -1; 149 credp->fc_gid = -1; 150 credp->fc_mode = -1; 151 credp->fc_rdev = -1; 152 } 153 154 static int get_dotl_openflags(V9fsState *s, int oflags) 155 { 156 int flags; 157 /* 158 * Filter the client open flags 159 */ 160 flags = dotl_to_open_flags(oflags); 161 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); 162 /* 163 * Ignore direct disk access hint until the server supports it. 164 */ 165 flags &= ~O_DIRECT; 166 return flags; 167 } 168 169 void v9fs_path_init(V9fsPath *path) 170 { 171 path->data = NULL; 172 path->size = 0; 173 } 174 175 void v9fs_path_free(V9fsPath *path) 176 { 177 g_free(path->data); 178 path->data = NULL; 179 path->size = 0; 180 } 181 182 183 void GCC_FMT_ATTR(2, 3) 184 v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) 185 { 186 va_list ap; 187 188 v9fs_path_free(path); 189 190 va_start(ap, fmt); 191 /* Bump the size for including terminating NULL */ 192 path->size = g_vasprintf(&path->data, fmt, ap) + 1; 193 va_end(ap); 194 } 195 196 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src) 197 { 198 v9fs_path_free(dst); 199 dst->size = src->size; 200 dst->data = g_memdup(src->data, src->size); 201 } 202 203 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, 204 const char *name, V9fsPath *path) 205 { 206 int err; 207 err = s->ops->name_to_path(&s->ctx, dirpath, name, path); 208 if (err < 0) { 209 err = -errno; 210 } 211 return err; 212 } 213 214 /* 215 * Return TRUE if s1 is an ancestor of s2. 216 * 217 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d". 218 * As a special case, We treat s1 as ancestor of s2 if they are same! 219 */ 220 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2) 221 { 222 if (!strncmp(s1->data, s2->data, s1->size - 1)) { 223 if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') { 224 return 1; 225 } 226 } 227 return 0; 228 } 229 230 static size_t v9fs_string_size(V9fsString *str) 231 { 232 return str->size; 233 } 234 235 /* 236 * returns 0 if fid got re-opened, 1 if not, < 0 on error */ 237 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f) 238 { 239 int err = 1; 240 if (f->fid_type == P9_FID_FILE) { 241 if (f->fs.fd == -1) { 242 do { 243 err = v9fs_co_open(pdu, f, f->open_flags); 244 } while (err == -EINTR && !pdu->cancelled); 245 } 246 } else if (f->fid_type == P9_FID_DIR) { 247 if (f->fs.dir.stream == NULL) { 248 do { 249 err = v9fs_co_opendir(pdu, f); 250 } while (err == -EINTR && !pdu->cancelled); 251 } 252 } 253 return err; 254 } 255 256 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid) 257 { 258 int err; 259 V9fsFidState *f; 260 V9fsState *s = pdu->s; 261 262 QSIMPLEQ_FOREACH(f, &s->fid_list, next) { 263 BUG_ON(f->clunked); 264 if (f->fid == fid) { 265 /* 266 * Update the fid ref upfront so that 267 * we don't get reclaimed when we yield 268 * in open later. 269 */ 270 f->ref++; 271 /* 272 * check whether we need to reopen the 273 * file. We might have closed the fd 274 * while trying to free up some file 275 * descriptors. 276 */ 277 err = v9fs_reopen_fid(pdu, f); 278 if (err < 0) { 279 f->ref--; 280 return NULL; 281 } 282 /* 283 * Mark the fid as referenced so that the LRU 284 * reclaim won't close the file descriptor 285 */ 286 f->flags |= FID_REFERENCED; 287 return f; 288 } 289 } 290 return NULL; 291 } 292 293 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) 294 { 295 V9fsFidState *f; 296 297 QSIMPLEQ_FOREACH(f, &s->fid_list, next) { 298 /* If fid is already there return NULL */ 299 BUG_ON(f->clunked); 300 if (f->fid == fid) { 301 return NULL; 302 } 303 } 304 f = g_malloc0(sizeof(V9fsFidState)); 305 f->fid = fid; 306 f->fid_type = P9_FID_NONE; 307 f->ref = 1; 308 /* 309 * Mark the fid as referenced so that the LRU 310 * reclaim won't close the file descriptor 311 */ 312 f->flags |= FID_REFERENCED; 313 QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next); 314 315 v9fs_readdir_init(s->proto_version, &f->fs.dir); 316 v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir); 317 318 return f; 319 } 320 321 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) 322 { 323 int retval = 0; 324 325 if (fidp->fs.xattr.xattrwalk_fid) { 326 /* getxattr/listxattr fid */ 327 goto free_value; 328 } 329 /* 330 * if this is fid for setxattr. clunk should 331 * result in setxattr localcall 332 */ 333 if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) { 334 /* clunk after partial write */ 335 retval = -EINVAL; 336 goto free_out; 337 } 338 if (fidp->fs.xattr.len) { 339 retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name, 340 fidp->fs.xattr.value, 341 fidp->fs.xattr.len, 342 fidp->fs.xattr.flags); 343 } else { 344 retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name); 345 } 346 free_out: 347 v9fs_string_free(&fidp->fs.xattr.name); 348 free_value: 349 g_free(fidp->fs.xattr.value); 350 return retval; 351 } 352 353 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp) 354 { 355 int retval = 0; 356 357 if (fidp->fid_type == P9_FID_FILE) { 358 /* If we reclaimed the fd no need to close */ 359 if (fidp->fs.fd != -1) { 360 retval = v9fs_co_close(pdu, &fidp->fs); 361 } 362 } else if (fidp->fid_type == P9_FID_DIR) { 363 if (fidp->fs.dir.stream != NULL) { 364 retval = v9fs_co_closedir(pdu, &fidp->fs); 365 } 366 } else if (fidp->fid_type == P9_FID_XATTR) { 367 retval = v9fs_xattr_fid_clunk(pdu, fidp); 368 } 369 v9fs_path_free(&fidp->path); 370 g_free(fidp); 371 return retval; 372 } 373 374 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) 375 { 376 BUG_ON(!fidp->ref); 377 fidp->ref--; 378 /* 379 * Don't free the fid if it is in reclaim list 380 */ 381 if (!fidp->ref && fidp->clunked) { 382 if (fidp->fid == pdu->s->root_fid) { 383 /* 384 * if the clunked fid is root fid then we 385 * have unmounted the fs on the client side. 386 * delete the migration blocker. Ideally, this 387 * should be hooked to transport close notification 388 */ 389 if (pdu->s->migration_blocker) { 390 migrate_del_blocker(pdu->s->migration_blocker); 391 error_free(pdu->s->migration_blocker); 392 pdu->s->migration_blocker = NULL; 393 } 394 } 395 return free_fid(pdu, fidp); 396 } 397 return 0; 398 } 399 400 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid) 401 { 402 V9fsFidState *fidp; 403 404 QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) { 405 if (fidp->fid == fid) { 406 QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next); 407 fidp->clunked = true; 408 return fidp; 409 } 410 } 411 return NULL; 412 } 413 414 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) 415 { 416 int reclaim_count = 0; 417 V9fsState *s = pdu->s; 418 V9fsFidState *f; 419 QSLIST_HEAD(, V9fsFidState) reclaim_list = 420 QSLIST_HEAD_INITIALIZER(reclaim_list); 421 422 QSIMPLEQ_FOREACH(f, &s->fid_list, next) { 423 /* 424 * Unlink fids cannot be reclaimed. Check 425 * for them and skip them. Also skip fids 426 * currently being operated on. 427 */ 428 if (f->ref || f->flags & FID_NON_RECLAIMABLE) { 429 continue; 430 } 431 /* 432 * if it is a recently referenced fid 433 * we leave the fid untouched and clear the 434 * reference bit. We come back to it later 435 * in the next iteration. (a simple LRU without 436 * moving list elements around) 437 */ 438 if (f->flags & FID_REFERENCED) { 439 f->flags &= ~FID_REFERENCED; 440 continue; 441 } 442 /* 443 * Add fids to reclaim list. 444 */ 445 if (f->fid_type == P9_FID_FILE) { 446 if (f->fs.fd != -1) { 447 /* 448 * Up the reference count so that 449 * a clunk request won't free this fid 450 */ 451 f->ref++; 452 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 453 f->fs_reclaim.fd = f->fs.fd; 454 f->fs.fd = -1; 455 reclaim_count++; 456 } 457 } else if (f->fid_type == P9_FID_DIR) { 458 if (f->fs.dir.stream != NULL) { 459 /* 460 * Up the reference count so that 461 * a clunk request won't free this fid 462 */ 463 f->ref++; 464 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 465 f->fs_reclaim.dir.stream = f->fs.dir.stream; 466 f->fs.dir.stream = NULL; 467 reclaim_count++; 468 } 469 } 470 if (reclaim_count >= open_fd_rc) { 471 break; 472 } 473 } 474 /* 475 * Now close the fid in reclaim list. Free them if they 476 * are already clunked. 477 */ 478 while (!QSLIST_EMPTY(&reclaim_list)) { 479 f = QSLIST_FIRST(&reclaim_list); 480 QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next); 481 if (f->fid_type == P9_FID_FILE) { 482 v9fs_co_close(pdu, &f->fs_reclaim); 483 } else if (f->fid_type == P9_FID_DIR) { 484 v9fs_co_closedir(pdu, &f->fs_reclaim); 485 } 486 /* 487 * Now drop the fid reference, free it 488 * if clunked. 489 */ 490 put_fid(pdu, f); 491 } 492 } 493 494 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path) 495 { 496 int err; 497 V9fsState *s = pdu->s; 498 V9fsFidState *fidp, *fidp_next; 499 500 fidp = QSIMPLEQ_FIRST(&s->fid_list); 501 if (!fidp) { 502 return 0; 503 } 504 505 /* 506 * v9fs_reopen_fid() can yield : a reference on the fid must be held 507 * to ensure its pointer remains valid and we can safely pass it to 508 * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so 509 * we must keep a reference on the next fid as well. So the logic here 510 * is to get a reference on a fid and only put it back during the next 511 * iteration after we could get a reference on the next fid. Start with 512 * the first one. 513 */ 514 for (fidp->ref++; fidp; fidp = fidp_next) { 515 if (fidp->path.size == path->size && 516 !memcmp(fidp->path.data, path->data, path->size)) { 517 /* Mark the fid non reclaimable. */ 518 fidp->flags |= FID_NON_RECLAIMABLE; 519 520 /* reopen the file/dir if already closed */ 521 err = v9fs_reopen_fid(pdu, fidp); 522 if (err < 0) { 523 put_fid(pdu, fidp); 524 return err; 525 } 526 } 527 528 fidp_next = QSIMPLEQ_NEXT(fidp, next); 529 530 if (fidp_next) { 531 /* 532 * Ensure the next fid survives a potential clunk request during 533 * put_fid() below and v9fs_reopen_fid() in the next iteration. 534 */ 535 fidp_next->ref++; 536 } 537 538 /* We're done with this fid */ 539 put_fid(pdu, fidp); 540 } 541 542 return 0; 543 } 544 545 static void coroutine_fn virtfs_reset(V9fsPDU *pdu) 546 { 547 V9fsState *s = pdu->s; 548 V9fsFidState *fidp; 549 550 /* Free all fids */ 551 while (!QSIMPLEQ_EMPTY(&s->fid_list)) { 552 /* Get fid */ 553 fidp = QSIMPLEQ_FIRST(&s->fid_list); 554 fidp->ref++; 555 556 /* Clunk fid */ 557 QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next); 558 fidp->clunked = true; 559 560 put_fid(pdu, fidp); 561 } 562 } 563 564 #define P9_QID_TYPE_DIR 0x80 565 #define P9_QID_TYPE_SYMLINK 0x02 566 567 #define P9_STAT_MODE_DIR 0x80000000 568 #define P9_STAT_MODE_APPEND 0x40000000 569 #define P9_STAT_MODE_EXCL 0x20000000 570 #define P9_STAT_MODE_MOUNT 0x10000000 571 #define P9_STAT_MODE_AUTH 0x08000000 572 #define P9_STAT_MODE_TMP 0x04000000 573 #define P9_STAT_MODE_SYMLINK 0x02000000 574 #define P9_STAT_MODE_LINK 0x01000000 575 #define P9_STAT_MODE_DEVICE 0x00800000 576 #define P9_STAT_MODE_NAMED_PIPE 0x00200000 577 #define P9_STAT_MODE_SOCKET 0x00100000 578 #define P9_STAT_MODE_SETUID 0x00080000 579 #define P9_STAT_MODE_SETGID 0x00040000 580 #define P9_STAT_MODE_SETVTX 0x00010000 581 582 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR | \ 583 P9_STAT_MODE_SYMLINK | \ 584 P9_STAT_MODE_LINK | \ 585 P9_STAT_MODE_DEVICE | \ 586 P9_STAT_MODE_NAMED_PIPE | \ 587 P9_STAT_MODE_SOCKET) 588 589 /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */ 590 static inline uint8_t mirror8bit(uint8_t byte) 591 { 592 return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023; 593 } 594 595 /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */ 596 static inline uint64_t mirror64bit(uint64_t value) 597 { 598 return ((uint64_t)mirror8bit(value & 0xff) << 56) | 599 ((uint64_t)mirror8bit((value >> 8) & 0xff) << 48) | 600 ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) | 601 ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) | 602 ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) | 603 ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) | 604 ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8) | 605 ((uint64_t)mirror8bit((value >> 56) & 0xff)); 606 } 607 608 /** 609 * @brief Parameter k for the Exponential Golomb algorihm to be used. 610 * 611 * The smaller this value, the smaller the minimum bit count for the Exp. 612 * Golomb generated affixes will be (at lowest index) however for the 613 * price of having higher maximum bit count of generated affixes (at highest 614 * index). Likewise increasing this parameter yields in smaller maximum bit 615 * count for the price of having higher minimum bit count. 616 * 617 * In practice that means: a good value for k depends on the expected amount 618 * of devices to be exposed by one export. For a small amount of devices k 619 * should be small, for a large amount of devices k might be increased 620 * instead. The default of k=0 should be fine for most users though. 621 * 622 * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of 623 * k should not change as long as guest is still running! Because that would 624 * cause completely different inode numbers to be generated on guest. 625 */ 626 #define EXP_GOLOMB_K 0 627 628 /** 629 * @brief Exponential Golomb algorithm for arbitrary k (including k=0). 630 * 631 * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!) 632 * with growing length and with the mathematical property of being 633 * "prefix-free". The latter means the generated prefixes can be prepended 634 * in front of arbitrary numbers and the resulting concatenated numbers are 635 * guaranteed to be always unique. 636 * 637 * This is a minor adjustment to the original Exp. Golomb algorithm in the 638 * sense that lowest allowed index (@param n) starts with 1, not with zero. 639 * 640 * @param n - natural number (or index) of the prefix to be generated 641 * (1, 2, 3, ...) 642 * @param k - parameter k of Exp. Golomb algorithm to be used 643 * (see comment on EXP_GOLOMB_K macro for details about k) 644 */ 645 static VariLenAffix expGolombEncode(uint64_t n, int k) 646 { 647 const uint64_t value = n + (1 << k) - 1; 648 const int bits = (int) log2(value) + 1; 649 return (VariLenAffix) { 650 .type = AffixType_Prefix, 651 .value = value, 652 .bits = bits + MAX((bits - 1 - k), 0) 653 }; 654 } 655 656 /** 657 * @brief Converts a suffix into a prefix, or a prefix into a suffix. 658 * 659 * Simply mirror all bits of the affix value, for the purpose to preserve 660 * respectively the mathematical "prefix-free" or "suffix-free" property 661 * after the conversion. 662 * 663 * If a passed prefix is suitable to create unique numbers, then the 664 * returned suffix is suitable to create unique numbers as well (and vice 665 * versa). 666 */ 667 static VariLenAffix invertAffix(const VariLenAffix *affix) 668 { 669 return (VariLenAffix) { 670 .type = 671 (affix->type == AffixType_Suffix) ? 672 AffixType_Prefix : AffixType_Suffix, 673 .value = 674 mirror64bit(affix->value) >> 675 ((sizeof(affix->value) * 8) - affix->bits), 676 .bits = affix->bits 677 }; 678 } 679 680 /** 681 * @brief Generates suffix numbers with "suffix-free" property. 682 * 683 * This is just a wrapper function on top of the Exp. Golomb algorithm. 684 * 685 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, 686 * this function converts the Exp. Golomb prefixes into appropriate suffixes 687 * which are still suitable for generating unique numbers. 688 * 689 * @param n - natural number (or index) of the suffix to be generated 690 * (1, 2, 3, ...) 691 */ 692 static VariLenAffix affixForIndex(uint64_t index) 693 { 694 VariLenAffix prefix; 695 prefix = expGolombEncode(index, EXP_GOLOMB_K); 696 return invertAffix(&prefix); /* convert prefix to suffix */ 697 } 698 699 /* creative abuse of tb_hash_func7, which is based on xxhash */ 700 static uint32_t qpp_hash(QppEntry e) 701 { 702 return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); 703 } 704 705 static uint32_t qpf_hash(QpfEntry e) 706 { 707 return qemu_xxhash7(e.ino, e.dev, 0, 0, 0); 708 } 709 710 static bool qpd_cmp_func(const void *obj, const void *userp) 711 { 712 const QpdEntry *e1 = obj, *e2 = userp; 713 return e1->dev == e2->dev; 714 } 715 716 static bool qpp_cmp_func(const void *obj, const void *userp) 717 { 718 const QppEntry *e1 = obj, *e2 = userp; 719 return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; 720 } 721 722 static bool qpf_cmp_func(const void *obj, const void *userp) 723 { 724 const QpfEntry *e1 = obj, *e2 = userp; 725 return e1->dev == e2->dev && e1->ino == e2->ino; 726 } 727 728 static void qp_table_remove(void *p, uint32_t h, void *up) 729 { 730 g_free(p); 731 } 732 733 static void qp_table_destroy(struct qht *ht) 734 { 735 if (!ht || !ht->map) { 736 return; 737 } 738 qht_iter(ht, qp_table_remove, NULL); 739 qht_destroy(ht); 740 } 741 742 static void qpd_table_init(struct qht *ht) 743 { 744 qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 745 } 746 747 static void qpp_table_init(struct qht *ht) 748 { 749 qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 750 } 751 752 static void qpf_table_init(struct qht *ht) 753 { 754 qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE); 755 } 756 757 /* 758 * Returns how many (high end) bits of inode numbers of the passed fs 759 * device shall be used (in combination with the device number) to 760 * generate hash values for qpp_table entries. 761 * 762 * This function is required if variable length suffixes are used for inode 763 * number mapping on guest level. Since a device may end up having multiple 764 * entries in qpp_table, each entry most probably with a different suffix 765 * length, we thus need this function in conjunction with qpd_table to 766 * "agree" about a fix amount of bits (per device) to be always used for 767 * generating hash values for the purpose of accessing qpp_table in order 768 * get consistent behaviour when accessing qpp_table. 769 */ 770 static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) 771 { 772 QpdEntry lookup = { 773 .dev = dev 774 }, *val; 775 uint32_t hash = dev; 776 VariLenAffix affix; 777 778 val = qht_lookup(&pdu->s->qpd_table, &lookup, hash); 779 if (!val) { 780 val = g_malloc0(sizeof(QpdEntry)); 781 *val = lookup; 782 affix = affixForIndex(pdu->s->qp_affix_next); 783 val->prefix_bits = affix.bits; 784 qht_insert(&pdu->s->qpd_table, val, hash, NULL); 785 pdu->s->qp_ndevices++; 786 } 787 return val->prefix_bits; 788 } 789 790 /** 791 * @brief Slow / full mapping host inode nr -> guest inode nr. 792 * 793 * This function performs a slower and much more costly remapping of an 794 * original file inode number on host to an appropriate different inode 795 * number on guest. For every (dev, inode) combination on host a new 796 * sequential number is generated, cached and exposed as inode number on 797 * guest. 798 * 799 * This is just a "last resort" fallback solution if the much faster/cheaper 800 * qid_path_suffixmap() failed. In practice this slow / full mapping is not 801 * expected ever to be used at all though. 802 * 803 * @see qid_path_suffixmap() for details 804 * 805 */ 806 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, 807 uint64_t *path) 808 { 809 QpfEntry lookup = { 810 .dev = stbuf->st_dev, 811 .ino = stbuf->st_ino 812 }, *val; 813 uint32_t hash = qpf_hash(lookup); 814 VariLenAffix affix; 815 816 val = qht_lookup(&pdu->s->qpf_table, &lookup, hash); 817 818 if (!val) { 819 if (pdu->s->qp_fullpath_next == 0) { 820 /* no more files can be mapped :'( */ 821 error_report_once( 822 "9p: No more prefixes available for remapping inodes from " 823 "host to guest." 824 ); 825 return -ENFILE; 826 } 827 828 val = g_malloc0(sizeof(QppEntry)); 829 *val = lookup; 830 831 /* new unique inode and device combo */ 832 affix = affixForIndex( 833 1ULL << (sizeof(pdu->s->qp_affix_next) * 8) 834 ); 835 val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value; 836 pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1); 837 qht_insert(&pdu->s->qpf_table, val, hash, NULL); 838 } 839 840 *path = val->path; 841 return 0; 842 } 843 844 /** 845 * @brief Quick mapping host inode nr -> guest inode nr. 846 * 847 * This function performs quick remapping of an original file inode number 848 * on host to an appropriate different inode number on guest. This remapping 849 * of inodes is required to avoid inode nr collisions on guest which would 850 * happen if the 9p export contains more than 1 exported file system (or 851 * more than 1 file system data set), because unlike on host level where the 852 * files would have different device nrs, all files exported by 9p would 853 * share the same device nr on guest (the device nr of the virtual 9p device 854 * that is). 855 * 856 * Inode remapping is performed by chopping off high end bits of the original 857 * inode number from host, shifting the result upwards and then assigning a 858 * generated suffix number for the low end bits, where the same suffix number 859 * will be shared by all inodes with the same device id AND the same high end 860 * bits that have been chopped off. That approach utilizes the fact that inode 861 * numbers very likely share the same high end bits (i.e. due to their common 862 * sequential generation by file systems) and hence we only have to generate 863 * and track a very limited amount of suffixes in practice due to that. 864 * 865 * We generate variable size suffixes for that purpose. The 1st generated 866 * suffix will only have 1 bit and hence we only need to chop off 1 bit from 867 * the original inode number. The subsequent suffixes being generated will 868 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being 869 * generated will have 3 bits and hence we have to chop off 3 bits from their 870 * original inodes, and so on. That approach of using variable length suffixes 871 * (i.e. over fixed size ones) utilizes the fact that in practice only a very 872 * limited amount of devices are shared by the same export (e.g. typically 873 * less than 2 dozen devices per 9p export), so in practice we need to chop 874 * off less bits than with fixed size prefixes and yet are flexible to add 875 * new devices at runtime below host's export directory at any time without 876 * having to reboot guest nor requiring to reconfigure guest for that. And due 877 * to the very limited amount of original high end bits that we chop off that 878 * way, the total amount of suffixes we need to generate is less than by using 879 * fixed size prefixes and hence it also improves performance of the inode 880 * remapping algorithm, and finally has the nice side effect that the inode 881 * numbers on guest will be much smaller & human friendly. ;-) 882 */ 883 static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf, 884 uint64_t *path) 885 { 886 const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev); 887 QppEntry lookup = { 888 .dev = stbuf->st_dev, 889 .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits)) 890 }, *val; 891 uint32_t hash = qpp_hash(lookup); 892 893 val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); 894 895 if (!val) { 896 if (pdu->s->qp_affix_next == 0) { 897 /* we ran out of affixes */ 898 warn_report_once( 899 "9p: Potential degraded performance of inode remapping" 900 ); 901 return -ENFILE; 902 } 903 904 val = g_malloc0(sizeof(QppEntry)); 905 *val = lookup; 906 907 /* new unique inode affix and device combo */ 908 val->qp_affix_index = pdu->s->qp_affix_next++; 909 val->qp_affix = affixForIndex(val->qp_affix_index); 910 qht_insert(&pdu->s->qpp_table, val, hash, NULL); 911 } 912 /* assuming generated affix to be suffix type, not prefix */ 913 *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value; 914 return 0; 915 } 916 917 static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) 918 { 919 int err; 920 size_t size; 921 922 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 923 /* map inode+device to qid path (fast path) */ 924 err = qid_path_suffixmap(pdu, stbuf, &qidp->path); 925 if (err == -ENFILE) { 926 /* fast path didn't work, fall back to full map */ 927 err = qid_path_fullmap(pdu, stbuf, &qidp->path); 928 } 929 if (err) { 930 return err; 931 } 932 } else { 933 if (pdu->s->dev_id != stbuf->st_dev) { 934 if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { 935 error_report_once( 936 "9p: Multiple devices detected in same VirtFS export. " 937 "Access of guest to additional devices is (partly) " 938 "denied due to virtfs option 'multidevs=forbid' being " 939 "effective." 940 ); 941 return -ENODEV; 942 } else { 943 warn_report_once( 944 "9p: Multiple devices detected in same VirtFS export, " 945 "which might lead to file ID collisions and severe " 946 "misbehaviours on guest! You should either use a " 947 "separate export for each device shared from host or " 948 "use virtfs option 'multidevs=remap'!" 949 ); 950 } 951 } 952 memset(&qidp->path, 0, sizeof(qidp->path)); 953 size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); 954 memcpy(&qidp->path, &stbuf->st_ino, size); 955 } 956 957 qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); 958 qidp->type = 0; 959 if (S_ISDIR(stbuf->st_mode)) { 960 qidp->type |= P9_QID_TYPE_DIR; 961 } 962 if (S_ISLNK(stbuf->st_mode)) { 963 qidp->type |= P9_QID_TYPE_SYMLINK; 964 } 965 966 return 0; 967 } 968 969 static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, 970 V9fsQID *qidp) 971 { 972 struct stat stbuf; 973 int err; 974 975 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 976 if (err < 0) { 977 return err; 978 } 979 err = stat_to_qid(pdu, &stbuf, qidp); 980 if (err < 0) { 981 return err; 982 } 983 return 0; 984 } 985 986 V9fsPDU *pdu_alloc(V9fsState *s) 987 { 988 V9fsPDU *pdu = NULL; 989 990 if (!QLIST_EMPTY(&s->free_list)) { 991 pdu = QLIST_FIRST(&s->free_list); 992 QLIST_REMOVE(pdu, next); 993 QLIST_INSERT_HEAD(&s->active_list, pdu, next); 994 } 995 return pdu; 996 } 997 998 void pdu_free(V9fsPDU *pdu) 999 { 1000 V9fsState *s = pdu->s; 1001 1002 g_assert(!pdu->cancelled); 1003 QLIST_REMOVE(pdu, next); 1004 QLIST_INSERT_HEAD(&s->free_list, pdu, next); 1005 } 1006 1007 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) 1008 { 1009 int8_t id = pdu->id + 1; /* Response */ 1010 V9fsState *s = pdu->s; 1011 int ret; 1012 1013 /* 1014 * The 9p spec requires that successfully cancelled pdus receive no reply. 1015 * Sending a reply would confuse clients because they would 1016 * assume that any EINTR is the actual result of the operation, 1017 * rather than a consequence of the cancellation. However, if 1018 * the operation completed (succesfully or with an error other 1019 * than caused be cancellation), we do send out that reply, both 1020 * for efficiency and to avoid confusing the rest of the state machine 1021 * that assumes passing a non-error here will mean a successful 1022 * transmission of the reply. 1023 */ 1024 bool discard = pdu->cancelled && len == -EINTR; 1025 if (discard) { 1026 trace_v9fs_rcancel(pdu->tag, pdu->id); 1027 pdu->size = 0; 1028 goto out_notify; 1029 } 1030 1031 if (len < 0) { 1032 int err = -len; 1033 len = 7; 1034 1035 if (s->proto_version != V9FS_PROTO_2000L) { 1036 V9fsString str; 1037 1038 str.data = strerror(err); 1039 str.size = strlen(str.data); 1040 1041 ret = pdu_marshal(pdu, len, "s", &str); 1042 if (ret < 0) { 1043 goto out_notify; 1044 } 1045 len += ret; 1046 id = P9_RERROR; 1047 } 1048 1049 ret = pdu_marshal(pdu, len, "d", err); 1050 if (ret < 0) { 1051 goto out_notify; 1052 } 1053 len += ret; 1054 1055 if (s->proto_version == V9FS_PROTO_2000L) { 1056 id = P9_RLERROR; 1057 } 1058 trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */ 1059 } 1060 1061 /* fill out the header */ 1062 if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) { 1063 goto out_notify; 1064 } 1065 1066 /* keep these in sync */ 1067 pdu->size = len; 1068 pdu->id = id; 1069 1070 out_notify: 1071 pdu->s->transport->push_and_notify(pdu); 1072 1073 /* Now wakeup anybody waiting in flush for this request */ 1074 if (!qemu_co_queue_next(&pdu->complete)) { 1075 pdu_free(pdu); 1076 } 1077 } 1078 1079 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) 1080 { 1081 mode_t ret; 1082 1083 ret = mode & 0777; 1084 if (mode & P9_STAT_MODE_DIR) { 1085 ret |= S_IFDIR; 1086 } 1087 1088 if (mode & P9_STAT_MODE_SYMLINK) { 1089 ret |= S_IFLNK; 1090 } 1091 if (mode & P9_STAT_MODE_SOCKET) { 1092 ret |= S_IFSOCK; 1093 } 1094 if (mode & P9_STAT_MODE_NAMED_PIPE) { 1095 ret |= S_IFIFO; 1096 } 1097 if (mode & P9_STAT_MODE_DEVICE) { 1098 if (extension->size && extension->data[0] == 'c') { 1099 ret |= S_IFCHR; 1100 } else { 1101 ret |= S_IFBLK; 1102 } 1103 } 1104 1105 if (!(ret & ~0777)) { 1106 ret |= S_IFREG; 1107 } 1108 1109 if (mode & P9_STAT_MODE_SETUID) { 1110 ret |= S_ISUID; 1111 } 1112 if (mode & P9_STAT_MODE_SETGID) { 1113 ret |= S_ISGID; 1114 } 1115 if (mode & P9_STAT_MODE_SETVTX) { 1116 ret |= S_ISVTX; 1117 } 1118 1119 return ret; 1120 } 1121 1122 static int donttouch_stat(V9fsStat *stat) 1123 { 1124 if (stat->type == -1 && 1125 stat->dev == -1 && 1126 stat->qid.type == 0xff && 1127 stat->qid.version == (uint32_t) -1 && 1128 stat->qid.path == (uint64_t) -1 && 1129 stat->mode == -1 && 1130 stat->atime == -1 && 1131 stat->mtime == -1 && 1132 stat->length == -1 && 1133 !stat->name.size && 1134 !stat->uid.size && 1135 !stat->gid.size && 1136 !stat->muid.size && 1137 stat->n_uid == -1 && 1138 stat->n_gid == -1 && 1139 stat->n_muid == -1) { 1140 return 1; 1141 } 1142 1143 return 0; 1144 } 1145 1146 static void v9fs_stat_init(V9fsStat *stat) 1147 { 1148 v9fs_string_init(&stat->name); 1149 v9fs_string_init(&stat->uid); 1150 v9fs_string_init(&stat->gid); 1151 v9fs_string_init(&stat->muid); 1152 v9fs_string_init(&stat->extension); 1153 } 1154 1155 static void v9fs_stat_free(V9fsStat *stat) 1156 { 1157 v9fs_string_free(&stat->name); 1158 v9fs_string_free(&stat->uid); 1159 v9fs_string_free(&stat->gid); 1160 v9fs_string_free(&stat->muid); 1161 v9fs_string_free(&stat->extension); 1162 } 1163 1164 static uint32_t stat_to_v9mode(const struct stat *stbuf) 1165 { 1166 uint32_t mode; 1167 1168 mode = stbuf->st_mode & 0777; 1169 if (S_ISDIR(stbuf->st_mode)) { 1170 mode |= P9_STAT_MODE_DIR; 1171 } 1172 1173 if (S_ISLNK(stbuf->st_mode)) { 1174 mode |= P9_STAT_MODE_SYMLINK; 1175 } 1176 1177 if (S_ISSOCK(stbuf->st_mode)) { 1178 mode |= P9_STAT_MODE_SOCKET; 1179 } 1180 1181 if (S_ISFIFO(stbuf->st_mode)) { 1182 mode |= P9_STAT_MODE_NAMED_PIPE; 1183 } 1184 1185 if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) { 1186 mode |= P9_STAT_MODE_DEVICE; 1187 } 1188 1189 if (stbuf->st_mode & S_ISUID) { 1190 mode |= P9_STAT_MODE_SETUID; 1191 } 1192 1193 if (stbuf->st_mode & S_ISGID) { 1194 mode |= P9_STAT_MODE_SETGID; 1195 } 1196 1197 if (stbuf->st_mode & S_ISVTX) { 1198 mode |= P9_STAT_MODE_SETVTX; 1199 } 1200 1201 return mode; 1202 } 1203 1204 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, 1205 const char *basename, 1206 const struct stat *stbuf, 1207 V9fsStat *v9stat) 1208 { 1209 int err; 1210 1211 memset(v9stat, 0, sizeof(*v9stat)); 1212 1213 err = stat_to_qid(pdu, stbuf, &v9stat->qid); 1214 if (err < 0) { 1215 return err; 1216 } 1217 v9stat->mode = stat_to_v9mode(stbuf); 1218 v9stat->atime = stbuf->st_atime; 1219 v9stat->mtime = stbuf->st_mtime; 1220 v9stat->length = stbuf->st_size; 1221 1222 v9fs_string_free(&v9stat->uid); 1223 v9fs_string_free(&v9stat->gid); 1224 v9fs_string_free(&v9stat->muid); 1225 1226 v9stat->n_uid = stbuf->st_uid; 1227 v9stat->n_gid = stbuf->st_gid; 1228 v9stat->n_muid = 0; 1229 1230 v9fs_string_free(&v9stat->extension); 1231 1232 if (v9stat->mode & P9_STAT_MODE_SYMLINK) { 1233 err = v9fs_co_readlink(pdu, path, &v9stat->extension); 1234 if (err < 0) { 1235 return err; 1236 } 1237 } else if (v9stat->mode & P9_STAT_MODE_DEVICE) { 1238 v9fs_string_sprintf(&v9stat->extension, "%c %u %u", 1239 S_ISCHR(stbuf->st_mode) ? 'c' : 'b', 1240 major(stbuf->st_rdev), minor(stbuf->st_rdev)); 1241 } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) { 1242 v9fs_string_sprintf(&v9stat->extension, "%s %lu", 1243 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink); 1244 } 1245 1246 v9fs_string_sprintf(&v9stat->name, "%s", basename); 1247 1248 v9stat->size = 61 + 1249 v9fs_string_size(&v9stat->name) + 1250 v9fs_string_size(&v9stat->uid) + 1251 v9fs_string_size(&v9stat->gid) + 1252 v9fs_string_size(&v9stat->muid) + 1253 v9fs_string_size(&v9stat->extension); 1254 return 0; 1255 } 1256 1257 #define P9_STATS_MODE 0x00000001ULL 1258 #define P9_STATS_NLINK 0x00000002ULL 1259 #define P9_STATS_UID 0x00000004ULL 1260 #define P9_STATS_GID 0x00000008ULL 1261 #define P9_STATS_RDEV 0x00000010ULL 1262 #define P9_STATS_ATIME 0x00000020ULL 1263 #define P9_STATS_MTIME 0x00000040ULL 1264 #define P9_STATS_CTIME 0x00000080ULL 1265 #define P9_STATS_INO 0x00000100ULL 1266 #define P9_STATS_SIZE 0x00000200ULL 1267 #define P9_STATS_BLOCKS 0x00000400ULL 1268 1269 #define P9_STATS_BTIME 0x00000800ULL 1270 #define P9_STATS_GEN 0x00001000ULL 1271 #define P9_STATS_DATA_VERSION 0x00002000ULL 1272 1273 #define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ 1274 #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ 1275 1276 1277 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, 1278 V9fsStatDotl *v9lstat) 1279 { 1280 memset(v9lstat, 0, sizeof(*v9lstat)); 1281 1282 v9lstat->st_mode = stbuf->st_mode; 1283 v9lstat->st_nlink = stbuf->st_nlink; 1284 v9lstat->st_uid = stbuf->st_uid; 1285 v9lstat->st_gid = stbuf->st_gid; 1286 v9lstat->st_rdev = stbuf->st_rdev; 1287 v9lstat->st_size = stbuf->st_size; 1288 v9lstat->st_blksize = stbuf->st_blksize; 1289 v9lstat->st_blocks = stbuf->st_blocks; 1290 v9lstat->st_atime_sec = stbuf->st_atime; 1291 v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; 1292 v9lstat->st_mtime_sec = stbuf->st_mtime; 1293 v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; 1294 v9lstat->st_ctime_sec = stbuf->st_ctime; 1295 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; 1296 /* Currently we only support BASIC fields in stat */ 1297 v9lstat->st_result_mask = P9_STATS_BASIC; 1298 1299 return stat_to_qid(pdu, stbuf, &v9lstat->qid); 1300 } 1301 1302 static void print_sg(struct iovec *sg, int cnt) 1303 { 1304 int i; 1305 1306 printf("sg[%d]: {", cnt); 1307 for (i = 0; i < cnt; i++) { 1308 if (i) { 1309 printf(", "); 1310 } 1311 printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len); 1312 } 1313 printf("}\n"); 1314 } 1315 1316 /* Will call this only for path name based fid */ 1317 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len) 1318 { 1319 V9fsPath str; 1320 v9fs_path_init(&str); 1321 v9fs_path_copy(&str, dst); 1322 v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len); 1323 v9fs_path_free(&str); 1324 } 1325 1326 static inline bool is_ro_export(FsContext *ctx) 1327 { 1328 return ctx->export_flags & V9FS_RDONLY; 1329 } 1330 1331 static void coroutine_fn v9fs_version(void *opaque) 1332 { 1333 ssize_t err; 1334 V9fsPDU *pdu = opaque; 1335 V9fsState *s = pdu->s; 1336 V9fsString version; 1337 size_t offset = 7; 1338 1339 v9fs_string_init(&version); 1340 err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version); 1341 if (err < 0) { 1342 goto out; 1343 } 1344 trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data); 1345 1346 virtfs_reset(pdu); 1347 1348 if (!strcmp(version.data, "9P2000.u")) { 1349 s->proto_version = V9FS_PROTO_2000U; 1350 } else if (!strcmp(version.data, "9P2000.L")) { 1351 s->proto_version = V9FS_PROTO_2000L; 1352 } else { 1353 v9fs_string_sprintf(&version, "unknown"); 1354 /* skip min. msize check, reporting invalid version has priority */ 1355 goto marshal; 1356 } 1357 1358 if (s->msize < P9_MIN_MSIZE) { 1359 err = -EMSGSIZE; 1360 error_report( 1361 "9pfs: Client requested msize < minimum msize (" 1362 stringify(P9_MIN_MSIZE) ") supported by this server." 1363 ); 1364 goto out; 1365 } 1366 1367 /* 8192 is the default msize of Linux clients */ 1368 if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) { 1369 warn_report_once( 1370 "9p: degraded performance: a reasonable high msize should be " 1371 "chosen on client/guest side (chosen msize is <= 8192). See " 1372 "https://wiki.qemu.org/Documentation/9psetup#msize for details." 1373 ); 1374 } 1375 1376 marshal: 1377 err = pdu_marshal(pdu, offset, "ds", s->msize, &version); 1378 if (err < 0) { 1379 goto out; 1380 } 1381 err += offset; 1382 trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data); 1383 out: 1384 pdu_complete(pdu, err); 1385 v9fs_string_free(&version); 1386 } 1387 1388 static void coroutine_fn v9fs_attach(void *opaque) 1389 { 1390 V9fsPDU *pdu = opaque; 1391 V9fsState *s = pdu->s; 1392 int32_t fid, afid, n_uname; 1393 V9fsString uname, aname; 1394 V9fsFidState *fidp; 1395 size_t offset = 7; 1396 V9fsQID qid; 1397 ssize_t err; 1398 1399 v9fs_string_init(&uname); 1400 v9fs_string_init(&aname); 1401 err = pdu_unmarshal(pdu, offset, "ddssd", &fid, 1402 &afid, &uname, &aname, &n_uname); 1403 if (err < 0) { 1404 goto out_nofid; 1405 } 1406 trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data); 1407 1408 fidp = alloc_fid(s, fid); 1409 if (fidp == NULL) { 1410 err = -EINVAL; 1411 goto out_nofid; 1412 } 1413 fidp->uid = n_uname; 1414 err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path); 1415 if (err < 0) { 1416 err = -EINVAL; 1417 clunk_fid(s, fid); 1418 goto out; 1419 } 1420 err = fid_to_qid(pdu, fidp, &qid); 1421 if (err < 0) { 1422 err = -EINVAL; 1423 clunk_fid(s, fid); 1424 goto out; 1425 } 1426 1427 /* 1428 * disable migration if we haven't done already. 1429 * attach could get called multiple times for the same export. 1430 */ 1431 if (!s->migration_blocker) { 1432 error_setg(&s->migration_blocker, 1433 "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", 1434 s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); 1435 err = migrate_add_blocker(s->migration_blocker, NULL); 1436 if (err < 0) { 1437 error_free(s->migration_blocker); 1438 s->migration_blocker = NULL; 1439 clunk_fid(s, fid); 1440 goto out; 1441 } 1442 s->root_fid = fid; 1443 } 1444 1445 err = pdu_marshal(pdu, offset, "Q", &qid); 1446 if (err < 0) { 1447 clunk_fid(s, fid); 1448 goto out; 1449 } 1450 err += offset; 1451 1452 memcpy(&s->root_qid, &qid, sizeof(qid)); 1453 trace_v9fs_attach_return(pdu->tag, pdu->id, 1454 qid.type, qid.version, qid.path); 1455 out: 1456 put_fid(pdu, fidp); 1457 out_nofid: 1458 pdu_complete(pdu, err); 1459 v9fs_string_free(&uname); 1460 v9fs_string_free(&aname); 1461 } 1462 1463 static void coroutine_fn v9fs_stat(void *opaque) 1464 { 1465 int32_t fid; 1466 V9fsStat v9stat; 1467 ssize_t err = 0; 1468 size_t offset = 7; 1469 struct stat stbuf; 1470 V9fsFidState *fidp; 1471 V9fsPDU *pdu = opaque; 1472 char *basename; 1473 1474 err = pdu_unmarshal(pdu, offset, "d", &fid); 1475 if (err < 0) { 1476 goto out_nofid; 1477 } 1478 trace_v9fs_stat(pdu->tag, pdu->id, fid); 1479 1480 fidp = get_fid(pdu, fid); 1481 if (fidp == NULL) { 1482 err = -ENOENT; 1483 goto out_nofid; 1484 } 1485 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1486 if (err < 0) { 1487 goto out; 1488 } 1489 basename = g_path_get_basename(fidp->path.data); 1490 err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat); 1491 g_free(basename); 1492 if (err < 0) { 1493 goto out; 1494 } 1495 err = pdu_marshal(pdu, offset, "wS", 0, &v9stat); 1496 if (err < 0) { 1497 v9fs_stat_free(&v9stat); 1498 goto out; 1499 } 1500 trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode, 1501 v9stat.atime, v9stat.mtime, v9stat.length); 1502 err += offset; 1503 v9fs_stat_free(&v9stat); 1504 out: 1505 put_fid(pdu, fidp); 1506 out_nofid: 1507 pdu_complete(pdu, err); 1508 } 1509 1510 static void coroutine_fn v9fs_getattr(void *opaque) 1511 { 1512 int32_t fid; 1513 size_t offset = 7; 1514 ssize_t retval = 0; 1515 struct stat stbuf; 1516 V9fsFidState *fidp; 1517 uint64_t request_mask; 1518 V9fsStatDotl v9stat_dotl; 1519 V9fsPDU *pdu = opaque; 1520 1521 retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask); 1522 if (retval < 0) { 1523 goto out_nofid; 1524 } 1525 trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask); 1526 1527 fidp = get_fid(pdu, fid); 1528 if (fidp == NULL) { 1529 retval = -ENOENT; 1530 goto out_nofid; 1531 } 1532 /* 1533 * Currently we only support BASIC fields in stat, so there is no 1534 * need to look at request_mask. 1535 */ 1536 retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1537 if (retval < 0) { 1538 goto out; 1539 } 1540 retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl); 1541 if (retval < 0) { 1542 goto out; 1543 } 1544 1545 /* fill st_gen if requested and supported by underlying fs */ 1546 if (request_mask & P9_STATS_GEN) { 1547 retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl); 1548 switch (retval) { 1549 case 0: 1550 /* we have valid st_gen: update result mask */ 1551 v9stat_dotl.st_result_mask |= P9_STATS_GEN; 1552 break; 1553 case -EINTR: 1554 /* request cancelled, e.g. by Tflush */ 1555 goto out; 1556 default: 1557 /* failed to get st_gen: not fatal, ignore */ 1558 break; 1559 } 1560 } 1561 retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl); 1562 if (retval < 0) { 1563 goto out; 1564 } 1565 retval += offset; 1566 trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask, 1567 v9stat_dotl.st_mode, v9stat_dotl.st_uid, 1568 v9stat_dotl.st_gid); 1569 out: 1570 put_fid(pdu, fidp); 1571 out_nofid: 1572 pdu_complete(pdu, retval); 1573 } 1574 1575 /* Attribute flags */ 1576 #define P9_ATTR_MODE (1 << 0) 1577 #define P9_ATTR_UID (1 << 1) 1578 #define P9_ATTR_GID (1 << 2) 1579 #define P9_ATTR_SIZE (1 << 3) 1580 #define P9_ATTR_ATIME (1 << 4) 1581 #define P9_ATTR_MTIME (1 << 5) 1582 #define P9_ATTR_CTIME (1 << 6) 1583 #define P9_ATTR_ATIME_SET (1 << 7) 1584 #define P9_ATTR_MTIME_SET (1 << 8) 1585 1586 #define P9_ATTR_MASK 127 1587 1588 static void coroutine_fn v9fs_setattr(void *opaque) 1589 { 1590 int err = 0; 1591 int32_t fid; 1592 V9fsFidState *fidp; 1593 size_t offset = 7; 1594 V9fsIattr v9iattr; 1595 V9fsPDU *pdu = opaque; 1596 1597 err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr); 1598 if (err < 0) { 1599 goto out_nofid; 1600 } 1601 1602 trace_v9fs_setattr(pdu->tag, pdu->id, fid, 1603 v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid, 1604 v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec); 1605 1606 fidp = get_fid(pdu, fid); 1607 if (fidp == NULL) { 1608 err = -EINVAL; 1609 goto out_nofid; 1610 } 1611 if (v9iattr.valid & P9_ATTR_MODE) { 1612 err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode); 1613 if (err < 0) { 1614 goto out; 1615 } 1616 } 1617 if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) { 1618 struct timespec times[2]; 1619 if (v9iattr.valid & P9_ATTR_ATIME) { 1620 if (v9iattr.valid & P9_ATTR_ATIME_SET) { 1621 times[0].tv_sec = v9iattr.atime_sec; 1622 times[0].tv_nsec = v9iattr.atime_nsec; 1623 } else { 1624 times[0].tv_nsec = UTIME_NOW; 1625 } 1626 } else { 1627 times[0].tv_nsec = UTIME_OMIT; 1628 } 1629 if (v9iattr.valid & P9_ATTR_MTIME) { 1630 if (v9iattr.valid & P9_ATTR_MTIME_SET) { 1631 times[1].tv_sec = v9iattr.mtime_sec; 1632 times[1].tv_nsec = v9iattr.mtime_nsec; 1633 } else { 1634 times[1].tv_nsec = UTIME_NOW; 1635 } 1636 } else { 1637 times[1].tv_nsec = UTIME_OMIT; 1638 } 1639 err = v9fs_co_utimensat(pdu, &fidp->path, times); 1640 if (err < 0) { 1641 goto out; 1642 } 1643 } 1644 /* 1645 * If the only valid entry in iattr is ctime we can call 1646 * chown(-1,-1) to update the ctime of the file 1647 */ 1648 if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) || 1649 ((v9iattr.valid & P9_ATTR_CTIME) 1650 && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) { 1651 if (!(v9iattr.valid & P9_ATTR_UID)) { 1652 v9iattr.uid = -1; 1653 } 1654 if (!(v9iattr.valid & P9_ATTR_GID)) { 1655 v9iattr.gid = -1; 1656 } 1657 err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid, 1658 v9iattr.gid); 1659 if (err < 0) { 1660 goto out; 1661 } 1662 } 1663 if (v9iattr.valid & (P9_ATTR_SIZE)) { 1664 err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size); 1665 if (err < 0) { 1666 goto out; 1667 } 1668 } 1669 err = offset; 1670 trace_v9fs_setattr_return(pdu->tag, pdu->id); 1671 out: 1672 put_fid(pdu, fidp); 1673 out_nofid: 1674 pdu_complete(pdu, err); 1675 } 1676 1677 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids) 1678 { 1679 int i; 1680 ssize_t err; 1681 size_t offset = 7; 1682 1683 err = pdu_marshal(pdu, offset, "w", nwnames); 1684 if (err < 0) { 1685 return err; 1686 } 1687 offset += err; 1688 for (i = 0; i < nwnames; i++) { 1689 err = pdu_marshal(pdu, offset, "Q", &qids[i]); 1690 if (err < 0) { 1691 return err; 1692 } 1693 offset += err; 1694 } 1695 return offset; 1696 } 1697 1698 static bool name_is_illegal(const char *name) 1699 { 1700 return !*name || strchr(name, '/') != NULL; 1701 } 1702 1703 static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2) 1704 { 1705 return 1706 qid1->type != qid2->type || 1707 qid1->version != qid2->version || 1708 qid1->path != qid2->path; 1709 } 1710 1711 static void coroutine_fn v9fs_walk(void *opaque) 1712 { 1713 int name_idx; 1714 V9fsQID *qids = NULL; 1715 int i, err = 0; 1716 V9fsPath dpath, path; 1717 uint16_t nwnames; 1718 struct stat stbuf; 1719 size_t offset = 7; 1720 int32_t fid, newfid; 1721 V9fsString *wnames = NULL; 1722 V9fsFidState *fidp; 1723 V9fsFidState *newfidp = NULL; 1724 V9fsPDU *pdu = opaque; 1725 V9fsState *s = pdu->s; 1726 V9fsQID qid; 1727 1728 err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames); 1729 if (err < 0) { 1730 pdu_complete(pdu, err); 1731 return ; 1732 } 1733 offset += err; 1734 1735 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames); 1736 1737 if (nwnames && nwnames <= P9_MAXWELEM) { 1738 wnames = g_new0(V9fsString, nwnames); 1739 qids = g_new0(V9fsQID, nwnames); 1740 for (i = 0; i < nwnames; i++) { 1741 err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); 1742 if (err < 0) { 1743 goto out_nofid; 1744 } 1745 if (name_is_illegal(wnames[i].data)) { 1746 err = -ENOENT; 1747 goto out_nofid; 1748 } 1749 offset += err; 1750 } 1751 } else if (nwnames > P9_MAXWELEM) { 1752 err = -EINVAL; 1753 goto out_nofid; 1754 } 1755 fidp = get_fid(pdu, fid); 1756 if (fidp == NULL) { 1757 err = -ENOENT; 1758 goto out_nofid; 1759 } 1760 1761 v9fs_path_init(&dpath); 1762 v9fs_path_init(&path); 1763 1764 err = fid_to_qid(pdu, fidp, &qid); 1765 if (err < 0) { 1766 goto out; 1767 } 1768 1769 /* 1770 * Both dpath and path initially poin to fidp. 1771 * Needed to handle request with nwnames == 0 1772 */ 1773 v9fs_path_copy(&dpath, &fidp->path); 1774 v9fs_path_copy(&path, &fidp->path); 1775 for (name_idx = 0; name_idx < nwnames; name_idx++) { 1776 if (not_same_qid(&pdu->s->root_qid, &qid) || 1777 strcmp("..", wnames[name_idx].data)) { 1778 err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, 1779 &path); 1780 if (err < 0) { 1781 goto out; 1782 } 1783 1784 err = v9fs_co_lstat(pdu, &path, &stbuf); 1785 if (err < 0) { 1786 goto out; 1787 } 1788 err = stat_to_qid(pdu, &stbuf, &qid); 1789 if (err < 0) { 1790 goto out; 1791 } 1792 v9fs_path_copy(&dpath, &path); 1793 } 1794 memcpy(&qids[name_idx], &qid, sizeof(qid)); 1795 } 1796 if (fid == newfid) { 1797 if (fidp->fid_type != P9_FID_NONE) { 1798 err = -EINVAL; 1799 goto out; 1800 } 1801 v9fs_path_write_lock(s); 1802 v9fs_path_copy(&fidp->path, &path); 1803 v9fs_path_unlock(s); 1804 } else { 1805 newfidp = alloc_fid(s, newfid); 1806 if (newfidp == NULL) { 1807 err = -EINVAL; 1808 goto out; 1809 } 1810 newfidp->uid = fidp->uid; 1811 v9fs_path_copy(&newfidp->path, &path); 1812 } 1813 err = v9fs_walk_marshal(pdu, nwnames, qids); 1814 trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids); 1815 out: 1816 put_fid(pdu, fidp); 1817 if (newfidp) { 1818 put_fid(pdu, newfidp); 1819 } 1820 v9fs_path_free(&dpath); 1821 v9fs_path_free(&path); 1822 out_nofid: 1823 pdu_complete(pdu, err); 1824 if (nwnames && nwnames <= P9_MAXWELEM) { 1825 for (name_idx = 0; name_idx < nwnames; name_idx++) { 1826 v9fs_string_free(&wnames[name_idx]); 1827 } 1828 g_free(wnames); 1829 g_free(qids); 1830 } 1831 } 1832 1833 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path) 1834 { 1835 struct statfs stbuf; 1836 int32_t iounit = 0; 1837 V9fsState *s = pdu->s; 1838 1839 /* 1840 * iounit should be multiples of f_bsize (host filesystem block size 1841 * and as well as less than (client msize - P9_IOHDRSZ)) 1842 */ 1843 if (!v9fs_co_statfs(pdu, path, &stbuf)) { 1844 if (stbuf.f_bsize) { 1845 iounit = stbuf.f_bsize; 1846 iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize; 1847 } 1848 } 1849 if (!iounit) { 1850 iounit = s->msize - P9_IOHDRSZ; 1851 } 1852 return iounit; 1853 } 1854 1855 static void coroutine_fn v9fs_open(void *opaque) 1856 { 1857 int flags; 1858 int32_t fid; 1859 int32_t mode; 1860 V9fsQID qid; 1861 int iounit = 0; 1862 ssize_t err = 0; 1863 size_t offset = 7; 1864 struct stat stbuf; 1865 V9fsFidState *fidp; 1866 V9fsPDU *pdu = opaque; 1867 V9fsState *s = pdu->s; 1868 1869 if (s->proto_version == V9FS_PROTO_2000L) { 1870 err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode); 1871 } else { 1872 uint8_t modebyte; 1873 err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte); 1874 mode = modebyte; 1875 } 1876 if (err < 0) { 1877 goto out_nofid; 1878 } 1879 trace_v9fs_open(pdu->tag, pdu->id, fid, mode); 1880 1881 fidp = get_fid(pdu, fid); 1882 if (fidp == NULL) { 1883 err = -ENOENT; 1884 goto out_nofid; 1885 } 1886 if (fidp->fid_type != P9_FID_NONE) { 1887 err = -EINVAL; 1888 goto out; 1889 } 1890 1891 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1892 if (err < 0) { 1893 goto out; 1894 } 1895 err = stat_to_qid(pdu, &stbuf, &qid); 1896 if (err < 0) { 1897 goto out; 1898 } 1899 if (S_ISDIR(stbuf.st_mode)) { 1900 err = v9fs_co_opendir(pdu, fidp); 1901 if (err < 0) { 1902 goto out; 1903 } 1904 fidp->fid_type = P9_FID_DIR; 1905 err = pdu_marshal(pdu, offset, "Qd", &qid, 0); 1906 if (err < 0) { 1907 goto out; 1908 } 1909 err += offset; 1910 } else { 1911 if (s->proto_version == V9FS_PROTO_2000L) { 1912 flags = get_dotl_openflags(s, mode); 1913 } else { 1914 flags = omode_to_uflags(mode); 1915 } 1916 if (is_ro_export(&s->ctx)) { 1917 if (mode & O_WRONLY || mode & O_RDWR || 1918 mode & O_APPEND || mode & O_TRUNC) { 1919 err = -EROFS; 1920 goto out; 1921 } 1922 } 1923 err = v9fs_co_open(pdu, fidp, flags); 1924 if (err < 0) { 1925 goto out; 1926 } 1927 fidp->fid_type = P9_FID_FILE; 1928 fidp->open_flags = flags; 1929 if (flags & O_EXCL) { 1930 /* 1931 * We let the host file system do O_EXCL check 1932 * We should not reclaim such fd 1933 */ 1934 fidp->flags |= FID_NON_RECLAIMABLE; 1935 } 1936 iounit = get_iounit(pdu, &fidp->path); 1937 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 1938 if (err < 0) { 1939 goto out; 1940 } 1941 err += offset; 1942 } 1943 trace_v9fs_open_return(pdu->tag, pdu->id, 1944 qid.type, qid.version, qid.path, iounit); 1945 out: 1946 put_fid(pdu, fidp); 1947 out_nofid: 1948 pdu_complete(pdu, err); 1949 } 1950 1951 static void coroutine_fn v9fs_lcreate(void *opaque) 1952 { 1953 int32_t dfid, flags, mode; 1954 gid_t gid; 1955 ssize_t err = 0; 1956 ssize_t offset = 7; 1957 V9fsString name; 1958 V9fsFidState *fidp; 1959 struct stat stbuf; 1960 V9fsQID qid; 1961 int32_t iounit; 1962 V9fsPDU *pdu = opaque; 1963 1964 v9fs_string_init(&name); 1965 err = pdu_unmarshal(pdu, offset, "dsddd", &dfid, 1966 &name, &flags, &mode, &gid); 1967 if (err < 0) { 1968 goto out_nofid; 1969 } 1970 trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid); 1971 1972 if (name_is_illegal(name.data)) { 1973 err = -ENOENT; 1974 goto out_nofid; 1975 } 1976 1977 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 1978 err = -EEXIST; 1979 goto out_nofid; 1980 } 1981 1982 fidp = get_fid(pdu, dfid); 1983 if (fidp == NULL) { 1984 err = -ENOENT; 1985 goto out_nofid; 1986 } 1987 if (fidp->fid_type != P9_FID_NONE) { 1988 err = -EINVAL; 1989 goto out; 1990 } 1991 1992 flags = get_dotl_openflags(pdu->s, flags); 1993 err = v9fs_co_open2(pdu, fidp, &name, gid, 1994 flags | O_CREAT, mode, &stbuf); 1995 if (err < 0) { 1996 goto out; 1997 } 1998 fidp->fid_type = P9_FID_FILE; 1999 fidp->open_flags = flags; 2000 if (flags & O_EXCL) { 2001 /* 2002 * We let the host file system do O_EXCL check 2003 * We should not reclaim such fd 2004 */ 2005 fidp->flags |= FID_NON_RECLAIMABLE; 2006 } 2007 iounit = get_iounit(pdu, &fidp->path); 2008 err = stat_to_qid(pdu, &stbuf, &qid); 2009 if (err < 0) { 2010 goto out; 2011 } 2012 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2013 if (err < 0) { 2014 goto out; 2015 } 2016 err += offset; 2017 trace_v9fs_lcreate_return(pdu->tag, pdu->id, 2018 qid.type, qid.version, qid.path, iounit); 2019 out: 2020 put_fid(pdu, fidp); 2021 out_nofid: 2022 pdu_complete(pdu, err); 2023 v9fs_string_free(&name); 2024 } 2025 2026 static void coroutine_fn v9fs_fsync(void *opaque) 2027 { 2028 int err; 2029 int32_t fid; 2030 int datasync; 2031 size_t offset = 7; 2032 V9fsFidState *fidp; 2033 V9fsPDU *pdu = opaque; 2034 2035 err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync); 2036 if (err < 0) { 2037 goto out_nofid; 2038 } 2039 trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync); 2040 2041 fidp = get_fid(pdu, fid); 2042 if (fidp == NULL) { 2043 err = -ENOENT; 2044 goto out_nofid; 2045 } 2046 err = v9fs_co_fsync(pdu, fidp, datasync); 2047 if (!err) { 2048 err = offset; 2049 } 2050 put_fid(pdu, fidp); 2051 out_nofid: 2052 pdu_complete(pdu, err); 2053 } 2054 2055 static void coroutine_fn v9fs_clunk(void *opaque) 2056 { 2057 int err; 2058 int32_t fid; 2059 size_t offset = 7; 2060 V9fsFidState *fidp; 2061 V9fsPDU *pdu = opaque; 2062 V9fsState *s = pdu->s; 2063 2064 err = pdu_unmarshal(pdu, offset, "d", &fid); 2065 if (err < 0) { 2066 goto out_nofid; 2067 } 2068 trace_v9fs_clunk(pdu->tag, pdu->id, fid); 2069 2070 fidp = clunk_fid(s, fid); 2071 if (fidp == NULL) { 2072 err = -ENOENT; 2073 goto out_nofid; 2074 } 2075 /* 2076 * Bump the ref so that put_fid will 2077 * free the fid. 2078 */ 2079 fidp->ref++; 2080 err = put_fid(pdu, fidp); 2081 if (!err) { 2082 err = offset; 2083 } 2084 out_nofid: 2085 pdu_complete(pdu, err); 2086 } 2087 2088 /* 2089 * Create a QEMUIOVector for a sub-region of PDU iovecs 2090 * 2091 * @qiov: uninitialized QEMUIOVector 2092 * @skip: number of bytes to skip from beginning of PDU 2093 * @size: number of bytes to include 2094 * @is_write: true - write, false - read 2095 * 2096 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up 2097 * with qemu_iovec_destroy(). 2098 */ 2099 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, 2100 size_t skip, size_t size, 2101 bool is_write) 2102 { 2103 QEMUIOVector elem; 2104 struct iovec *iov; 2105 unsigned int niov; 2106 2107 if (is_write) { 2108 pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); 2109 } else { 2110 pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); 2111 } 2112 2113 qemu_iovec_init_external(&elem, iov, niov); 2114 qemu_iovec_init(qiov, niov); 2115 qemu_iovec_concat(qiov, &elem, skip, size); 2116 } 2117 2118 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2119 uint64_t off, uint32_t max_count) 2120 { 2121 ssize_t err; 2122 size_t offset = 7; 2123 uint64_t read_count; 2124 QEMUIOVector qiov_full; 2125 2126 if (fidp->fs.xattr.len < off) { 2127 read_count = 0; 2128 } else { 2129 read_count = fidp->fs.xattr.len - off; 2130 } 2131 if (read_count > max_count) { 2132 read_count = max_count; 2133 } 2134 err = pdu_marshal(pdu, offset, "d", read_count); 2135 if (err < 0) { 2136 return err; 2137 } 2138 offset += err; 2139 2140 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); 2141 err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, 2142 ((char *)fidp->fs.xattr.value) + off, 2143 read_count); 2144 qemu_iovec_destroy(&qiov_full); 2145 if (err < 0) { 2146 return err; 2147 } 2148 offset += err; 2149 return offset; 2150 } 2151 2152 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, 2153 V9fsFidState *fidp, 2154 uint32_t max_count) 2155 { 2156 V9fsPath path; 2157 V9fsStat v9stat; 2158 int len, err = 0; 2159 int32_t count = 0; 2160 struct stat stbuf; 2161 off_t saved_dir_pos; 2162 struct dirent *dent; 2163 2164 /* save the directory position */ 2165 saved_dir_pos = v9fs_co_telldir(pdu, fidp); 2166 if (saved_dir_pos < 0) { 2167 return saved_dir_pos; 2168 } 2169 2170 while (1) { 2171 v9fs_path_init(&path); 2172 2173 v9fs_readdir_lock(&fidp->fs.dir); 2174 2175 err = v9fs_co_readdir(pdu, fidp, &dent); 2176 if (err || !dent) { 2177 break; 2178 } 2179 err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); 2180 if (err < 0) { 2181 break; 2182 } 2183 err = v9fs_co_lstat(pdu, &path, &stbuf); 2184 if (err < 0) { 2185 break; 2186 } 2187 err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat); 2188 if (err < 0) { 2189 break; 2190 } 2191 if ((count + v9stat.size + 2) > max_count) { 2192 v9fs_readdir_unlock(&fidp->fs.dir); 2193 2194 /* Ran out of buffer. Set dir back to old position and return */ 2195 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2196 v9fs_stat_free(&v9stat); 2197 v9fs_path_free(&path); 2198 return count; 2199 } 2200 2201 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2202 len = pdu_marshal(pdu, 11 + count, "S", &v9stat); 2203 2204 v9fs_readdir_unlock(&fidp->fs.dir); 2205 2206 if (len < 0) { 2207 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2208 v9fs_stat_free(&v9stat); 2209 v9fs_path_free(&path); 2210 return len; 2211 } 2212 count += len; 2213 v9fs_stat_free(&v9stat); 2214 v9fs_path_free(&path); 2215 saved_dir_pos = dent->d_off; 2216 } 2217 2218 v9fs_readdir_unlock(&fidp->fs.dir); 2219 2220 v9fs_path_free(&path); 2221 if (err < 0) { 2222 return err; 2223 } 2224 return count; 2225 } 2226 2227 static void coroutine_fn v9fs_read(void *opaque) 2228 { 2229 int32_t fid; 2230 uint64_t off; 2231 ssize_t err = 0; 2232 int32_t count = 0; 2233 size_t offset = 7; 2234 uint32_t max_count; 2235 V9fsFidState *fidp; 2236 V9fsPDU *pdu = opaque; 2237 V9fsState *s = pdu->s; 2238 2239 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count); 2240 if (err < 0) { 2241 goto out_nofid; 2242 } 2243 trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count); 2244 2245 fidp = get_fid(pdu, fid); 2246 if (fidp == NULL) { 2247 err = -EINVAL; 2248 goto out_nofid; 2249 } 2250 if (fidp->fid_type == P9_FID_DIR) { 2251 if (s->proto_version != V9FS_PROTO_2000U) { 2252 warn_report_once( 2253 "9p: bad client: T_read request on directory only expected " 2254 "with 9P2000.u protocol version" 2255 ); 2256 err = -EOPNOTSUPP; 2257 goto out; 2258 } 2259 if (off == 0) { 2260 v9fs_co_rewinddir(pdu, fidp); 2261 } 2262 count = v9fs_do_readdir_with_stat(pdu, fidp, max_count); 2263 if (count < 0) { 2264 err = count; 2265 goto out; 2266 } 2267 err = pdu_marshal(pdu, offset, "d", count); 2268 if (err < 0) { 2269 goto out; 2270 } 2271 err += offset + count; 2272 } else if (fidp->fid_type == P9_FID_FILE) { 2273 QEMUIOVector qiov_full; 2274 QEMUIOVector qiov; 2275 int32_t len; 2276 2277 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); 2278 qemu_iovec_init(&qiov, qiov_full.niov); 2279 do { 2280 qemu_iovec_reset(&qiov); 2281 qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); 2282 if (0) { 2283 print_sg(qiov.iov, qiov.niov); 2284 } 2285 /* Loop in case of EINTR */ 2286 do { 2287 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off); 2288 if (len >= 0) { 2289 off += len; 2290 count += len; 2291 } 2292 } while (len == -EINTR && !pdu->cancelled); 2293 if (len < 0) { 2294 /* IO error return the error */ 2295 err = len; 2296 goto out_free_iovec; 2297 } 2298 } while (count < max_count && len > 0); 2299 err = pdu_marshal(pdu, offset, "d", count); 2300 if (err < 0) { 2301 goto out_free_iovec; 2302 } 2303 err += offset + count; 2304 out_free_iovec: 2305 qemu_iovec_destroy(&qiov); 2306 qemu_iovec_destroy(&qiov_full); 2307 } else if (fidp->fid_type == P9_FID_XATTR) { 2308 err = v9fs_xattr_read(s, pdu, fidp, off, max_count); 2309 } else { 2310 err = -EINVAL; 2311 } 2312 trace_v9fs_read_return(pdu->tag, pdu->id, count, err); 2313 out: 2314 put_fid(pdu, fidp); 2315 out_nofid: 2316 pdu_complete(pdu, err); 2317 } 2318 2319 /** 2320 * Returns size required in Rreaddir response for the passed dirent @p name. 2321 * 2322 * @param name - directory entry's name (i.e. file name, directory name) 2323 * @returns required size in bytes 2324 */ 2325 size_t v9fs_readdir_response_size(V9fsString *name) 2326 { 2327 /* 2328 * Size of each dirent on the wire: size of qid (13) + size of offset (8) 2329 * size of type (1) + size of name.size (2) + strlen(name.data) 2330 */ 2331 return 24 + v9fs_string_size(name); 2332 } 2333 2334 static void v9fs_free_dirents(struct V9fsDirEnt *e) 2335 { 2336 struct V9fsDirEnt *next = NULL; 2337 2338 for (; e; e = next) { 2339 next = e->next; 2340 g_free(e->dent); 2341 g_free(e->st); 2342 g_free(e); 2343 } 2344 } 2345 2346 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, 2347 off_t offset, int32_t max_count) 2348 { 2349 size_t size; 2350 V9fsQID qid; 2351 V9fsString name; 2352 int len, err = 0; 2353 int32_t count = 0; 2354 struct dirent *dent; 2355 struct stat *st; 2356 struct V9fsDirEnt *entries = NULL; 2357 2358 /* 2359 * inode remapping requires the device id, which in turn might be 2360 * different for different directory entries, so if inode remapping is 2361 * enabled we have to make a full stat for each directory entry 2362 */ 2363 const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES; 2364 2365 /* 2366 * Fetch all required directory entries altogether on a background IO 2367 * thread from fs driver. We don't want to do that for each entry 2368 * individually, because hopping between threads (this main IO thread 2369 * and background IO driver thread) would sum up to huge latencies. 2370 */ 2371 count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count, 2372 dostat); 2373 if (count < 0) { 2374 err = count; 2375 count = 0; 2376 goto out; 2377 } 2378 count = 0; 2379 2380 for (struct V9fsDirEnt *e = entries; e; e = e->next) { 2381 dent = e->dent; 2382 2383 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 2384 st = e->st; 2385 /* e->st should never be NULL, but just to be sure */ 2386 if (!st) { 2387 err = -1; 2388 break; 2389 } 2390 2391 /* remap inode */ 2392 err = stat_to_qid(pdu, st, &qid); 2393 if (err < 0) { 2394 break; 2395 } 2396 } else { 2397 /* 2398 * Fill up just the path field of qid because the client uses 2399 * only that. To fill the entire qid structure we will have 2400 * to stat each dirent found, which is expensive. For the 2401 * latter reason we don't call stat_to_qid() here. Only drawback 2402 * is that no multi-device export detection of stat_to_qid() 2403 * would be done and provided as error to the user here. But 2404 * user would get that error anyway when accessing those 2405 * files/dirs through other ways. 2406 */ 2407 size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); 2408 memcpy(&qid.path, &dent->d_ino, size); 2409 /* Fill the other fields with dummy values */ 2410 qid.type = 0; 2411 qid.version = 0; 2412 } 2413 2414 v9fs_string_init(&name); 2415 v9fs_string_sprintf(&name, "%s", dent->d_name); 2416 2417 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2418 len = pdu_marshal(pdu, 11 + count, "Qqbs", 2419 &qid, dent->d_off, 2420 dent->d_type, &name); 2421 2422 v9fs_string_free(&name); 2423 2424 if (len < 0) { 2425 err = len; 2426 break; 2427 } 2428 2429 count += len; 2430 } 2431 2432 out: 2433 v9fs_free_dirents(entries); 2434 if (err < 0) { 2435 return err; 2436 } 2437 return count; 2438 } 2439 2440 static void coroutine_fn v9fs_readdir(void *opaque) 2441 { 2442 int32_t fid; 2443 V9fsFidState *fidp; 2444 ssize_t retval = 0; 2445 size_t offset = 7; 2446 uint64_t initial_offset; 2447 int32_t count; 2448 uint32_t max_count; 2449 V9fsPDU *pdu = opaque; 2450 V9fsState *s = pdu->s; 2451 2452 retval = pdu_unmarshal(pdu, offset, "dqd", &fid, 2453 &initial_offset, &max_count); 2454 if (retval < 0) { 2455 goto out_nofid; 2456 } 2457 trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count); 2458 2459 /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */ 2460 if (max_count > s->msize - 11) { 2461 max_count = s->msize - 11; 2462 warn_report_once( 2463 "9p: bad client: T_readdir with count > msize - 11" 2464 ); 2465 } 2466 2467 fidp = get_fid(pdu, fid); 2468 if (fidp == NULL) { 2469 retval = -EINVAL; 2470 goto out_nofid; 2471 } 2472 if (!fidp->fs.dir.stream) { 2473 retval = -EINVAL; 2474 goto out; 2475 } 2476 if (s->proto_version != V9FS_PROTO_2000L) { 2477 warn_report_once( 2478 "9p: bad client: T_readdir request only expected with 9P2000.L " 2479 "protocol version" 2480 ); 2481 retval = -EOPNOTSUPP; 2482 goto out; 2483 } 2484 count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count); 2485 if (count < 0) { 2486 retval = count; 2487 goto out; 2488 } 2489 retval = pdu_marshal(pdu, offset, "d", count); 2490 if (retval < 0) { 2491 goto out; 2492 } 2493 retval += count + offset; 2494 trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval); 2495 out: 2496 put_fid(pdu, fidp); 2497 out_nofid: 2498 pdu_complete(pdu, retval); 2499 } 2500 2501 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2502 uint64_t off, uint32_t count, 2503 struct iovec *sg, int cnt) 2504 { 2505 int i, to_copy; 2506 ssize_t err = 0; 2507 uint64_t write_count; 2508 size_t offset = 7; 2509 2510 2511 if (fidp->fs.xattr.len < off) { 2512 return -ENOSPC; 2513 } 2514 write_count = fidp->fs.xattr.len - off; 2515 if (write_count > count) { 2516 write_count = count; 2517 } 2518 err = pdu_marshal(pdu, offset, "d", write_count); 2519 if (err < 0) { 2520 return err; 2521 } 2522 err += offset; 2523 fidp->fs.xattr.copied_len += write_count; 2524 /* 2525 * Now copy the content from sg list 2526 */ 2527 for (i = 0; i < cnt; i++) { 2528 if (write_count > sg[i].iov_len) { 2529 to_copy = sg[i].iov_len; 2530 } else { 2531 to_copy = write_count; 2532 } 2533 memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy); 2534 /* updating vs->off since we are not using below */ 2535 off += to_copy; 2536 write_count -= to_copy; 2537 } 2538 2539 return err; 2540 } 2541 2542 static void coroutine_fn v9fs_write(void *opaque) 2543 { 2544 ssize_t err; 2545 int32_t fid; 2546 uint64_t off; 2547 uint32_t count; 2548 int32_t len = 0; 2549 int32_t total = 0; 2550 size_t offset = 7; 2551 V9fsFidState *fidp; 2552 V9fsPDU *pdu = opaque; 2553 V9fsState *s = pdu->s; 2554 QEMUIOVector qiov_full; 2555 QEMUIOVector qiov; 2556 2557 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count); 2558 if (err < 0) { 2559 pdu_complete(pdu, err); 2560 return; 2561 } 2562 offset += err; 2563 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); 2564 trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); 2565 2566 fidp = get_fid(pdu, fid); 2567 if (fidp == NULL) { 2568 err = -EINVAL; 2569 goto out_nofid; 2570 } 2571 if (fidp->fid_type == P9_FID_FILE) { 2572 if (fidp->fs.fd == -1) { 2573 err = -EINVAL; 2574 goto out; 2575 } 2576 } else if (fidp->fid_type == P9_FID_XATTR) { 2577 /* 2578 * setxattr operation 2579 */ 2580 err = v9fs_xattr_write(s, pdu, fidp, off, count, 2581 qiov_full.iov, qiov_full.niov); 2582 goto out; 2583 } else { 2584 err = -EINVAL; 2585 goto out; 2586 } 2587 qemu_iovec_init(&qiov, qiov_full.niov); 2588 do { 2589 qemu_iovec_reset(&qiov); 2590 qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); 2591 if (0) { 2592 print_sg(qiov.iov, qiov.niov); 2593 } 2594 /* Loop in case of EINTR */ 2595 do { 2596 len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off); 2597 if (len >= 0) { 2598 off += len; 2599 total += len; 2600 } 2601 } while (len == -EINTR && !pdu->cancelled); 2602 if (len < 0) { 2603 /* IO error return the error */ 2604 err = len; 2605 goto out_qiov; 2606 } 2607 } while (total < count && len > 0); 2608 2609 offset = 7; 2610 err = pdu_marshal(pdu, offset, "d", total); 2611 if (err < 0) { 2612 goto out_qiov; 2613 } 2614 err += offset; 2615 trace_v9fs_write_return(pdu->tag, pdu->id, total, err); 2616 out_qiov: 2617 qemu_iovec_destroy(&qiov); 2618 out: 2619 put_fid(pdu, fidp); 2620 out_nofid: 2621 qemu_iovec_destroy(&qiov_full); 2622 pdu_complete(pdu, err); 2623 } 2624 2625 static void coroutine_fn v9fs_create(void *opaque) 2626 { 2627 int32_t fid; 2628 int err = 0; 2629 size_t offset = 7; 2630 V9fsFidState *fidp; 2631 V9fsQID qid; 2632 int32_t perm; 2633 int8_t mode; 2634 V9fsPath path; 2635 struct stat stbuf; 2636 V9fsString name; 2637 V9fsString extension; 2638 int iounit; 2639 V9fsPDU *pdu = opaque; 2640 V9fsState *s = pdu->s; 2641 2642 v9fs_path_init(&path); 2643 v9fs_string_init(&name); 2644 v9fs_string_init(&extension); 2645 err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name, 2646 &perm, &mode, &extension); 2647 if (err < 0) { 2648 goto out_nofid; 2649 } 2650 trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode); 2651 2652 if (name_is_illegal(name.data)) { 2653 err = -ENOENT; 2654 goto out_nofid; 2655 } 2656 2657 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2658 err = -EEXIST; 2659 goto out_nofid; 2660 } 2661 2662 fidp = get_fid(pdu, fid); 2663 if (fidp == NULL) { 2664 err = -EINVAL; 2665 goto out_nofid; 2666 } 2667 if (fidp->fid_type != P9_FID_NONE) { 2668 err = -EINVAL; 2669 goto out; 2670 } 2671 if (perm & P9_STAT_MODE_DIR) { 2672 err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777, 2673 fidp->uid, -1, &stbuf); 2674 if (err < 0) { 2675 goto out; 2676 } 2677 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2678 if (err < 0) { 2679 goto out; 2680 } 2681 v9fs_path_write_lock(s); 2682 v9fs_path_copy(&fidp->path, &path); 2683 v9fs_path_unlock(s); 2684 err = v9fs_co_opendir(pdu, fidp); 2685 if (err < 0) { 2686 goto out; 2687 } 2688 fidp->fid_type = P9_FID_DIR; 2689 } else if (perm & P9_STAT_MODE_SYMLINK) { 2690 err = v9fs_co_symlink(pdu, fidp, &name, 2691 extension.data, -1 , &stbuf); 2692 if (err < 0) { 2693 goto out; 2694 } 2695 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2696 if (err < 0) { 2697 goto out; 2698 } 2699 v9fs_path_write_lock(s); 2700 v9fs_path_copy(&fidp->path, &path); 2701 v9fs_path_unlock(s); 2702 } else if (perm & P9_STAT_MODE_LINK) { 2703 int32_t ofid = atoi(extension.data); 2704 V9fsFidState *ofidp = get_fid(pdu, ofid); 2705 if (ofidp == NULL) { 2706 err = -EINVAL; 2707 goto out; 2708 } 2709 err = v9fs_co_link(pdu, ofidp, fidp, &name); 2710 put_fid(pdu, ofidp); 2711 if (err < 0) { 2712 goto out; 2713 } 2714 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2715 if (err < 0) { 2716 fidp->fid_type = P9_FID_NONE; 2717 goto out; 2718 } 2719 v9fs_path_write_lock(s); 2720 v9fs_path_copy(&fidp->path, &path); 2721 v9fs_path_unlock(s); 2722 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2723 if (err < 0) { 2724 fidp->fid_type = P9_FID_NONE; 2725 goto out; 2726 } 2727 } else if (perm & P9_STAT_MODE_DEVICE) { 2728 char ctype; 2729 uint32_t major, minor; 2730 mode_t nmode = 0; 2731 2732 if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) { 2733 err = -errno; 2734 goto out; 2735 } 2736 2737 switch (ctype) { 2738 case 'c': 2739 nmode = S_IFCHR; 2740 break; 2741 case 'b': 2742 nmode = S_IFBLK; 2743 break; 2744 default: 2745 err = -EIO; 2746 goto out; 2747 } 2748 2749 nmode |= perm & 0777; 2750 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2751 makedev(major, minor), nmode, &stbuf); 2752 if (err < 0) { 2753 goto out; 2754 } 2755 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2756 if (err < 0) { 2757 goto out; 2758 } 2759 v9fs_path_write_lock(s); 2760 v9fs_path_copy(&fidp->path, &path); 2761 v9fs_path_unlock(s); 2762 } else if (perm & P9_STAT_MODE_NAMED_PIPE) { 2763 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2764 0, S_IFIFO | (perm & 0777), &stbuf); 2765 if (err < 0) { 2766 goto out; 2767 } 2768 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2769 if (err < 0) { 2770 goto out; 2771 } 2772 v9fs_path_write_lock(s); 2773 v9fs_path_copy(&fidp->path, &path); 2774 v9fs_path_unlock(s); 2775 } else if (perm & P9_STAT_MODE_SOCKET) { 2776 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2777 0, S_IFSOCK | (perm & 0777), &stbuf); 2778 if (err < 0) { 2779 goto out; 2780 } 2781 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2782 if (err < 0) { 2783 goto out; 2784 } 2785 v9fs_path_write_lock(s); 2786 v9fs_path_copy(&fidp->path, &path); 2787 v9fs_path_unlock(s); 2788 } else { 2789 err = v9fs_co_open2(pdu, fidp, &name, -1, 2790 omode_to_uflags(mode) | O_CREAT, perm, &stbuf); 2791 if (err < 0) { 2792 goto out; 2793 } 2794 fidp->fid_type = P9_FID_FILE; 2795 fidp->open_flags = omode_to_uflags(mode); 2796 if (fidp->open_flags & O_EXCL) { 2797 /* 2798 * We let the host file system do O_EXCL check 2799 * We should not reclaim such fd 2800 */ 2801 fidp->flags |= FID_NON_RECLAIMABLE; 2802 } 2803 } 2804 iounit = get_iounit(pdu, &fidp->path); 2805 err = stat_to_qid(pdu, &stbuf, &qid); 2806 if (err < 0) { 2807 goto out; 2808 } 2809 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2810 if (err < 0) { 2811 goto out; 2812 } 2813 err += offset; 2814 trace_v9fs_create_return(pdu->tag, pdu->id, 2815 qid.type, qid.version, qid.path, iounit); 2816 out: 2817 put_fid(pdu, fidp); 2818 out_nofid: 2819 pdu_complete(pdu, err); 2820 v9fs_string_free(&name); 2821 v9fs_string_free(&extension); 2822 v9fs_path_free(&path); 2823 } 2824 2825 static void coroutine_fn v9fs_symlink(void *opaque) 2826 { 2827 V9fsPDU *pdu = opaque; 2828 V9fsString name; 2829 V9fsString symname; 2830 V9fsFidState *dfidp; 2831 V9fsQID qid; 2832 struct stat stbuf; 2833 int32_t dfid; 2834 int err = 0; 2835 gid_t gid; 2836 size_t offset = 7; 2837 2838 v9fs_string_init(&name); 2839 v9fs_string_init(&symname); 2840 err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid); 2841 if (err < 0) { 2842 goto out_nofid; 2843 } 2844 trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid); 2845 2846 if (name_is_illegal(name.data)) { 2847 err = -ENOENT; 2848 goto out_nofid; 2849 } 2850 2851 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2852 err = -EEXIST; 2853 goto out_nofid; 2854 } 2855 2856 dfidp = get_fid(pdu, dfid); 2857 if (dfidp == NULL) { 2858 err = -EINVAL; 2859 goto out_nofid; 2860 } 2861 err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf); 2862 if (err < 0) { 2863 goto out; 2864 } 2865 err = stat_to_qid(pdu, &stbuf, &qid); 2866 if (err < 0) { 2867 goto out; 2868 } 2869 err = pdu_marshal(pdu, offset, "Q", &qid); 2870 if (err < 0) { 2871 goto out; 2872 } 2873 err += offset; 2874 trace_v9fs_symlink_return(pdu->tag, pdu->id, 2875 qid.type, qid.version, qid.path); 2876 out: 2877 put_fid(pdu, dfidp); 2878 out_nofid: 2879 pdu_complete(pdu, err); 2880 v9fs_string_free(&name); 2881 v9fs_string_free(&symname); 2882 } 2883 2884 static void coroutine_fn v9fs_flush(void *opaque) 2885 { 2886 ssize_t err; 2887 int16_t tag; 2888 size_t offset = 7; 2889 V9fsPDU *cancel_pdu = NULL; 2890 V9fsPDU *pdu = opaque; 2891 V9fsState *s = pdu->s; 2892 2893 err = pdu_unmarshal(pdu, offset, "w", &tag); 2894 if (err < 0) { 2895 pdu_complete(pdu, err); 2896 return; 2897 } 2898 trace_v9fs_flush(pdu->tag, pdu->id, tag); 2899 2900 if (pdu->tag == tag) { 2901 warn_report("the guest sent a self-referencing 9P flush request"); 2902 } else { 2903 QLIST_FOREACH(cancel_pdu, &s->active_list, next) { 2904 if (cancel_pdu->tag == tag) { 2905 break; 2906 } 2907 } 2908 } 2909 if (cancel_pdu) { 2910 cancel_pdu->cancelled = 1; 2911 /* 2912 * Wait for pdu to complete. 2913 */ 2914 qemu_co_queue_wait(&cancel_pdu->complete, NULL); 2915 if (!qemu_co_queue_next(&cancel_pdu->complete)) { 2916 cancel_pdu->cancelled = 0; 2917 pdu_free(cancel_pdu); 2918 } 2919 } 2920 pdu_complete(pdu, 7); 2921 } 2922 2923 static void coroutine_fn v9fs_link(void *opaque) 2924 { 2925 V9fsPDU *pdu = opaque; 2926 int32_t dfid, oldfid; 2927 V9fsFidState *dfidp, *oldfidp; 2928 V9fsString name; 2929 size_t offset = 7; 2930 int err = 0; 2931 2932 v9fs_string_init(&name); 2933 err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name); 2934 if (err < 0) { 2935 goto out_nofid; 2936 } 2937 trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data); 2938 2939 if (name_is_illegal(name.data)) { 2940 err = -ENOENT; 2941 goto out_nofid; 2942 } 2943 2944 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2945 err = -EEXIST; 2946 goto out_nofid; 2947 } 2948 2949 dfidp = get_fid(pdu, dfid); 2950 if (dfidp == NULL) { 2951 err = -ENOENT; 2952 goto out_nofid; 2953 } 2954 2955 oldfidp = get_fid(pdu, oldfid); 2956 if (oldfidp == NULL) { 2957 err = -ENOENT; 2958 goto out; 2959 } 2960 err = v9fs_co_link(pdu, oldfidp, dfidp, &name); 2961 if (!err) { 2962 err = offset; 2963 } 2964 put_fid(pdu, oldfidp); 2965 out: 2966 put_fid(pdu, dfidp); 2967 out_nofid: 2968 v9fs_string_free(&name); 2969 pdu_complete(pdu, err); 2970 } 2971 2972 /* Only works with path name based fid */ 2973 static void coroutine_fn v9fs_remove(void *opaque) 2974 { 2975 int32_t fid; 2976 int err = 0; 2977 size_t offset = 7; 2978 V9fsFidState *fidp; 2979 V9fsPDU *pdu = opaque; 2980 2981 err = pdu_unmarshal(pdu, offset, "d", &fid); 2982 if (err < 0) { 2983 goto out_nofid; 2984 } 2985 trace_v9fs_remove(pdu->tag, pdu->id, fid); 2986 2987 fidp = get_fid(pdu, fid); 2988 if (fidp == NULL) { 2989 err = -EINVAL; 2990 goto out_nofid; 2991 } 2992 /* if fs driver is not path based, return EOPNOTSUPP */ 2993 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 2994 err = -EOPNOTSUPP; 2995 goto out_err; 2996 } 2997 /* 2998 * IF the file is unlinked, we cannot reopen 2999 * the file later. So don't reclaim fd 3000 */ 3001 err = v9fs_mark_fids_unreclaim(pdu, &fidp->path); 3002 if (err < 0) { 3003 goto out_err; 3004 } 3005 err = v9fs_co_remove(pdu, &fidp->path); 3006 if (!err) { 3007 err = offset; 3008 } 3009 out_err: 3010 /* For TREMOVE we need to clunk the fid even on failed remove */ 3011 clunk_fid(pdu->s, fidp->fid); 3012 put_fid(pdu, fidp); 3013 out_nofid: 3014 pdu_complete(pdu, err); 3015 } 3016 3017 static void coroutine_fn v9fs_unlinkat(void *opaque) 3018 { 3019 int err = 0; 3020 V9fsString name; 3021 int32_t dfid, flags, rflags = 0; 3022 size_t offset = 7; 3023 V9fsPath path; 3024 V9fsFidState *dfidp; 3025 V9fsPDU *pdu = opaque; 3026 3027 v9fs_string_init(&name); 3028 err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags); 3029 if (err < 0) { 3030 goto out_nofid; 3031 } 3032 3033 if (name_is_illegal(name.data)) { 3034 err = -ENOENT; 3035 goto out_nofid; 3036 } 3037 3038 if (!strcmp(".", name.data)) { 3039 err = -EINVAL; 3040 goto out_nofid; 3041 } 3042 3043 if (!strcmp("..", name.data)) { 3044 err = -ENOTEMPTY; 3045 goto out_nofid; 3046 } 3047 3048 if (flags & ~P9_DOTL_AT_REMOVEDIR) { 3049 err = -EINVAL; 3050 goto out_nofid; 3051 } 3052 3053 if (flags & P9_DOTL_AT_REMOVEDIR) { 3054 rflags |= AT_REMOVEDIR; 3055 } 3056 3057 dfidp = get_fid(pdu, dfid); 3058 if (dfidp == NULL) { 3059 err = -EINVAL; 3060 goto out_nofid; 3061 } 3062 /* 3063 * IF the file is unlinked, we cannot reopen 3064 * the file later. So don't reclaim fd 3065 */ 3066 v9fs_path_init(&path); 3067 err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path); 3068 if (err < 0) { 3069 goto out_err; 3070 } 3071 err = v9fs_mark_fids_unreclaim(pdu, &path); 3072 if (err < 0) { 3073 goto out_err; 3074 } 3075 err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags); 3076 if (!err) { 3077 err = offset; 3078 } 3079 out_err: 3080 put_fid(pdu, dfidp); 3081 v9fs_path_free(&path); 3082 out_nofid: 3083 pdu_complete(pdu, err); 3084 v9fs_string_free(&name); 3085 } 3086 3087 3088 /* Only works with path name based fid */ 3089 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, 3090 int32_t newdirfid, 3091 V9fsString *name) 3092 { 3093 int err = 0; 3094 V9fsPath new_path; 3095 V9fsFidState *tfidp; 3096 V9fsState *s = pdu->s; 3097 V9fsFidState *dirfidp = NULL; 3098 3099 v9fs_path_init(&new_path); 3100 if (newdirfid != -1) { 3101 dirfidp = get_fid(pdu, newdirfid); 3102 if (dirfidp == NULL) { 3103 return -ENOENT; 3104 } 3105 if (fidp->fid_type != P9_FID_NONE) { 3106 err = -EINVAL; 3107 goto out; 3108 } 3109 err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); 3110 if (err < 0) { 3111 goto out; 3112 } 3113 } else { 3114 char *dir_name = g_path_get_dirname(fidp->path.data); 3115 V9fsPath dir_path; 3116 3117 v9fs_path_init(&dir_path); 3118 v9fs_path_sprintf(&dir_path, "%s", dir_name); 3119 g_free(dir_name); 3120 3121 err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path); 3122 v9fs_path_free(&dir_path); 3123 if (err < 0) { 3124 goto out; 3125 } 3126 } 3127 err = v9fs_co_rename(pdu, &fidp->path, &new_path); 3128 if (err < 0) { 3129 goto out; 3130 } 3131 /* 3132 * Fixup fid's pointing to the old name to 3133 * start pointing to the new name 3134 */ 3135 QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) { 3136 if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) { 3137 /* replace the name */ 3138 v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data)); 3139 } 3140 } 3141 out: 3142 if (dirfidp) { 3143 put_fid(pdu, dirfidp); 3144 } 3145 v9fs_path_free(&new_path); 3146 return err; 3147 } 3148 3149 /* Only works with path name based fid */ 3150 static void coroutine_fn v9fs_rename(void *opaque) 3151 { 3152 int32_t fid; 3153 ssize_t err = 0; 3154 size_t offset = 7; 3155 V9fsString name; 3156 int32_t newdirfid; 3157 V9fsFidState *fidp; 3158 V9fsPDU *pdu = opaque; 3159 V9fsState *s = pdu->s; 3160 3161 v9fs_string_init(&name); 3162 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name); 3163 if (err < 0) { 3164 goto out_nofid; 3165 } 3166 3167 if (name_is_illegal(name.data)) { 3168 err = -ENOENT; 3169 goto out_nofid; 3170 } 3171 3172 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3173 err = -EISDIR; 3174 goto out_nofid; 3175 } 3176 3177 fidp = get_fid(pdu, fid); 3178 if (fidp == NULL) { 3179 err = -ENOENT; 3180 goto out_nofid; 3181 } 3182 if (fidp->fid_type != P9_FID_NONE) { 3183 err = -EINVAL; 3184 goto out; 3185 } 3186 /* if fs driver is not path based, return EOPNOTSUPP */ 3187 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3188 err = -EOPNOTSUPP; 3189 goto out; 3190 } 3191 v9fs_path_write_lock(s); 3192 err = v9fs_complete_rename(pdu, fidp, newdirfid, &name); 3193 v9fs_path_unlock(s); 3194 if (!err) { 3195 err = offset; 3196 } 3197 out: 3198 put_fid(pdu, fidp); 3199 out_nofid: 3200 pdu_complete(pdu, err); 3201 v9fs_string_free(&name); 3202 } 3203 3204 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, 3205 V9fsString *old_name, 3206 V9fsPath *newdir, 3207 V9fsString *new_name) 3208 { 3209 V9fsFidState *tfidp; 3210 V9fsPath oldpath, newpath; 3211 V9fsState *s = pdu->s; 3212 int err; 3213 3214 v9fs_path_init(&oldpath); 3215 v9fs_path_init(&newpath); 3216 err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); 3217 if (err < 0) { 3218 goto out; 3219 } 3220 err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); 3221 if (err < 0) { 3222 goto out; 3223 } 3224 3225 /* 3226 * Fixup fid's pointing to the old name to 3227 * start pointing to the new name 3228 */ 3229 QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) { 3230 if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) { 3231 /* replace the name */ 3232 v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); 3233 } 3234 } 3235 out: 3236 v9fs_path_free(&oldpath); 3237 v9fs_path_free(&newpath); 3238 return err; 3239 } 3240 3241 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, 3242 V9fsString *old_name, 3243 int32_t newdirfid, 3244 V9fsString *new_name) 3245 { 3246 int err = 0; 3247 V9fsState *s = pdu->s; 3248 V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL; 3249 3250 olddirfidp = get_fid(pdu, olddirfid); 3251 if (olddirfidp == NULL) { 3252 err = -ENOENT; 3253 goto out; 3254 } 3255 if (newdirfid != -1) { 3256 newdirfidp = get_fid(pdu, newdirfid); 3257 if (newdirfidp == NULL) { 3258 err = -ENOENT; 3259 goto out; 3260 } 3261 } else { 3262 newdirfidp = get_fid(pdu, olddirfid); 3263 } 3264 3265 err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name, 3266 &newdirfidp->path, new_name); 3267 if (err < 0) { 3268 goto out; 3269 } 3270 if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { 3271 /* Only for path based fid we need to do the below fixup */ 3272 err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, 3273 &newdirfidp->path, new_name); 3274 } 3275 out: 3276 if (olddirfidp) { 3277 put_fid(pdu, olddirfidp); 3278 } 3279 if (newdirfidp) { 3280 put_fid(pdu, newdirfidp); 3281 } 3282 return err; 3283 } 3284 3285 static void coroutine_fn v9fs_renameat(void *opaque) 3286 { 3287 ssize_t err = 0; 3288 size_t offset = 7; 3289 V9fsPDU *pdu = opaque; 3290 V9fsState *s = pdu->s; 3291 int32_t olddirfid, newdirfid; 3292 V9fsString old_name, new_name; 3293 3294 v9fs_string_init(&old_name); 3295 v9fs_string_init(&new_name); 3296 err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid, 3297 &old_name, &newdirfid, &new_name); 3298 if (err < 0) { 3299 goto out_err; 3300 } 3301 3302 if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) { 3303 err = -ENOENT; 3304 goto out_err; 3305 } 3306 3307 if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) || 3308 !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) { 3309 err = -EISDIR; 3310 goto out_err; 3311 } 3312 3313 v9fs_path_write_lock(s); 3314 err = v9fs_complete_renameat(pdu, olddirfid, 3315 &old_name, newdirfid, &new_name); 3316 v9fs_path_unlock(s); 3317 if (!err) { 3318 err = offset; 3319 } 3320 3321 out_err: 3322 pdu_complete(pdu, err); 3323 v9fs_string_free(&old_name); 3324 v9fs_string_free(&new_name); 3325 } 3326 3327 static void coroutine_fn v9fs_wstat(void *opaque) 3328 { 3329 int32_t fid; 3330 int err = 0; 3331 int16_t unused; 3332 V9fsStat v9stat; 3333 size_t offset = 7; 3334 struct stat stbuf; 3335 V9fsFidState *fidp; 3336 V9fsPDU *pdu = opaque; 3337 V9fsState *s = pdu->s; 3338 3339 v9fs_stat_init(&v9stat); 3340 err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat); 3341 if (err < 0) { 3342 goto out_nofid; 3343 } 3344 trace_v9fs_wstat(pdu->tag, pdu->id, fid, 3345 v9stat.mode, v9stat.atime, v9stat.mtime); 3346 3347 fidp = get_fid(pdu, fid); 3348 if (fidp == NULL) { 3349 err = -EINVAL; 3350 goto out_nofid; 3351 } 3352 /* do we need to sync the file? */ 3353 if (donttouch_stat(&v9stat)) { 3354 err = v9fs_co_fsync(pdu, fidp, 0); 3355 goto out; 3356 } 3357 if (v9stat.mode != -1) { 3358 uint32_t v9_mode; 3359 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 3360 if (err < 0) { 3361 goto out; 3362 } 3363 v9_mode = stat_to_v9mode(&stbuf); 3364 if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) != 3365 (v9_mode & P9_STAT_MODE_TYPE_BITS)) { 3366 /* Attempting to change the type */ 3367 err = -EIO; 3368 goto out; 3369 } 3370 err = v9fs_co_chmod(pdu, &fidp->path, 3371 v9mode_to_mode(v9stat.mode, 3372 &v9stat.extension)); 3373 if (err < 0) { 3374 goto out; 3375 } 3376 } 3377 if (v9stat.mtime != -1 || v9stat.atime != -1) { 3378 struct timespec times[2]; 3379 if (v9stat.atime != -1) { 3380 times[0].tv_sec = v9stat.atime; 3381 times[0].tv_nsec = 0; 3382 } else { 3383 times[0].tv_nsec = UTIME_OMIT; 3384 } 3385 if (v9stat.mtime != -1) { 3386 times[1].tv_sec = v9stat.mtime; 3387 times[1].tv_nsec = 0; 3388 } else { 3389 times[1].tv_nsec = UTIME_OMIT; 3390 } 3391 err = v9fs_co_utimensat(pdu, &fidp->path, times); 3392 if (err < 0) { 3393 goto out; 3394 } 3395 } 3396 if (v9stat.n_gid != -1 || v9stat.n_uid != -1) { 3397 err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid); 3398 if (err < 0) { 3399 goto out; 3400 } 3401 } 3402 if (v9stat.name.size != 0) { 3403 v9fs_path_write_lock(s); 3404 err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name); 3405 v9fs_path_unlock(s); 3406 if (err < 0) { 3407 goto out; 3408 } 3409 } 3410 if (v9stat.length != -1) { 3411 err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length); 3412 if (err < 0) { 3413 goto out; 3414 } 3415 } 3416 err = offset; 3417 out: 3418 put_fid(pdu, fidp); 3419 out_nofid: 3420 v9fs_stat_free(&v9stat); 3421 pdu_complete(pdu, err); 3422 } 3423 3424 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) 3425 { 3426 uint32_t f_type; 3427 uint32_t f_bsize; 3428 uint64_t f_blocks; 3429 uint64_t f_bfree; 3430 uint64_t f_bavail; 3431 uint64_t f_files; 3432 uint64_t f_ffree; 3433 uint64_t fsid_val; 3434 uint32_t f_namelen; 3435 size_t offset = 7; 3436 int32_t bsize_factor; 3437 3438 /* 3439 * compute bsize factor based on host file system block size 3440 * and client msize 3441 */ 3442 bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize; 3443 if (!bsize_factor) { 3444 bsize_factor = 1; 3445 } 3446 f_type = stbuf->f_type; 3447 f_bsize = stbuf->f_bsize; 3448 f_bsize *= bsize_factor; 3449 /* 3450 * f_bsize is adjusted(multiplied) by bsize factor, so we need to 3451 * adjust(divide) the number of blocks, free blocks and available 3452 * blocks by bsize factor 3453 */ 3454 f_blocks = stbuf->f_blocks / bsize_factor; 3455 f_bfree = stbuf->f_bfree / bsize_factor; 3456 f_bavail = stbuf->f_bavail / bsize_factor; 3457 f_files = stbuf->f_files; 3458 f_ffree = stbuf->f_ffree; 3459 fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | 3460 (unsigned long long)stbuf->f_fsid.__val[1] << 32; 3461 f_namelen = stbuf->f_namelen; 3462 3463 return pdu_marshal(pdu, offset, "ddqqqqqqd", 3464 f_type, f_bsize, f_blocks, f_bfree, 3465 f_bavail, f_files, f_ffree, 3466 fsid_val, f_namelen); 3467 } 3468 3469 static void coroutine_fn v9fs_statfs(void *opaque) 3470 { 3471 int32_t fid; 3472 ssize_t retval = 0; 3473 size_t offset = 7; 3474 V9fsFidState *fidp; 3475 struct statfs stbuf; 3476 V9fsPDU *pdu = opaque; 3477 V9fsState *s = pdu->s; 3478 3479 retval = pdu_unmarshal(pdu, offset, "d", &fid); 3480 if (retval < 0) { 3481 goto out_nofid; 3482 } 3483 fidp = get_fid(pdu, fid); 3484 if (fidp == NULL) { 3485 retval = -ENOENT; 3486 goto out_nofid; 3487 } 3488 retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf); 3489 if (retval < 0) { 3490 goto out; 3491 } 3492 retval = v9fs_fill_statfs(s, pdu, &stbuf); 3493 if (retval < 0) { 3494 goto out; 3495 } 3496 retval += offset; 3497 out: 3498 put_fid(pdu, fidp); 3499 out_nofid: 3500 pdu_complete(pdu, retval); 3501 } 3502 3503 static void coroutine_fn v9fs_mknod(void *opaque) 3504 { 3505 3506 int mode; 3507 gid_t gid; 3508 int32_t fid; 3509 V9fsQID qid; 3510 int err = 0; 3511 int major, minor; 3512 size_t offset = 7; 3513 V9fsString name; 3514 struct stat stbuf; 3515 V9fsFidState *fidp; 3516 V9fsPDU *pdu = opaque; 3517 3518 v9fs_string_init(&name); 3519 err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode, 3520 &major, &minor, &gid); 3521 if (err < 0) { 3522 goto out_nofid; 3523 } 3524 trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor); 3525 3526 if (name_is_illegal(name.data)) { 3527 err = -ENOENT; 3528 goto out_nofid; 3529 } 3530 3531 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3532 err = -EEXIST; 3533 goto out_nofid; 3534 } 3535 3536 fidp = get_fid(pdu, fid); 3537 if (fidp == NULL) { 3538 err = -ENOENT; 3539 goto out_nofid; 3540 } 3541 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid, 3542 makedev(major, minor), mode, &stbuf); 3543 if (err < 0) { 3544 goto out; 3545 } 3546 err = stat_to_qid(pdu, &stbuf, &qid); 3547 if (err < 0) { 3548 goto out; 3549 } 3550 err = pdu_marshal(pdu, offset, "Q", &qid); 3551 if (err < 0) { 3552 goto out; 3553 } 3554 err += offset; 3555 trace_v9fs_mknod_return(pdu->tag, pdu->id, 3556 qid.type, qid.version, qid.path); 3557 out: 3558 put_fid(pdu, fidp); 3559 out_nofid: 3560 pdu_complete(pdu, err); 3561 v9fs_string_free(&name); 3562 } 3563 3564 /* 3565 * Implement posix byte range locking code 3566 * Server side handling of locking code is very simple, because 9p server in 3567 * QEMU can handle only one client. And most of the lock handling 3568 * (like conflict, merging) etc is done by the VFS layer itself, so no need to 3569 * do any thing in * qemu 9p server side lock code path. 3570 * So when a TLOCK request comes, always return success 3571 */ 3572 static void coroutine_fn v9fs_lock(void *opaque) 3573 { 3574 V9fsFlock flock; 3575 size_t offset = 7; 3576 struct stat stbuf; 3577 V9fsFidState *fidp; 3578 int32_t fid, err = 0; 3579 V9fsPDU *pdu = opaque; 3580 3581 v9fs_string_init(&flock.client_id); 3582 err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, 3583 &flock.flags, &flock.start, &flock.length, 3584 &flock.proc_id, &flock.client_id); 3585 if (err < 0) { 3586 goto out_nofid; 3587 } 3588 trace_v9fs_lock(pdu->tag, pdu->id, fid, 3589 flock.type, flock.start, flock.length); 3590 3591 3592 /* We support only block flag now (that too ignored currently) */ 3593 if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) { 3594 err = -EINVAL; 3595 goto out_nofid; 3596 } 3597 fidp = get_fid(pdu, fid); 3598 if (fidp == NULL) { 3599 err = -ENOENT; 3600 goto out_nofid; 3601 } 3602 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3603 if (err < 0) { 3604 goto out; 3605 } 3606 err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); 3607 if (err < 0) { 3608 goto out; 3609 } 3610 err += offset; 3611 trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); 3612 out: 3613 put_fid(pdu, fidp); 3614 out_nofid: 3615 pdu_complete(pdu, err); 3616 v9fs_string_free(&flock.client_id); 3617 } 3618 3619 /* 3620 * When a TGETLOCK request comes, always return success because all lock 3621 * handling is done by client's VFS layer. 3622 */ 3623 static void coroutine_fn v9fs_getlock(void *opaque) 3624 { 3625 size_t offset = 7; 3626 struct stat stbuf; 3627 V9fsFidState *fidp; 3628 V9fsGetlock glock; 3629 int32_t fid, err = 0; 3630 V9fsPDU *pdu = opaque; 3631 3632 v9fs_string_init(&glock.client_id); 3633 err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type, 3634 &glock.start, &glock.length, &glock.proc_id, 3635 &glock.client_id); 3636 if (err < 0) { 3637 goto out_nofid; 3638 } 3639 trace_v9fs_getlock(pdu->tag, pdu->id, fid, 3640 glock.type, glock.start, glock.length); 3641 3642 fidp = get_fid(pdu, fid); 3643 if (fidp == NULL) { 3644 err = -ENOENT; 3645 goto out_nofid; 3646 } 3647 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3648 if (err < 0) { 3649 goto out; 3650 } 3651 glock.type = P9_LOCK_TYPE_UNLCK; 3652 err = pdu_marshal(pdu, offset, "bqqds", glock.type, 3653 glock.start, glock.length, glock.proc_id, 3654 &glock.client_id); 3655 if (err < 0) { 3656 goto out; 3657 } 3658 err += offset; 3659 trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start, 3660 glock.length, glock.proc_id); 3661 out: 3662 put_fid(pdu, fidp); 3663 out_nofid: 3664 pdu_complete(pdu, err); 3665 v9fs_string_free(&glock.client_id); 3666 } 3667 3668 static void coroutine_fn v9fs_mkdir(void *opaque) 3669 { 3670 V9fsPDU *pdu = opaque; 3671 size_t offset = 7; 3672 int32_t fid; 3673 struct stat stbuf; 3674 V9fsQID qid; 3675 V9fsString name; 3676 V9fsFidState *fidp; 3677 gid_t gid; 3678 int mode; 3679 int err = 0; 3680 3681 v9fs_string_init(&name); 3682 err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid); 3683 if (err < 0) { 3684 goto out_nofid; 3685 } 3686 trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid); 3687 3688 if (name_is_illegal(name.data)) { 3689 err = -ENOENT; 3690 goto out_nofid; 3691 } 3692 3693 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3694 err = -EEXIST; 3695 goto out_nofid; 3696 } 3697 3698 fidp = get_fid(pdu, fid); 3699 if (fidp == NULL) { 3700 err = -ENOENT; 3701 goto out_nofid; 3702 } 3703 err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf); 3704 if (err < 0) { 3705 goto out; 3706 } 3707 err = stat_to_qid(pdu, &stbuf, &qid); 3708 if (err < 0) { 3709 goto out; 3710 } 3711 err = pdu_marshal(pdu, offset, "Q", &qid); 3712 if (err < 0) { 3713 goto out; 3714 } 3715 err += offset; 3716 trace_v9fs_mkdir_return(pdu->tag, pdu->id, 3717 qid.type, qid.version, qid.path, err); 3718 out: 3719 put_fid(pdu, fidp); 3720 out_nofid: 3721 pdu_complete(pdu, err); 3722 v9fs_string_free(&name); 3723 } 3724 3725 static void coroutine_fn v9fs_xattrwalk(void *opaque) 3726 { 3727 int64_t size; 3728 V9fsString name; 3729 ssize_t err = 0; 3730 size_t offset = 7; 3731 int32_t fid, newfid; 3732 V9fsFidState *file_fidp; 3733 V9fsFidState *xattr_fidp = NULL; 3734 V9fsPDU *pdu = opaque; 3735 V9fsState *s = pdu->s; 3736 3737 v9fs_string_init(&name); 3738 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name); 3739 if (err < 0) { 3740 goto out_nofid; 3741 } 3742 trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data); 3743 3744 file_fidp = get_fid(pdu, fid); 3745 if (file_fidp == NULL) { 3746 err = -ENOENT; 3747 goto out_nofid; 3748 } 3749 xattr_fidp = alloc_fid(s, newfid); 3750 if (xattr_fidp == NULL) { 3751 err = -EINVAL; 3752 goto out; 3753 } 3754 v9fs_path_copy(&xattr_fidp->path, &file_fidp->path); 3755 if (!v9fs_string_size(&name)) { 3756 /* 3757 * listxattr request. Get the size first 3758 */ 3759 size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0); 3760 if (size < 0) { 3761 err = size; 3762 clunk_fid(s, xattr_fidp->fid); 3763 goto out; 3764 } 3765 /* 3766 * Read the xattr value 3767 */ 3768 xattr_fidp->fs.xattr.len = size; 3769 xattr_fidp->fid_type = P9_FID_XATTR; 3770 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3771 xattr_fidp->fs.xattr.value = g_malloc0(size); 3772 if (size) { 3773 err = v9fs_co_llistxattr(pdu, &xattr_fidp->path, 3774 xattr_fidp->fs.xattr.value, 3775 xattr_fidp->fs.xattr.len); 3776 if (err < 0) { 3777 clunk_fid(s, xattr_fidp->fid); 3778 goto out; 3779 } 3780 } 3781 err = pdu_marshal(pdu, offset, "q", size); 3782 if (err < 0) { 3783 goto out; 3784 } 3785 err += offset; 3786 } else { 3787 /* 3788 * specific xattr fid. We check for xattr 3789 * presence also collect the xattr size 3790 */ 3791 size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 3792 &name, NULL, 0); 3793 if (size < 0) { 3794 err = size; 3795 clunk_fid(s, xattr_fidp->fid); 3796 goto out; 3797 } 3798 /* 3799 * Read the xattr value 3800 */ 3801 xattr_fidp->fs.xattr.len = size; 3802 xattr_fidp->fid_type = P9_FID_XATTR; 3803 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3804 xattr_fidp->fs.xattr.value = g_malloc0(size); 3805 if (size) { 3806 err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 3807 &name, xattr_fidp->fs.xattr.value, 3808 xattr_fidp->fs.xattr.len); 3809 if (err < 0) { 3810 clunk_fid(s, xattr_fidp->fid); 3811 goto out; 3812 } 3813 } 3814 err = pdu_marshal(pdu, offset, "q", size); 3815 if (err < 0) { 3816 goto out; 3817 } 3818 err += offset; 3819 } 3820 trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size); 3821 out: 3822 put_fid(pdu, file_fidp); 3823 if (xattr_fidp) { 3824 put_fid(pdu, xattr_fidp); 3825 } 3826 out_nofid: 3827 pdu_complete(pdu, err); 3828 v9fs_string_free(&name); 3829 } 3830 3831 static void coroutine_fn v9fs_xattrcreate(void *opaque) 3832 { 3833 int flags, rflags = 0; 3834 int32_t fid; 3835 uint64_t size; 3836 ssize_t err = 0; 3837 V9fsString name; 3838 size_t offset = 7; 3839 V9fsFidState *file_fidp; 3840 V9fsFidState *xattr_fidp; 3841 V9fsPDU *pdu = opaque; 3842 3843 v9fs_string_init(&name); 3844 err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags); 3845 if (err < 0) { 3846 goto out_nofid; 3847 } 3848 trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags); 3849 3850 if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) { 3851 err = -EINVAL; 3852 goto out_nofid; 3853 } 3854 3855 if (flags & P9_XATTR_CREATE) { 3856 rflags |= XATTR_CREATE; 3857 } 3858 3859 if (flags & P9_XATTR_REPLACE) { 3860 rflags |= XATTR_REPLACE; 3861 } 3862 3863 if (size > XATTR_SIZE_MAX) { 3864 err = -E2BIG; 3865 goto out_nofid; 3866 } 3867 3868 file_fidp = get_fid(pdu, fid); 3869 if (file_fidp == NULL) { 3870 err = -EINVAL; 3871 goto out_nofid; 3872 } 3873 if (file_fidp->fid_type != P9_FID_NONE) { 3874 err = -EINVAL; 3875 goto out_put_fid; 3876 } 3877 3878 /* Make the file fid point to xattr */ 3879 xattr_fidp = file_fidp; 3880 xattr_fidp->fid_type = P9_FID_XATTR; 3881 xattr_fidp->fs.xattr.copied_len = 0; 3882 xattr_fidp->fs.xattr.xattrwalk_fid = false; 3883 xattr_fidp->fs.xattr.len = size; 3884 xattr_fidp->fs.xattr.flags = rflags; 3885 v9fs_string_init(&xattr_fidp->fs.xattr.name); 3886 v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name); 3887 xattr_fidp->fs.xattr.value = g_malloc0(size); 3888 err = offset; 3889 out_put_fid: 3890 put_fid(pdu, file_fidp); 3891 out_nofid: 3892 pdu_complete(pdu, err); 3893 v9fs_string_free(&name); 3894 } 3895 3896 static void coroutine_fn v9fs_readlink(void *opaque) 3897 { 3898 V9fsPDU *pdu = opaque; 3899 size_t offset = 7; 3900 V9fsString target; 3901 int32_t fid; 3902 int err = 0; 3903 V9fsFidState *fidp; 3904 3905 err = pdu_unmarshal(pdu, offset, "d", &fid); 3906 if (err < 0) { 3907 goto out_nofid; 3908 } 3909 trace_v9fs_readlink(pdu->tag, pdu->id, fid); 3910 fidp = get_fid(pdu, fid); 3911 if (fidp == NULL) { 3912 err = -ENOENT; 3913 goto out_nofid; 3914 } 3915 3916 v9fs_string_init(&target); 3917 err = v9fs_co_readlink(pdu, &fidp->path, &target); 3918 if (err < 0) { 3919 goto out; 3920 } 3921 err = pdu_marshal(pdu, offset, "s", &target); 3922 if (err < 0) { 3923 v9fs_string_free(&target); 3924 goto out; 3925 } 3926 err += offset; 3927 trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data); 3928 v9fs_string_free(&target); 3929 out: 3930 put_fid(pdu, fidp); 3931 out_nofid: 3932 pdu_complete(pdu, err); 3933 } 3934 3935 static CoroutineEntry *pdu_co_handlers[] = { 3936 [P9_TREADDIR] = v9fs_readdir, 3937 [P9_TSTATFS] = v9fs_statfs, 3938 [P9_TGETATTR] = v9fs_getattr, 3939 [P9_TSETATTR] = v9fs_setattr, 3940 [P9_TXATTRWALK] = v9fs_xattrwalk, 3941 [P9_TXATTRCREATE] = v9fs_xattrcreate, 3942 [P9_TMKNOD] = v9fs_mknod, 3943 [P9_TRENAME] = v9fs_rename, 3944 [P9_TLOCK] = v9fs_lock, 3945 [P9_TGETLOCK] = v9fs_getlock, 3946 [P9_TRENAMEAT] = v9fs_renameat, 3947 [P9_TREADLINK] = v9fs_readlink, 3948 [P9_TUNLINKAT] = v9fs_unlinkat, 3949 [P9_TMKDIR] = v9fs_mkdir, 3950 [P9_TVERSION] = v9fs_version, 3951 [P9_TLOPEN] = v9fs_open, 3952 [P9_TATTACH] = v9fs_attach, 3953 [P9_TSTAT] = v9fs_stat, 3954 [P9_TWALK] = v9fs_walk, 3955 [P9_TCLUNK] = v9fs_clunk, 3956 [P9_TFSYNC] = v9fs_fsync, 3957 [P9_TOPEN] = v9fs_open, 3958 [P9_TREAD] = v9fs_read, 3959 #if 0 3960 [P9_TAUTH] = v9fs_auth, 3961 #endif 3962 [P9_TFLUSH] = v9fs_flush, 3963 [P9_TLINK] = v9fs_link, 3964 [P9_TSYMLINK] = v9fs_symlink, 3965 [P9_TCREATE] = v9fs_create, 3966 [P9_TLCREATE] = v9fs_lcreate, 3967 [P9_TWRITE] = v9fs_write, 3968 [P9_TWSTAT] = v9fs_wstat, 3969 [P9_TREMOVE] = v9fs_remove, 3970 }; 3971 3972 static void coroutine_fn v9fs_op_not_supp(void *opaque) 3973 { 3974 V9fsPDU *pdu = opaque; 3975 pdu_complete(pdu, -EOPNOTSUPP); 3976 } 3977 3978 static void coroutine_fn v9fs_fs_ro(void *opaque) 3979 { 3980 V9fsPDU *pdu = opaque; 3981 pdu_complete(pdu, -EROFS); 3982 } 3983 3984 static inline bool is_read_only_op(V9fsPDU *pdu) 3985 { 3986 switch (pdu->id) { 3987 case P9_TREADDIR: 3988 case P9_TSTATFS: 3989 case P9_TGETATTR: 3990 case P9_TXATTRWALK: 3991 case P9_TLOCK: 3992 case P9_TGETLOCK: 3993 case P9_TREADLINK: 3994 case P9_TVERSION: 3995 case P9_TLOPEN: 3996 case P9_TATTACH: 3997 case P9_TSTAT: 3998 case P9_TWALK: 3999 case P9_TCLUNK: 4000 case P9_TFSYNC: 4001 case P9_TOPEN: 4002 case P9_TREAD: 4003 case P9_TAUTH: 4004 case P9_TFLUSH: 4005 return 1; 4006 default: 4007 return 0; 4008 } 4009 } 4010 4011 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) 4012 { 4013 Coroutine *co; 4014 CoroutineEntry *handler; 4015 V9fsState *s = pdu->s; 4016 4017 pdu->size = le32_to_cpu(hdr->size_le); 4018 pdu->id = hdr->id; 4019 pdu->tag = le16_to_cpu(hdr->tag_le); 4020 4021 if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || 4022 (pdu_co_handlers[pdu->id] == NULL)) { 4023 handler = v9fs_op_not_supp; 4024 } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { 4025 handler = v9fs_fs_ro; 4026 } else { 4027 handler = pdu_co_handlers[pdu->id]; 4028 } 4029 4030 qemu_co_queue_init(&pdu->complete); 4031 co = qemu_coroutine_create(handler, pdu); 4032 qemu_coroutine_enter(co); 4033 } 4034 4035 /* Returns 0 on success, 1 on failure. */ 4036 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, 4037 Error **errp) 4038 { 4039 ERRP_GUARD(); 4040 int i, len; 4041 struct stat stat; 4042 FsDriverEntry *fse; 4043 V9fsPath path; 4044 int rc = 1; 4045 4046 assert(!s->transport); 4047 s->transport = t; 4048 4049 /* initialize pdu allocator */ 4050 QLIST_INIT(&s->free_list); 4051 QLIST_INIT(&s->active_list); 4052 for (i = 0; i < MAX_REQ; i++) { 4053 QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next); 4054 s->pdus[i].s = s; 4055 s->pdus[i].idx = i; 4056 } 4057 4058 v9fs_path_init(&path); 4059 4060 fse = get_fsdev_fsentry(s->fsconf.fsdev_id); 4061 4062 if (!fse) { 4063 /* We don't have a fsdev identified by fsdev_id */ 4064 error_setg(errp, "9pfs device couldn't find fsdev with the " 4065 "id = %s", 4066 s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL"); 4067 goto out; 4068 } 4069 4070 if (!s->fsconf.tag) { 4071 /* we haven't specified a mount_tag */ 4072 error_setg(errp, "fsdev with id %s needs mount_tag arguments", 4073 s->fsconf.fsdev_id); 4074 goto out; 4075 } 4076 4077 s->ctx.export_flags = fse->export_flags; 4078 s->ctx.fs_root = g_strdup(fse->path); 4079 s->ctx.exops.get_st_gen = NULL; 4080 len = strlen(s->fsconf.tag); 4081 if (len > MAX_TAG_LEN - 1) { 4082 error_setg(errp, "mount tag '%s' (%d bytes) is longer than " 4083 "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1); 4084 goto out; 4085 } 4086 4087 s->tag = g_strdup(s->fsconf.tag); 4088 s->ctx.uid = -1; 4089 4090 s->ops = fse->ops; 4091 4092 s->ctx.fmode = fse->fmode; 4093 s->ctx.dmode = fse->dmode; 4094 4095 QSIMPLEQ_INIT(&s->fid_list); 4096 qemu_co_rwlock_init(&s->rename_lock); 4097 4098 if (s->ops->init(&s->ctx, errp) < 0) { 4099 error_prepend(errp, "cannot initialize fsdev '%s': ", 4100 s->fsconf.fsdev_id); 4101 goto out; 4102 } 4103 4104 /* 4105 * Check details of export path, We need to use fs driver 4106 * call back to do that. Since we are in the init path, we don't 4107 * use co-routines here. 4108 */ 4109 if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) { 4110 error_setg(errp, 4111 "error in converting name to path %s", strerror(errno)); 4112 goto out; 4113 } 4114 if (s->ops->lstat(&s->ctx, &path, &stat)) { 4115 error_setg(errp, "share path %s does not exist", fse->path); 4116 goto out; 4117 } else if (!S_ISDIR(stat.st_mode)) { 4118 error_setg(errp, "share path %s is not a directory", fse->path); 4119 goto out; 4120 } 4121 4122 s->dev_id = stat.st_dev; 4123 4124 /* init inode remapping : */ 4125 /* hash table for variable length inode suffixes */ 4126 qpd_table_init(&s->qpd_table); 4127 /* hash table for slow/full inode remapping (most users won't need it) */ 4128 qpf_table_init(&s->qpf_table); 4129 /* hash table for quick inode remapping */ 4130 qpp_table_init(&s->qpp_table); 4131 s->qp_ndevices = 0; 4132 s->qp_affix_next = 1; /* reserve 0 to detect overflow */ 4133 s->qp_fullpath_next = 1; 4134 4135 s->ctx.fst = &fse->fst; 4136 fsdev_throttle_init(s->ctx.fst); 4137 4138 rc = 0; 4139 out: 4140 if (rc) { 4141 v9fs_device_unrealize_common(s); 4142 } 4143 v9fs_path_free(&path); 4144 return rc; 4145 } 4146 4147 void v9fs_device_unrealize_common(V9fsState *s) 4148 { 4149 if (s->ops && s->ops->cleanup) { 4150 s->ops->cleanup(&s->ctx); 4151 } 4152 if (s->ctx.fst) { 4153 fsdev_throttle_cleanup(s->ctx.fst); 4154 } 4155 g_free(s->tag); 4156 qp_table_destroy(&s->qpd_table); 4157 qp_table_destroy(&s->qpp_table); 4158 qp_table_destroy(&s->qpf_table); 4159 g_free(s->ctx.fs_root); 4160 } 4161 4162 typedef struct VirtfsCoResetData { 4163 V9fsPDU pdu; 4164 bool done; 4165 } VirtfsCoResetData; 4166 4167 static void coroutine_fn virtfs_co_reset(void *opaque) 4168 { 4169 VirtfsCoResetData *data = opaque; 4170 4171 virtfs_reset(&data->pdu); 4172 data->done = true; 4173 } 4174 4175 void v9fs_reset(V9fsState *s) 4176 { 4177 VirtfsCoResetData data = { .pdu = { .s = s }, .done = false }; 4178 Coroutine *co; 4179 4180 while (!QLIST_EMPTY(&s->active_list)) { 4181 aio_poll(qemu_get_aio_context(), true); 4182 } 4183 4184 co = qemu_coroutine_create(virtfs_co_reset, &data); 4185 qemu_coroutine_enter(co); 4186 4187 while (!data.done) { 4188 aio_poll(qemu_get_aio_context(), true); 4189 } 4190 } 4191 4192 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void) 4193 { 4194 struct rlimit rlim; 4195 if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { 4196 error_report("Failed to get the resource limit"); 4197 exit(1); 4198 } 4199 open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3); 4200 open_fd_rc = rlim.rlim_cur / 2; 4201 } 4202