1 /* 2 * Virtio 9p backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * Not so fast! You might want to read the 9p developer docs first: 16 * https://wiki.qemu.org/Documentation/9p 17 */ 18 19 #include "qemu/osdep.h" 20 #ifdef CONFIG_LINUX 21 #include <linux/limits.h> 22 #endif 23 #include <glib/gprintf.h> 24 #include "hw/virtio/virtio.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/iov.h" 28 #include "qemu/main-loop.h" 29 #include "qemu/sockets.h" 30 #include "virtio-9p.h" 31 #include "fsdev/qemu-fsdev.h" 32 #include "9p-xattr.h" 33 #include "9p-util.h" 34 #include "coth.h" 35 #include "trace.h" 36 #include "migration/blocker.h" 37 #include "qemu/xxhash.h" 38 #include <math.h> 39 40 int open_fd_hw; 41 int total_open_fd; 42 static int open_fd_rc; 43 44 enum { 45 Oread = 0x00, 46 Owrite = 0x01, 47 Ordwr = 0x02, 48 Oexec = 0x03, 49 Oexcl = 0x04, 50 Otrunc = 0x10, 51 Orexec = 0x20, 52 Orclose = 0x40, 53 Oappend = 0x80, 54 }; 55 56 P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free); 57 58 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 59 { 60 ssize_t ret; 61 va_list ap; 62 63 va_start(ap, fmt); 64 ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap); 65 va_end(ap); 66 67 return ret; 68 } 69 70 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 71 { 72 ssize_t ret; 73 va_list ap; 74 75 va_start(ap, fmt); 76 ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap); 77 va_end(ap); 78 79 return ret; 80 } 81 82 static int omode_to_uflags(int8_t mode) 83 { 84 int ret = 0; 85 86 switch (mode & 3) { 87 case Oread: 88 ret = O_RDONLY; 89 break; 90 case Ordwr: 91 ret = O_RDWR; 92 break; 93 case Owrite: 94 ret = O_WRONLY; 95 break; 96 case Oexec: 97 ret = O_RDONLY; 98 break; 99 } 100 101 if (mode & Otrunc) { 102 ret |= O_TRUNC; 103 } 104 105 if (mode & Oappend) { 106 ret |= O_APPEND; 107 } 108 109 if (mode & Oexcl) { 110 ret |= O_EXCL; 111 } 112 113 return ret; 114 } 115 116 typedef struct DotlOpenflagMap { 117 int dotl_flag; 118 int open_flag; 119 } DotlOpenflagMap; 120 121 static int dotl_to_open_flags(int flags) 122 { 123 int i; 124 /* 125 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY 126 * and P9_DOTL_NOACCESS 127 */ 128 int oflags = flags & O_ACCMODE; 129 130 DotlOpenflagMap dotl_oflag_map[] = { 131 { P9_DOTL_CREATE, O_CREAT }, 132 { P9_DOTL_EXCL, O_EXCL }, 133 { P9_DOTL_NOCTTY , O_NOCTTY }, 134 { P9_DOTL_TRUNC, O_TRUNC }, 135 { P9_DOTL_APPEND, O_APPEND }, 136 { P9_DOTL_NONBLOCK, O_NONBLOCK } , 137 { P9_DOTL_DSYNC, O_DSYNC }, 138 { P9_DOTL_FASYNC, FASYNC }, 139 #ifndef CONFIG_DARWIN 140 { P9_DOTL_NOATIME, O_NOATIME }, 141 /* 142 * On Darwin, we could map to F_NOCACHE, which is 143 * similar, but doesn't quite have the same 144 * semantics. However, we don't support O_DIRECT 145 * even on linux at the moment, so we just ignore 146 * it here. 147 */ 148 { P9_DOTL_DIRECT, O_DIRECT }, 149 #endif 150 { P9_DOTL_LARGEFILE, O_LARGEFILE }, 151 { P9_DOTL_DIRECTORY, O_DIRECTORY }, 152 { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, 153 { P9_DOTL_SYNC, O_SYNC }, 154 }; 155 156 for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { 157 if (flags & dotl_oflag_map[i].dotl_flag) { 158 oflags |= dotl_oflag_map[i].open_flag; 159 } 160 } 161 162 return oflags; 163 } 164 165 void cred_init(FsCred *credp) 166 { 167 credp->fc_uid = -1; 168 credp->fc_gid = -1; 169 credp->fc_mode = -1; 170 credp->fc_rdev = -1; 171 } 172 173 static int get_dotl_openflags(V9fsState *s, int oflags) 174 { 175 int flags; 176 /* 177 * Filter the client open flags 178 */ 179 flags = dotl_to_open_flags(oflags); 180 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); 181 #ifndef CONFIG_DARWIN 182 /* 183 * Ignore direct disk access hint until the server supports it. 184 */ 185 flags &= ~O_DIRECT; 186 #endif 187 return flags; 188 } 189 190 void v9fs_path_init(V9fsPath *path) 191 { 192 path->data = NULL; 193 path->size = 0; 194 } 195 196 void v9fs_path_free(V9fsPath *path) 197 { 198 g_free(path->data); 199 path->data = NULL; 200 path->size = 0; 201 } 202 203 204 void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) 205 { 206 va_list ap; 207 208 v9fs_path_free(path); 209 210 va_start(ap, fmt); 211 /* Bump the size for including terminating NULL */ 212 path->size = g_vasprintf(&path->data, fmt, ap) + 1; 213 va_end(ap); 214 } 215 216 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src) 217 { 218 v9fs_path_free(dst); 219 dst->size = src->size; 220 dst->data = g_memdup(src->data, src->size); 221 } 222 223 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, 224 const char *name, V9fsPath *path) 225 { 226 int err; 227 err = s->ops->name_to_path(&s->ctx, dirpath, name, path); 228 if (err < 0) { 229 err = -errno; 230 } 231 return err; 232 } 233 234 /* 235 * Return TRUE if s1 is an ancestor of s2. 236 * 237 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d". 238 * As a special case, We treat s1 as ancestor of s2 if they are same! 239 */ 240 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2) 241 { 242 if (!strncmp(s1->data, s2->data, s1->size - 1)) { 243 if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') { 244 return 1; 245 } 246 } 247 return 0; 248 } 249 250 static size_t v9fs_string_size(V9fsString *str) 251 { 252 return str->size; 253 } 254 255 /* 256 * returns 0 if fid got re-opened, 1 if not, < 0 on error 257 */ 258 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f) 259 { 260 int err = 1; 261 if (f->fid_type == P9_FID_FILE) { 262 if (f->fs.fd == -1) { 263 do { 264 err = v9fs_co_open(pdu, f, f->open_flags); 265 } while (err == -EINTR && !pdu->cancelled); 266 } 267 } else if (f->fid_type == P9_FID_DIR) { 268 if (f->fs.dir.stream == NULL) { 269 do { 270 err = v9fs_co_opendir(pdu, f); 271 } while (err == -EINTR && !pdu->cancelled); 272 } 273 } 274 return err; 275 } 276 277 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid) 278 { 279 int err; 280 V9fsFidState *f; 281 V9fsState *s = pdu->s; 282 283 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 284 if (f) { 285 BUG_ON(f->clunked); 286 /* 287 * Update the fid ref upfront so that 288 * we don't get reclaimed when we yield 289 * in open later. 290 */ 291 f->ref++; 292 /* 293 * check whether we need to reopen the 294 * file. We might have closed the fd 295 * while trying to free up some file 296 * descriptors. 297 */ 298 err = v9fs_reopen_fid(pdu, f); 299 if (err < 0) { 300 f->ref--; 301 return NULL; 302 } 303 /* 304 * Mark the fid as referenced so that the LRU 305 * reclaim won't close the file descriptor 306 */ 307 f->flags |= FID_REFERENCED; 308 return f; 309 } 310 return NULL; 311 } 312 313 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) 314 { 315 V9fsFidState *f; 316 317 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 318 if (f) { 319 /* If fid is already there return NULL */ 320 BUG_ON(f->clunked); 321 return NULL; 322 } 323 f = g_new0(V9fsFidState, 1); 324 f->fid = fid; 325 f->fid_type = P9_FID_NONE; 326 f->ref = 1; 327 /* 328 * Mark the fid as referenced so that the LRU 329 * reclaim won't close the file descriptor 330 */ 331 f->flags |= FID_REFERENCED; 332 g_hash_table_insert(s->fids, GINT_TO_POINTER(fid), f); 333 334 v9fs_readdir_init(s->proto_version, &f->fs.dir); 335 v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir); 336 337 return f; 338 } 339 340 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) 341 { 342 int retval = 0; 343 344 if (fidp->fs.xattr.xattrwalk_fid) { 345 /* getxattr/listxattr fid */ 346 goto free_value; 347 } 348 /* 349 * if this is fid for setxattr. clunk should 350 * result in setxattr localcall 351 */ 352 if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) { 353 /* clunk after partial write */ 354 retval = -EINVAL; 355 goto free_out; 356 } 357 if (fidp->fs.xattr.len) { 358 retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name, 359 fidp->fs.xattr.value, 360 fidp->fs.xattr.len, 361 fidp->fs.xattr.flags); 362 } else { 363 retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name); 364 } 365 free_out: 366 v9fs_string_free(&fidp->fs.xattr.name); 367 free_value: 368 g_free(fidp->fs.xattr.value); 369 return retval; 370 } 371 372 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp) 373 { 374 int retval = 0; 375 376 if (fidp->fid_type == P9_FID_FILE) { 377 /* If we reclaimed the fd no need to close */ 378 if (fidp->fs.fd != -1) { 379 retval = v9fs_co_close(pdu, &fidp->fs); 380 } 381 } else if (fidp->fid_type == P9_FID_DIR) { 382 if (fidp->fs.dir.stream != NULL) { 383 retval = v9fs_co_closedir(pdu, &fidp->fs); 384 } 385 } else if (fidp->fid_type == P9_FID_XATTR) { 386 retval = v9fs_xattr_fid_clunk(pdu, fidp); 387 } 388 v9fs_path_free(&fidp->path); 389 g_free(fidp); 390 return retval; 391 } 392 393 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) 394 { 395 BUG_ON(!fidp->ref); 396 fidp->ref--; 397 /* 398 * Don't free the fid if it is in reclaim list 399 */ 400 if (!fidp->ref && fidp->clunked) { 401 if (fidp->fid == pdu->s->root_fid) { 402 /* 403 * if the clunked fid is root fid then we 404 * have unmounted the fs on the client side. 405 * delete the migration blocker. Ideally, this 406 * should be hooked to transport close notification 407 */ 408 migrate_del_blocker(&pdu->s->migration_blocker); 409 } 410 return free_fid(pdu, fidp); 411 } 412 return 0; 413 } 414 415 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid) 416 { 417 V9fsFidState *fidp; 418 419 /* TODO: Use g_hash_table_steal_extended() instead? */ 420 fidp = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 421 if (fidp) { 422 g_hash_table_remove(s->fids, GINT_TO_POINTER(fid)); 423 fidp->clunked = true; 424 return fidp; 425 } 426 return NULL; 427 } 428 429 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) 430 { 431 int reclaim_count = 0; 432 V9fsState *s = pdu->s; 433 V9fsFidState *f; 434 GHashTableIter iter; 435 gpointer fid; 436 int err; 437 int nclosed = 0; 438 439 /* prevent multiple coroutines running this function simultaniously */ 440 if (s->reclaiming) { 441 return; 442 } 443 s->reclaiming = true; 444 445 g_hash_table_iter_init(&iter, s->fids); 446 447 QSLIST_HEAD(, V9fsFidState) reclaim_list = 448 QSLIST_HEAD_INITIALIZER(reclaim_list); 449 450 /* Pick FIDs to be closed, collect them on reclaim_list. */ 451 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) { 452 /* 453 * Unlinked fids cannot be reclaimed, skip those, and also skip fids 454 * currently being operated on. 455 */ 456 if (f->ref || f->flags & FID_NON_RECLAIMABLE) { 457 continue; 458 } 459 /* 460 * if it is a recently referenced fid 461 * we leave the fid untouched and clear the 462 * reference bit. We come back to it later 463 * in the next iteration. (a simple LRU without 464 * moving list elements around) 465 */ 466 if (f->flags & FID_REFERENCED) { 467 f->flags &= ~FID_REFERENCED; 468 continue; 469 } 470 /* 471 * Add fids to reclaim list. 472 */ 473 if (f->fid_type == P9_FID_FILE) { 474 if (f->fs.fd != -1) { 475 /* 476 * Up the reference count so that 477 * a clunk request won't free this fid 478 */ 479 f->ref++; 480 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 481 f->fs_reclaim.fd = f->fs.fd; 482 f->fs.fd = -1; 483 reclaim_count++; 484 } 485 } else if (f->fid_type == P9_FID_DIR) { 486 if (f->fs.dir.stream != NULL) { 487 /* 488 * Up the reference count so that 489 * a clunk request won't free this fid 490 */ 491 f->ref++; 492 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 493 f->fs_reclaim.dir.stream = f->fs.dir.stream; 494 f->fs.dir.stream = NULL; 495 reclaim_count++; 496 } 497 } 498 if (reclaim_count >= open_fd_rc) { 499 break; 500 } 501 } 502 /* 503 * Close the picked FIDs altogether on a background I/O driver thread. Do 504 * this all at once to keep latency (i.e. amount of thread hops between main 505 * thread <-> fs driver background thread) as low as possible. 506 */ 507 v9fs_co_run_in_worker({ 508 QSLIST_FOREACH(f, &reclaim_list, reclaim_next) { 509 err = (f->fid_type == P9_FID_DIR) ? 510 s->ops->closedir(&s->ctx, &f->fs_reclaim) : 511 s->ops->close(&s->ctx, &f->fs_reclaim); 512 513 /* 'man 2 close' suggests to ignore close() errors except of EBADF */ 514 if (unlikely(err && errno == EBADF)) { 515 /* 516 * unexpected case as FIDs were picked above by having a valid 517 * file descriptor 518 */ 519 error_report("9pfs: v9fs_reclaim_fd() WARNING: close() failed with EBADF"); 520 } else { 521 /* total_open_fd must only be mutated on main thread */ 522 nclosed++; 523 } 524 } 525 }); 526 total_open_fd -= nclosed; 527 /* Free the closed FIDs. */ 528 while (!QSLIST_EMPTY(&reclaim_list)) { 529 f = QSLIST_FIRST(&reclaim_list); 530 QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next); 531 /* 532 * Now drop the fid reference, free it 533 * if clunked. 534 */ 535 put_fid(pdu, f); 536 } 537 538 s->reclaiming = false; 539 } 540 541 /* 542 * This is used when a path is removed from the directory tree. Any 543 * fids that still reference it must not be closed from then on, since 544 * they cannot be reopened. 545 */ 546 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path) 547 { 548 int err = 0; 549 V9fsState *s = pdu->s; 550 V9fsFidState *fidp; 551 gpointer fid; 552 GHashTableIter iter; 553 /* 554 * The most common case is probably that we have exactly one 555 * fid for the given path, so preallocate exactly one. 556 */ 557 g_autoptr(GArray) to_reopen = g_array_sized_new(FALSE, FALSE, 558 sizeof(V9fsFidState *), 1); 559 gint i; 560 561 g_hash_table_iter_init(&iter, s->fids); 562 563 /* 564 * We iterate over the fid table looking for the entries we need 565 * to reopen, and store them in to_reopen. This is because 566 * v9fs_reopen_fid() and put_fid() yield. This allows the fid table 567 * to be modified in the meantime, invalidating our iterator. 568 */ 569 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &fidp)) { 570 if (fidp->path.size == path->size && 571 !memcmp(fidp->path.data, path->data, path->size)) { 572 /* 573 * Ensure the fid survives a potential clunk request during 574 * v9fs_reopen_fid or put_fid. 575 */ 576 fidp->ref++; 577 fidp->flags |= FID_NON_RECLAIMABLE; 578 g_array_append_val(to_reopen, fidp); 579 } 580 } 581 582 for (i = 0; i < to_reopen->len; i++) { 583 fidp = g_array_index(to_reopen, V9fsFidState*, i); 584 /* reopen the file/dir if already closed */ 585 err = v9fs_reopen_fid(pdu, fidp); 586 if (err < 0) { 587 break; 588 } 589 } 590 591 for (i = 0; i < to_reopen->len; i++) { 592 put_fid(pdu, g_array_index(to_reopen, V9fsFidState*, i)); 593 } 594 return err; 595 } 596 597 static void coroutine_fn virtfs_reset(V9fsPDU *pdu) 598 { 599 V9fsState *s = pdu->s; 600 V9fsFidState *fidp; 601 GList *freeing; 602 /* 603 * Get a list of all the values (fid states) in the table, which 604 * we then... 605 */ 606 g_autoptr(GList) fids = g_hash_table_get_values(s->fids); 607 608 /* ... remove from the table, taking over ownership. */ 609 g_hash_table_steal_all(s->fids); 610 611 /* 612 * This allows us to release our references to them asynchronously without 613 * iterating over the hash table and risking iterator invalidation 614 * through concurrent modifications. 615 */ 616 for (freeing = fids; freeing; freeing = freeing->next) { 617 fidp = freeing->data; 618 fidp->ref++; 619 fidp->clunked = true; 620 put_fid(pdu, fidp); 621 } 622 } 623 624 #define P9_QID_TYPE_DIR 0x80 625 #define P9_QID_TYPE_SYMLINK 0x02 626 627 #define P9_STAT_MODE_DIR 0x80000000 628 #define P9_STAT_MODE_APPEND 0x40000000 629 #define P9_STAT_MODE_EXCL 0x20000000 630 #define P9_STAT_MODE_MOUNT 0x10000000 631 #define P9_STAT_MODE_AUTH 0x08000000 632 #define P9_STAT_MODE_TMP 0x04000000 633 #define P9_STAT_MODE_SYMLINK 0x02000000 634 #define P9_STAT_MODE_LINK 0x01000000 635 #define P9_STAT_MODE_DEVICE 0x00800000 636 #define P9_STAT_MODE_NAMED_PIPE 0x00200000 637 #define P9_STAT_MODE_SOCKET 0x00100000 638 #define P9_STAT_MODE_SETUID 0x00080000 639 #define P9_STAT_MODE_SETGID 0x00040000 640 #define P9_STAT_MODE_SETVTX 0x00010000 641 642 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR | \ 643 P9_STAT_MODE_SYMLINK | \ 644 P9_STAT_MODE_LINK | \ 645 P9_STAT_MODE_DEVICE | \ 646 P9_STAT_MODE_NAMED_PIPE | \ 647 P9_STAT_MODE_SOCKET) 648 649 /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */ 650 static inline uint8_t mirror8bit(uint8_t byte) 651 { 652 return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023; 653 } 654 655 /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */ 656 static inline uint64_t mirror64bit(uint64_t value) 657 { 658 return ((uint64_t)mirror8bit(value & 0xff) << 56) | 659 ((uint64_t)mirror8bit((value >> 8) & 0xff) << 48) | 660 ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) | 661 ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) | 662 ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) | 663 ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) | 664 ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8) | 665 ((uint64_t)mirror8bit((value >> 56) & 0xff)); 666 } 667 668 /* 669 * Parameter k for the Exponential Golomb algorithm to be used. 670 * 671 * The smaller this value, the smaller the minimum bit count for the Exp. 672 * Golomb generated affixes will be (at lowest index) however for the 673 * price of having higher maximum bit count of generated affixes (at highest 674 * index). Likewise increasing this parameter yields in smaller maximum bit 675 * count for the price of having higher minimum bit count. 676 * 677 * In practice that means: a good value for k depends on the expected amount 678 * of devices to be exposed by one export. For a small amount of devices k 679 * should be small, for a large amount of devices k might be increased 680 * instead. The default of k=0 should be fine for most users though. 681 * 682 * IMPORTANT: In case this ever becomes a runtime parameter; the value of 683 * k should not change as long as guest is still running! Because that would 684 * cause completely different inode numbers to be generated on guest. 685 */ 686 #define EXP_GOLOMB_K 0 687 688 /** 689 * expGolombEncode() - Exponential Golomb algorithm for arbitrary k 690 * (including k=0). 691 * 692 * @n: natural number (or index) of the prefix to be generated 693 * (1, 2, 3, ...) 694 * @k: parameter k of Exp. Golomb algorithm to be used 695 * (see comment on EXP_GOLOMB_K macro for details about k) 696 * Return: prefix for given @n and @k 697 * 698 * The Exponential Golomb algorithm generates prefixes (NOT suffixes!) 699 * with growing length and with the mathematical property of being 700 * "prefix-free". The latter means the generated prefixes can be prepended 701 * in front of arbitrary numbers and the resulting concatenated numbers are 702 * guaranteed to be always unique. 703 * 704 * This is a minor adjustment to the original Exp. Golomb algorithm in the 705 * sense that lowest allowed index (@n) starts with 1, not with zero. 706 */ 707 static VariLenAffix expGolombEncode(uint64_t n, int k) 708 { 709 const uint64_t value = n + (1 << k) - 1; 710 const int bits = (int) log2(value) + 1; 711 return (VariLenAffix) { 712 .type = AffixType_Prefix, 713 .value = value, 714 .bits = bits + MAX((bits - 1 - k), 0) 715 }; 716 } 717 718 /** 719 * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix. 720 * @affix: either suffix or prefix to be inverted 721 * Return: inversion of passed @affix 722 * 723 * Simply mirror all bits of the affix value, for the purpose to preserve 724 * respectively the mathematical "prefix-free" or "suffix-free" property 725 * after the conversion. 726 * 727 * If a passed prefix is suitable to create unique numbers, then the 728 * returned suffix is suitable to create unique numbers as well (and vice 729 * versa). 730 */ 731 static VariLenAffix invertAffix(const VariLenAffix *affix) 732 { 733 return (VariLenAffix) { 734 .type = 735 (affix->type == AffixType_Suffix) ? 736 AffixType_Prefix : AffixType_Suffix, 737 .value = 738 mirror64bit(affix->value) >> 739 ((sizeof(affix->value) * 8) - affix->bits), 740 .bits = affix->bits 741 }; 742 } 743 744 /** 745 * affixForIndex() - Generates suffix numbers with "suffix-free" property. 746 * @index: natural number (or index) of the suffix to be generated 747 * (1, 2, 3, ...) 748 * Return: Suffix suitable to assemble unique number. 749 * 750 * This is just a wrapper function on top of the Exp. Golomb algorithm. 751 * 752 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, 753 * this function converts the Exp. Golomb prefixes into appropriate suffixes 754 * which are still suitable for generating unique numbers. 755 */ 756 static VariLenAffix affixForIndex(uint64_t index) 757 { 758 VariLenAffix prefix; 759 prefix = expGolombEncode(index, EXP_GOLOMB_K); 760 return invertAffix(&prefix); /* convert prefix to suffix */ 761 } 762 763 static uint32_t qpp_hash(QppEntry e) 764 { 765 return qemu_xxhash4(e.ino_prefix, e.dev); 766 } 767 768 static uint32_t qpf_hash(QpfEntry e) 769 { 770 return qemu_xxhash4(e.ino, e.dev); 771 } 772 773 static bool qpd_cmp_func(const void *obj, const void *userp) 774 { 775 const QpdEntry *e1 = obj, *e2 = userp; 776 return e1->dev == e2->dev; 777 } 778 779 static bool qpp_cmp_func(const void *obj, const void *userp) 780 { 781 const QppEntry *e1 = obj, *e2 = userp; 782 return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; 783 } 784 785 static bool qpf_cmp_func(const void *obj, const void *userp) 786 { 787 const QpfEntry *e1 = obj, *e2 = userp; 788 return e1->dev == e2->dev && e1->ino == e2->ino; 789 } 790 791 static void qp_table_remove(void *p, uint32_t h, void *up) 792 { 793 g_free(p); 794 } 795 796 static void qp_table_destroy(struct qht *ht) 797 { 798 if (!ht || !ht->map) { 799 return; 800 } 801 qht_iter(ht, qp_table_remove, NULL); 802 qht_destroy(ht); 803 } 804 805 static void qpd_table_init(struct qht *ht) 806 { 807 qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 808 } 809 810 static void qpp_table_init(struct qht *ht) 811 { 812 qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 813 } 814 815 static void qpf_table_init(struct qht *ht) 816 { 817 qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE); 818 } 819 820 /* 821 * Returns how many (high end) bits of inode numbers of the passed fs 822 * device shall be used (in combination with the device number) to 823 * generate hash values for qpp_table entries. 824 * 825 * This function is required if variable length suffixes are used for inode 826 * number mapping on guest level. Since a device may end up having multiple 827 * entries in qpp_table, each entry most probably with a different suffix 828 * length, we thus need this function in conjunction with qpd_table to 829 * "agree" about a fix amount of bits (per device) to be always used for 830 * generating hash values for the purpose of accessing qpp_table in order 831 * get consistent behaviour when accessing qpp_table. 832 */ 833 static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) 834 { 835 QpdEntry lookup = { 836 .dev = dev 837 }, *val; 838 uint32_t hash = dev; 839 VariLenAffix affix; 840 841 val = qht_lookup(&pdu->s->qpd_table, &lookup, hash); 842 if (!val) { 843 val = g_new0(QpdEntry, 1); 844 *val = lookup; 845 affix = affixForIndex(pdu->s->qp_affix_next); 846 val->prefix_bits = affix.bits; 847 qht_insert(&pdu->s->qpd_table, val, hash, NULL); 848 pdu->s->qp_ndevices++; 849 } 850 return val->prefix_bits; 851 } 852 853 /* 854 * Slow / full mapping host inode nr -> guest inode nr. 855 * 856 * This function performs a slower and much more costly remapping of an 857 * original file inode number on host to an appropriate different inode 858 * number on guest. For every (dev, inode) combination on host a new 859 * sequential number is generated, cached and exposed as inode number on 860 * guest. 861 * 862 * This is just a "last resort" fallback solution if the much faster/cheaper 863 * qid_path_suffixmap() failed. In practice this slow / full mapping is not 864 * expected ever to be used at all though. 865 * 866 * See qid_path_suffixmap() for details 867 * 868 */ 869 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, 870 uint64_t *path) 871 { 872 QpfEntry lookup = { 873 .dev = stbuf->st_dev, 874 .ino = stbuf->st_ino 875 }, *val; 876 uint32_t hash = qpf_hash(lookup); 877 VariLenAffix affix; 878 879 val = qht_lookup(&pdu->s->qpf_table, &lookup, hash); 880 881 if (!val) { 882 if (pdu->s->qp_fullpath_next == 0) { 883 /* no more files can be mapped :'( */ 884 error_report_once( 885 "9p: No more prefixes available for remapping inodes from " 886 "host to guest." 887 ); 888 return -ENFILE; 889 } 890 891 val = g_new0(QpfEntry, 1); 892 *val = lookup; 893 894 /* new unique inode and device combo */ 895 affix = affixForIndex( 896 1ULL << (sizeof(pdu->s->qp_affix_next) * 8) 897 ); 898 val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value; 899 pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1); 900 qht_insert(&pdu->s->qpf_table, val, hash, NULL); 901 } 902 903 *path = val->path; 904 return 0; 905 } 906 907 /* 908 * Quick mapping host inode nr -> guest inode nr. 909 * 910 * This function performs quick remapping of an original file inode number 911 * on host to an appropriate different inode number on guest. This remapping 912 * of inodes is required to avoid inode nr collisions on guest which would 913 * happen if the 9p export contains more than 1 exported file system (or 914 * more than 1 file system data set), because unlike on host level where the 915 * files would have different device nrs, all files exported by 9p would 916 * share the same device nr on guest (the device nr of the virtual 9p device 917 * that is). 918 * 919 * Inode remapping is performed by chopping off high end bits of the original 920 * inode number from host, shifting the result upwards and then assigning a 921 * generated suffix number for the low end bits, where the same suffix number 922 * will be shared by all inodes with the same device id AND the same high end 923 * bits that have been chopped off. That approach utilizes the fact that inode 924 * numbers very likely share the same high end bits (i.e. due to their common 925 * sequential generation by file systems) and hence we only have to generate 926 * and track a very limited amount of suffixes in practice due to that. 927 * 928 * We generate variable size suffixes for that purpose. The 1st generated 929 * suffix will only have 1 bit and hence we only need to chop off 1 bit from 930 * the original inode number. The subsequent suffixes being generated will 931 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being 932 * generated will have 3 bits and hence we have to chop off 3 bits from their 933 * original inodes, and so on. That approach of using variable length suffixes 934 * (i.e. over fixed size ones) utilizes the fact that in practice only a very 935 * limited amount of devices are shared by the same export (e.g. typically 936 * less than 2 dozen devices per 9p export), so in practice we need to chop 937 * off less bits than with fixed size prefixes and yet are flexible to add 938 * new devices at runtime below host's export directory at any time without 939 * having to reboot guest nor requiring to reconfigure guest for that. And due 940 * to the very limited amount of original high end bits that we chop off that 941 * way, the total amount of suffixes we need to generate is less than by using 942 * fixed size prefixes and hence it also improves performance of the inode 943 * remapping algorithm, and finally has the nice side effect that the inode 944 * numbers on guest will be much smaller & human friendly. ;-) 945 */ 946 static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf, 947 uint64_t *path) 948 { 949 const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev); 950 QppEntry lookup = { 951 .dev = stbuf->st_dev, 952 .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits)) 953 }, *val; 954 uint32_t hash = qpp_hash(lookup); 955 956 val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); 957 958 if (!val) { 959 if (pdu->s->qp_affix_next == 0) { 960 /* we ran out of affixes */ 961 warn_report_once( 962 "9p: Potential degraded performance of inode remapping" 963 ); 964 return -ENFILE; 965 } 966 967 val = g_new0(QppEntry, 1); 968 *val = lookup; 969 970 /* new unique inode affix and device combo */ 971 val->qp_affix_index = pdu->s->qp_affix_next++; 972 val->qp_affix = affixForIndex(val->qp_affix_index); 973 qht_insert(&pdu->s->qpp_table, val, hash, NULL); 974 } 975 /* assuming generated affix to be suffix type, not prefix */ 976 *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value; 977 return 0; 978 } 979 980 static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) 981 { 982 int err; 983 size_t size; 984 985 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 986 /* map inode+device to qid path (fast path) */ 987 err = qid_path_suffixmap(pdu, stbuf, &qidp->path); 988 if (err == -ENFILE) { 989 /* fast path didn't work, fall back to full map */ 990 err = qid_path_fullmap(pdu, stbuf, &qidp->path); 991 } 992 if (err) { 993 return err; 994 } 995 } else { 996 if (pdu->s->dev_id != stbuf->st_dev) { 997 if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { 998 error_report_once( 999 "9p: Multiple devices detected in same VirtFS export. " 1000 "Access of guest to additional devices is (partly) " 1001 "denied due to virtfs option 'multidevs=forbid' being " 1002 "effective." 1003 ); 1004 return -ENODEV; 1005 } else { 1006 warn_report_once( 1007 "9p: Multiple devices detected in same VirtFS export, " 1008 "which might lead to file ID collisions and severe " 1009 "misbehaviours on guest! You should either use a " 1010 "separate export for each device shared from host or " 1011 "use virtfs option 'multidevs=remap'!" 1012 ); 1013 } 1014 } 1015 memset(&qidp->path, 0, sizeof(qidp->path)); 1016 size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); 1017 memcpy(&qidp->path, &stbuf->st_ino, size); 1018 } 1019 1020 qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); 1021 qidp->type = 0; 1022 if (S_ISDIR(stbuf->st_mode)) { 1023 qidp->type |= P9_QID_TYPE_DIR; 1024 } 1025 if (S_ISLNK(stbuf->st_mode)) { 1026 qidp->type |= P9_QID_TYPE_SYMLINK; 1027 } 1028 1029 return 0; 1030 } 1031 1032 V9fsPDU *pdu_alloc(V9fsState *s) 1033 { 1034 V9fsPDU *pdu = NULL; 1035 1036 if (!QLIST_EMPTY(&s->free_list)) { 1037 pdu = QLIST_FIRST(&s->free_list); 1038 QLIST_REMOVE(pdu, next); 1039 QLIST_INSERT_HEAD(&s->active_list, pdu, next); 1040 } 1041 return pdu; 1042 } 1043 1044 void pdu_free(V9fsPDU *pdu) 1045 { 1046 V9fsState *s = pdu->s; 1047 1048 g_assert(!pdu->cancelled); 1049 QLIST_REMOVE(pdu, next); 1050 QLIST_INSERT_HEAD(&s->free_list, pdu, next); 1051 } 1052 1053 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) 1054 { 1055 int8_t id = pdu->id + 1; /* Response */ 1056 V9fsState *s = pdu->s; 1057 int ret; 1058 1059 /* 1060 * The 9p spec requires that successfully cancelled pdus receive no reply. 1061 * Sending a reply would confuse clients because they would 1062 * assume that any EINTR is the actual result of the operation, 1063 * rather than a consequence of the cancellation. However, if 1064 * the operation completed (successfully or with an error other 1065 * than caused be cancellation), we do send out that reply, both 1066 * for efficiency and to avoid confusing the rest of the state machine 1067 * that assumes passing a non-error here will mean a successful 1068 * transmission of the reply. 1069 */ 1070 bool discard = pdu->cancelled && len == -EINTR; 1071 if (discard) { 1072 trace_v9fs_rcancel(pdu->tag, pdu->id); 1073 pdu->size = 0; 1074 goto out_notify; 1075 } 1076 1077 if (len < 0) { 1078 int err = -len; 1079 len = 7; 1080 1081 if (s->proto_version != V9FS_PROTO_2000L) { 1082 V9fsString str; 1083 1084 str.data = strerror(err); 1085 str.size = strlen(str.data); 1086 1087 ret = pdu_marshal(pdu, len, "s", &str); 1088 if (ret < 0) { 1089 goto out_notify; 1090 } 1091 len += ret; 1092 id = P9_RERROR; 1093 } else { 1094 err = errno_to_dotl(err); 1095 } 1096 1097 ret = pdu_marshal(pdu, len, "d", err); 1098 if (ret < 0) { 1099 goto out_notify; 1100 } 1101 len += ret; 1102 1103 if (s->proto_version == V9FS_PROTO_2000L) { 1104 id = P9_RLERROR; 1105 } 1106 trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */ 1107 } 1108 1109 /* fill out the header */ 1110 if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) { 1111 goto out_notify; 1112 } 1113 1114 /* keep these in sync */ 1115 pdu->size = len; 1116 pdu->id = id; 1117 1118 out_notify: 1119 pdu->s->transport->push_and_notify(pdu); 1120 1121 /* Now wakeup anybody waiting in flush for this request */ 1122 if (!qemu_co_queue_next(&pdu->complete)) { 1123 pdu_free(pdu); 1124 } 1125 } 1126 1127 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) 1128 { 1129 mode_t ret; 1130 1131 ret = mode & 0777; 1132 if (mode & P9_STAT_MODE_DIR) { 1133 ret |= S_IFDIR; 1134 } 1135 1136 if (mode & P9_STAT_MODE_SYMLINK) { 1137 ret |= S_IFLNK; 1138 } 1139 if (mode & P9_STAT_MODE_SOCKET) { 1140 ret |= S_IFSOCK; 1141 } 1142 if (mode & P9_STAT_MODE_NAMED_PIPE) { 1143 ret |= S_IFIFO; 1144 } 1145 if (mode & P9_STAT_MODE_DEVICE) { 1146 if (extension->size && extension->data[0] == 'c') { 1147 ret |= S_IFCHR; 1148 } else { 1149 ret |= S_IFBLK; 1150 } 1151 } 1152 1153 if (!(ret & ~0777)) { 1154 ret |= S_IFREG; 1155 } 1156 1157 if (mode & P9_STAT_MODE_SETUID) { 1158 ret |= S_ISUID; 1159 } 1160 if (mode & P9_STAT_MODE_SETGID) { 1161 ret |= S_ISGID; 1162 } 1163 if (mode & P9_STAT_MODE_SETVTX) { 1164 ret |= S_ISVTX; 1165 } 1166 1167 return ret; 1168 } 1169 1170 static int donttouch_stat(V9fsStat *stat) 1171 { 1172 if (stat->type == -1 && 1173 stat->dev == -1 && 1174 stat->qid.type == 0xff && 1175 stat->qid.version == (uint32_t) -1 && 1176 stat->qid.path == (uint64_t) -1 && 1177 stat->mode == -1 && 1178 stat->atime == -1 && 1179 stat->mtime == -1 && 1180 stat->length == -1 && 1181 !stat->name.size && 1182 !stat->uid.size && 1183 !stat->gid.size && 1184 !stat->muid.size && 1185 stat->n_uid == -1 && 1186 stat->n_gid == -1 && 1187 stat->n_muid == -1) { 1188 return 1; 1189 } 1190 1191 return 0; 1192 } 1193 1194 static void v9fs_stat_init(V9fsStat *stat) 1195 { 1196 v9fs_string_init(&stat->name); 1197 v9fs_string_init(&stat->uid); 1198 v9fs_string_init(&stat->gid); 1199 v9fs_string_init(&stat->muid); 1200 v9fs_string_init(&stat->extension); 1201 } 1202 1203 static void v9fs_stat_free(V9fsStat *stat) 1204 { 1205 v9fs_string_free(&stat->name); 1206 v9fs_string_free(&stat->uid); 1207 v9fs_string_free(&stat->gid); 1208 v9fs_string_free(&stat->muid); 1209 v9fs_string_free(&stat->extension); 1210 } 1211 1212 static uint32_t stat_to_v9mode(const struct stat *stbuf) 1213 { 1214 uint32_t mode; 1215 1216 mode = stbuf->st_mode & 0777; 1217 if (S_ISDIR(stbuf->st_mode)) { 1218 mode |= P9_STAT_MODE_DIR; 1219 } 1220 1221 if (S_ISLNK(stbuf->st_mode)) { 1222 mode |= P9_STAT_MODE_SYMLINK; 1223 } 1224 1225 if (S_ISSOCK(stbuf->st_mode)) { 1226 mode |= P9_STAT_MODE_SOCKET; 1227 } 1228 1229 if (S_ISFIFO(stbuf->st_mode)) { 1230 mode |= P9_STAT_MODE_NAMED_PIPE; 1231 } 1232 1233 if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) { 1234 mode |= P9_STAT_MODE_DEVICE; 1235 } 1236 1237 if (stbuf->st_mode & S_ISUID) { 1238 mode |= P9_STAT_MODE_SETUID; 1239 } 1240 1241 if (stbuf->st_mode & S_ISGID) { 1242 mode |= P9_STAT_MODE_SETGID; 1243 } 1244 1245 if (stbuf->st_mode & S_ISVTX) { 1246 mode |= P9_STAT_MODE_SETVTX; 1247 } 1248 1249 return mode; 1250 } 1251 1252 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, 1253 const char *basename, 1254 const struct stat *stbuf, 1255 V9fsStat *v9stat) 1256 { 1257 int err; 1258 1259 memset(v9stat, 0, sizeof(*v9stat)); 1260 1261 err = stat_to_qid(pdu, stbuf, &v9stat->qid); 1262 if (err < 0) { 1263 return err; 1264 } 1265 v9stat->mode = stat_to_v9mode(stbuf); 1266 v9stat->atime = stbuf->st_atime; 1267 v9stat->mtime = stbuf->st_mtime; 1268 v9stat->length = stbuf->st_size; 1269 1270 v9fs_string_free(&v9stat->uid); 1271 v9fs_string_free(&v9stat->gid); 1272 v9fs_string_free(&v9stat->muid); 1273 1274 v9stat->n_uid = stbuf->st_uid; 1275 v9stat->n_gid = stbuf->st_gid; 1276 v9stat->n_muid = 0; 1277 1278 v9fs_string_free(&v9stat->extension); 1279 1280 if (v9stat->mode & P9_STAT_MODE_SYMLINK) { 1281 err = v9fs_co_readlink(pdu, path, &v9stat->extension); 1282 if (err < 0) { 1283 return err; 1284 } 1285 } else if (v9stat->mode & P9_STAT_MODE_DEVICE) { 1286 v9fs_string_sprintf(&v9stat->extension, "%c %u %u", 1287 S_ISCHR(stbuf->st_mode) ? 'c' : 'b', 1288 major(stbuf->st_rdev), minor(stbuf->st_rdev)); 1289 } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) { 1290 v9fs_string_sprintf(&v9stat->extension, "%s %lu", 1291 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink); 1292 } 1293 1294 v9fs_string_sprintf(&v9stat->name, "%s", basename); 1295 1296 v9stat->size = 61 + 1297 v9fs_string_size(&v9stat->name) + 1298 v9fs_string_size(&v9stat->uid) + 1299 v9fs_string_size(&v9stat->gid) + 1300 v9fs_string_size(&v9stat->muid) + 1301 v9fs_string_size(&v9stat->extension); 1302 return 0; 1303 } 1304 1305 #define P9_STATS_MODE 0x00000001ULL 1306 #define P9_STATS_NLINK 0x00000002ULL 1307 #define P9_STATS_UID 0x00000004ULL 1308 #define P9_STATS_GID 0x00000008ULL 1309 #define P9_STATS_RDEV 0x00000010ULL 1310 #define P9_STATS_ATIME 0x00000020ULL 1311 #define P9_STATS_MTIME 0x00000040ULL 1312 #define P9_STATS_CTIME 0x00000080ULL 1313 #define P9_STATS_INO 0x00000100ULL 1314 #define P9_STATS_SIZE 0x00000200ULL 1315 #define P9_STATS_BLOCKS 0x00000400ULL 1316 1317 #define P9_STATS_BTIME 0x00000800ULL 1318 #define P9_STATS_GEN 0x00001000ULL 1319 #define P9_STATS_DATA_VERSION 0x00002000ULL 1320 1321 #define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ 1322 #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ 1323 1324 1325 /** 1326 * blksize_to_iounit() - Block size exposed to 9p client. 1327 * Return: block size 1328 * 1329 * @pdu: 9p client request 1330 * @blksize: host filesystem's block size 1331 * 1332 * Convert host filesystem's block size into an appropriate block size for 1333 * 9p client (guest OS side). The value returned suggests an "optimum" block 1334 * size for 9p I/O, i.e. to maximize performance. 1335 */ 1336 static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) 1337 { 1338 int32_t iounit = 0; 1339 V9fsState *s = pdu->s; 1340 1341 /* 1342 * iounit should be multiples of blksize (host filesystem block size) 1343 * as well as less than (client msize - P9_IOHDRSZ) 1344 */ 1345 if (blksize) { 1346 iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize); 1347 } 1348 if (!iounit) { 1349 iounit = s->msize - P9_IOHDRSZ; 1350 } 1351 return iounit; 1352 } 1353 1354 static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf) 1355 { 1356 return blksize_to_iounit(pdu, stbuf->st_blksize); 1357 } 1358 1359 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, 1360 V9fsStatDotl *v9lstat) 1361 { 1362 memset(v9lstat, 0, sizeof(*v9lstat)); 1363 1364 v9lstat->st_mode = stbuf->st_mode; 1365 v9lstat->st_nlink = stbuf->st_nlink; 1366 v9lstat->st_uid = stbuf->st_uid; 1367 v9lstat->st_gid = stbuf->st_gid; 1368 v9lstat->st_rdev = host_dev_to_dotl_dev(stbuf->st_rdev); 1369 v9lstat->st_size = stbuf->st_size; 1370 v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); 1371 v9lstat->st_blocks = stbuf->st_blocks; 1372 v9lstat->st_atime_sec = stbuf->st_atime; 1373 v9lstat->st_mtime_sec = stbuf->st_mtime; 1374 v9lstat->st_ctime_sec = stbuf->st_ctime; 1375 #ifdef CONFIG_DARWIN 1376 v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; 1377 v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; 1378 v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; 1379 #else 1380 v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; 1381 v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; 1382 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; 1383 #endif 1384 /* Currently we only support BASIC fields in stat */ 1385 v9lstat->st_result_mask = P9_STATS_BASIC; 1386 1387 return stat_to_qid(pdu, stbuf, &v9lstat->qid); 1388 } 1389 1390 static void print_sg(struct iovec *sg, int cnt) 1391 { 1392 int i; 1393 1394 printf("sg[%d]: {", cnt); 1395 for (i = 0; i < cnt; i++) { 1396 if (i) { 1397 printf(", "); 1398 } 1399 printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len); 1400 } 1401 printf("}\n"); 1402 } 1403 1404 /* Will call this only for path name based fid */ 1405 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len) 1406 { 1407 V9fsPath str; 1408 v9fs_path_init(&str); 1409 v9fs_path_copy(&str, dst); 1410 v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len); 1411 v9fs_path_free(&str); 1412 } 1413 1414 static inline bool is_ro_export(FsContext *ctx) 1415 { 1416 return ctx->export_flags & V9FS_RDONLY; 1417 } 1418 1419 static void coroutine_fn v9fs_version(void *opaque) 1420 { 1421 ssize_t err; 1422 V9fsPDU *pdu = opaque; 1423 V9fsState *s = pdu->s; 1424 V9fsString version; 1425 size_t offset = 7; 1426 1427 v9fs_string_init(&version); 1428 err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version); 1429 if (err < 0) { 1430 goto out; 1431 } 1432 trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data); 1433 1434 virtfs_reset(pdu); 1435 1436 if (!strcmp(version.data, "9P2000.u")) { 1437 s->proto_version = V9FS_PROTO_2000U; 1438 } else if (!strcmp(version.data, "9P2000.L")) { 1439 s->proto_version = V9FS_PROTO_2000L; 1440 } else { 1441 v9fs_string_sprintf(&version, "unknown"); 1442 /* skip min. msize check, reporting invalid version has priority */ 1443 goto marshal; 1444 } 1445 1446 if (s->msize < P9_MIN_MSIZE) { 1447 err = -EMSGSIZE; 1448 error_report( 1449 "9pfs: Client requested msize < minimum msize (" 1450 stringify(P9_MIN_MSIZE) ") supported by this server." 1451 ); 1452 goto out; 1453 } 1454 1455 /* 8192 is the default msize of Linux clients */ 1456 if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) { 1457 warn_report_once( 1458 "9p: degraded performance: a reasonable high msize should be " 1459 "chosen on client/guest side (chosen msize is <= 8192). See " 1460 "https://wiki.qemu.org/Documentation/9psetup#msize for details." 1461 ); 1462 } 1463 1464 marshal: 1465 err = pdu_marshal(pdu, offset, "ds", s->msize, &version); 1466 if (err < 0) { 1467 goto out; 1468 } 1469 err += offset; 1470 trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data); 1471 out: 1472 pdu_complete(pdu, err); 1473 v9fs_string_free(&version); 1474 } 1475 1476 static void coroutine_fn v9fs_attach(void *opaque) 1477 { 1478 V9fsPDU *pdu = opaque; 1479 V9fsState *s = pdu->s; 1480 int32_t fid, afid, n_uname; 1481 V9fsString uname, aname; 1482 V9fsFidState *fidp; 1483 size_t offset = 7; 1484 V9fsQID qid; 1485 ssize_t err; 1486 struct stat stbuf; 1487 1488 v9fs_string_init(&uname); 1489 v9fs_string_init(&aname); 1490 err = pdu_unmarshal(pdu, offset, "ddssd", &fid, 1491 &afid, &uname, &aname, &n_uname); 1492 if (err < 0) { 1493 goto out_nofid; 1494 } 1495 trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data); 1496 1497 fidp = alloc_fid(s, fid); 1498 if (fidp == NULL) { 1499 err = -EINVAL; 1500 goto out_nofid; 1501 } 1502 fidp->uid = n_uname; 1503 err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path); 1504 if (err < 0) { 1505 err = -EINVAL; 1506 clunk_fid(s, fid); 1507 goto out; 1508 } 1509 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1510 if (err < 0) { 1511 err = -EINVAL; 1512 clunk_fid(s, fid); 1513 goto out; 1514 } 1515 err = stat_to_qid(pdu, &stbuf, &qid); 1516 if (err < 0) { 1517 err = -EINVAL; 1518 clunk_fid(s, fid); 1519 goto out; 1520 } 1521 1522 /* 1523 * disable migration if we haven't done already. 1524 * attach could get called multiple times for the same export. 1525 */ 1526 if (!s->migration_blocker) { 1527 error_setg(&s->migration_blocker, 1528 "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", 1529 s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); 1530 err = migrate_add_blocker(&s->migration_blocker, NULL); 1531 if (err < 0) { 1532 clunk_fid(s, fid); 1533 goto out; 1534 } 1535 s->root_fid = fid; 1536 } 1537 1538 err = pdu_marshal(pdu, offset, "Q", &qid); 1539 if (err < 0) { 1540 clunk_fid(s, fid); 1541 goto out; 1542 } 1543 err += offset; 1544 1545 memcpy(&s->root_st, &stbuf, sizeof(stbuf)); 1546 trace_v9fs_attach_return(pdu->tag, pdu->id, 1547 qid.type, qid.version, qid.path); 1548 out: 1549 put_fid(pdu, fidp); 1550 out_nofid: 1551 pdu_complete(pdu, err); 1552 v9fs_string_free(&uname); 1553 v9fs_string_free(&aname); 1554 } 1555 1556 static void coroutine_fn v9fs_stat(void *opaque) 1557 { 1558 int32_t fid; 1559 V9fsStat v9stat; 1560 ssize_t err = 0; 1561 size_t offset = 7; 1562 struct stat stbuf; 1563 V9fsFidState *fidp; 1564 V9fsPDU *pdu = opaque; 1565 char *basename; 1566 1567 err = pdu_unmarshal(pdu, offset, "d", &fid); 1568 if (err < 0) { 1569 goto out_nofid; 1570 } 1571 trace_v9fs_stat(pdu->tag, pdu->id, fid); 1572 1573 fidp = get_fid(pdu, fid); 1574 if (fidp == NULL) { 1575 err = -ENOENT; 1576 goto out_nofid; 1577 } 1578 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1579 if (err < 0) { 1580 goto out; 1581 } 1582 basename = g_path_get_basename(fidp->path.data); 1583 err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat); 1584 g_free(basename); 1585 if (err < 0) { 1586 goto out; 1587 } 1588 err = pdu_marshal(pdu, offset, "wS", 0, &v9stat); 1589 if (err < 0) { 1590 v9fs_stat_free(&v9stat); 1591 goto out; 1592 } 1593 trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode, 1594 v9stat.atime, v9stat.mtime, v9stat.length); 1595 err += offset; 1596 v9fs_stat_free(&v9stat); 1597 out: 1598 put_fid(pdu, fidp); 1599 out_nofid: 1600 pdu_complete(pdu, err); 1601 } 1602 1603 static bool fid_has_valid_file_handle(V9fsState *s, V9fsFidState *fidp) 1604 { 1605 return s->ops->has_valid_file_handle(fidp->fid_type, &fidp->fs); 1606 } 1607 1608 static void coroutine_fn v9fs_getattr(void *opaque) 1609 { 1610 int32_t fid; 1611 size_t offset = 7; 1612 ssize_t retval = 0; 1613 struct stat stbuf; 1614 V9fsFidState *fidp; 1615 uint64_t request_mask; 1616 V9fsStatDotl v9stat_dotl; 1617 V9fsPDU *pdu = opaque; 1618 1619 retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask); 1620 if (retval < 0) { 1621 goto out_nofid; 1622 } 1623 trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask); 1624 1625 fidp = get_fid(pdu, fid); 1626 if (fidp == NULL) { 1627 retval = -ENOENT; 1628 goto out_nofid; 1629 } 1630 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1631 retval = v9fs_co_fstat(pdu, fidp, &stbuf); 1632 } else { 1633 retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1634 } 1635 if (retval < 0) { 1636 goto out; 1637 } 1638 retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl); 1639 if (retval < 0) { 1640 goto out; 1641 } 1642 1643 /* fill st_gen if requested and supported by underlying fs */ 1644 if (request_mask & P9_STATS_GEN) { 1645 retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl); 1646 switch (retval) { 1647 case 0: 1648 /* we have valid st_gen: update result mask */ 1649 v9stat_dotl.st_result_mask |= P9_STATS_GEN; 1650 break; 1651 case -EINTR: 1652 /* request cancelled, e.g. by Tflush */ 1653 goto out; 1654 default: 1655 /* failed to get st_gen: not fatal, ignore */ 1656 break; 1657 } 1658 } 1659 retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl); 1660 if (retval < 0) { 1661 goto out; 1662 } 1663 retval += offset; 1664 trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask, 1665 v9stat_dotl.st_mode, v9stat_dotl.st_uid, 1666 v9stat_dotl.st_gid); 1667 out: 1668 put_fid(pdu, fidp); 1669 out_nofid: 1670 pdu_complete(pdu, retval); 1671 } 1672 1673 /* Attribute flags */ 1674 #define P9_ATTR_MODE (1 << 0) 1675 #define P9_ATTR_UID (1 << 1) 1676 #define P9_ATTR_GID (1 << 2) 1677 #define P9_ATTR_SIZE (1 << 3) 1678 #define P9_ATTR_ATIME (1 << 4) 1679 #define P9_ATTR_MTIME (1 << 5) 1680 #define P9_ATTR_CTIME (1 << 6) 1681 #define P9_ATTR_ATIME_SET (1 << 7) 1682 #define P9_ATTR_MTIME_SET (1 << 8) 1683 1684 #define P9_ATTR_MASK 127 1685 1686 static void coroutine_fn v9fs_setattr(void *opaque) 1687 { 1688 int err = 0; 1689 int32_t fid; 1690 V9fsFidState *fidp; 1691 size_t offset = 7; 1692 V9fsIattr v9iattr; 1693 V9fsPDU *pdu = opaque; 1694 1695 err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr); 1696 if (err < 0) { 1697 goto out_nofid; 1698 } 1699 1700 trace_v9fs_setattr(pdu->tag, pdu->id, fid, 1701 v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid, 1702 v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec); 1703 1704 fidp = get_fid(pdu, fid); 1705 if (fidp == NULL) { 1706 err = -EINVAL; 1707 goto out_nofid; 1708 } 1709 if (v9iattr.valid & P9_ATTR_MODE) { 1710 err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode); 1711 if (err < 0) { 1712 goto out; 1713 } 1714 } 1715 if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) { 1716 struct timespec times[2]; 1717 if (v9iattr.valid & P9_ATTR_ATIME) { 1718 if (v9iattr.valid & P9_ATTR_ATIME_SET) { 1719 times[0].tv_sec = v9iattr.atime_sec; 1720 times[0].tv_nsec = v9iattr.atime_nsec; 1721 } else { 1722 times[0].tv_nsec = UTIME_NOW; 1723 } 1724 } else { 1725 times[0].tv_nsec = UTIME_OMIT; 1726 } 1727 if (v9iattr.valid & P9_ATTR_MTIME) { 1728 if (v9iattr.valid & P9_ATTR_MTIME_SET) { 1729 times[1].tv_sec = v9iattr.mtime_sec; 1730 times[1].tv_nsec = v9iattr.mtime_nsec; 1731 } else { 1732 times[1].tv_nsec = UTIME_NOW; 1733 } 1734 } else { 1735 times[1].tv_nsec = UTIME_OMIT; 1736 } 1737 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1738 err = v9fs_co_futimens(pdu, fidp, times); 1739 } else { 1740 err = v9fs_co_utimensat(pdu, &fidp->path, times); 1741 } 1742 if (err < 0) { 1743 goto out; 1744 } 1745 } 1746 /* 1747 * If the only valid entry in iattr is ctime we can call 1748 * chown(-1,-1) to update the ctime of the file 1749 */ 1750 if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) || 1751 ((v9iattr.valid & P9_ATTR_CTIME) 1752 && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) { 1753 if (!(v9iattr.valid & P9_ATTR_UID)) { 1754 v9iattr.uid = -1; 1755 } 1756 if (!(v9iattr.valid & P9_ATTR_GID)) { 1757 v9iattr.gid = -1; 1758 } 1759 err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid, 1760 v9iattr.gid); 1761 if (err < 0) { 1762 goto out; 1763 } 1764 } 1765 if (v9iattr.valid & (P9_ATTR_SIZE)) { 1766 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1767 err = v9fs_co_ftruncate(pdu, fidp, v9iattr.size); 1768 } else { 1769 err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size); 1770 } 1771 if (err < 0) { 1772 goto out; 1773 } 1774 } 1775 err = offset; 1776 trace_v9fs_setattr_return(pdu->tag, pdu->id); 1777 out: 1778 put_fid(pdu, fidp); 1779 out_nofid: 1780 pdu_complete(pdu, err); 1781 } 1782 1783 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids) 1784 { 1785 int i; 1786 ssize_t err; 1787 size_t offset = 7; 1788 1789 err = pdu_marshal(pdu, offset, "w", nwnames); 1790 if (err < 0) { 1791 return err; 1792 } 1793 offset += err; 1794 for (i = 0; i < nwnames; i++) { 1795 err = pdu_marshal(pdu, offset, "Q", &qids[i]); 1796 if (err < 0) { 1797 return err; 1798 } 1799 offset += err; 1800 } 1801 return offset; 1802 } 1803 1804 static bool name_is_illegal(const char *name) 1805 { 1806 return !*name || strchr(name, '/') != NULL; 1807 } 1808 1809 static bool same_stat_id(const struct stat *a, const struct stat *b) 1810 { 1811 return a->st_dev == b->st_dev && a->st_ino == b->st_ino; 1812 } 1813 1814 /* 1815 * Returns a (newly allocated) comma-separated string presentation of the 1816 * passed array for logging (tracing) purpose for trace event "v9fs_walk". 1817 * 1818 * It is caller's responsibility to free the returned string. 1819 */ 1820 static char *trace_v9fs_walk_wnames(V9fsString *wnames, size_t nwnames) 1821 { 1822 g_autofree char **arr = g_malloc0_n(nwnames + 1, sizeof(char *)); 1823 for (size_t i = 0; i < nwnames; ++i) { 1824 arr[i] = wnames[i].data; 1825 } 1826 return g_strjoinv(", ", arr); 1827 } 1828 1829 static void coroutine_fn v9fs_walk(void *opaque) 1830 { 1831 int name_idx, nwalked; 1832 g_autofree V9fsQID *qids = NULL; 1833 int i, err = 0, any_err = 0; 1834 V9fsPath dpath, path; 1835 P9ARRAY_REF(V9fsPath) pathes = NULL; 1836 uint16_t nwnames; 1837 struct stat stbuf, fidst; 1838 g_autofree struct stat *stbufs = NULL; 1839 size_t offset = 7; 1840 int32_t fid, newfid; 1841 P9ARRAY_REF(V9fsString) wnames = NULL; 1842 g_autofree char *trace_wnames = NULL; 1843 V9fsFidState *fidp; 1844 V9fsFidState *newfidp = NULL; 1845 V9fsPDU *pdu = opaque; 1846 V9fsState *s = pdu->s; 1847 V9fsQID qid; 1848 1849 err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames); 1850 if (err < 0) { 1851 pdu_complete(pdu, err); 1852 return; 1853 } 1854 offset += err; 1855 1856 if (nwnames > P9_MAXWELEM) { 1857 err = -EINVAL; 1858 goto out_nofid_nownames; 1859 } 1860 if (nwnames) { 1861 P9ARRAY_NEW(V9fsString, wnames, nwnames); 1862 qids = g_new0(V9fsQID, nwnames); 1863 stbufs = g_new0(struct stat, nwnames); 1864 P9ARRAY_NEW(V9fsPath, pathes, nwnames); 1865 for (i = 0; i < nwnames; i++) { 1866 err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); 1867 if (err < 0) { 1868 goto out_nofid_nownames; 1869 } 1870 if (name_is_illegal(wnames[i].data)) { 1871 err = -ENOENT; 1872 goto out_nofid_nownames; 1873 } 1874 offset += err; 1875 } 1876 if (trace_event_get_state_backends(TRACE_V9FS_WALK)) { 1877 trace_wnames = trace_v9fs_walk_wnames(wnames, nwnames); 1878 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, 1879 trace_wnames); 1880 } 1881 } else { 1882 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, ""); 1883 } 1884 1885 fidp = get_fid(pdu, fid); 1886 if (fidp == NULL) { 1887 err = -ENOENT; 1888 goto out_nofid; 1889 } 1890 1891 v9fs_path_init(&dpath); 1892 v9fs_path_init(&path); 1893 /* 1894 * Both dpath and path initially point to fidp. 1895 * Needed to handle request with nwnames == 0 1896 */ 1897 v9fs_path_copy(&dpath, &fidp->path); 1898 v9fs_path_copy(&path, &fidp->path); 1899 1900 /* 1901 * To keep latency (i.e. overall execution time for processing this 1902 * Twalk client request) as small as possible, run all the required fs 1903 * driver code altogether inside the following block. 1904 */ 1905 v9fs_co_run_in_worker({ 1906 nwalked = 0; 1907 if (v9fs_request_cancelled(pdu)) { 1908 any_err |= err = -EINTR; 1909 break; 1910 } 1911 err = s->ops->lstat(&s->ctx, &dpath, &fidst); 1912 if (err < 0) { 1913 any_err |= err = -errno; 1914 break; 1915 } 1916 stbuf = fidst; 1917 for (; nwalked < nwnames; nwalked++) { 1918 if (v9fs_request_cancelled(pdu)) { 1919 any_err |= err = -EINTR; 1920 break; 1921 } 1922 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1923 strcmp("..", wnames[nwalked].data)) 1924 { 1925 err = s->ops->name_to_path(&s->ctx, &dpath, 1926 wnames[nwalked].data, 1927 &pathes[nwalked]); 1928 if (err < 0) { 1929 any_err |= err = -errno; 1930 break; 1931 } 1932 if (v9fs_request_cancelled(pdu)) { 1933 any_err |= err = -EINTR; 1934 break; 1935 } 1936 err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf); 1937 if (err < 0) { 1938 any_err |= err = -errno; 1939 break; 1940 } 1941 stbufs[nwalked] = stbuf; 1942 v9fs_path_copy(&dpath, &pathes[nwalked]); 1943 } 1944 } 1945 }); 1946 /* 1947 * Handle all the rest of this Twalk request on main thread ... 1948 * 1949 * NOTE: -EINTR is an exception where we deviate from the protocol spec 1950 * and simply send a (R)Lerror response instead of bothering to assemble 1951 * a (deducted) Rwalk response; because -EINTR is always the result of a 1952 * Tflush request, so client would no longer wait for a response in this 1953 * case anyway. 1954 */ 1955 if ((err < 0 && !nwalked) || err == -EINTR) { 1956 goto out; 1957 } 1958 1959 any_err |= err = stat_to_qid(pdu, &fidst, &qid); 1960 if (err < 0 && !nwalked) { 1961 goto out; 1962 } 1963 stbuf = fidst; 1964 1965 /* reset dpath and path */ 1966 v9fs_path_copy(&dpath, &fidp->path); 1967 v9fs_path_copy(&path, &fidp->path); 1968 1969 for (name_idx = 0; name_idx < nwalked; name_idx++) { 1970 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1971 strcmp("..", wnames[name_idx].data)) 1972 { 1973 stbuf = stbufs[name_idx]; 1974 any_err |= err = stat_to_qid(pdu, &stbuf, &qid); 1975 if (err < 0) { 1976 break; 1977 } 1978 v9fs_path_copy(&path, &pathes[name_idx]); 1979 v9fs_path_copy(&dpath, &path); 1980 } 1981 memcpy(&qids[name_idx], &qid, sizeof(qid)); 1982 } 1983 if (any_err < 0) { 1984 if (!name_idx) { 1985 /* don't send any QIDs, send Rlerror instead */ 1986 goto out; 1987 } else { 1988 /* send QIDs (not Rlerror), but fid MUST remain unaffected */ 1989 goto send_qids; 1990 } 1991 } 1992 if (fid == newfid) { 1993 if (fidp->fid_type != P9_FID_NONE) { 1994 err = -EINVAL; 1995 goto out; 1996 } 1997 v9fs_path_write_lock(s); 1998 v9fs_path_copy(&fidp->path, &path); 1999 v9fs_path_unlock(s); 2000 } else { 2001 newfidp = alloc_fid(s, newfid); 2002 if (newfidp == NULL) { 2003 err = -EINVAL; 2004 goto out; 2005 } 2006 newfidp->uid = fidp->uid; 2007 v9fs_path_copy(&newfidp->path, &path); 2008 } 2009 send_qids: 2010 err = v9fs_walk_marshal(pdu, name_idx, qids); 2011 trace_v9fs_walk_return(pdu->tag, pdu->id, name_idx, qids); 2012 out: 2013 put_fid(pdu, fidp); 2014 if (newfidp) { 2015 put_fid(pdu, newfidp); 2016 } 2017 v9fs_path_free(&dpath); 2018 v9fs_path_free(&path); 2019 goto out_pdu_complete; 2020 out_nofid_nownames: 2021 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, "<?>"); 2022 out_nofid: 2023 out_pdu_complete: 2024 pdu_complete(pdu, err); 2025 } 2026 2027 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path) 2028 { 2029 struct statfs stbuf; 2030 int err = v9fs_co_statfs(pdu, path, &stbuf); 2031 2032 return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0); 2033 } 2034 2035 static void coroutine_fn v9fs_open(void *opaque) 2036 { 2037 int flags; 2038 int32_t fid; 2039 int32_t mode; 2040 V9fsQID qid; 2041 int iounit = 0; 2042 ssize_t err = 0; 2043 size_t offset = 7; 2044 struct stat stbuf; 2045 V9fsFidState *fidp; 2046 V9fsPDU *pdu = opaque; 2047 V9fsState *s = pdu->s; 2048 g_autofree char *trace_oflags = NULL; 2049 2050 if (s->proto_version == V9FS_PROTO_2000L) { 2051 err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode); 2052 } else { 2053 uint8_t modebyte; 2054 err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte); 2055 mode = modebyte; 2056 } 2057 if (err < 0) { 2058 goto out_nofid; 2059 } 2060 if (trace_event_get_state_backends(TRACE_V9FS_OPEN)) { 2061 trace_oflags = qemu_open_flags_tostr( 2062 (s->proto_version == V9FS_PROTO_2000L) ? 2063 dotl_to_open_flags(mode) : omode_to_uflags(mode) 2064 ); 2065 trace_v9fs_open(pdu->tag, pdu->id, fid, mode, trace_oflags); 2066 } 2067 2068 fidp = get_fid(pdu, fid); 2069 if (fidp == NULL) { 2070 err = -ENOENT; 2071 goto out_nofid; 2072 } 2073 if (fidp->fid_type != P9_FID_NONE) { 2074 err = -EINVAL; 2075 goto out; 2076 } 2077 2078 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2079 if (err < 0) { 2080 goto out; 2081 } 2082 err = stat_to_qid(pdu, &stbuf, &qid); 2083 if (err < 0) { 2084 goto out; 2085 } 2086 if (S_ISDIR(stbuf.st_mode)) { 2087 err = v9fs_co_opendir(pdu, fidp); 2088 if (err < 0) { 2089 goto out; 2090 } 2091 fidp->fid_type = P9_FID_DIR; 2092 err = pdu_marshal(pdu, offset, "Qd", &qid, 0); 2093 if (err < 0) { 2094 goto out; 2095 } 2096 err += offset; 2097 } else { 2098 if (s->proto_version == V9FS_PROTO_2000L) { 2099 flags = get_dotl_openflags(s, mode); 2100 } else { 2101 flags = omode_to_uflags(mode); 2102 } 2103 if (is_ro_export(&s->ctx)) { 2104 if (mode & O_WRONLY || mode & O_RDWR || 2105 mode & O_APPEND || mode & O_TRUNC) { 2106 err = -EROFS; 2107 goto out; 2108 } 2109 } 2110 err = v9fs_co_open(pdu, fidp, flags); 2111 if (err < 0) { 2112 goto out; 2113 } 2114 fidp->fid_type = P9_FID_FILE; 2115 fidp->open_flags = flags; 2116 if (flags & O_EXCL) { 2117 /* 2118 * We let the host file system do O_EXCL check 2119 * We should not reclaim such fd 2120 */ 2121 fidp->flags |= FID_NON_RECLAIMABLE; 2122 } 2123 iounit = get_iounit(pdu, &fidp->path); 2124 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2125 if (err < 0) { 2126 goto out; 2127 } 2128 err += offset; 2129 } 2130 trace_v9fs_open_return(pdu->tag, pdu->id, 2131 qid.type, qid.version, qid.path, iounit); 2132 out: 2133 put_fid(pdu, fidp); 2134 out_nofid: 2135 pdu_complete(pdu, err); 2136 } 2137 2138 static void coroutine_fn v9fs_lcreate(void *opaque) 2139 { 2140 int32_t dfid, flags, mode; 2141 gid_t gid; 2142 ssize_t err = 0; 2143 ssize_t offset = 7; 2144 V9fsString name; 2145 V9fsFidState *fidp; 2146 struct stat stbuf; 2147 V9fsQID qid; 2148 int32_t iounit; 2149 V9fsPDU *pdu = opaque; 2150 2151 v9fs_string_init(&name); 2152 err = pdu_unmarshal(pdu, offset, "dsddd", &dfid, 2153 &name, &flags, &mode, &gid); 2154 if (err < 0) { 2155 goto out_nofid; 2156 } 2157 trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid); 2158 2159 if (name_is_illegal(name.data)) { 2160 err = -ENOENT; 2161 goto out_nofid; 2162 } 2163 2164 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2165 err = -EEXIST; 2166 goto out_nofid; 2167 } 2168 2169 fidp = get_fid(pdu, dfid); 2170 if (fidp == NULL) { 2171 err = -ENOENT; 2172 goto out_nofid; 2173 } 2174 if (fidp->fid_type != P9_FID_NONE) { 2175 err = -EINVAL; 2176 goto out; 2177 } 2178 2179 flags = get_dotl_openflags(pdu->s, flags); 2180 err = v9fs_co_open2(pdu, fidp, &name, gid, 2181 flags | O_CREAT, mode, &stbuf); 2182 if (err < 0) { 2183 goto out; 2184 } 2185 fidp->fid_type = P9_FID_FILE; 2186 fidp->open_flags = flags; 2187 if (flags & O_EXCL) { 2188 /* 2189 * We let the host file system do O_EXCL check 2190 * We should not reclaim such fd 2191 */ 2192 fidp->flags |= FID_NON_RECLAIMABLE; 2193 } 2194 iounit = get_iounit(pdu, &fidp->path); 2195 err = stat_to_qid(pdu, &stbuf, &qid); 2196 if (err < 0) { 2197 goto out; 2198 } 2199 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2200 if (err < 0) { 2201 goto out; 2202 } 2203 err += offset; 2204 trace_v9fs_lcreate_return(pdu->tag, pdu->id, 2205 qid.type, qid.version, qid.path, iounit); 2206 out: 2207 put_fid(pdu, fidp); 2208 out_nofid: 2209 pdu_complete(pdu, err); 2210 v9fs_string_free(&name); 2211 } 2212 2213 static void coroutine_fn v9fs_fsync(void *opaque) 2214 { 2215 int err; 2216 int32_t fid; 2217 int datasync; 2218 size_t offset = 7; 2219 V9fsFidState *fidp; 2220 V9fsPDU *pdu = opaque; 2221 2222 err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync); 2223 if (err < 0) { 2224 goto out_nofid; 2225 } 2226 trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync); 2227 2228 fidp = get_fid(pdu, fid); 2229 if (fidp == NULL) { 2230 err = -ENOENT; 2231 goto out_nofid; 2232 } 2233 err = v9fs_co_fsync(pdu, fidp, datasync); 2234 if (!err) { 2235 err = offset; 2236 } 2237 put_fid(pdu, fidp); 2238 out_nofid: 2239 pdu_complete(pdu, err); 2240 } 2241 2242 static void coroutine_fn v9fs_clunk(void *opaque) 2243 { 2244 int err; 2245 int32_t fid; 2246 size_t offset = 7; 2247 V9fsFidState *fidp; 2248 V9fsPDU *pdu = opaque; 2249 V9fsState *s = pdu->s; 2250 2251 err = pdu_unmarshal(pdu, offset, "d", &fid); 2252 if (err < 0) { 2253 goto out_nofid; 2254 } 2255 trace_v9fs_clunk(pdu->tag, pdu->id, fid); 2256 2257 fidp = clunk_fid(s, fid); 2258 if (fidp == NULL) { 2259 err = -ENOENT; 2260 goto out_nofid; 2261 } 2262 /* 2263 * Bump the ref so that put_fid will 2264 * free the fid. 2265 */ 2266 fidp->ref++; 2267 err = put_fid(pdu, fidp); 2268 if (!err) { 2269 err = offset; 2270 } 2271 out_nofid: 2272 pdu_complete(pdu, err); 2273 } 2274 2275 /* 2276 * Create a QEMUIOVector for a sub-region of PDU iovecs 2277 * 2278 * @qiov: uninitialized QEMUIOVector 2279 * @skip: number of bytes to skip from beginning of PDU 2280 * @size: number of bytes to include 2281 * @is_write: true - write, false - read 2282 * 2283 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up 2284 * with qemu_iovec_destroy(). 2285 */ 2286 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, 2287 size_t skip, size_t size, 2288 bool is_write) 2289 { 2290 QEMUIOVector elem; 2291 struct iovec *iov; 2292 unsigned int niov; 2293 2294 if (is_write) { 2295 pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); 2296 } else { 2297 pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); 2298 } 2299 2300 qemu_iovec_init_external(&elem, iov, niov); 2301 qemu_iovec_init(qiov, niov); 2302 qemu_iovec_concat(qiov, &elem, skip, size); 2303 } 2304 2305 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2306 uint64_t off, uint32_t max_count) 2307 { 2308 ssize_t err; 2309 size_t offset = 7; 2310 uint64_t read_count; 2311 QEMUIOVector qiov_full; 2312 2313 if (fidp->fs.xattr.len < off) { 2314 read_count = 0; 2315 } else { 2316 read_count = fidp->fs.xattr.len - off; 2317 } 2318 if (read_count > max_count) { 2319 read_count = max_count; 2320 } 2321 err = pdu_marshal(pdu, offset, "d", read_count); 2322 if (err < 0) { 2323 return err; 2324 } 2325 offset += err; 2326 2327 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); 2328 err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, 2329 ((char *)fidp->fs.xattr.value) + off, 2330 read_count); 2331 qemu_iovec_destroy(&qiov_full); 2332 if (err < 0) { 2333 return err; 2334 } 2335 offset += err; 2336 return offset; 2337 } 2338 2339 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, 2340 V9fsFidState *fidp, 2341 uint32_t max_count) 2342 { 2343 V9fsPath path; 2344 V9fsStat v9stat; 2345 int len, err = 0; 2346 int32_t count = 0; 2347 struct stat stbuf; 2348 off_t saved_dir_pos; 2349 struct dirent *dent; 2350 2351 /* save the directory position */ 2352 saved_dir_pos = v9fs_co_telldir(pdu, fidp); 2353 if (saved_dir_pos < 0) { 2354 return saved_dir_pos; 2355 } 2356 2357 while (1) { 2358 v9fs_path_init(&path); 2359 2360 v9fs_readdir_lock(&fidp->fs.dir); 2361 2362 err = v9fs_co_readdir(pdu, fidp, &dent); 2363 if (err || !dent) { 2364 break; 2365 } 2366 err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); 2367 if (err < 0) { 2368 break; 2369 } 2370 err = v9fs_co_lstat(pdu, &path, &stbuf); 2371 if (err < 0) { 2372 break; 2373 } 2374 err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat); 2375 if (err < 0) { 2376 break; 2377 } 2378 if ((count + v9stat.size + 2) > max_count) { 2379 v9fs_readdir_unlock(&fidp->fs.dir); 2380 2381 /* Ran out of buffer. Set dir back to old position and return */ 2382 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2383 v9fs_stat_free(&v9stat); 2384 v9fs_path_free(&path); 2385 return count; 2386 } 2387 2388 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2389 len = pdu_marshal(pdu, 11 + count, "S", &v9stat); 2390 2391 v9fs_readdir_unlock(&fidp->fs.dir); 2392 2393 if (len < 0) { 2394 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2395 v9fs_stat_free(&v9stat); 2396 v9fs_path_free(&path); 2397 return len; 2398 } 2399 count += len; 2400 v9fs_stat_free(&v9stat); 2401 v9fs_path_free(&path); 2402 saved_dir_pos = qemu_dirent_off(dent); 2403 } 2404 2405 v9fs_readdir_unlock(&fidp->fs.dir); 2406 2407 v9fs_path_free(&path); 2408 if (err < 0) { 2409 return err; 2410 } 2411 return count; 2412 } 2413 2414 static void coroutine_fn v9fs_read(void *opaque) 2415 { 2416 int32_t fid; 2417 uint64_t off; 2418 ssize_t err = 0; 2419 int32_t count = 0; 2420 size_t offset = 7; 2421 uint32_t max_count; 2422 V9fsFidState *fidp; 2423 V9fsPDU *pdu = opaque; 2424 V9fsState *s = pdu->s; 2425 2426 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count); 2427 if (err < 0) { 2428 goto out_nofid; 2429 } 2430 trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count); 2431 2432 fidp = get_fid(pdu, fid); 2433 if (fidp == NULL) { 2434 err = -EINVAL; 2435 goto out_nofid; 2436 } 2437 if (fidp->fid_type == P9_FID_DIR) { 2438 if (s->proto_version != V9FS_PROTO_2000U) { 2439 warn_report_once( 2440 "9p: bad client: T_read request on directory only expected " 2441 "with 9P2000.u protocol version" 2442 ); 2443 err = -EOPNOTSUPP; 2444 goto out; 2445 } 2446 if (off == 0) { 2447 v9fs_co_rewinddir(pdu, fidp); 2448 } 2449 count = v9fs_do_readdir_with_stat(pdu, fidp, max_count); 2450 if (count < 0) { 2451 err = count; 2452 goto out; 2453 } 2454 err = pdu_marshal(pdu, offset, "d", count); 2455 if (err < 0) { 2456 goto out; 2457 } 2458 err += offset + count; 2459 } else if (fidp->fid_type == P9_FID_FILE) { 2460 QEMUIOVector qiov_full; 2461 QEMUIOVector qiov; 2462 int32_t len; 2463 2464 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); 2465 qemu_iovec_init(&qiov, qiov_full.niov); 2466 do { 2467 qemu_iovec_reset(&qiov); 2468 qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); 2469 if (0) { 2470 print_sg(qiov.iov, qiov.niov); 2471 } 2472 /* Loop in case of EINTR */ 2473 do { 2474 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off); 2475 if (len >= 0) { 2476 off += len; 2477 count += len; 2478 } 2479 } while (len == -EINTR && !pdu->cancelled); 2480 if (len < 0) { 2481 /* IO error return the error */ 2482 err = len; 2483 goto out_free_iovec; 2484 } 2485 } while (count < max_count && len > 0); 2486 err = pdu_marshal(pdu, offset, "d", count); 2487 if (err < 0) { 2488 goto out_free_iovec; 2489 } 2490 err += offset + count; 2491 out_free_iovec: 2492 qemu_iovec_destroy(&qiov); 2493 qemu_iovec_destroy(&qiov_full); 2494 } else if (fidp->fid_type == P9_FID_XATTR) { 2495 err = v9fs_xattr_read(s, pdu, fidp, off, max_count); 2496 } else { 2497 err = -EINVAL; 2498 } 2499 trace_v9fs_read_return(pdu->tag, pdu->id, count, err); 2500 out: 2501 put_fid(pdu, fidp); 2502 out_nofid: 2503 pdu_complete(pdu, err); 2504 } 2505 2506 /** 2507 * v9fs_readdir_response_size() - Returns size required in Rreaddir response 2508 * for the passed dirent @name. 2509 * 2510 * @name: directory entry's name (i.e. file name, directory name) 2511 * Return: required size in bytes 2512 */ 2513 size_t v9fs_readdir_response_size(V9fsString *name) 2514 { 2515 /* 2516 * Size of each dirent on the wire: size of qid (13) + size of offset (8) 2517 * size of type (1) + size of name.size (2) + strlen(name.data) 2518 */ 2519 return 24 + v9fs_string_size(name); 2520 } 2521 2522 static void v9fs_free_dirents(struct V9fsDirEnt *e) 2523 { 2524 struct V9fsDirEnt *next = NULL; 2525 2526 for (; e; e = next) { 2527 next = e->next; 2528 g_free(e->dent); 2529 g_free(e->st); 2530 g_free(e); 2531 } 2532 } 2533 2534 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, 2535 off_t offset, int32_t max_count) 2536 { 2537 size_t size; 2538 V9fsQID qid; 2539 V9fsString name; 2540 int len, err = 0; 2541 int32_t count = 0; 2542 off_t off; 2543 struct dirent *dent; 2544 struct stat *st; 2545 struct V9fsDirEnt *entries = NULL; 2546 2547 /* 2548 * inode remapping requires the device id, which in turn might be 2549 * different for different directory entries, so if inode remapping is 2550 * enabled we have to make a full stat for each directory entry 2551 */ 2552 const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES; 2553 2554 /* 2555 * Fetch all required directory entries altogether on a background IO 2556 * thread from fs driver. We don't want to do that for each entry 2557 * individually, because hopping between threads (this main IO thread 2558 * and background IO driver thread) would sum up to huge latencies. 2559 */ 2560 count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count, 2561 dostat); 2562 if (count < 0) { 2563 err = count; 2564 count = 0; 2565 goto out; 2566 } 2567 count = 0; 2568 2569 for (struct V9fsDirEnt *e = entries; e; e = e->next) { 2570 dent = e->dent; 2571 2572 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 2573 st = e->st; 2574 /* e->st should never be NULL, but just to be sure */ 2575 if (!st) { 2576 err = -1; 2577 break; 2578 } 2579 2580 /* remap inode */ 2581 err = stat_to_qid(pdu, st, &qid); 2582 if (err < 0) { 2583 break; 2584 } 2585 } else { 2586 /* 2587 * Fill up just the path field of qid because the client uses 2588 * only that. To fill the entire qid structure we will have 2589 * to stat each dirent found, which is expensive. For the 2590 * latter reason we don't call stat_to_qid() here. Only drawback 2591 * is that no multi-device export detection of stat_to_qid() 2592 * would be done and provided as error to the user here. But 2593 * user would get that error anyway when accessing those 2594 * files/dirs through other ways. 2595 */ 2596 size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); 2597 memcpy(&qid.path, &dent->d_ino, size); 2598 /* Fill the other fields with dummy values */ 2599 qid.type = 0; 2600 qid.version = 0; 2601 } 2602 2603 off = qemu_dirent_off(dent); 2604 v9fs_string_init(&name); 2605 v9fs_string_sprintf(&name, "%s", dent->d_name); 2606 2607 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2608 len = pdu_marshal(pdu, 11 + count, "Qqbs", 2609 &qid, off, 2610 dent->d_type, &name); 2611 2612 v9fs_string_free(&name); 2613 2614 if (len < 0) { 2615 err = len; 2616 break; 2617 } 2618 2619 count += len; 2620 } 2621 2622 out: 2623 v9fs_free_dirents(entries); 2624 if (err < 0) { 2625 return err; 2626 } 2627 return count; 2628 } 2629 2630 static void coroutine_fn v9fs_readdir(void *opaque) 2631 { 2632 int32_t fid; 2633 V9fsFidState *fidp; 2634 ssize_t retval = 0; 2635 size_t offset = 7; 2636 uint64_t initial_offset; 2637 int32_t count; 2638 uint32_t max_count; 2639 V9fsPDU *pdu = opaque; 2640 V9fsState *s = pdu->s; 2641 2642 retval = pdu_unmarshal(pdu, offset, "dqd", &fid, 2643 &initial_offset, &max_count); 2644 if (retval < 0) { 2645 goto out_nofid; 2646 } 2647 trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count); 2648 2649 /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */ 2650 if (max_count > s->msize - 11) { 2651 max_count = s->msize - 11; 2652 warn_report_once( 2653 "9p: bad client: T_readdir with count > msize - 11" 2654 ); 2655 } 2656 2657 fidp = get_fid(pdu, fid); 2658 if (fidp == NULL) { 2659 retval = -EINVAL; 2660 goto out_nofid; 2661 } 2662 if (fidp->fid_type != P9_FID_DIR) { 2663 warn_report_once("9p: bad client: T_readdir on non-directory stream"); 2664 retval = -ENOTDIR; 2665 goto out; 2666 } 2667 if (!fidp->fs.dir.stream) { 2668 retval = -EINVAL; 2669 goto out; 2670 } 2671 if (s->proto_version != V9FS_PROTO_2000L) { 2672 warn_report_once( 2673 "9p: bad client: T_readdir request only expected with 9P2000.L " 2674 "protocol version" 2675 ); 2676 retval = -EOPNOTSUPP; 2677 goto out; 2678 } 2679 count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count); 2680 if (count < 0) { 2681 retval = count; 2682 goto out; 2683 } 2684 retval = pdu_marshal(pdu, offset, "d", count); 2685 if (retval < 0) { 2686 goto out; 2687 } 2688 retval += count + offset; 2689 trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval); 2690 out: 2691 put_fid(pdu, fidp); 2692 out_nofid: 2693 pdu_complete(pdu, retval); 2694 } 2695 2696 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2697 uint64_t off, uint32_t count, 2698 struct iovec *sg, int cnt) 2699 { 2700 int i, to_copy; 2701 ssize_t err = 0; 2702 uint64_t write_count; 2703 size_t offset = 7; 2704 2705 2706 if (fidp->fs.xattr.len < off) { 2707 return -ENOSPC; 2708 } 2709 write_count = fidp->fs.xattr.len - off; 2710 if (write_count > count) { 2711 write_count = count; 2712 } 2713 err = pdu_marshal(pdu, offset, "d", write_count); 2714 if (err < 0) { 2715 return err; 2716 } 2717 err += offset; 2718 fidp->fs.xattr.copied_len += write_count; 2719 /* 2720 * Now copy the content from sg list 2721 */ 2722 for (i = 0; i < cnt; i++) { 2723 if (write_count > sg[i].iov_len) { 2724 to_copy = sg[i].iov_len; 2725 } else { 2726 to_copy = write_count; 2727 } 2728 memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy); 2729 /* updating vs->off since we are not using below */ 2730 off += to_copy; 2731 write_count -= to_copy; 2732 } 2733 2734 return err; 2735 } 2736 2737 static void coroutine_fn v9fs_write(void *opaque) 2738 { 2739 ssize_t err; 2740 int32_t fid; 2741 uint64_t off; 2742 uint32_t count; 2743 int32_t len = 0; 2744 int32_t total = 0; 2745 size_t offset = 7; 2746 V9fsFidState *fidp; 2747 V9fsPDU *pdu = opaque; 2748 V9fsState *s = pdu->s; 2749 QEMUIOVector qiov_full; 2750 QEMUIOVector qiov; 2751 2752 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count); 2753 if (err < 0) { 2754 pdu_complete(pdu, err); 2755 return; 2756 } 2757 offset += err; 2758 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); 2759 trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); 2760 2761 fidp = get_fid(pdu, fid); 2762 if (fidp == NULL) { 2763 err = -EINVAL; 2764 goto out_nofid; 2765 } 2766 if (fidp->fid_type == P9_FID_FILE) { 2767 if (fidp->fs.fd == -1) { 2768 err = -EINVAL; 2769 goto out; 2770 } 2771 } else if (fidp->fid_type == P9_FID_XATTR) { 2772 /* 2773 * setxattr operation 2774 */ 2775 err = v9fs_xattr_write(s, pdu, fidp, off, count, 2776 qiov_full.iov, qiov_full.niov); 2777 goto out; 2778 } else { 2779 err = -EINVAL; 2780 goto out; 2781 } 2782 qemu_iovec_init(&qiov, qiov_full.niov); 2783 do { 2784 qemu_iovec_reset(&qiov); 2785 qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); 2786 if (0) { 2787 print_sg(qiov.iov, qiov.niov); 2788 } 2789 /* Loop in case of EINTR */ 2790 do { 2791 len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off); 2792 if (len >= 0) { 2793 off += len; 2794 total += len; 2795 } 2796 } while (len == -EINTR && !pdu->cancelled); 2797 if (len < 0) { 2798 /* IO error return the error */ 2799 err = len; 2800 goto out_qiov; 2801 } 2802 } while (total < count && len > 0); 2803 2804 offset = 7; 2805 err = pdu_marshal(pdu, offset, "d", total); 2806 if (err < 0) { 2807 goto out_qiov; 2808 } 2809 err += offset; 2810 trace_v9fs_write_return(pdu->tag, pdu->id, total, err); 2811 out_qiov: 2812 qemu_iovec_destroy(&qiov); 2813 out: 2814 put_fid(pdu, fidp); 2815 out_nofid: 2816 qemu_iovec_destroy(&qiov_full); 2817 pdu_complete(pdu, err); 2818 } 2819 2820 static void coroutine_fn v9fs_create(void *opaque) 2821 { 2822 int32_t fid; 2823 int err = 0; 2824 size_t offset = 7; 2825 V9fsFidState *fidp; 2826 V9fsQID qid; 2827 int32_t perm; 2828 int8_t mode; 2829 V9fsPath path; 2830 struct stat stbuf; 2831 V9fsString name; 2832 V9fsString extension; 2833 int iounit; 2834 V9fsPDU *pdu = opaque; 2835 V9fsState *s = pdu->s; 2836 2837 v9fs_path_init(&path); 2838 v9fs_string_init(&name); 2839 v9fs_string_init(&extension); 2840 err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name, 2841 &perm, &mode, &extension); 2842 if (err < 0) { 2843 goto out_nofid; 2844 } 2845 trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode); 2846 2847 if (name_is_illegal(name.data)) { 2848 err = -ENOENT; 2849 goto out_nofid; 2850 } 2851 2852 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2853 err = -EEXIST; 2854 goto out_nofid; 2855 } 2856 2857 fidp = get_fid(pdu, fid); 2858 if (fidp == NULL) { 2859 err = -EINVAL; 2860 goto out_nofid; 2861 } 2862 if (fidp->fid_type != P9_FID_NONE) { 2863 err = -EINVAL; 2864 goto out; 2865 } 2866 if (perm & P9_STAT_MODE_DIR) { 2867 err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777, 2868 fidp->uid, -1, &stbuf); 2869 if (err < 0) { 2870 goto out; 2871 } 2872 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2873 if (err < 0) { 2874 goto out; 2875 } 2876 v9fs_path_write_lock(s); 2877 v9fs_path_copy(&fidp->path, &path); 2878 v9fs_path_unlock(s); 2879 err = v9fs_co_opendir(pdu, fidp); 2880 if (err < 0) { 2881 goto out; 2882 } 2883 fidp->fid_type = P9_FID_DIR; 2884 } else if (perm & P9_STAT_MODE_SYMLINK) { 2885 err = v9fs_co_symlink(pdu, fidp, &name, 2886 extension.data, -1 , &stbuf); 2887 if (err < 0) { 2888 goto out; 2889 } 2890 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2891 if (err < 0) { 2892 goto out; 2893 } 2894 v9fs_path_write_lock(s); 2895 v9fs_path_copy(&fidp->path, &path); 2896 v9fs_path_unlock(s); 2897 } else if (perm & P9_STAT_MODE_LINK) { 2898 int32_t ofid = atoi(extension.data); 2899 V9fsFidState *ofidp = get_fid(pdu, ofid); 2900 if (ofidp == NULL) { 2901 err = -EINVAL; 2902 goto out; 2903 } 2904 err = v9fs_co_link(pdu, ofidp, fidp, &name); 2905 put_fid(pdu, ofidp); 2906 if (err < 0) { 2907 goto out; 2908 } 2909 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2910 if (err < 0) { 2911 fidp->fid_type = P9_FID_NONE; 2912 goto out; 2913 } 2914 v9fs_path_write_lock(s); 2915 v9fs_path_copy(&fidp->path, &path); 2916 v9fs_path_unlock(s); 2917 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2918 if (err < 0) { 2919 fidp->fid_type = P9_FID_NONE; 2920 goto out; 2921 } 2922 } else if (perm & P9_STAT_MODE_DEVICE) { 2923 char ctype; 2924 uint32_t major, minor; 2925 mode_t nmode = 0; 2926 2927 if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) { 2928 err = -errno; 2929 goto out; 2930 } 2931 2932 switch (ctype) { 2933 case 'c': 2934 nmode = S_IFCHR; 2935 break; 2936 case 'b': 2937 nmode = S_IFBLK; 2938 break; 2939 default: 2940 err = -EIO; 2941 goto out; 2942 } 2943 2944 nmode |= perm & 0777; 2945 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2946 makedev(major, minor), nmode, &stbuf); 2947 if (err < 0) { 2948 goto out; 2949 } 2950 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2951 if (err < 0) { 2952 goto out; 2953 } 2954 v9fs_path_write_lock(s); 2955 v9fs_path_copy(&fidp->path, &path); 2956 v9fs_path_unlock(s); 2957 } else if (perm & P9_STAT_MODE_NAMED_PIPE) { 2958 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2959 0, S_IFIFO | (perm & 0777), &stbuf); 2960 if (err < 0) { 2961 goto out; 2962 } 2963 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2964 if (err < 0) { 2965 goto out; 2966 } 2967 v9fs_path_write_lock(s); 2968 v9fs_path_copy(&fidp->path, &path); 2969 v9fs_path_unlock(s); 2970 } else if (perm & P9_STAT_MODE_SOCKET) { 2971 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2972 0, S_IFSOCK | (perm & 0777), &stbuf); 2973 if (err < 0) { 2974 goto out; 2975 } 2976 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2977 if (err < 0) { 2978 goto out; 2979 } 2980 v9fs_path_write_lock(s); 2981 v9fs_path_copy(&fidp->path, &path); 2982 v9fs_path_unlock(s); 2983 } else { 2984 err = v9fs_co_open2(pdu, fidp, &name, -1, 2985 omode_to_uflags(mode) | O_CREAT, perm, &stbuf); 2986 if (err < 0) { 2987 goto out; 2988 } 2989 fidp->fid_type = P9_FID_FILE; 2990 fidp->open_flags = omode_to_uflags(mode); 2991 if (fidp->open_flags & O_EXCL) { 2992 /* 2993 * We let the host file system do O_EXCL check 2994 * We should not reclaim such fd 2995 */ 2996 fidp->flags |= FID_NON_RECLAIMABLE; 2997 } 2998 } 2999 iounit = get_iounit(pdu, &fidp->path); 3000 err = stat_to_qid(pdu, &stbuf, &qid); 3001 if (err < 0) { 3002 goto out; 3003 } 3004 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 3005 if (err < 0) { 3006 goto out; 3007 } 3008 err += offset; 3009 trace_v9fs_create_return(pdu->tag, pdu->id, 3010 qid.type, qid.version, qid.path, iounit); 3011 out: 3012 put_fid(pdu, fidp); 3013 out_nofid: 3014 pdu_complete(pdu, err); 3015 v9fs_string_free(&name); 3016 v9fs_string_free(&extension); 3017 v9fs_path_free(&path); 3018 } 3019 3020 static void coroutine_fn v9fs_symlink(void *opaque) 3021 { 3022 V9fsPDU *pdu = opaque; 3023 V9fsString name; 3024 V9fsString symname; 3025 V9fsFidState *dfidp; 3026 V9fsQID qid; 3027 struct stat stbuf; 3028 int32_t dfid; 3029 int err = 0; 3030 gid_t gid; 3031 size_t offset = 7; 3032 3033 v9fs_string_init(&name); 3034 v9fs_string_init(&symname); 3035 err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid); 3036 if (err < 0) { 3037 goto out_nofid; 3038 } 3039 trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid); 3040 3041 if (name_is_illegal(name.data)) { 3042 err = -ENOENT; 3043 goto out_nofid; 3044 } 3045 3046 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3047 err = -EEXIST; 3048 goto out_nofid; 3049 } 3050 3051 dfidp = get_fid(pdu, dfid); 3052 if (dfidp == NULL) { 3053 err = -EINVAL; 3054 goto out_nofid; 3055 } 3056 err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf); 3057 if (err < 0) { 3058 goto out; 3059 } 3060 err = stat_to_qid(pdu, &stbuf, &qid); 3061 if (err < 0) { 3062 goto out; 3063 } 3064 err = pdu_marshal(pdu, offset, "Q", &qid); 3065 if (err < 0) { 3066 goto out; 3067 } 3068 err += offset; 3069 trace_v9fs_symlink_return(pdu->tag, pdu->id, 3070 qid.type, qid.version, qid.path); 3071 out: 3072 put_fid(pdu, dfidp); 3073 out_nofid: 3074 pdu_complete(pdu, err); 3075 v9fs_string_free(&name); 3076 v9fs_string_free(&symname); 3077 } 3078 3079 static void coroutine_fn v9fs_flush(void *opaque) 3080 { 3081 ssize_t err; 3082 int16_t tag; 3083 size_t offset = 7; 3084 V9fsPDU *cancel_pdu = NULL; 3085 V9fsPDU *pdu = opaque; 3086 V9fsState *s = pdu->s; 3087 3088 err = pdu_unmarshal(pdu, offset, "w", &tag); 3089 if (err < 0) { 3090 pdu_complete(pdu, err); 3091 return; 3092 } 3093 trace_v9fs_flush(pdu->tag, pdu->id, tag); 3094 3095 if (pdu->tag == tag) { 3096 warn_report("the guest sent a self-referencing 9P flush request"); 3097 } else { 3098 QLIST_FOREACH(cancel_pdu, &s->active_list, next) { 3099 if (cancel_pdu->tag == tag) { 3100 break; 3101 } 3102 } 3103 } 3104 if (cancel_pdu) { 3105 cancel_pdu->cancelled = 1; 3106 /* 3107 * Wait for pdu to complete. 3108 */ 3109 qemu_co_queue_wait(&cancel_pdu->complete, NULL); 3110 if (!qemu_co_queue_next(&cancel_pdu->complete)) { 3111 cancel_pdu->cancelled = 0; 3112 pdu_free(cancel_pdu); 3113 } 3114 } 3115 pdu_complete(pdu, 7); 3116 } 3117 3118 static void coroutine_fn v9fs_link(void *opaque) 3119 { 3120 V9fsPDU *pdu = opaque; 3121 int32_t dfid, oldfid; 3122 V9fsFidState *dfidp, *oldfidp; 3123 V9fsString name; 3124 size_t offset = 7; 3125 int err = 0; 3126 3127 v9fs_string_init(&name); 3128 err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name); 3129 if (err < 0) { 3130 goto out_nofid; 3131 } 3132 trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data); 3133 3134 if (name_is_illegal(name.data)) { 3135 err = -ENOENT; 3136 goto out_nofid; 3137 } 3138 3139 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3140 err = -EEXIST; 3141 goto out_nofid; 3142 } 3143 3144 dfidp = get_fid(pdu, dfid); 3145 if (dfidp == NULL) { 3146 err = -ENOENT; 3147 goto out_nofid; 3148 } 3149 3150 oldfidp = get_fid(pdu, oldfid); 3151 if (oldfidp == NULL) { 3152 err = -ENOENT; 3153 goto out; 3154 } 3155 err = v9fs_co_link(pdu, oldfidp, dfidp, &name); 3156 if (!err) { 3157 err = offset; 3158 } 3159 put_fid(pdu, oldfidp); 3160 out: 3161 put_fid(pdu, dfidp); 3162 out_nofid: 3163 v9fs_string_free(&name); 3164 pdu_complete(pdu, err); 3165 } 3166 3167 /* Only works with path name based fid */ 3168 static void coroutine_fn v9fs_remove(void *opaque) 3169 { 3170 int32_t fid; 3171 int err = 0; 3172 size_t offset = 7; 3173 V9fsFidState *fidp; 3174 V9fsPDU *pdu = opaque; 3175 3176 err = pdu_unmarshal(pdu, offset, "d", &fid); 3177 if (err < 0) { 3178 goto out_nofid; 3179 } 3180 trace_v9fs_remove(pdu->tag, pdu->id, fid); 3181 3182 fidp = get_fid(pdu, fid); 3183 if (fidp == NULL) { 3184 err = -EINVAL; 3185 goto out_nofid; 3186 } 3187 /* if fs driver is not path based, return EOPNOTSUPP */ 3188 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3189 err = -EOPNOTSUPP; 3190 goto out_err; 3191 } 3192 /* 3193 * IF the file is unlinked, we cannot reopen 3194 * the file later. So don't reclaim fd 3195 */ 3196 err = v9fs_mark_fids_unreclaim(pdu, &fidp->path); 3197 if (err < 0) { 3198 goto out_err; 3199 } 3200 err = v9fs_co_remove(pdu, &fidp->path); 3201 if (!err) { 3202 err = offset; 3203 } 3204 out_err: 3205 /* For TREMOVE we need to clunk the fid even on failed remove */ 3206 clunk_fid(pdu->s, fidp->fid); 3207 put_fid(pdu, fidp); 3208 out_nofid: 3209 pdu_complete(pdu, err); 3210 } 3211 3212 static void coroutine_fn v9fs_unlinkat(void *opaque) 3213 { 3214 int err = 0; 3215 V9fsString name; 3216 int32_t dfid, flags, rflags = 0; 3217 size_t offset = 7; 3218 V9fsPath path; 3219 V9fsFidState *dfidp; 3220 V9fsPDU *pdu = opaque; 3221 3222 v9fs_string_init(&name); 3223 err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags); 3224 if (err < 0) { 3225 goto out_nofid; 3226 } 3227 3228 if (name_is_illegal(name.data)) { 3229 err = -ENOENT; 3230 goto out_nofid; 3231 } 3232 3233 if (!strcmp(".", name.data)) { 3234 err = -EINVAL; 3235 goto out_nofid; 3236 } 3237 3238 if (!strcmp("..", name.data)) { 3239 err = -ENOTEMPTY; 3240 goto out_nofid; 3241 } 3242 3243 if (flags & ~P9_DOTL_AT_REMOVEDIR) { 3244 err = -EINVAL; 3245 goto out_nofid; 3246 } 3247 3248 if (flags & P9_DOTL_AT_REMOVEDIR) { 3249 rflags |= AT_REMOVEDIR; 3250 } 3251 3252 dfidp = get_fid(pdu, dfid); 3253 if (dfidp == NULL) { 3254 err = -EINVAL; 3255 goto out_nofid; 3256 } 3257 /* 3258 * IF the file is unlinked, we cannot reopen 3259 * the file later. So don't reclaim fd 3260 */ 3261 v9fs_path_init(&path); 3262 err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path); 3263 if (err < 0) { 3264 goto out_err; 3265 } 3266 err = v9fs_mark_fids_unreclaim(pdu, &path); 3267 if (err < 0) { 3268 goto out_err; 3269 } 3270 err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags); 3271 if (!err) { 3272 err = offset; 3273 } 3274 out_err: 3275 put_fid(pdu, dfidp); 3276 v9fs_path_free(&path); 3277 out_nofid: 3278 pdu_complete(pdu, err); 3279 v9fs_string_free(&name); 3280 } 3281 3282 3283 /* Only works with path name based fid */ 3284 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, 3285 int32_t newdirfid, 3286 V9fsString *name) 3287 { 3288 int err = 0; 3289 V9fsPath new_path; 3290 V9fsFidState *tfidp; 3291 V9fsState *s = pdu->s; 3292 V9fsFidState *dirfidp = NULL; 3293 GHashTableIter iter; 3294 gpointer fid; 3295 3296 v9fs_path_init(&new_path); 3297 if (newdirfid != -1) { 3298 dirfidp = get_fid(pdu, newdirfid); 3299 if (dirfidp == NULL) { 3300 return -ENOENT; 3301 } 3302 if (fidp->fid_type != P9_FID_NONE) { 3303 err = -EINVAL; 3304 goto out; 3305 } 3306 err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); 3307 if (err < 0) { 3308 goto out; 3309 } 3310 } else { 3311 char *dir_name = g_path_get_dirname(fidp->path.data); 3312 V9fsPath dir_path; 3313 3314 v9fs_path_init(&dir_path); 3315 v9fs_path_sprintf(&dir_path, "%s", dir_name); 3316 g_free(dir_name); 3317 3318 err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path); 3319 v9fs_path_free(&dir_path); 3320 if (err < 0) { 3321 goto out; 3322 } 3323 } 3324 err = v9fs_co_rename(pdu, &fidp->path, &new_path); 3325 if (err < 0) { 3326 goto out; 3327 } 3328 3329 /* 3330 * Fixup fid's pointing to the old name to 3331 * start pointing to the new name 3332 */ 3333 g_hash_table_iter_init(&iter, s->fids); 3334 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3335 if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) { 3336 /* replace the name */ 3337 v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data)); 3338 } 3339 } 3340 out: 3341 if (dirfidp) { 3342 put_fid(pdu, dirfidp); 3343 } 3344 v9fs_path_free(&new_path); 3345 return err; 3346 } 3347 3348 /* Only works with path name based fid */ 3349 static void coroutine_fn v9fs_rename(void *opaque) 3350 { 3351 int32_t fid; 3352 ssize_t err = 0; 3353 size_t offset = 7; 3354 V9fsString name; 3355 int32_t newdirfid; 3356 V9fsFidState *fidp; 3357 V9fsPDU *pdu = opaque; 3358 V9fsState *s = pdu->s; 3359 3360 v9fs_string_init(&name); 3361 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name); 3362 if (err < 0) { 3363 goto out_nofid; 3364 } 3365 3366 if (name_is_illegal(name.data)) { 3367 err = -ENOENT; 3368 goto out_nofid; 3369 } 3370 3371 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3372 err = -EISDIR; 3373 goto out_nofid; 3374 } 3375 3376 fidp = get_fid(pdu, fid); 3377 if (fidp == NULL) { 3378 err = -ENOENT; 3379 goto out_nofid; 3380 } 3381 if (fidp->fid_type != P9_FID_NONE) { 3382 err = -EINVAL; 3383 goto out; 3384 } 3385 /* if fs driver is not path based, return EOPNOTSUPP */ 3386 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3387 err = -EOPNOTSUPP; 3388 goto out; 3389 } 3390 v9fs_path_write_lock(s); 3391 err = v9fs_complete_rename(pdu, fidp, newdirfid, &name); 3392 v9fs_path_unlock(s); 3393 if (!err) { 3394 err = offset; 3395 } 3396 out: 3397 put_fid(pdu, fidp); 3398 out_nofid: 3399 pdu_complete(pdu, err); 3400 v9fs_string_free(&name); 3401 } 3402 3403 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, 3404 V9fsString *old_name, 3405 V9fsPath *newdir, 3406 V9fsString *new_name) 3407 { 3408 V9fsFidState *tfidp; 3409 V9fsPath oldpath, newpath; 3410 V9fsState *s = pdu->s; 3411 int err; 3412 GHashTableIter iter; 3413 gpointer fid; 3414 3415 v9fs_path_init(&oldpath); 3416 v9fs_path_init(&newpath); 3417 err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); 3418 if (err < 0) { 3419 goto out; 3420 } 3421 err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); 3422 if (err < 0) { 3423 goto out; 3424 } 3425 3426 /* 3427 * Fixup fid's pointing to the old name to 3428 * start pointing to the new name 3429 */ 3430 g_hash_table_iter_init(&iter, s->fids); 3431 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3432 if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) { 3433 /* replace the name */ 3434 v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); 3435 } 3436 } 3437 out: 3438 v9fs_path_free(&oldpath); 3439 v9fs_path_free(&newpath); 3440 return err; 3441 } 3442 3443 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, 3444 V9fsString *old_name, 3445 int32_t newdirfid, 3446 V9fsString *new_name) 3447 { 3448 int err = 0; 3449 V9fsState *s = pdu->s; 3450 V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL; 3451 3452 olddirfidp = get_fid(pdu, olddirfid); 3453 if (olddirfidp == NULL) { 3454 err = -ENOENT; 3455 goto out; 3456 } 3457 if (newdirfid != -1) { 3458 newdirfidp = get_fid(pdu, newdirfid); 3459 if (newdirfidp == NULL) { 3460 err = -ENOENT; 3461 goto out; 3462 } 3463 } else { 3464 newdirfidp = get_fid(pdu, olddirfid); 3465 } 3466 3467 err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name, 3468 &newdirfidp->path, new_name); 3469 if (err < 0) { 3470 goto out; 3471 } 3472 if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { 3473 /* Only for path based fid we need to do the below fixup */ 3474 err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, 3475 &newdirfidp->path, new_name); 3476 } 3477 out: 3478 if (olddirfidp) { 3479 put_fid(pdu, olddirfidp); 3480 } 3481 if (newdirfidp) { 3482 put_fid(pdu, newdirfidp); 3483 } 3484 return err; 3485 } 3486 3487 static void coroutine_fn v9fs_renameat(void *opaque) 3488 { 3489 ssize_t err = 0; 3490 size_t offset = 7; 3491 V9fsPDU *pdu = opaque; 3492 V9fsState *s = pdu->s; 3493 int32_t olddirfid, newdirfid; 3494 V9fsString old_name, new_name; 3495 3496 v9fs_string_init(&old_name); 3497 v9fs_string_init(&new_name); 3498 err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid, 3499 &old_name, &newdirfid, &new_name); 3500 if (err < 0) { 3501 goto out_err; 3502 } 3503 3504 if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) { 3505 err = -ENOENT; 3506 goto out_err; 3507 } 3508 3509 if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) || 3510 !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) { 3511 err = -EISDIR; 3512 goto out_err; 3513 } 3514 3515 v9fs_path_write_lock(s); 3516 err = v9fs_complete_renameat(pdu, olddirfid, 3517 &old_name, newdirfid, &new_name); 3518 v9fs_path_unlock(s); 3519 if (!err) { 3520 err = offset; 3521 } 3522 3523 out_err: 3524 pdu_complete(pdu, err); 3525 v9fs_string_free(&old_name); 3526 v9fs_string_free(&new_name); 3527 } 3528 3529 static void coroutine_fn v9fs_wstat(void *opaque) 3530 { 3531 int32_t fid; 3532 int err = 0; 3533 int16_t unused; 3534 V9fsStat v9stat; 3535 size_t offset = 7; 3536 struct stat stbuf; 3537 V9fsFidState *fidp; 3538 V9fsPDU *pdu = opaque; 3539 V9fsState *s = pdu->s; 3540 3541 v9fs_stat_init(&v9stat); 3542 err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat); 3543 if (err < 0) { 3544 goto out_nofid; 3545 } 3546 trace_v9fs_wstat(pdu->tag, pdu->id, fid, 3547 v9stat.mode, v9stat.atime, v9stat.mtime); 3548 3549 fidp = get_fid(pdu, fid); 3550 if (fidp == NULL) { 3551 err = -EINVAL; 3552 goto out_nofid; 3553 } 3554 /* do we need to sync the file? */ 3555 if (donttouch_stat(&v9stat)) { 3556 err = v9fs_co_fsync(pdu, fidp, 0); 3557 goto out; 3558 } 3559 if (v9stat.mode != -1) { 3560 uint32_t v9_mode; 3561 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 3562 if (err < 0) { 3563 goto out; 3564 } 3565 v9_mode = stat_to_v9mode(&stbuf); 3566 if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) != 3567 (v9_mode & P9_STAT_MODE_TYPE_BITS)) { 3568 /* Attempting to change the type */ 3569 err = -EIO; 3570 goto out; 3571 } 3572 err = v9fs_co_chmod(pdu, &fidp->path, 3573 v9mode_to_mode(v9stat.mode, 3574 &v9stat.extension)); 3575 if (err < 0) { 3576 goto out; 3577 } 3578 } 3579 if (v9stat.mtime != -1 || v9stat.atime != -1) { 3580 struct timespec times[2]; 3581 if (v9stat.atime != -1) { 3582 times[0].tv_sec = v9stat.atime; 3583 times[0].tv_nsec = 0; 3584 } else { 3585 times[0].tv_nsec = UTIME_OMIT; 3586 } 3587 if (v9stat.mtime != -1) { 3588 times[1].tv_sec = v9stat.mtime; 3589 times[1].tv_nsec = 0; 3590 } else { 3591 times[1].tv_nsec = UTIME_OMIT; 3592 } 3593 err = v9fs_co_utimensat(pdu, &fidp->path, times); 3594 if (err < 0) { 3595 goto out; 3596 } 3597 } 3598 if (v9stat.n_gid != -1 || v9stat.n_uid != -1) { 3599 err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid); 3600 if (err < 0) { 3601 goto out; 3602 } 3603 } 3604 if (v9stat.name.size != 0) { 3605 v9fs_path_write_lock(s); 3606 err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name); 3607 v9fs_path_unlock(s); 3608 if (err < 0) { 3609 goto out; 3610 } 3611 } 3612 if (v9stat.length != -1) { 3613 err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length); 3614 if (err < 0) { 3615 goto out; 3616 } 3617 } 3618 err = offset; 3619 out: 3620 put_fid(pdu, fidp); 3621 out_nofid: 3622 v9fs_stat_free(&v9stat); 3623 pdu_complete(pdu, err); 3624 } 3625 3626 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) 3627 { 3628 uint32_t f_type; 3629 uint32_t f_bsize; 3630 uint64_t f_blocks; 3631 uint64_t f_bfree; 3632 uint64_t f_bavail; 3633 uint64_t f_files; 3634 uint64_t f_ffree; 3635 uint64_t fsid_val; 3636 uint32_t f_namelen; 3637 size_t offset = 7; 3638 int32_t bsize_factor; 3639 3640 /* 3641 * compute bsize factor based on host file system block size 3642 * and client msize 3643 */ 3644 bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize; 3645 if (!bsize_factor) { 3646 bsize_factor = 1; 3647 } 3648 f_type = stbuf->f_type; 3649 f_bsize = stbuf->f_bsize; 3650 f_bsize *= bsize_factor; 3651 /* 3652 * f_bsize is adjusted(multiplied) by bsize factor, so we need to 3653 * adjust(divide) the number of blocks, free blocks and available 3654 * blocks by bsize factor 3655 */ 3656 f_blocks = stbuf->f_blocks / bsize_factor; 3657 f_bfree = stbuf->f_bfree / bsize_factor; 3658 f_bavail = stbuf->f_bavail / bsize_factor; 3659 f_files = stbuf->f_files; 3660 f_ffree = stbuf->f_ffree; 3661 #ifdef CONFIG_DARWIN 3662 fsid_val = (unsigned int)stbuf->f_fsid.val[0] | 3663 (unsigned long long)stbuf->f_fsid.val[1] << 32; 3664 f_namelen = NAME_MAX; 3665 #else 3666 fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | 3667 (unsigned long long)stbuf->f_fsid.__val[1] << 32; 3668 f_namelen = stbuf->f_namelen; 3669 #endif 3670 3671 return pdu_marshal(pdu, offset, "ddqqqqqqd", 3672 f_type, f_bsize, f_blocks, f_bfree, 3673 f_bavail, f_files, f_ffree, 3674 fsid_val, f_namelen); 3675 } 3676 3677 static void coroutine_fn v9fs_statfs(void *opaque) 3678 { 3679 int32_t fid; 3680 ssize_t retval = 0; 3681 size_t offset = 7; 3682 V9fsFidState *fidp; 3683 struct statfs stbuf; 3684 V9fsPDU *pdu = opaque; 3685 V9fsState *s = pdu->s; 3686 3687 retval = pdu_unmarshal(pdu, offset, "d", &fid); 3688 if (retval < 0) { 3689 goto out_nofid; 3690 } 3691 fidp = get_fid(pdu, fid); 3692 if (fidp == NULL) { 3693 retval = -ENOENT; 3694 goto out_nofid; 3695 } 3696 retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf); 3697 if (retval < 0) { 3698 goto out; 3699 } 3700 retval = v9fs_fill_statfs(s, pdu, &stbuf); 3701 if (retval < 0) { 3702 goto out; 3703 } 3704 retval += offset; 3705 out: 3706 put_fid(pdu, fidp); 3707 out_nofid: 3708 pdu_complete(pdu, retval); 3709 } 3710 3711 static void coroutine_fn v9fs_mknod(void *opaque) 3712 { 3713 3714 int mode; 3715 gid_t gid; 3716 int32_t fid; 3717 V9fsQID qid; 3718 int err = 0; 3719 int major, minor; 3720 size_t offset = 7; 3721 V9fsString name; 3722 struct stat stbuf; 3723 V9fsFidState *fidp; 3724 V9fsPDU *pdu = opaque; 3725 3726 v9fs_string_init(&name); 3727 err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode, 3728 &major, &minor, &gid); 3729 if (err < 0) { 3730 goto out_nofid; 3731 } 3732 trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor); 3733 3734 if (name_is_illegal(name.data)) { 3735 err = -ENOENT; 3736 goto out_nofid; 3737 } 3738 3739 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3740 err = -EEXIST; 3741 goto out_nofid; 3742 } 3743 3744 fidp = get_fid(pdu, fid); 3745 if (fidp == NULL) { 3746 err = -ENOENT; 3747 goto out_nofid; 3748 } 3749 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid, 3750 makedev(major, minor), mode, &stbuf); 3751 if (err < 0) { 3752 goto out; 3753 } 3754 err = stat_to_qid(pdu, &stbuf, &qid); 3755 if (err < 0) { 3756 goto out; 3757 } 3758 err = pdu_marshal(pdu, offset, "Q", &qid); 3759 if (err < 0) { 3760 goto out; 3761 } 3762 err += offset; 3763 trace_v9fs_mknod_return(pdu->tag, pdu->id, 3764 qid.type, qid.version, qid.path); 3765 out: 3766 put_fid(pdu, fidp); 3767 out_nofid: 3768 pdu_complete(pdu, err); 3769 v9fs_string_free(&name); 3770 } 3771 3772 /* 3773 * Implement posix byte range locking code 3774 * Server side handling of locking code is very simple, because 9p server in 3775 * QEMU can handle only one client. And most of the lock handling 3776 * (like conflict, merging) etc is done by the VFS layer itself, so no need to 3777 * do any thing in * qemu 9p server side lock code path. 3778 * So when a TLOCK request comes, always return success 3779 */ 3780 static void coroutine_fn v9fs_lock(void *opaque) 3781 { 3782 V9fsFlock flock; 3783 size_t offset = 7; 3784 struct stat stbuf; 3785 V9fsFidState *fidp; 3786 int32_t fid, err = 0; 3787 V9fsPDU *pdu = opaque; 3788 3789 v9fs_string_init(&flock.client_id); 3790 err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, 3791 &flock.flags, &flock.start, &flock.length, 3792 &flock.proc_id, &flock.client_id); 3793 if (err < 0) { 3794 goto out_nofid; 3795 } 3796 trace_v9fs_lock(pdu->tag, pdu->id, fid, 3797 flock.type, flock.start, flock.length); 3798 3799 3800 /* We support only block flag now (that too ignored currently) */ 3801 if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) { 3802 err = -EINVAL; 3803 goto out_nofid; 3804 } 3805 fidp = get_fid(pdu, fid); 3806 if (fidp == NULL) { 3807 err = -ENOENT; 3808 goto out_nofid; 3809 } 3810 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3811 if (err < 0) { 3812 goto out; 3813 } 3814 err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); 3815 if (err < 0) { 3816 goto out; 3817 } 3818 err += offset; 3819 trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); 3820 out: 3821 put_fid(pdu, fidp); 3822 out_nofid: 3823 pdu_complete(pdu, err); 3824 v9fs_string_free(&flock.client_id); 3825 } 3826 3827 /* 3828 * When a TGETLOCK request comes, always return success because all lock 3829 * handling is done by client's VFS layer. 3830 */ 3831 static void coroutine_fn v9fs_getlock(void *opaque) 3832 { 3833 size_t offset = 7; 3834 struct stat stbuf; 3835 V9fsFidState *fidp; 3836 V9fsGetlock glock; 3837 int32_t fid, err = 0; 3838 V9fsPDU *pdu = opaque; 3839 3840 v9fs_string_init(&glock.client_id); 3841 err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type, 3842 &glock.start, &glock.length, &glock.proc_id, 3843 &glock.client_id); 3844 if (err < 0) { 3845 goto out_nofid; 3846 } 3847 trace_v9fs_getlock(pdu->tag, pdu->id, fid, 3848 glock.type, glock.start, glock.length); 3849 3850 fidp = get_fid(pdu, fid); 3851 if (fidp == NULL) { 3852 err = -ENOENT; 3853 goto out_nofid; 3854 } 3855 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3856 if (err < 0) { 3857 goto out; 3858 } 3859 glock.type = P9_LOCK_TYPE_UNLCK; 3860 err = pdu_marshal(pdu, offset, "bqqds", glock.type, 3861 glock.start, glock.length, glock.proc_id, 3862 &glock.client_id); 3863 if (err < 0) { 3864 goto out; 3865 } 3866 err += offset; 3867 trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start, 3868 glock.length, glock.proc_id); 3869 out: 3870 put_fid(pdu, fidp); 3871 out_nofid: 3872 pdu_complete(pdu, err); 3873 v9fs_string_free(&glock.client_id); 3874 } 3875 3876 static void coroutine_fn v9fs_mkdir(void *opaque) 3877 { 3878 V9fsPDU *pdu = opaque; 3879 size_t offset = 7; 3880 int32_t fid; 3881 struct stat stbuf; 3882 V9fsQID qid; 3883 V9fsString name; 3884 V9fsFidState *fidp; 3885 gid_t gid; 3886 int mode; 3887 int err = 0; 3888 3889 v9fs_string_init(&name); 3890 err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid); 3891 if (err < 0) { 3892 goto out_nofid; 3893 } 3894 trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid); 3895 3896 if (name_is_illegal(name.data)) { 3897 err = -ENOENT; 3898 goto out_nofid; 3899 } 3900 3901 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3902 err = -EEXIST; 3903 goto out_nofid; 3904 } 3905 3906 fidp = get_fid(pdu, fid); 3907 if (fidp == NULL) { 3908 err = -ENOENT; 3909 goto out_nofid; 3910 } 3911 err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf); 3912 if (err < 0) { 3913 goto out; 3914 } 3915 err = stat_to_qid(pdu, &stbuf, &qid); 3916 if (err < 0) { 3917 goto out; 3918 } 3919 err = pdu_marshal(pdu, offset, "Q", &qid); 3920 if (err < 0) { 3921 goto out; 3922 } 3923 err += offset; 3924 trace_v9fs_mkdir_return(pdu->tag, pdu->id, 3925 qid.type, qid.version, qid.path, err); 3926 out: 3927 put_fid(pdu, fidp); 3928 out_nofid: 3929 pdu_complete(pdu, err); 3930 v9fs_string_free(&name); 3931 } 3932 3933 static void coroutine_fn v9fs_xattrwalk(void *opaque) 3934 { 3935 int64_t size; 3936 V9fsString name; 3937 ssize_t err = 0; 3938 size_t offset = 7; 3939 int32_t fid, newfid; 3940 V9fsFidState *file_fidp; 3941 V9fsFidState *xattr_fidp = NULL; 3942 V9fsPDU *pdu = opaque; 3943 V9fsState *s = pdu->s; 3944 3945 v9fs_string_init(&name); 3946 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name); 3947 if (err < 0) { 3948 goto out_nofid; 3949 } 3950 trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data); 3951 3952 file_fidp = get_fid(pdu, fid); 3953 if (file_fidp == NULL) { 3954 err = -ENOENT; 3955 goto out_nofid; 3956 } 3957 xattr_fidp = alloc_fid(s, newfid); 3958 if (xattr_fidp == NULL) { 3959 err = -EINVAL; 3960 goto out; 3961 } 3962 v9fs_path_copy(&xattr_fidp->path, &file_fidp->path); 3963 if (!v9fs_string_size(&name)) { 3964 /* 3965 * listxattr request. Get the size first 3966 */ 3967 size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0); 3968 if (size < 0) { 3969 err = size; 3970 clunk_fid(s, xattr_fidp->fid); 3971 goto out; 3972 } 3973 /* 3974 * Read the xattr value 3975 */ 3976 xattr_fidp->fs.xattr.len = size; 3977 xattr_fidp->fid_type = P9_FID_XATTR; 3978 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3979 xattr_fidp->fs.xattr.value = g_malloc0(size); 3980 if (size) { 3981 err = v9fs_co_llistxattr(pdu, &xattr_fidp->path, 3982 xattr_fidp->fs.xattr.value, 3983 xattr_fidp->fs.xattr.len); 3984 if (err < 0) { 3985 clunk_fid(s, xattr_fidp->fid); 3986 goto out; 3987 } 3988 } 3989 err = pdu_marshal(pdu, offset, "q", size); 3990 if (err < 0) { 3991 goto out; 3992 } 3993 err += offset; 3994 } else { 3995 /* 3996 * specific xattr fid. We check for xattr 3997 * presence also collect the xattr size 3998 */ 3999 size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 4000 &name, NULL, 0); 4001 if (size < 0) { 4002 err = size; 4003 clunk_fid(s, xattr_fidp->fid); 4004 goto out; 4005 } 4006 /* 4007 * Read the xattr value 4008 */ 4009 xattr_fidp->fs.xattr.len = size; 4010 xattr_fidp->fid_type = P9_FID_XATTR; 4011 xattr_fidp->fs.xattr.xattrwalk_fid = true; 4012 xattr_fidp->fs.xattr.value = g_malloc0(size); 4013 if (size) { 4014 err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 4015 &name, xattr_fidp->fs.xattr.value, 4016 xattr_fidp->fs.xattr.len); 4017 if (err < 0) { 4018 clunk_fid(s, xattr_fidp->fid); 4019 goto out; 4020 } 4021 } 4022 err = pdu_marshal(pdu, offset, "q", size); 4023 if (err < 0) { 4024 goto out; 4025 } 4026 err += offset; 4027 } 4028 trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size); 4029 out: 4030 put_fid(pdu, file_fidp); 4031 if (xattr_fidp) { 4032 put_fid(pdu, xattr_fidp); 4033 } 4034 out_nofid: 4035 pdu_complete(pdu, err); 4036 v9fs_string_free(&name); 4037 } 4038 4039 #if defined(CONFIG_LINUX) 4040 /* Currently, only Linux has XATTR_SIZE_MAX */ 4041 #define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX 4042 #elif defined(CONFIG_DARWIN) 4043 /* 4044 * Darwin doesn't seem to define a maximum xattr size in its user 4045 * space header, so manually configure it across platforms as 64k. 4046 * 4047 * Having no limit at all can lead to QEMU crashing during large g_malloc() 4048 * calls. Because QEMU does not currently support macOS guests, the below 4049 * preliminary solution only works due to its being a reflection of the limit of 4050 * Linux guests. 4051 */ 4052 #define P9_XATTR_SIZE_MAX 65536 4053 #else 4054 #error Missing definition for P9_XATTR_SIZE_MAX for this host system 4055 #endif 4056 4057 static void coroutine_fn v9fs_xattrcreate(void *opaque) 4058 { 4059 int flags, rflags = 0; 4060 int32_t fid; 4061 uint64_t size; 4062 ssize_t err = 0; 4063 V9fsString name; 4064 size_t offset = 7; 4065 V9fsFidState *file_fidp; 4066 V9fsFidState *xattr_fidp; 4067 V9fsPDU *pdu = opaque; 4068 4069 v9fs_string_init(&name); 4070 err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags); 4071 if (err < 0) { 4072 goto out_nofid; 4073 } 4074 trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags); 4075 4076 if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) { 4077 err = -EINVAL; 4078 goto out_nofid; 4079 } 4080 4081 if (flags & P9_XATTR_CREATE) { 4082 rflags |= XATTR_CREATE; 4083 } 4084 4085 if (flags & P9_XATTR_REPLACE) { 4086 rflags |= XATTR_REPLACE; 4087 } 4088 4089 if (size > P9_XATTR_SIZE_MAX) { 4090 err = -E2BIG; 4091 goto out_nofid; 4092 } 4093 4094 file_fidp = get_fid(pdu, fid); 4095 if (file_fidp == NULL) { 4096 err = -EINVAL; 4097 goto out_nofid; 4098 } 4099 if (file_fidp->fid_type != P9_FID_NONE) { 4100 err = -EINVAL; 4101 goto out_put_fid; 4102 } 4103 4104 /* Make the file fid point to xattr */ 4105 xattr_fidp = file_fidp; 4106 xattr_fidp->fid_type = P9_FID_XATTR; 4107 xattr_fidp->fs.xattr.copied_len = 0; 4108 xattr_fidp->fs.xattr.xattrwalk_fid = false; 4109 xattr_fidp->fs.xattr.len = size; 4110 xattr_fidp->fs.xattr.flags = rflags; 4111 v9fs_string_init(&xattr_fidp->fs.xattr.name); 4112 v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name); 4113 xattr_fidp->fs.xattr.value = g_malloc0(size); 4114 err = offset; 4115 out_put_fid: 4116 put_fid(pdu, file_fidp); 4117 out_nofid: 4118 pdu_complete(pdu, err); 4119 v9fs_string_free(&name); 4120 } 4121 4122 static void coroutine_fn v9fs_readlink(void *opaque) 4123 { 4124 V9fsPDU *pdu = opaque; 4125 size_t offset = 7; 4126 V9fsString target; 4127 int32_t fid; 4128 int err = 0; 4129 V9fsFidState *fidp; 4130 4131 err = pdu_unmarshal(pdu, offset, "d", &fid); 4132 if (err < 0) { 4133 goto out_nofid; 4134 } 4135 trace_v9fs_readlink(pdu->tag, pdu->id, fid); 4136 fidp = get_fid(pdu, fid); 4137 if (fidp == NULL) { 4138 err = -ENOENT; 4139 goto out_nofid; 4140 } 4141 4142 v9fs_string_init(&target); 4143 err = v9fs_co_readlink(pdu, &fidp->path, &target); 4144 if (err < 0) { 4145 goto out; 4146 } 4147 err = pdu_marshal(pdu, offset, "s", &target); 4148 if (err < 0) { 4149 v9fs_string_free(&target); 4150 goto out; 4151 } 4152 err += offset; 4153 trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data); 4154 v9fs_string_free(&target); 4155 out: 4156 put_fid(pdu, fidp); 4157 out_nofid: 4158 pdu_complete(pdu, err); 4159 } 4160 4161 static CoroutineEntry *pdu_co_handlers[] = { 4162 [P9_TREADDIR] = v9fs_readdir, 4163 [P9_TSTATFS] = v9fs_statfs, 4164 [P9_TGETATTR] = v9fs_getattr, 4165 [P9_TSETATTR] = v9fs_setattr, 4166 [P9_TXATTRWALK] = v9fs_xattrwalk, 4167 [P9_TXATTRCREATE] = v9fs_xattrcreate, 4168 [P9_TMKNOD] = v9fs_mknod, 4169 [P9_TRENAME] = v9fs_rename, 4170 [P9_TLOCK] = v9fs_lock, 4171 [P9_TGETLOCK] = v9fs_getlock, 4172 [P9_TRENAMEAT] = v9fs_renameat, 4173 [P9_TREADLINK] = v9fs_readlink, 4174 [P9_TUNLINKAT] = v9fs_unlinkat, 4175 [P9_TMKDIR] = v9fs_mkdir, 4176 [P9_TVERSION] = v9fs_version, 4177 [P9_TLOPEN] = v9fs_open, 4178 [P9_TATTACH] = v9fs_attach, 4179 [P9_TSTAT] = v9fs_stat, 4180 [P9_TWALK] = v9fs_walk, 4181 [P9_TCLUNK] = v9fs_clunk, 4182 [P9_TFSYNC] = v9fs_fsync, 4183 [P9_TOPEN] = v9fs_open, 4184 [P9_TREAD] = v9fs_read, 4185 #if 0 4186 [P9_TAUTH] = v9fs_auth, 4187 #endif 4188 [P9_TFLUSH] = v9fs_flush, 4189 [P9_TLINK] = v9fs_link, 4190 [P9_TSYMLINK] = v9fs_symlink, 4191 [P9_TCREATE] = v9fs_create, 4192 [P9_TLCREATE] = v9fs_lcreate, 4193 [P9_TWRITE] = v9fs_write, 4194 [P9_TWSTAT] = v9fs_wstat, 4195 [P9_TREMOVE] = v9fs_remove, 4196 }; 4197 4198 static void coroutine_fn v9fs_op_not_supp(void *opaque) 4199 { 4200 V9fsPDU *pdu = opaque; 4201 pdu_complete(pdu, -EOPNOTSUPP); 4202 } 4203 4204 static void coroutine_fn v9fs_fs_ro(void *opaque) 4205 { 4206 V9fsPDU *pdu = opaque; 4207 pdu_complete(pdu, -EROFS); 4208 } 4209 4210 static inline bool is_read_only_op(V9fsPDU *pdu) 4211 { 4212 switch (pdu->id) { 4213 case P9_TREADDIR: 4214 case P9_TSTATFS: 4215 case P9_TGETATTR: 4216 case P9_TXATTRWALK: 4217 case P9_TLOCK: 4218 case P9_TGETLOCK: 4219 case P9_TREADLINK: 4220 case P9_TVERSION: 4221 case P9_TLOPEN: 4222 case P9_TATTACH: 4223 case P9_TSTAT: 4224 case P9_TWALK: 4225 case P9_TCLUNK: 4226 case P9_TFSYNC: 4227 case P9_TOPEN: 4228 case P9_TREAD: 4229 case P9_TAUTH: 4230 case P9_TFLUSH: 4231 return 1; 4232 default: 4233 return 0; 4234 } 4235 } 4236 4237 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) 4238 { 4239 Coroutine *co; 4240 CoroutineEntry *handler; 4241 V9fsState *s = pdu->s; 4242 4243 pdu->size = le32_to_cpu(hdr->size_le); 4244 pdu->id = hdr->id; 4245 pdu->tag = le16_to_cpu(hdr->tag_le); 4246 4247 if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || 4248 (pdu_co_handlers[pdu->id] == NULL)) { 4249 handler = v9fs_op_not_supp; 4250 } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { 4251 handler = v9fs_fs_ro; 4252 } else { 4253 handler = pdu_co_handlers[pdu->id]; 4254 } 4255 4256 qemu_co_queue_init(&pdu->complete); 4257 co = qemu_coroutine_create(handler, pdu); 4258 qemu_coroutine_enter(co); 4259 } 4260 4261 /* Returns 0 on success, 1 on failure. */ 4262 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, 4263 Error **errp) 4264 { 4265 ERRP_GUARD(); 4266 int i, len; 4267 struct stat stat; 4268 FsDriverEntry *fse; 4269 V9fsPath path; 4270 int rc = 1; 4271 4272 assert(!s->transport); 4273 s->transport = t; 4274 4275 /* initialize pdu allocator */ 4276 QLIST_INIT(&s->free_list); 4277 QLIST_INIT(&s->active_list); 4278 for (i = 0; i < MAX_REQ; i++) { 4279 QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next); 4280 s->pdus[i].s = s; 4281 s->pdus[i].idx = i; 4282 } 4283 4284 v9fs_path_init(&path); 4285 4286 fse = get_fsdev_fsentry(s->fsconf.fsdev_id); 4287 4288 if (!fse) { 4289 /* We don't have a fsdev identified by fsdev_id */ 4290 error_setg(errp, "9pfs device couldn't find fsdev with the " 4291 "id = %s", 4292 s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL"); 4293 goto out; 4294 } 4295 4296 if (!s->fsconf.tag) { 4297 /* we haven't specified a mount_tag */ 4298 error_setg(errp, "fsdev with id %s needs mount_tag arguments", 4299 s->fsconf.fsdev_id); 4300 goto out; 4301 } 4302 4303 s->ctx.export_flags = fse->export_flags; 4304 s->ctx.fs_root = g_strdup(fse->path); 4305 s->ctx.exops.get_st_gen = NULL; 4306 len = strlen(s->fsconf.tag); 4307 if (len > MAX_TAG_LEN - 1) { 4308 error_setg(errp, "mount tag '%s' (%d bytes) is longer than " 4309 "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1); 4310 goto out; 4311 } 4312 4313 s->tag = g_strdup(s->fsconf.tag); 4314 s->ctx.uid = -1; 4315 4316 s->ops = fse->ops; 4317 4318 s->ctx.fmode = fse->fmode; 4319 s->ctx.dmode = fse->dmode; 4320 4321 s->fids = g_hash_table_new(NULL, NULL); 4322 qemu_co_rwlock_init(&s->rename_lock); 4323 4324 if (s->ops->init(&s->ctx, errp) < 0) { 4325 error_prepend(errp, "cannot initialize fsdev '%s': ", 4326 s->fsconf.fsdev_id); 4327 goto out; 4328 } 4329 4330 /* 4331 * Check details of export path, We need to use fs driver 4332 * call back to do that. Since we are in the init path, we don't 4333 * use co-routines here. 4334 */ 4335 if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) { 4336 error_setg(errp, 4337 "error in converting name to path %s", strerror(errno)); 4338 goto out; 4339 } 4340 if (s->ops->lstat(&s->ctx, &path, &stat)) { 4341 error_setg(errp, "share path %s does not exist", fse->path); 4342 goto out; 4343 } else if (!S_ISDIR(stat.st_mode)) { 4344 error_setg(errp, "share path %s is not a directory", fse->path); 4345 goto out; 4346 } 4347 4348 s->dev_id = stat.st_dev; 4349 4350 /* init inode remapping : */ 4351 /* hash table for variable length inode suffixes */ 4352 qpd_table_init(&s->qpd_table); 4353 /* hash table for slow/full inode remapping (most users won't need it) */ 4354 qpf_table_init(&s->qpf_table); 4355 /* hash table for quick inode remapping */ 4356 qpp_table_init(&s->qpp_table); 4357 s->qp_ndevices = 0; 4358 s->qp_affix_next = 1; /* reserve 0 to detect overflow */ 4359 s->qp_fullpath_next = 1; 4360 4361 s->ctx.fst = &fse->fst; 4362 fsdev_throttle_init(s->ctx.fst); 4363 4364 s->reclaiming = false; 4365 4366 rc = 0; 4367 out: 4368 if (rc) { 4369 v9fs_device_unrealize_common(s); 4370 } 4371 v9fs_path_free(&path); 4372 return rc; 4373 } 4374 4375 void v9fs_device_unrealize_common(V9fsState *s) 4376 { 4377 if (s->ops && s->ops->cleanup) { 4378 s->ops->cleanup(&s->ctx); 4379 } 4380 if (s->ctx.fst) { 4381 fsdev_throttle_cleanup(s->ctx.fst); 4382 } 4383 if (s->fids) { 4384 g_hash_table_destroy(s->fids); 4385 s->fids = NULL; 4386 } 4387 g_free(s->tag); 4388 qp_table_destroy(&s->qpd_table); 4389 qp_table_destroy(&s->qpp_table); 4390 qp_table_destroy(&s->qpf_table); 4391 g_free(s->ctx.fs_root); 4392 } 4393 4394 typedef struct VirtfsCoResetData { 4395 V9fsPDU pdu; 4396 bool done; 4397 } VirtfsCoResetData; 4398 4399 static void coroutine_fn virtfs_co_reset(void *opaque) 4400 { 4401 VirtfsCoResetData *data = opaque; 4402 4403 virtfs_reset(&data->pdu); 4404 data->done = true; 4405 } 4406 4407 void v9fs_reset(V9fsState *s) 4408 { 4409 VirtfsCoResetData data = { .pdu = { .s = s }, .done = false }; 4410 Coroutine *co; 4411 4412 while (!QLIST_EMPTY(&s->active_list)) { 4413 aio_poll(qemu_get_aio_context(), true); 4414 } 4415 4416 co = qemu_coroutine_create(virtfs_co_reset, &data); 4417 qemu_coroutine_enter(co); 4418 4419 while (!data.done) { 4420 aio_poll(qemu_get_aio_context(), true); 4421 } 4422 } 4423 4424 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void) 4425 { 4426 struct rlimit rlim; 4427 if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { 4428 error_report("Failed to get the resource limit"); 4429 exit(1); 4430 } 4431 open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3); 4432 open_fd_rc = rlim.rlim_cur / 2; 4433 } 4434