1 /* 2 * Virtio 9p backend 3 * 4 * Copyright IBM, Corp. 2010 5 * 6 * Authors: 7 * Anthony Liguori <aliguori@us.ibm.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2. See 10 * the COPYING file in the top-level directory. 11 * 12 */ 13 14 /* 15 * Not so fast! You might want to read the 9p developer docs first: 16 * https://wiki.qemu.org/Documentation/9p 17 */ 18 19 #include "qemu/osdep.h" 20 #ifdef CONFIG_LINUX 21 #include <linux/limits.h> 22 #endif 23 #include <glib/gprintf.h> 24 #include "hw/virtio/virtio.h" 25 #include "qapi/error.h" 26 #include "qemu/error-report.h" 27 #include "qemu/iov.h" 28 #include "qemu/main-loop.h" 29 #include "qemu/sockets.h" 30 #include "virtio-9p.h" 31 #include "fsdev/qemu-fsdev.h" 32 #include "9p-xattr.h" 33 #include "9p-util.h" 34 #include "coth.h" 35 #include "trace.h" 36 #include "migration/blocker.h" 37 #include "qemu/xxhash.h" 38 #include <math.h> 39 40 int open_fd_hw; 41 int total_open_fd; 42 static int open_fd_rc; 43 44 enum { 45 Oread = 0x00, 46 Owrite = 0x01, 47 Ordwr = 0x02, 48 Oexec = 0x03, 49 Oexcl = 0x04, 50 Otrunc = 0x10, 51 Orexec = 0x20, 52 Orclose = 0x40, 53 Oappend = 0x80, 54 }; 55 56 P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free); 57 58 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 59 { 60 ssize_t ret; 61 va_list ap; 62 63 va_start(ap, fmt); 64 ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap); 65 va_end(ap); 66 67 return ret; 68 } 69 70 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...) 71 { 72 ssize_t ret; 73 va_list ap; 74 75 va_start(ap, fmt); 76 ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap); 77 va_end(ap); 78 79 return ret; 80 } 81 82 static int omode_to_uflags(int8_t mode) 83 { 84 int ret = 0; 85 86 switch (mode & 3) { 87 case Oread: 88 ret = O_RDONLY; 89 break; 90 case Ordwr: 91 ret = O_RDWR; 92 break; 93 case Owrite: 94 ret = O_WRONLY; 95 break; 96 case Oexec: 97 ret = O_RDONLY; 98 break; 99 } 100 101 if (mode & Otrunc) { 102 ret |= O_TRUNC; 103 } 104 105 if (mode & Oappend) { 106 ret |= O_APPEND; 107 } 108 109 if (mode & Oexcl) { 110 ret |= O_EXCL; 111 } 112 113 return ret; 114 } 115 116 typedef struct DotlOpenflagMap { 117 int dotl_flag; 118 int open_flag; 119 } DotlOpenflagMap; 120 121 static int dotl_to_open_flags(int flags) 122 { 123 int i; 124 /* 125 * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY 126 * and P9_DOTL_NOACCESS 127 */ 128 int oflags = flags & O_ACCMODE; 129 130 DotlOpenflagMap dotl_oflag_map[] = { 131 { P9_DOTL_CREATE, O_CREAT }, 132 { P9_DOTL_EXCL, O_EXCL }, 133 { P9_DOTL_NOCTTY , O_NOCTTY }, 134 { P9_DOTL_TRUNC, O_TRUNC }, 135 { P9_DOTL_APPEND, O_APPEND }, 136 { P9_DOTL_NONBLOCK, O_NONBLOCK } , 137 { P9_DOTL_DSYNC, O_DSYNC }, 138 { P9_DOTL_FASYNC, FASYNC }, 139 #if !defined(CONFIG_DARWIN) && !defined(CONFIG_FREEBSD) 140 { P9_DOTL_NOATIME, O_NOATIME }, 141 #endif 142 #ifndef CONFIG_DARWIN 143 /* 144 * On Darwin, we could map to F_NOCACHE, which is 145 * similar, but doesn't quite have the same 146 * semantics. However, we don't support O_DIRECT 147 * even on linux at the moment, so we just ignore 148 * it here. 149 */ 150 { P9_DOTL_DIRECT, O_DIRECT }, 151 #endif 152 { P9_DOTL_LARGEFILE, O_LARGEFILE }, 153 { P9_DOTL_DIRECTORY, O_DIRECTORY }, 154 { P9_DOTL_NOFOLLOW, O_NOFOLLOW }, 155 { P9_DOTL_SYNC, O_SYNC }, 156 }; 157 158 for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) { 159 if (flags & dotl_oflag_map[i].dotl_flag) { 160 oflags |= dotl_oflag_map[i].open_flag; 161 } 162 } 163 164 return oflags; 165 } 166 167 void cred_init(FsCred *credp) 168 { 169 credp->fc_uid = -1; 170 credp->fc_gid = -1; 171 credp->fc_mode = -1; 172 credp->fc_rdev = -1; 173 } 174 175 static int get_dotl_openflags(V9fsState *s, int oflags) 176 { 177 int flags; 178 /* 179 * Filter the client open flags 180 */ 181 flags = dotl_to_open_flags(oflags); 182 flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT); 183 #ifndef CONFIG_DARWIN 184 /* 185 * Ignore direct disk access hint until the server supports it. 186 */ 187 flags &= ~O_DIRECT; 188 #endif 189 return flags; 190 } 191 192 void v9fs_path_init(V9fsPath *path) 193 { 194 path->data = NULL; 195 path->size = 0; 196 } 197 198 void v9fs_path_free(V9fsPath *path) 199 { 200 g_free(path->data); 201 path->data = NULL; 202 path->size = 0; 203 } 204 205 206 void v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...) 207 { 208 va_list ap; 209 210 v9fs_path_free(path); 211 212 va_start(ap, fmt); 213 /* Bump the size for including terminating NULL */ 214 path->size = g_vasprintf(&path->data, fmt, ap) + 1; 215 va_end(ap); 216 } 217 218 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src) 219 { 220 v9fs_path_free(dst); 221 dst->size = src->size; 222 dst->data = g_memdup(src->data, src->size); 223 } 224 225 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath, 226 const char *name, V9fsPath *path) 227 { 228 int err; 229 err = s->ops->name_to_path(&s->ctx, dirpath, name, path); 230 if (err < 0) { 231 err = -errno; 232 } 233 return err; 234 } 235 236 /* 237 * Return TRUE if s1 is an ancestor of s2. 238 * 239 * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d". 240 * As a special case, We treat s1 as ancestor of s2 if they are same! 241 */ 242 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2) 243 { 244 if (!strncmp(s1->data, s2->data, s1->size - 1)) { 245 if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') { 246 return 1; 247 } 248 } 249 return 0; 250 } 251 252 static size_t v9fs_string_size(V9fsString *str) 253 { 254 return str->size; 255 } 256 257 /* 258 * returns 0 if fid got re-opened, 1 if not, < 0 on error 259 */ 260 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f) 261 { 262 int err = 1; 263 if (f->fid_type == P9_FID_FILE) { 264 if (f->fs.fd == -1) { 265 do { 266 err = v9fs_co_open(pdu, f, f->open_flags); 267 } while (err == -EINTR && !pdu->cancelled); 268 } 269 } else if (f->fid_type == P9_FID_DIR) { 270 if (f->fs.dir.stream == NULL) { 271 do { 272 err = v9fs_co_opendir(pdu, f); 273 } while (err == -EINTR && !pdu->cancelled); 274 } 275 } 276 return err; 277 } 278 279 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid) 280 { 281 int err; 282 V9fsFidState *f; 283 V9fsState *s = pdu->s; 284 285 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 286 if (f) { 287 BUG_ON(f->clunked); 288 /* 289 * Update the fid ref upfront so that 290 * we don't get reclaimed when we yield 291 * in open later. 292 */ 293 f->ref++; 294 /* 295 * check whether we need to reopen the 296 * file. We might have closed the fd 297 * while trying to free up some file 298 * descriptors. 299 */ 300 err = v9fs_reopen_fid(pdu, f); 301 if (err < 0) { 302 f->ref--; 303 return NULL; 304 } 305 /* 306 * Mark the fid as referenced so that the LRU 307 * reclaim won't close the file descriptor 308 */ 309 f->flags |= FID_REFERENCED; 310 return f; 311 } 312 return NULL; 313 } 314 315 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid) 316 { 317 V9fsFidState *f; 318 319 f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 320 if (f) { 321 /* If fid is already there return NULL */ 322 BUG_ON(f->clunked); 323 return NULL; 324 } 325 f = g_new0(V9fsFidState, 1); 326 f->fid = fid; 327 f->fid_type = P9_FID_NONE; 328 f->ref = 1; 329 /* 330 * Mark the fid as referenced so that the LRU 331 * reclaim won't close the file descriptor 332 */ 333 f->flags |= FID_REFERENCED; 334 g_hash_table_insert(s->fids, GINT_TO_POINTER(fid), f); 335 336 v9fs_readdir_init(s->proto_version, &f->fs.dir); 337 v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir); 338 339 return f; 340 } 341 342 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp) 343 { 344 int retval = 0; 345 346 if (fidp->fs.xattr.xattrwalk_fid) { 347 /* getxattr/listxattr fid */ 348 goto free_value; 349 } 350 /* 351 * if this is fid for setxattr. clunk should 352 * result in setxattr localcall 353 */ 354 if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) { 355 /* clunk after partial write */ 356 retval = -EINVAL; 357 goto free_out; 358 } 359 if (fidp->fs.xattr.len) { 360 retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name, 361 fidp->fs.xattr.value, 362 fidp->fs.xattr.len, 363 fidp->fs.xattr.flags); 364 } else { 365 retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name); 366 } 367 free_out: 368 v9fs_string_free(&fidp->fs.xattr.name); 369 free_value: 370 g_free(fidp->fs.xattr.value); 371 return retval; 372 } 373 374 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp) 375 { 376 int retval = 0; 377 378 if (fidp->fid_type == P9_FID_FILE) { 379 /* If we reclaimed the fd no need to close */ 380 if (fidp->fs.fd != -1) { 381 retval = v9fs_co_close(pdu, &fidp->fs); 382 } 383 } else if (fidp->fid_type == P9_FID_DIR) { 384 if (fidp->fs.dir.stream != NULL) { 385 retval = v9fs_co_closedir(pdu, &fidp->fs); 386 } 387 } else if (fidp->fid_type == P9_FID_XATTR) { 388 retval = v9fs_xattr_fid_clunk(pdu, fidp); 389 } 390 v9fs_path_free(&fidp->path); 391 g_free(fidp); 392 return retval; 393 } 394 395 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp) 396 { 397 BUG_ON(!fidp->ref); 398 fidp->ref--; 399 /* 400 * Don't free the fid if it is in reclaim list 401 */ 402 if (!fidp->ref && fidp->clunked) { 403 if (fidp->fid == pdu->s->root_fid) { 404 /* 405 * if the clunked fid is root fid then we 406 * have unmounted the fs on the client side. 407 * delete the migration blocker. Ideally, this 408 * should be hooked to transport close notification 409 */ 410 migrate_del_blocker(&pdu->s->migration_blocker); 411 } 412 return free_fid(pdu, fidp); 413 } 414 return 0; 415 } 416 417 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid) 418 { 419 V9fsFidState *fidp; 420 421 /* TODO: Use g_hash_table_steal_extended() instead? */ 422 fidp = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid)); 423 if (fidp) { 424 g_hash_table_remove(s->fids, GINT_TO_POINTER(fid)); 425 fidp->clunked = true; 426 return fidp; 427 } 428 return NULL; 429 } 430 431 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu) 432 { 433 int reclaim_count = 0; 434 V9fsState *s = pdu->s; 435 V9fsFidState *f; 436 GHashTableIter iter; 437 gpointer fid; 438 int err; 439 int nclosed = 0; 440 441 /* prevent multiple coroutines running this function simultaniously */ 442 if (s->reclaiming) { 443 return; 444 } 445 s->reclaiming = true; 446 447 g_hash_table_iter_init(&iter, s->fids); 448 449 QSLIST_HEAD(, V9fsFidState) reclaim_list = 450 QSLIST_HEAD_INITIALIZER(reclaim_list); 451 452 /* Pick FIDs to be closed, collect them on reclaim_list. */ 453 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) { 454 /* 455 * Unlinked fids cannot be reclaimed, skip those, and also skip fids 456 * currently being operated on. 457 */ 458 if (f->ref || f->flags & FID_NON_RECLAIMABLE) { 459 continue; 460 } 461 /* 462 * if it is a recently referenced fid 463 * we leave the fid untouched and clear the 464 * reference bit. We come back to it later 465 * in the next iteration. (a simple LRU without 466 * moving list elements around) 467 */ 468 if (f->flags & FID_REFERENCED) { 469 f->flags &= ~FID_REFERENCED; 470 continue; 471 } 472 /* 473 * Add fids to reclaim list. 474 */ 475 if (f->fid_type == P9_FID_FILE) { 476 if (f->fs.fd != -1) { 477 /* 478 * Up the reference count so that 479 * a clunk request won't free this fid 480 */ 481 f->ref++; 482 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 483 f->fs_reclaim.fd = f->fs.fd; 484 f->fs.fd = -1; 485 reclaim_count++; 486 } 487 } else if (f->fid_type == P9_FID_DIR) { 488 if (f->fs.dir.stream != NULL) { 489 /* 490 * Up the reference count so that 491 * a clunk request won't free this fid 492 */ 493 f->ref++; 494 QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next); 495 f->fs_reclaim.dir.stream = f->fs.dir.stream; 496 f->fs.dir.stream = NULL; 497 reclaim_count++; 498 } 499 } 500 if (reclaim_count >= open_fd_rc) { 501 break; 502 } 503 } 504 /* 505 * Close the picked FIDs altogether on a background I/O driver thread. Do 506 * this all at once to keep latency (i.e. amount of thread hops between main 507 * thread <-> fs driver background thread) as low as possible. 508 */ 509 v9fs_co_run_in_worker({ 510 QSLIST_FOREACH(f, &reclaim_list, reclaim_next) { 511 err = (f->fid_type == P9_FID_DIR) ? 512 s->ops->closedir(&s->ctx, &f->fs_reclaim) : 513 s->ops->close(&s->ctx, &f->fs_reclaim); 514 515 /* 'man 2 close' suggests to ignore close() errors except of EBADF */ 516 if (unlikely(err && errno == EBADF)) { 517 /* 518 * unexpected case as FIDs were picked above by having a valid 519 * file descriptor 520 */ 521 error_report("9pfs: v9fs_reclaim_fd() WARNING: close() failed with EBADF"); 522 } else { 523 /* total_open_fd must only be mutated on main thread */ 524 nclosed++; 525 } 526 } 527 }); 528 total_open_fd -= nclosed; 529 /* Free the closed FIDs. */ 530 while (!QSLIST_EMPTY(&reclaim_list)) { 531 f = QSLIST_FIRST(&reclaim_list); 532 QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next); 533 /* 534 * Now drop the fid reference, free it 535 * if clunked. 536 */ 537 put_fid(pdu, f); 538 } 539 540 s->reclaiming = false; 541 } 542 543 /* 544 * This is used when a path is removed from the directory tree. Any 545 * fids that still reference it must not be closed from then on, since 546 * they cannot be reopened. 547 */ 548 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path) 549 { 550 int err = 0; 551 V9fsState *s = pdu->s; 552 V9fsFidState *fidp; 553 gpointer fid; 554 GHashTableIter iter; 555 /* 556 * The most common case is probably that we have exactly one 557 * fid for the given path, so preallocate exactly one. 558 */ 559 g_autoptr(GArray) to_reopen = g_array_sized_new(FALSE, FALSE, 560 sizeof(V9fsFidState *), 1); 561 gint i; 562 563 g_hash_table_iter_init(&iter, s->fids); 564 565 /* 566 * We iterate over the fid table looking for the entries we need 567 * to reopen, and store them in to_reopen. This is because 568 * v9fs_reopen_fid() and put_fid() yield. This allows the fid table 569 * to be modified in the meantime, invalidating our iterator. 570 */ 571 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &fidp)) { 572 if (fidp->path.size == path->size && 573 !memcmp(fidp->path.data, path->data, path->size)) { 574 /* 575 * Ensure the fid survives a potential clunk request during 576 * v9fs_reopen_fid or put_fid. 577 */ 578 fidp->ref++; 579 fidp->flags |= FID_NON_RECLAIMABLE; 580 g_array_append_val(to_reopen, fidp); 581 } 582 } 583 584 for (i = 0; i < to_reopen->len; i++) { 585 fidp = g_array_index(to_reopen, V9fsFidState*, i); 586 /* reopen the file/dir if already closed */ 587 err = v9fs_reopen_fid(pdu, fidp); 588 if (err < 0) { 589 break; 590 } 591 } 592 593 for (i = 0; i < to_reopen->len; i++) { 594 put_fid(pdu, g_array_index(to_reopen, V9fsFidState*, i)); 595 } 596 return err; 597 } 598 599 static void coroutine_fn virtfs_reset(V9fsPDU *pdu) 600 { 601 V9fsState *s = pdu->s; 602 V9fsFidState *fidp; 603 GList *freeing; 604 /* 605 * Get a list of all the values (fid states) in the table, which 606 * we then... 607 */ 608 g_autoptr(GList) fids = g_hash_table_get_values(s->fids); 609 610 /* ... remove from the table, taking over ownership. */ 611 g_hash_table_steal_all(s->fids); 612 613 /* 614 * This allows us to release our references to them asynchronously without 615 * iterating over the hash table and risking iterator invalidation 616 * through concurrent modifications. 617 */ 618 for (freeing = fids; freeing; freeing = freeing->next) { 619 fidp = freeing->data; 620 fidp->ref++; 621 fidp->clunked = true; 622 put_fid(pdu, fidp); 623 } 624 } 625 626 #define P9_QID_TYPE_DIR 0x80 627 #define P9_QID_TYPE_SYMLINK 0x02 628 629 #define P9_STAT_MODE_DIR 0x80000000 630 #define P9_STAT_MODE_APPEND 0x40000000 631 #define P9_STAT_MODE_EXCL 0x20000000 632 #define P9_STAT_MODE_MOUNT 0x10000000 633 #define P9_STAT_MODE_AUTH 0x08000000 634 #define P9_STAT_MODE_TMP 0x04000000 635 #define P9_STAT_MODE_SYMLINK 0x02000000 636 #define P9_STAT_MODE_LINK 0x01000000 637 #define P9_STAT_MODE_DEVICE 0x00800000 638 #define P9_STAT_MODE_NAMED_PIPE 0x00200000 639 #define P9_STAT_MODE_SOCKET 0x00100000 640 #define P9_STAT_MODE_SETUID 0x00080000 641 #define P9_STAT_MODE_SETGID 0x00040000 642 #define P9_STAT_MODE_SETVTX 0x00010000 643 644 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR | \ 645 P9_STAT_MODE_SYMLINK | \ 646 P9_STAT_MODE_LINK | \ 647 P9_STAT_MODE_DEVICE | \ 648 P9_STAT_MODE_NAMED_PIPE | \ 649 P9_STAT_MODE_SOCKET) 650 651 /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */ 652 static inline uint8_t mirror8bit(uint8_t byte) 653 { 654 return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023; 655 } 656 657 /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */ 658 static inline uint64_t mirror64bit(uint64_t value) 659 { 660 return ((uint64_t)mirror8bit(value & 0xff) << 56) | 661 ((uint64_t)mirror8bit((value >> 8) & 0xff) << 48) | 662 ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) | 663 ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) | 664 ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) | 665 ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) | 666 ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8) | 667 ((uint64_t)mirror8bit((value >> 56) & 0xff)); 668 } 669 670 /* 671 * Parameter k for the Exponential Golomb algorithm to be used. 672 * 673 * The smaller this value, the smaller the minimum bit count for the Exp. 674 * Golomb generated affixes will be (at lowest index) however for the 675 * price of having higher maximum bit count of generated affixes (at highest 676 * index). Likewise increasing this parameter yields in smaller maximum bit 677 * count for the price of having higher minimum bit count. 678 * 679 * In practice that means: a good value for k depends on the expected amount 680 * of devices to be exposed by one export. For a small amount of devices k 681 * should be small, for a large amount of devices k might be increased 682 * instead. The default of k=0 should be fine for most users though. 683 * 684 * IMPORTANT: In case this ever becomes a runtime parameter; the value of 685 * k should not change as long as guest is still running! Because that would 686 * cause completely different inode numbers to be generated on guest. 687 */ 688 #define EXP_GOLOMB_K 0 689 690 /** 691 * expGolombEncode() - Exponential Golomb algorithm for arbitrary k 692 * (including k=0). 693 * 694 * @n: natural number (or index) of the prefix to be generated 695 * (1, 2, 3, ...) 696 * @k: parameter k of Exp. Golomb algorithm to be used 697 * (see comment on EXP_GOLOMB_K macro for details about k) 698 * Return: prefix for given @n and @k 699 * 700 * The Exponential Golomb algorithm generates prefixes (NOT suffixes!) 701 * with growing length and with the mathematical property of being 702 * "prefix-free". The latter means the generated prefixes can be prepended 703 * in front of arbitrary numbers and the resulting concatenated numbers are 704 * guaranteed to be always unique. 705 * 706 * This is a minor adjustment to the original Exp. Golomb algorithm in the 707 * sense that lowest allowed index (@n) starts with 1, not with zero. 708 */ 709 static VariLenAffix expGolombEncode(uint64_t n, int k) 710 { 711 const uint64_t value = n + (1 << k) - 1; 712 const int bits = (int) log2(value) + 1; 713 return (VariLenAffix) { 714 .type = AffixType_Prefix, 715 .value = value, 716 .bits = bits + MAX((bits - 1 - k), 0) 717 }; 718 } 719 720 /** 721 * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix. 722 * @affix: either suffix or prefix to be inverted 723 * Return: inversion of passed @affix 724 * 725 * Simply mirror all bits of the affix value, for the purpose to preserve 726 * respectively the mathematical "prefix-free" or "suffix-free" property 727 * after the conversion. 728 * 729 * If a passed prefix is suitable to create unique numbers, then the 730 * returned suffix is suitable to create unique numbers as well (and vice 731 * versa). 732 */ 733 static VariLenAffix invertAffix(const VariLenAffix *affix) 734 { 735 return (VariLenAffix) { 736 .type = 737 (affix->type == AffixType_Suffix) ? 738 AffixType_Prefix : AffixType_Suffix, 739 .value = 740 mirror64bit(affix->value) >> 741 ((sizeof(affix->value) * 8) - affix->bits), 742 .bits = affix->bits 743 }; 744 } 745 746 /** 747 * affixForIndex() - Generates suffix numbers with "suffix-free" property. 748 * @index: natural number (or index) of the suffix to be generated 749 * (1, 2, 3, ...) 750 * Return: Suffix suitable to assemble unique number. 751 * 752 * This is just a wrapper function on top of the Exp. Golomb algorithm. 753 * 754 * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes, 755 * this function converts the Exp. Golomb prefixes into appropriate suffixes 756 * which are still suitable for generating unique numbers. 757 */ 758 static VariLenAffix affixForIndex(uint64_t index) 759 { 760 VariLenAffix prefix; 761 prefix = expGolombEncode(index, EXP_GOLOMB_K); 762 return invertAffix(&prefix); /* convert prefix to suffix */ 763 } 764 765 static uint32_t qpp_hash(QppEntry e) 766 { 767 return qemu_xxhash4(e.ino_prefix, e.dev); 768 } 769 770 static uint32_t qpf_hash(QpfEntry e) 771 { 772 return qemu_xxhash4(e.ino, e.dev); 773 } 774 775 static bool qpd_cmp_func(const void *obj, const void *userp) 776 { 777 const QpdEntry *e1 = obj, *e2 = userp; 778 return e1->dev == e2->dev; 779 } 780 781 static bool qpp_cmp_func(const void *obj, const void *userp) 782 { 783 const QppEntry *e1 = obj, *e2 = userp; 784 return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; 785 } 786 787 static bool qpf_cmp_func(const void *obj, const void *userp) 788 { 789 const QpfEntry *e1 = obj, *e2 = userp; 790 return e1->dev == e2->dev && e1->ino == e2->ino; 791 } 792 793 static void qp_table_remove(void *p, uint32_t h, void *up) 794 { 795 g_free(p); 796 } 797 798 static void qp_table_destroy(struct qht *ht) 799 { 800 if (!ht || !ht->map) { 801 return; 802 } 803 qht_iter(ht, qp_table_remove, NULL); 804 qht_destroy(ht); 805 } 806 807 static void qpd_table_init(struct qht *ht) 808 { 809 qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 810 } 811 812 static void qpp_table_init(struct qht *ht) 813 { 814 qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE); 815 } 816 817 static void qpf_table_init(struct qht *ht) 818 { 819 qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE); 820 } 821 822 /* 823 * Returns how many (high end) bits of inode numbers of the passed fs 824 * device shall be used (in combination with the device number) to 825 * generate hash values for qpp_table entries. 826 * 827 * This function is required if variable length suffixes are used for inode 828 * number mapping on guest level. Since a device may end up having multiple 829 * entries in qpp_table, each entry most probably with a different suffix 830 * length, we thus need this function in conjunction with qpd_table to 831 * "agree" about a fix amount of bits (per device) to be always used for 832 * generating hash values for the purpose of accessing qpp_table in order 833 * get consistent behaviour when accessing qpp_table. 834 */ 835 static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev) 836 { 837 QpdEntry lookup = { 838 .dev = dev 839 }, *val; 840 uint32_t hash = dev; 841 VariLenAffix affix; 842 843 val = qht_lookup(&pdu->s->qpd_table, &lookup, hash); 844 if (!val) { 845 val = g_new0(QpdEntry, 1); 846 *val = lookup; 847 affix = affixForIndex(pdu->s->qp_affix_next); 848 val->prefix_bits = affix.bits; 849 qht_insert(&pdu->s->qpd_table, val, hash, NULL); 850 pdu->s->qp_ndevices++; 851 } 852 return val->prefix_bits; 853 } 854 855 /* 856 * Slow / full mapping host inode nr -> guest inode nr. 857 * 858 * This function performs a slower and much more costly remapping of an 859 * original file inode number on host to an appropriate different inode 860 * number on guest. For every (dev, inode) combination on host a new 861 * sequential number is generated, cached and exposed as inode number on 862 * guest. 863 * 864 * This is just a "last resort" fallback solution if the much faster/cheaper 865 * qid_path_suffixmap() failed. In practice this slow / full mapping is not 866 * expected ever to be used at all though. 867 * 868 * See qid_path_suffixmap() for details 869 * 870 */ 871 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf, 872 uint64_t *path) 873 { 874 QpfEntry lookup = { 875 .dev = stbuf->st_dev, 876 .ino = stbuf->st_ino 877 }, *val; 878 uint32_t hash = qpf_hash(lookup); 879 VariLenAffix affix; 880 881 val = qht_lookup(&pdu->s->qpf_table, &lookup, hash); 882 883 if (!val) { 884 if (pdu->s->qp_fullpath_next == 0) { 885 /* no more files can be mapped :'( */ 886 error_report_once( 887 "9p: No more prefixes available for remapping inodes from " 888 "host to guest." 889 ); 890 return -ENFILE; 891 } 892 893 val = g_new0(QpfEntry, 1); 894 *val = lookup; 895 896 /* new unique inode and device combo */ 897 affix = affixForIndex( 898 1ULL << (sizeof(pdu->s->qp_affix_next) * 8) 899 ); 900 val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value; 901 pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1); 902 qht_insert(&pdu->s->qpf_table, val, hash, NULL); 903 } 904 905 *path = val->path; 906 return 0; 907 } 908 909 /* 910 * Quick mapping host inode nr -> guest inode nr. 911 * 912 * This function performs quick remapping of an original file inode number 913 * on host to an appropriate different inode number on guest. This remapping 914 * of inodes is required to avoid inode nr collisions on guest which would 915 * happen if the 9p export contains more than 1 exported file system (or 916 * more than 1 file system data set), because unlike on host level where the 917 * files would have different device nrs, all files exported by 9p would 918 * share the same device nr on guest (the device nr of the virtual 9p device 919 * that is). 920 * 921 * Inode remapping is performed by chopping off high end bits of the original 922 * inode number from host, shifting the result upwards and then assigning a 923 * generated suffix number for the low end bits, where the same suffix number 924 * will be shared by all inodes with the same device id AND the same high end 925 * bits that have been chopped off. That approach utilizes the fact that inode 926 * numbers very likely share the same high end bits (i.e. due to their common 927 * sequential generation by file systems) and hence we only have to generate 928 * and track a very limited amount of suffixes in practice due to that. 929 * 930 * We generate variable size suffixes for that purpose. The 1st generated 931 * suffix will only have 1 bit and hence we only need to chop off 1 bit from 932 * the original inode number. The subsequent suffixes being generated will 933 * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being 934 * generated will have 3 bits and hence we have to chop off 3 bits from their 935 * original inodes, and so on. That approach of using variable length suffixes 936 * (i.e. over fixed size ones) utilizes the fact that in practice only a very 937 * limited amount of devices are shared by the same export (e.g. typically 938 * less than 2 dozen devices per 9p export), so in practice we need to chop 939 * off less bits than with fixed size prefixes and yet are flexible to add 940 * new devices at runtime below host's export directory at any time without 941 * having to reboot guest nor requiring to reconfigure guest for that. And due 942 * to the very limited amount of original high end bits that we chop off that 943 * way, the total amount of suffixes we need to generate is less than by using 944 * fixed size prefixes and hence it also improves performance of the inode 945 * remapping algorithm, and finally has the nice side effect that the inode 946 * numbers on guest will be much smaller & human friendly. ;-) 947 */ 948 static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf, 949 uint64_t *path) 950 { 951 const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev); 952 QppEntry lookup = { 953 .dev = stbuf->st_dev, 954 .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits)) 955 }, *val; 956 uint32_t hash = qpp_hash(lookup); 957 958 val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); 959 960 if (!val) { 961 if (pdu->s->qp_affix_next == 0) { 962 /* we ran out of affixes */ 963 warn_report_once( 964 "9p: Potential degraded performance of inode remapping" 965 ); 966 return -ENFILE; 967 } 968 969 val = g_new0(QppEntry, 1); 970 *val = lookup; 971 972 /* new unique inode affix and device combo */ 973 val->qp_affix_index = pdu->s->qp_affix_next++; 974 val->qp_affix = affixForIndex(val->qp_affix_index); 975 qht_insert(&pdu->s->qpp_table, val, hash, NULL); 976 } 977 /* assuming generated affix to be suffix type, not prefix */ 978 *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value; 979 return 0; 980 } 981 982 static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) 983 { 984 int err; 985 size_t size; 986 987 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 988 /* map inode+device to qid path (fast path) */ 989 err = qid_path_suffixmap(pdu, stbuf, &qidp->path); 990 if (err == -ENFILE) { 991 /* fast path didn't work, fall back to full map */ 992 err = qid_path_fullmap(pdu, stbuf, &qidp->path); 993 } 994 if (err) { 995 return err; 996 } 997 } else { 998 if (pdu->s->dev_id != stbuf->st_dev) { 999 if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { 1000 error_report_once( 1001 "9p: Multiple devices detected in same VirtFS export. " 1002 "Access of guest to additional devices is (partly) " 1003 "denied due to virtfs option 'multidevs=forbid' being " 1004 "effective." 1005 ); 1006 return -ENODEV; 1007 } else { 1008 warn_report_once( 1009 "9p: Multiple devices detected in same VirtFS export, " 1010 "which might lead to file ID collisions and severe " 1011 "misbehaviours on guest! You should either use a " 1012 "separate export for each device shared from host or " 1013 "use virtfs option 'multidevs=remap'!" 1014 ); 1015 } 1016 } 1017 memset(&qidp->path, 0, sizeof(qidp->path)); 1018 size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); 1019 memcpy(&qidp->path, &stbuf->st_ino, size); 1020 } 1021 1022 qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); 1023 qidp->type = 0; 1024 if (S_ISDIR(stbuf->st_mode)) { 1025 qidp->type |= P9_QID_TYPE_DIR; 1026 } 1027 if (S_ISLNK(stbuf->st_mode)) { 1028 qidp->type |= P9_QID_TYPE_SYMLINK; 1029 } 1030 1031 return 0; 1032 } 1033 1034 V9fsPDU *pdu_alloc(V9fsState *s) 1035 { 1036 V9fsPDU *pdu = NULL; 1037 1038 if (!QLIST_EMPTY(&s->free_list)) { 1039 pdu = QLIST_FIRST(&s->free_list); 1040 QLIST_REMOVE(pdu, next); 1041 QLIST_INSERT_HEAD(&s->active_list, pdu, next); 1042 } 1043 return pdu; 1044 } 1045 1046 void pdu_free(V9fsPDU *pdu) 1047 { 1048 V9fsState *s = pdu->s; 1049 1050 g_assert(!pdu->cancelled); 1051 QLIST_REMOVE(pdu, next); 1052 QLIST_INSERT_HEAD(&s->free_list, pdu, next); 1053 } 1054 1055 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) 1056 { 1057 int8_t id = pdu->id + 1; /* Response */ 1058 V9fsState *s = pdu->s; 1059 int ret; 1060 1061 /* 1062 * The 9p spec requires that successfully cancelled pdus receive no reply. 1063 * Sending a reply would confuse clients because they would 1064 * assume that any EINTR is the actual result of the operation, 1065 * rather than a consequence of the cancellation. However, if 1066 * the operation completed (successfully or with an error other 1067 * than caused be cancellation), we do send out that reply, both 1068 * for efficiency and to avoid confusing the rest of the state machine 1069 * that assumes passing a non-error here will mean a successful 1070 * transmission of the reply. 1071 */ 1072 bool discard = pdu->cancelled && len == -EINTR; 1073 if (discard) { 1074 trace_v9fs_rcancel(pdu->tag, pdu->id); 1075 pdu->size = 0; 1076 goto out_notify; 1077 } 1078 1079 if (len < 0) { 1080 int err = -len; 1081 len = 7; 1082 1083 if (s->proto_version != V9FS_PROTO_2000L) { 1084 V9fsString str; 1085 1086 str.data = strerror(err); 1087 str.size = strlen(str.data); 1088 1089 ret = pdu_marshal(pdu, len, "s", &str); 1090 if (ret < 0) { 1091 goto out_notify; 1092 } 1093 len += ret; 1094 id = P9_RERROR; 1095 } else { 1096 err = errno_to_dotl(err); 1097 } 1098 1099 ret = pdu_marshal(pdu, len, "d", err); 1100 if (ret < 0) { 1101 goto out_notify; 1102 } 1103 len += ret; 1104 1105 if (s->proto_version == V9FS_PROTO_2000L) { 1106 id = P9_RLERROR; 1107 } 1108 trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */ 1109 } 1110 1111 /* fill out the header */ 1112 if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) { 1113 goto out_notify; 1114 } 1115 1116 /* keep these in sync */ 1117 pdu->size = len; 1118 pdu->id = id; 1119 1120 out_notify: 1121 pdu->s->transport->push_and_notify(pdu); 1122 1123 /* Now wakeup anybody waiting in flush for this request */ 1124 if (!qemu_co_queue_next(&pdu->complete)) { 1125 pdu_free(pdu); 1126 } 1127 } 1128 1129 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) 1130 { 1131 mode_t ret; 1132 1133 ret = mode & 0777; 1134 if (mode & P9_STAT_MODE_DIR) { 1135 ret |= S_IFDIR; 1136 } 1137 1138 if (mode & P9_STAT_MODE_SYMLINK) { 1139 ret |= S_IFLNK; 1140 } 1141 if (mode & P9_STAT_MODE_SOCKET) { 1142 ret |= S_IFSOCK; 1143 } 1144 if (mode & P9_STAT_MODE_NAMED_PIPE) { 1145 ret |= S_IFIFO; 1146 } 1147 if (mode & P9_STAT_MODE_DEVICE) { 1148 if (extension->size && extension->data[0] == 'c') { 1149 ret |= S_IFCHR; 1150 } else { 1151 ret |= S_IFBLK; 1152 } 1153 } 1154 1155 if (!(ret & ~0777)) { 1156 ret |= S_IFREG; 1157 } 1158 1159 if (mode & P9_STAT_MODE_SETUID) { 1160 ret |= S_ISUID; 1161 } 1162 if (mode & P9_STAT_MODE_SETGID) { 1163 ret |= S_ISGID; 1164 } 1165 if (mode & P9_STAT_MODE_SETVTX) { 1166 ret |= S_ISVTX; 1167 } 1168 1169 return ret; 1170 } 1171 1172 static int donttouch_stat(V9fsStat *stat) 1173 { 1174 if (stat->type == -1 && 1175 stat->dev == -1 && 1176 stat->qid.type == 0xff && 1177 stat->qid.version == (uint32_t) -1 && 1178 stat->qid.path == (uint64_t) -1 && 1179 stat->mode == -1 && 1180 stat->atime == -1 && 1181 stat->mtime == -1 && 1182 stat->length == -1 && 1183 !stat->name.size && 1184 !stat->uid.size && 1185 !stat->gid.size && 1186 !stat->muid.size && 1187 stat->n_uid == -1 && 1188 stat->n_gid == -1 && 1189 stat->n_muid == -1) { 1190 return 1; 1191 } 1192 1193 return 0; 1194 } 1195 1196 static void v9fs_stat_init(V9fsStat *stat) 1197 { 1198 v9fs_string_init(&stat->name); 1199 v9fs_string_init(&stat->uid); 1200 v9fs_string_init(&stat->gid); 1201 v9fs_string_init(&stat->muid); 1202 v9fs_string_init(&stat->extension); 1203 } 1204 1205 static void v9fs_stat_free(V9fsStat *stat) 1206 { 1207 v9fs_string_free(&stat->name); 1208 v9fs_string_free(&stat->uid); 1209 v9fs_string_free(&stat->gid); 1210 v9fs_string_free(&stat->muid); 1211 v9fs_string_free(&stat->extension); 1212 } 1213 1214 static uint32_t stat_to_v9mode(const struct stat *stbuf) 1215 { 1216 uint32_t mode; 1217 1218 mode = stbuf->st_mode & 0777; 1219 if (S_ISDIR(stbuf->st_mode)) { 1220 mode |= P9_STAT_MODE_DIR; 1221 } 1222 1223 if (S_ISLNK(stbuf->st_mode)) { 1224 mode |= P9_STAT_MODE_SYMLINK; 1225 } 1226 1227 if (S_ISSOCK(stbuf->st_mode)) { 1228 mode |= P9_STAT_MODE_SOCKET; 1229 } 1230 1231 if (S_ISFIFO(stbuf->st_mode)) { 1232 mode |= P9_STAT_MODE_NAMED_PIPE; 1233 } 1234 1235 if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) { 1236 mode |= P9_STAT_MODE_DEVICE; 1237 } 1238 1239 if (stbuf->st_mode & S_ISUID) { 1240 mode |= P9_STAT_MODE_SETUID; 1241 } 1242 1243 if (stbuf->st_mode & S_ISGID) { 1244 mode |= P9_STAT_MODE_SETGID; 1245 } 1246 1247 if (stbuf->st_mode & S_ISVTX) { 1248 mode |= P9_STAT_MODE_SETVTX; 1249 } 1250 1251 return mode; 1252 } 1253 1254 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path, 1255 const char *basename, 1256 const struct stat *stbuf, 1257 V9fsStat *v9stat) 1258 { 1259 int err; 1260 1261 memset(v9stat, 0, sizeof(*v9stat)); 1262 1263 err = stat_to_qid(pdu, stbuf, &v9stat->qid); 1264 if (err < 0) { 1265 return err; 1266 } 1267 v9stat->mode = stat_to_v9mode(stbuf); 1268 v9stat->atime = stbuf->st_atime; 1269 v9stat->mtime = stbuf->st_mtime; 1270 v9stat->length = stbuf->st_size; 1271 1272 v9fs_string_free(&v9stat->uid); 1273 v9fs_string_free(&v9stat->gid); 1274 v9fs_string_free(&v9stat->muid); 1275 1276 v9stat->n_uid = stbuf->st_uid; 1277 v9stat->n_gid = stbuf->st_gid; 1278 v9stat->n_muid = 0; 1279 1280 v9fs_string_free(&v9stat->extension); 1281 1282 if (v9stat->mode & P9_STAT_MODE_SYMLINK) { 1283 err = v9fs_co_readlink(pdu, path, &v9stat->extension); 1284 if (err < 0) { 1285 return err; 1286 } 1287 } else if (v9stat->mode & P9_STAT_MODE_DEVICE) { 1288 v9fs_string_sprintf(&v9stat->extension, "%c %u %u", 1289 S_ISCHR(stbuf->st_mode) ? 'c' : 'b', 1290 major(stbuf->st_rdev), minor(stbuf->st_rdev)); 1291 } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) { 1292 v9fs_string_sprintf(&v9stat->extension, "%s %lu", 1293 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink); 1294 } 1295 1296 v9fs_string_sprintf(&v9stat->name, "%s", basename); 1297 1298 v9stat->size = 61 + 1299 v9fs_string_size(&v9stat->name) + 1300 v9fs_string_size(&v9stat->uid) + 1301 v9fs_string_size(&v9stat->gid) + 1302 v9fs_string_size(&v9stat->muid) + 1303 v9fs_string_size(&v9stat->extension); 1304 return 0; 1305 } 1306 1307 #define P9_STATS_MODE 0x00000001ULL 1308 #define P9_STATS_NLINK 0x00000002ULL 1309 #define P9_STATS_UID 0x00000004ULL 1310 #define P9_STATS_GID 0x00000008ULL 1311 #define P9_STATS_RDEV 0x00000010ULL 1312 #define P9_STATS_ATIME 0x00000020ULL 1313 #define P9_STATS_MTIME 0x00000040ULL 1314 #define P9_STATS_CTIME 0x00000080ULL 1315 #define P9_STATS_INO 0x00000100ULL 1316 #define P9_STATS_SIZE 0x00000200ULL 1317 #define P9_STATS_BLOCKS 0x00000400ULL 1318 1319 #define P9_STATS_BTIME 0x00000800ULL 1320 #define P9_STATS_GEN 0x00001000ULL 1321 #define P9_STATS_DATA_VERSION 0x00002000ULL 1322 1323 #define P9_STATS_BASIC 0x000007ffULL /* Mask for fields up to BLOCKS */ 1324 #define P9_STATS_ALL 0x00003fffULL /* Mask for All fields above */ 1325 1326 1327 /** 1328 * blksize_to_iounit() - Block size exposed to 9p client. 1329 * Return: block size 1330 * 1331 * @pdu: 9p client request 1332 * @blksize: host filesystem's block size 1333 * 1334 * Convert host filesystem's block size into an appropriate block size for 1335 * 9p client (guest OS side). The value returned suggests an "optimum" block 1336 * size for 9p I/O, i.e. to maximize performance. 1337 */ 1338 static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize) 1339 { 1340 int32_t iounit = 0; 1341 V9fsState *s = pdu->s; 1342 1343 /* 1344 * iounit should be multiples of blksize (host filesystem block size) 1345 * as well as less than (client msize - P9_IOHDRSZ) 1346 */ 1347 if (blksize) { 1348 iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize); 1349 } 1350 if (!iounit) { 1351 iounit = s->msize - P9_IOHDRSZ; 1352 } 1353 return iounit; 1354 } 1355 1356 static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf) 1357 { 1358 return blksize_to_iounit(pdu, stbuf->st_blksize); 1359 } 1360 1361 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf, 1362 V9fsStatDotl *v9lstat) 1363 { 1364 memset(v9lstat, 0, sizeof(*v9lstat)); 1365 1366 v9lstat->st_mode = stbuf->st_mode; 1367 v9lstat->st_nlink = stbuf->st_nlink; 1368 v9lstat->st_uid = stbuf->st_uid; 1369 v9lstat->st_gid = stbuf->st_gid; 1370 v9lstat->st_rdev = host_dev_to_dotl_dev(stbuf->st_rdev); 1371 v9lstat->st_size = stbuf->st_size; 1372 v9lstat->st_blksize = stat_to_iounit(pdu, stbuf); 1373 v9lstat->st_blocks = stbuf->st_blocks; 1374 v9lstat->st_atime_sec = stbuf->st_atime; 1375 v9lstat->st_mtime_sec = stbuf->st_mtime; 1376 v9lstat->st_ctime_sec = stbuf->st_ctime; 1377 #ifdef CONFIG_DARWIN 1378 v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec; 1379 v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec; 1380 v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec; 1381 #else 1382 v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec; 1383 v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec; 1384 v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec; 1385 #endif 1386 /* Currently we only support BASIC fields in stat */ 1387 v9lstat->st_result_mask = P9_STATS_BASIC; 1388 1389 return stat_to_qid(pdu, stbuf, &v9lstat->qid); 1390 } 1391 1392 static void print_sg(struct iovec *sg, int cnt) 1393 { 1394 int i; 1395 1396 printf("sg[%d]: {", cnt); 1397 for (i = 0; i < cnt; i++) { 1398 if (i) { 1399 printf(", "); 1400 } 1401 printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len); 1402 } 1403 printf("}\n"); 1404 } 1405 1406 /* Will call this only for path name based fid */ 1407 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len) 1408 { 1409 V9fsPath str; 1410 v9fs_path_init(&str); 1411 v9fs_path_copy(&str, dst); 1412 v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len); 1413 v9fs_path_free(&str); 1414 } 1415 1416 static inline bool is_ro_export(FsContext *ctx) 1417 { 1418 return ctx->export_flags & V9FS_RDONLY; 1419 } 1420 1421 static void coroutine_fn v9fs_version(void *opaque) 1422 { 1423 ssize_t err; 1424 V9fsPDU *pdu = opaque; 1425 V9fsState *s = pdu->s; 1426 V9fsString version; 1427 size_t offset = 7; 1428 1429 v9fs_string_init(&version); 1430 err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version); 1431 if (err < 0) { 1432 goto out; 1433 } 1434 trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data); 1435 1436 virtfs_reset(pdu); 1437 1438 if (!strcmp(version.data, "9P2000.u")) { 1439 s->proto_version = V9FS_PROTO_2000U; 1440 } else if (!strcmp(version.data, "9P2000.L")) { 1441 s->proto_version = V9FS_PROTO_2000L; 1442 } else { 1443 v9fs_string_sprintf(&version, "unknown"); 1444 /* skip min. msize check, reporting invalid version has priority */ 1445 goto marshal; 1446 } 1447 1448 if (s->msize < P9_MIN_MSIZE) { 1449 err = -EMSGSIZE; 1450 error_report( 1451 "9pfs: Client requested msize < minimum msize (" 1452 stringify(P9_MIN_MSIZE) ") supported by this server." 1453 ); 1454 goto out; 1455 } 1456 1457 /* 8192 is the default msize of Linux clients */ 1458 if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) { 1459 warn_report_once( 1460 "9p: degraded performance: a reasonable high msize should be " 1461 "chosen on client/guest side (chosen msize is <= 8192). See " 1462 "https://wiki.qemu.org/Documentation/9psetup#msize for details." 1463 ); 1464 } 1465 1466 marshal: 1467 err = pdu_marshal(pdu, offset, "ds", s->msize, &version); 1468 if (err < 0) { 1469 goto out; 1470 } 1471 err += offset; 1472 trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data); 1473 out: 1474 pdu_complete(pdu, err); 1475 v9fs_string_free(&version); 1476 } 1477 1478 static void coroutine_fn v9fs_attach(void *opaque) 1479 { 1480 V9fsPDU *pdu = opaque; 1481 V9fsState *s = pdu->s; 1482 int32_t fid, afid, n_uname; 1483 V9fsString uname, aname; 1484 V9fsFidState *fidp; 1485 size_t offset = 7; 1486 V9fsQID qid; 1487 ssize_t err; 1488 struct stat stbuf; 1489 1490 v9fs_string_init(&uname); 1491 v9fs_string_init(&aname); 1492 err = pdu_unmarshal(pdu, offset, "ddssd", &fid, 1493 &afid, &uname, &aname, &n_uname); 1494 if (err < 0) { 1495 goto out_nofid; 1496 } 1497 trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data); 1498 1499 fidp = alloc_fid(s, fid); 1500 if (fidp == NULL) { 1501 err = -EINVAL; 1502 goto out_nofid; 1503 } 1504 fidp->uid = n_uname; 1505 err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path); 1506 if (err < 0) { 1507 err = -EINVAL; 1508 clunk_fid(s, fid); 1509 goto out; 1510 } 1511 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1512 if (err < 0) { 1513 err = -EINVAL; 1514 clunk_fid(s, fid); 1515 goto out; 1516 } 1517 err = stat_to_qid(pdu, &stbuf, &qid); 1518 if (err < 0) { 1519 err = -EINVAL; 1520 clunk_fid(s, fid); 1521 goto out; 1522 } 1523 1524 /* 1525 * disable migration if we haven't done already. 1526 * attach could get called multiple times for the same export. 1527 */ 1528 if (!s->migration_blocker) { 1529 error_setg(&s->migration_blocker, 1530 "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'", 1531 s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag); 1532 err = migrate_add_blocker(&s->migration_blocker, NULL); 1533 if (err < 0) { 1534 clunk_fid(s, fid); 1535 goto out; 1536 } 1537 s->root_fid = fid; 1538 } 1539 1540 err = pdu_marshal(pdu, offset, "Q", &qid); 1541 if (err < 0) { 1542 clunk_fid(s, fid); 1543 goto out; 1544 } 1545 err += offset; 1546 1547 memcpy(&s->root_st, &stbuf, sizeof(stbuf)); 1548 trace_v9fs_attach_return(pdu->tag, pdu->id, 1549 qid.type, qid.version, qid.path); 1550 out: 1551 put_fid(pdu, fidp); 1552 out_nofid: 1553 pdu_complete(pdu, err); 1554 v9fs_string_free(&uname); 1555 v9fs_string_free(&aname); 1556 } 1557 1558 static void coroutine_fn v9fs_stat(void *opaque) 1559 { 1560 int32_t fid; 1561 V9fsStat v9stat; 1562 ssize_t err = 0; 1563 size_t offset = 7; 1564 struct stat stbuf; 1565 V9fsFidState *fidp; 1566 V9fsPDU *pdu = opaque; 1567 char *basename; 1568 1569 err = pdu_unmarshal(pdu, offset, "d", &fid); 1570 if (err < 0) { 1571 goto out_nofid; 1572 } 1573 trace_v9fs_stat(pdu->tag, pdu->id, fid); 1574 1575 fidp = get_fid(pdu, fid); 1576 if (fidp == NULL) { 1577 err = -ENOENT; 1578 goto out_nofid; 1579 } 1580 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1581 if (err < 0) { 1582 goto out; 1583 } 1584 basename = g_path_get_basename(fidp->path.data); 1585 err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat); 1586 g_free(basename); 1587 if (err < 0) { 1588 goto out; 1589 } 1590 err = pdu_marshal(pdu, offset, "wS", 0, &v9stat); 1591 if (err < 0) { 1592 v9fs_stat_free(&v9stat); 1593 goto out; 1594 } 1595 trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode, 1596 v9stat.atime, v9stat.mtime, v9stat.length); 1597 err += offset; 1598 v9fs_stat_free(&v9stat); 1599 out: 1600 put_fid(pdu, fidp); 1601 out_nofid: 1602 pdu_complete(pdu, err); 1603 } 1604 1605 static bool fid_has_valid_file_handle(V9fsState *s, V9fsFidState *fidp) 1606 { 1607 return s->ops->has_valid_file_handle(fidp->fid_type, &fidp->fs); 1608 } 1609 1610 static void coroutine_fn v9fs_getattr(void *opaque) 1611 { 1612 int32_t fid; 1613 size_t offset = 7; 1614 ssize_t retval = 0; 1615 struct stat stbuf; 1616 V9fsFidState *fidp; 1617 uint64_t request_mask; 1618 V9fsStatDotl v9stat_dotl; 1619 V9fsPDU *pdu = opaque; 1620 1621 retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask); 1622 if (retval < 0) { 1623 goto out_nofid; 1624 } 1625 trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask); 1626 1627 fidp = get_fid(pdu, fid); 1628 if (fidp == NULL) { 1629 retval = -ENOENT; 1630 goto out_nofid; 1631 } 1632 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1633 retval = v9fs_co_fstat(pdu, fidp, &stbuf); 1634 } else { 1635 retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 1636 } 1637 if (retval < 0) { 1638 goto out; 1639 } 1640 retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl); 1641 if (retval < 0) { 1642 goto out; 1643 } 1644 1645 /* fill st_gen if requested and supported by underlying fs */ 1646 if (request_mask & P9_STATS_GEN) { 1647 retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl); 1648 switch (retval) { 1649 case 0: 1650 /* we have valid st_gen: update result mask */ 1651 v9stat_dotl.st_result_mask |= P9_STATS_GEN; 1652 break; 1653 case -EINTR: 1654 /* request cancelled, e.g. by Tflush */ 1655 goto out; 1656 default: 1657 /* failed to get st_gen: not fatal, ignore */ 1658 break; 1659 } 1660 } 1661 retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl); 1662 if (retval < 0) { 1663 goto out; 1664 } 1665 retval += offset; 1666 trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask, 1667 v9stat_dotl.st_mode, v9stat_dotl.st_uid, 1668 v9stat_dotl.st_gid); 1669 out: 1670 put_fid(pdu, fidp); 1671 out_nofid: 1672 pdu_complete(pdu, retval); 1673 } 1674 1675 /* Attribute flags */ 1676 #define P9_ATTR_MODE (1 << 0) 1677 #define P9_ATTR_UID (1 << 1) 1678 #define P9_ATTR_GID (1 << 2) 1679 #define P9_ATTR_SIZE (1 << 3) 1680 #define P9_ATTR_ATIME (1 << 4) 1681 #define P9_ATTR_MTIME (1 << 5) 1682 #define P9_ATTR_CTIME (1 << 6) 1683 #define P9_ATTR_ATIME_SET (1 << 7) 1684 #define P9_ATTR_MTIME_SET (1 << 8) 1685 1686 #define P9_ATTR_MASK 127 1687 1688 static void coroutine_fn v9fs_setattr(void *opaque) 1689 { 1690 int err = 0; 1691 int32_t fid; 1692 V9fsFidState *fidp; 1693 size_t offset = 7; 1694 V9fsIattr v9iattr; 1695 V9fsPDU *pdu = opaque; 1696 1697 err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr); 1698 if (err < 0) { 1699 goto out_nofid; 1700 } 1701 1702 trace_v9fs_setattr(pdu->tag, pdu->id, fid, 1703 v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid, 1704 v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec); 1705 1706 fidp = get_fid(pdu, fid); 1707 if (fidp == NULL) { 1708 err = -EINVAL; 1709 goto out_nofid; 1710 } 1711 if (v9iattr.valid & P9_ATTR_MODE) { 1712 err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode); 1713 if (err < 0) { 1714 goto out; 1715 } 1716 } 1717 if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) { 1718 struct timespec times[2]; 1719 if (v9iattr.valid & P9_ATTR_ATIME) { 1720 if (v9iattr.valid & P9_ATTR_ATIME_SET) { 1721 times[0].tv_sec = v9iattr.atime_sec; 1722 times[0].tv_nsec = v9iattr.atime_nsec; 1723 } else { 1724 times[0].tv_nsec = UTIME_NOW; 1725 } 1726 } else { 1727 times[0].tv_nsec = UTIME_OMIT; 1728 } 1729 if (v9iattr.valid & P9_ATTR_MTIME) { 1730 if (v9iattr.valid & P9_ATTR_MTIME_SET) { 1731 times[1].tv_sec = v9iattr.mtime_sec; 1732 times[1].tv_nsec = v9iattr.mtime_nsec; 1733 } else { 1734 times[1].tv_nsec = UTIME_NOW; 1735 } 1736 } else { 1737 times[1].tv_nsec = UTIME_OMIT; 1738 } 1739 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1740 err = v9fs_co_futimens(pdu, fidp, times); 1741 } else { 1742 err = v9fs_co_utimensat(pdu, &fidp->path, times); 1743 } 1744 if (err < 0) { 1745 goto out; 1746 } 1747 } 1748 /* 1749 * If the only valid entry in iattr is ctime we can call 1750 * chown(-1,-1) to update the ctime of the file 1751 */ 1752 if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) || 1753 ((v9iattr.valid & P9_ATTR_CTIME) 1754 && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) { 1755 if (!(v9iattr.valid & P9_ATTR_UID)) { 1756 v9iattr.uid = -1; 1757 } 1758 if (!(v9iattr.valid & P9_ATTR_GID)) { 1759 v9iattr.gid = -1; 1760 } 1761 err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid, 1762 v9iattr.gid); 1763 if (err < 0) { 1764 goto out; 1765 } 1766 } 1767 if (v9iattr.valid & (P9_ATTR_SIZE)) { 1768 if (fid_has_valid_file_handle(pdu->s, fidp)) { 1769 err = v9fs_co_ftruncate(pdu, fidp, v9iattr.size); 1770 } else { 1771 err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size); 1772 } 1773 if (err < 0) { 1774 goto out; 1775 } 1776 } 1777 err = offset; 1778 trace_v9fs_setattr_return(pdu->tag, pdu->id); 1779 out: 1780 put_fid(pdu, fidp); 1781 out_nofid: 1782 pdu_complete(pdu, err); 1783 } 1784 1785 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids) 1786 { 1787 int i; 1788 ssize_t err; 1789 size_t offset = 7; 1790 1791 err = pdu_marshal(pdu, offset, "w", nwnames); 1792 if (err < 0) { 1793 return err; 1794 } 1795 offset += err; 1796 for (i = 0; i < nwnames; i++) { 1797 err = pdu_marshal(pdu, offset, "Q", &qids[i]); 1798 if (err < 0) { 1799 return err; 1800 } 1801 offset += err; 1802 } 1803 return offset; 1804 } 1805 1806 static bool name_is_illegal(const char *name) 1807 { 1808 return !*name || strchr(name, '/') != NULL; 1809 } 1810 1811 static bool same_stat_id(const struct stat *a, const struct stat *b) 1812 { 1813 return a->st_dev == b->st_dev && a->st_ino == b->st_ino; 1814 } 1815 1816 /* 1817 * Returns a (newly allocated) comma-separated string presentation of the 1818 * passed array for logging (tracing) purpose for trace event "v9fs_walk". 1819 * 1820 * It is caller's responsibility to free the returned string. 1821 */ 1822 static char *trace_v9fs_walk_wnames(V9fsString *wnames, size_t nwnames) 1823 { 1824 g_autofree char **arr = g_malloc0_n(nwnames + 1, sizeof(char *)); 1825 for (size_t i = 0; i < nwnames; ++i) { 1826 arr[i] = wnames[i].data; 1827 } 1828 return g_strjoinv(", ", arr); 1829 } 1830 1831 static void coroutine_fn v9fs_walk(void *opaque) 1832 { 1833 int name_idx, nwalked; 1834 g_autofree V9fsQID *qids = NULL; 1835 int i, err = 0, any_err = 0; 1836 V9fsPath dpath, path; 1837 P9ARRAY_REF(V9fsPath) pathes = NULL; 1838 uint16_t nwnames; 1839 struct stat stbuf, fidst; 1840 g_autofree struct stat *stbufs = NULL; 1841 size_t offset = 7; 1842 int32_t fid, newfid; 1843 P9ARRAY_REF(V9fsString) wnames = NULL; 1844 g_autofree char *trace_wnames = NULL; 1845 V9fsFidState *fidp; 1846 V9fsFidState *newfidp = NULL; 1847 V9fsPDU *pdu = opaque; 1848 V9fsState *s = pdu->s; 1849 V9fsQID qid; 1850 1851 err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames); 1852 if (err < 0) { 1853 pdu_complete(pdu, err); 1854 return; 1855 } 1856 offset += err; 1857 1858 if (nwnames > P9_MAXWELEM) { 1859 err = -EINVAL; 1860 goto out_nofid_nownames; 1861 } 1862 if (nwnames) { 1863 P9ARRAY_NEW(V9fsString, wnames, nwnames); 1864 qids = g_new0(V9fsQID, nwnames); 1865 stbufs = g_new0(struct stat, nwnames); 1866 P9ARRAY_NEW(V9fsPath, pathes, nwnames); 1867 for (i = 0; i < nwnames; i++) { 1868 err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); 1869 if (err < 0) { 1870 goto out_nofid_nownames; 1871 } 1872 if (name_is_illegal(wnames[i].data)) { 1873 err = -ENOENT; 1874 goto out_nofid_nownames; 1875 } 1876 offset += err; 1877 } 1878 if (trace_event_get_state_backends(TRACE_V9FS_WALK)) { 1879 trace_wnames = trace_v9fs_walk_wnames(wnames, nwnames); 1880 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, 1881 trace_wnames); 1882 } 1883 } else { 1884 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, ""); 1885 } 1886 1887 fidp = get_fid(pdu, fid); 1888 if (fidp == NULL) { 1889 err = -ENOENT; 1890 goto out_nofid; 1891 } 1892 1893 v9fs_path_init(&dpath); 1894 v9fs_path_init(&path); 1895 /* 1896 * Both dpath and path initially point to fidp. 1897 * Needed to handle request with nwnames == 0 1898 */ 1899 v9fs_path_copy(&dpath, &fidp->path); 1900 v9fs_path_copy(&path, &fidp->path); 1901 1902 /* 1903 * To keep latency (i.e. overall execution time for processing this 1904 * Twalk client request) as small as possible, run all the required fs 1905 * driver code altogether inside the following block. 1906 */ 1907 v9fs_co_run_in_worker({ 1908 nwalked = 0; 1909 if (v9fs_request_cancelled(pdu)) { 1910 any_err |= err = -EINTR; 1911 break; 1912 } 1913 err = s->ops->lstat(&s->ctx, &dpath, &fidst); 1914 if (err < 0) { 1915 any_err |= err = -errno; 1916 break; 1917 } 1918 stbuf = fidst; 1919 for (; nwalked < nwnames; nwalked++) { 1920 if (v9fs_request_cancelled(pdu)) { 1921 any_err |= err = -EINTR; 1922 break; 1923 } 1924 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1925 strcmp("..", wnames[nwalked].data)) 1926 { 1927 err = s->ops->name_to_path(&s->ctx, &dpath, 1928 wnames[nwalked].data, 1929 &pathes[nwalked]); 1930 if (err < 0) { 1931 any_err |= err = -errno; 1932 break; 1933 } 1934 if (v9fs_request_cancelled(pdu)) { 1935 any_err |= err = -EINTR; 1936 break; 1937 } 1938 err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf); 1939 if (err < 0) { 1940 any_err |= err = -errno; 1941 break; 1942 } 1943 stbufs[nwalked] = stbuf; 1944 v9fs_path_copy(&dpath, &pathes[nwalked]); 1945 } 1946 } 1947 }); 1948 /* 1949 * Handle all the rest of this Twalk request on main thread ... 1950 * 1951 * NOTE: -EINTR is an exception where we deviate from the protocol spec 1952 * and simply send a (R)Lerror response instead of bothering to assemble 1953 * a (deducted) Rwalk response; because -EINTR is always the result of a 1954 * Tflush request, so client would no longer wait for a response in this 1955 * case anyway. 1956 */ 1957 if ((err < 0 && !nwalked) || err == -EINTR) { 1958 goto out; 1959 } 1960 1961 any_err |= err = stat_to_qid(pdu, &fidst, &qid); 1962 if (err < 0 && !nwalked) { 1963 goto out; 1964 } 1965 stbuf = fidst; 1966 1967 /* reset dpath and path */ 1968 v9fs_path_copy(&dpath, &fidp->path); 1969 v9fs_path_copy(&path, &fidp->path); 1970 1971 for (name_idx = 0; name_idx < nwalked; name_idx++) { 1972 if (!same_stat_id(&pdu->s->root_st, &stbuf) || 1973 strcmp("..", wnames[name_idx].data)) 1974 { 1975 stbuf = stbufs[name_idx]; 1976 any_err |= err = stat_to_qid(pdu, &stbuf, &qid); 1977 if (err < 0) { 1978 break; 1979 } 1980 v9fs_path_copy(&path, &pathes[name_idx]); 1981 v9fs_path_copy(&dpath, &path); 1982 } 1983 memcpy(&qids[name_idx], &qid, sizeof(qid)); 1984 } 1985 if (any_err < 0) { 1986 if (!name_idx) { 1987 /* don't send any QIDs, send Rlerror instead */ 1988 goto out; 1989 } else { 1990 /* send QIDs (not Rlerror), but fid MUST remain unaffected */ 1991 goto send_qids; 1992 } 1993 } 1994 if (fid == newfid) { 1995 if (fidp->fid_type != P9_FID_NONE) { 1996 err = -EINVAL; 1997 goto out; 1998 } 1999 v9fs_path_write_lock(s); 2000 v9fs_path_copy(&fidp->path, &path); 2001 v9fs_path_unlock(s); 2002 } else { 2003 newfidp = alloc_fid(s, newfid); 2004 if (newfidp == NULL) { 2005 err = -EINVAL; 2006 goto out; 2007 } 2008 newfidp->uid = fidp->uid; 2009 v9fs_path_copy(&newfidp->path, &path); 2010 } 2011 send_qids: 2012 err = v9fs_walk_marshal(pdu, name_idx, qids); 2013 trace_v9fs_walk_return(pdu->tag, pdu->id, name_idx, qids); 2014 out: 2015 put_fid(pdu, fidp); 2016 if (newfidp) { 2017 put_fid(pdu, newfidp); 2018 } 2019 v9fs_path_free(&dpath); 2020 v9fs_path_free(&path); 2021 goto out_pdu_complete; 2022 out_nofid_nownames: 2023 trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames, "<?>"); 2024 out_nofid: 2025 out_pdu_complete: 2026 pdu_complete(pdu, err); 2027 } 2028 2029 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path) 2030 { 2031 struct statfs stbuf; 2032 int err = v9fs_co_statfs(pdu, path, &stbuf); 2033 2034 return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0); 2035 } 2036 2037 static void coroutine_fn v9fs_open(void *opaque) 2038 { 2039 int flags; 2040 int32_t fid; 2041 int32_t mode; 2042 V9fsQID qid; 2043 int iounit = 0; 2044 ssize_t err = 0; 2045 size_t offset = 7; 2046 struct stat stbuf; 2047 V9fsFidState *fidp; 2048 V9fsPDU *pdu = opaque; 2049 V9fsState *s = pdu->s; 2050 g_autofree char *trace_oflags = NULL; 2051 2052 if (s->proto_version == V9FS_PROTO_2000L) { 2053 err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode); 2054 } else { 2055 uint8_t modebyte; 2056 err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte); 2057 mode = modebyte; 2058 } 2059 if (err < 0) { 2060 goto out_nofid; 2061 } 2062 if (trace_event_get_state_backends(TRACE_V9FS_OPEN)) { 2063 trace_oflags = qemu_open_flags_tostr( 2064 (s->proto_version == V9FS_PROTO_2000L) ? 2065 dotl_to_open_flags(mode) : omode_to_uflags(mode) 2066 ); 2067 trace_v9fs_open(pdu->tag, pdu->id, fid, mode, trace_oflags); 2068 } 2069 2070 fidp = get_fid(pdu, fid); 2071 if (fidp == NULL) { 2072 err = -ENOENT; 2073 goto out_nofid; 2074 } 2075 if (fidp->fid_type != P9_FID_NONE) { 2076 err = -EINVAL; 2077 goto out; 2078 } 2079 2080 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2081 if (err < 0) { 2082 goto out; 2083 } 2084 err = stat_to_qid(pdu, &stbuf, &qid); 2085 if (err < 0) { 2086 goto out; 2087 } 2088 if (S_ISDIR(stbuf.st_mode)) { 2089 err = v9fs_co_opendir(pdu, fidp); 2090 if (err < 0) { 2091 goto out; 2092 } 2093 fidp->fid_type = P9_FID_DIR; 2094 err = pdu_marshal(pdu, offset, "Qd", &qid, 0); 2095 if (err < 0) { 2096 goto out; 2097 } 2098 err += offset; 2099 } else { 2100 if (s->proto_version == V9FS_PROTO_2000L) { 2101 flags = get_dotl_openflags(s, mode); 2102 } else { 2103 flags = omode_to_uflags(mode); 2104 } 2105 if (is_ro_export(&s->ctx)) { 2106 if (mode & O_WRONLY || mode & O_RDWR || 2107 mode & O_APPEND || mode & O_TRUNC) { 2108 err = -EROFS; 2109 goto out; 2110 } 2111 } 2112 err = v9fs_co_open(pdu, fidp, flags); 2113 if (err < 0) { 2114 goto out; 2115 } 2116 fidp->fid_type = P9_FID_FILE; 2117 fidp->open_flags = flags; 2118 if (flags & O_EXCL) { 2119 /* 2120 * We let the host file system do O_EXCL check 2121 * We should not reclaim such fd 2122 */ 2123 fidp->flags |= FID_NON_RECLAIMABLE; 2124 } 2125 iounit = get_iounit(pdu, &fidp->path); 2126 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2127 if (err < 0) { 2128 goto out; 2129 } 2130 err += offset; 2131 } 2132 trace_v9fs_open_return(pdu->tag, pdu->id, 2133 qid.type, qid.version, qid.path, iounit); 2134 out: 2135 put_fid(pdu, fidp); 2136 out_nofid: 2137 pdu_complete(pdu, err); 2138 } 2139 2140 static void coroutine_fn v9fs_lcreate(void *opaque) 2141 { 2142 int32_t dfid, flags, mode; 2143 gid_t gid; 2144 ssize_t err = 0; 2145 ssize_t offset = 7; 2146 V9fsString name; 2147 V9fsFidState *fidp; 2148 struct stat stbuf; 2149 V9fsQID qid; 2150 int32_t iounit; 2151 V9fsPDU *pdu = opaque; 2152 2153 v9fs_string_init(&name); 2154 err = pdu_unmarshal(pdu, offset, "dsddd", &dfid, 2155 &name, &flags, &mode, &gid); 2156 if (err < 0) { 2157 goto out_nofid; 2158 } 2159 trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid); 2160 2161 if (name_is_illegal(name.data)) { 2162 err = -ENOENT; 2163 goto out_nofid; 2164 } 2165 2166 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2167 err = -EEXIST; 2168 goto out_nofid; 2169 } 2170 2171 fidp = get_fid(pdu, dfid); 2172 if (fidp == NULL) { 2173 err = -ENOENT; 2174 goto out_nofid; 2175 } 2176 if (fidp->fid_type != P9_FID_NONE) { 2177 err = -EINVAL; 2178 goto out; 2179 } 2180 2181 flags = get_dotl_openflags(pdu->s, flags); 2182 err = v9fs_co_open2(pdu, fidp, &name, gid, 2183 flags | O_CREAT, mode, &stbuf); 2184 if (err < 0) { 2185 goto out; 2186 } 2187 fidp->fid_type = P9_FID_FILE; 2188 fidp->open_flags = flags; 2189 if (flags & O_EXCL) { 2190 /* 2191 * We let the host file system do O_EXCL check 2192 * We should not reclaim such fd 2193 */ 2194 fidp->flags |= FID_NON_RECLAIMABLE; 2195 } 2196 iounit = get_iounit(pdu, &fidp->path); 2197 err = stat_to_qid(pdu, &stbuf, &qid); 2198 if (err < 0) { 2199 goto out; 2200 } 2201 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 2202 if (err < 0) { 2203 goto out; 2204 } 2205 err += offset; 2206 trace_v9fs_lcreate_return(pdu->tag, pdu->id, 2207 qid.type, qid.version, qid.path, iounit); 2208 out: 2209 put_fid(pdu, fidp); 2210 out_nofid: 2211 pdu_complete(pdu, err); 2212 v9fs_string_free(&name); 2213 } 2214 2215 static void coroutine_fn v9fs_fsync(void *opaque) 2216 { 2217 int err; 2218 int32_t fid; 2219 int datasync; 2220 size_t offset = 7; 2221 V9fsFidState *fidp; 2222 V9fsPDU *pdu = opaque; 2223 2224 err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync); 2225 if (err < 0) { 2226 goto out_nofid; 2227 } 2228 trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync); 2229 2230 fidp = get_fid(pdu, fid); 2231 if (fidp == NULL) { 2232 err = -ENOENT; 2233 goto out_nofid; 2234 } 2235 err = v9fs_co_fsync(pdu, fidp, datasync); 2236 if (!err) { 2237 err = offset; 2238 } 2239 put_fid(pdu, fidp); 2240 out_nofid: 2241 pdu_complete(pdu, err); 2242 } 2243 2244 static void coroutine_fn v9fs_clunk(void *opaque) 2245 { 2246 int err; 2247 int32_t fid; 2248 size_t offset = 7; 2249 V9fsFidState *fidp; 2250 V9fsPDU *pdu = opaque; 2251 V9fsState *s = pdu->s; 2252 2253 err = pdu_unmarshal(pdu, offset, "d", &fid); 2254 if (err < 0) { 2255 goto out_nofid; 2256 } 2257 trace_v9fs_clunk(pdu->tag, pdu->id, fid); 2258 2259 fidp = clunk_fid(s, fid); 2260 if (fidp == NULL) { 2261 err = -ENOENT; 2262 goto out_nofid; 2263 } 2264 /* 2265 * Bump the ref so that put_fid will 2266 * free the fid. 2267 */ 2268 fidp->ref++; 2269 err = put_fid(pdu, fidp); 2270 if (!err) { 2271 err = offset; 2272 } 2273 out_nofid: 2274 pdu_complete(pdu, err); 2275 } 2276 2277 /* 2278 * Create a QEMUIOVector for a sub-region of PDU iovecs 2279 * 2280 * @qiov: uninitialized QEMUIOVector 2281 * @skip: number of bytes to skip from beginning of PDU 2282 * @size: number of bytes to include 2283 * @is_write: true - write, false - read 2284 * 2285 * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up 2286 * with qemu_iovec_destroy(). 2287 */ 2288 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, 2289 size_t skip, size_t size, 2290 bool is_write) 2291 { 2292 QEMUIOVector elem; 2293 struct iovec *iov; 2294 unsigned int niov; 2295 2296 if (is_write) { 2297 pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip); 2298 } else { 2299 pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip); 2300 } 2301 2302 qemu_iovec_init_external(&elem, iov, niov); 2303 qemu_iovec_init(qiov, niov); 2304 qemu_iovec_concat(qiov, &elem, skip, size); 2305 } 2306 2307 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2308 uint64_t off, uint32_t max_count) 2309 { 2310 ssize_t err; 2311 size_t offset = 7; 2312 uint64_t read_count; 2313 QEMUIOVector qiov_full; 2314 2315 if (fidp->fs.xattr.len < off) { 2316 read_count = 0; 2317 } else { 2318 read_count = fidp->fs.xattr.len - off; 2319 } 2320 if (read_count > max_count) { 2321 read_count = max_count; 2322 } 2323 err = pdu_marshal(pdu, offset, "d", read_count); 2324 if (err < 0) { 2325 return err; 2326 } 2327 offset += err; 2328 2329 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false); 2330 err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0, 2331 ((char *)fidp->fs.xattr.value) + off, 2332 read_count); 2333 qemu_iovec_destroy(&qiov_full); 2334 if (err < 0) { 2335 return err; 2336 } 2337 offset += err; 2338 return offset; 2339 } 2340 2341 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu, 2342 V9fsFidState *fidp, 2343 uint32_t max_count) 2344 { 2345 V9fsPath path; 2346 V9fsStat v9stat; 2347 int len, err = 0; 2348 int32_t count = 0; 2349 struct stat stbuf; 2350 off_t saved_dir_pos; 2351 struct dirent *dent; 2352 2353 /* save the directory position */ 2354 saved_dir_pos = v9fs_co_telldir(pdu, fidp); 2355 if (saved_dir_pos < 0) { 2356 return saved_dir_pos; 2357 } 2358 2359 while (1) { 2360 v9fs_path_init(&path); 2361 2362 v9fs_readdir_lock(&fidp->fs.dir); 2363 2364 err = v9fs_co_readdir(pdu, fidp, &dent); 2365 if (err || !dent) { 2366 break; 2367 } 2368 err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); 2369 if (err < 0) { 2370 break; 2371 } 2372 err = v9fs_co_lstat(pdu, &path, &stbuf); 2373 if (err < 0) { 2374 break; 2375 } 2376 err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat); 2377 if (err < 0) { 2378 break; 2379 } 2380 if ((count + v9stat.size + 2) > max_count) { 2381 v9fs_readdir_unlock(&fidp->fs.dir); 2382 2383 /* Ran out of buffer. Set dir back to old position and return */ 2384 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2385 v9fs_stat_free(&v9stat); 2386 v9fs_path_free(&path); 2387 return count; 2388 } 2389 2390 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2391 len = pdu_marshal(pdu, 11 + count, "S", &v9stat); 2392 2393 v9fs_readdir_unlock(&fidp->fs.dir); 2394 2395 if (len < 0) { 2396 v9fs_co_seekdir(pdu, fidp, saved_dir_pos); 2397 v9fs_stat_free(&v9stat); 2398 v9fs_path_free(&path); 2399 return len; 2400 } 2401 count += len; 2402 v9fs_stat_free(&v9stat); 2403 v9fs_path_free(&path); 2404 saved_dir_pos = qemu_dirent_off(dent); 2405 } 2406 2407 v9fs_readdir_unlock(&fidp->fs.dir); 2408 2409 v9fs_path_free(&path); 2410 if (err < 0) { 2411 return err; 2412 } 2413 return count; 2414 } 2415 2416 static void coroutine_fn v9fs_read(void *opaque) 2417 { 2418 int32_t fid; 2419 uint64_t off; 2420 ssize_t err = 0; 2421 int32_t count = 0; 2422 size_t offset = 7; 2423 uint32_t max_count; 2424 V9fsFidState *fidp; 2425 V9fsPDU *pdu = opaque; 2426 V9fsState *s = pdu->s; 2427 2428 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count); 2429 if (err < 0) { 2430 goto out_nofid; 2431 } 2432 trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count); 2433 2434 fidp = get_fid(pdu, fid); 2435 if (fidp == NULL) { 2436 err = -EINVAL; 2437 goto out_nofid; 2438 } 2439 if (fidp->fid_type == P9_FID_DIR) { 2440 if (s->proto_version != V9FS_PROTO_2000U) { 2441 warn_report_once( 2442 "9p: bad client: T_read request on directory only expected " 2443 "with 9P2000.u protocol version" 2444 ); 2445 err = -EOPNOTSUPP; 2446 goto out; 2447 } 2448 if (off == 0) { 2449 v9fs_co_rewinddir(pdu, fidp); 2450 } 2451 count = v9fs_do_readdir_with_stat(pdu, fidp, max_count); 2452 if (count < 0) { 2453 err = count; 2454 goto out; 2455 } 2456 err = pdu_marshal(pdu, offset, "d", count); 2457 if (err < 0) { 2458 goto out; 2459 } 2460 err += offset + count; 2461 } else if (fidp->fid_type == P9_FID_FILE) { 2462 QEMUIOVector qiov_full; 2463 QEMUIOVector qiov; 2464 int32_t len; 2465 2466 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false); 2467 qemu_iovec_init(&qiov, qiov_full.niov); 2468 do { 2469 qemu_iovec_reset(&qiov); 2470 qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); 2471 if (0) { 2472 print_sg(qiov.iov, qiov.niov); 2473 } 2474 /* Loop in case of EINTR */ 2475 do { 2476 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off); 2477 if (len >= 0) { 2478 off += len; 2479 count += len; 2480 } 2481 } while (len == -EINTR && !pdu->cancelled); 2482 if (len < 0) { 2483 /* IO error return the error */ 2484 err = len; 2485 goto out_free_iovec; 2486 } 2487 } while (count < max_count && len > 0); 2488 err = pdu_marshal(pdu, offset, "d", count); 2489 if (err < 0) { 2490 goto out_free_iovec; 2491 } 2492 err += offset + count; 2493 out_free_iovec: 2494 qemu_iovec_destroy(&qiov); 2495 qemu_iovec_destroy(&qiov_full); 2496 } else if (fidp->fid_type == P9_FID_XATTR) { 2497 err = v9fs_xattr_read(s, pdu, fidp, off, max_count); 2498 } else { 2499 err = -EINVAL; 2500 } 2501 trace_v9fs_read_return(pdu->tag, pdu->id, count, err); 2502 out: 2503 put_fid(pdu, fidp); 2504 out_nofid: 2505 pdu_complete(pdu, err); 2506 } 2507 2508 /** 2509 * v9fs_readdir_response_size() - Returns size required in Rreaddir response 2510 * for the passed dirent @name. 2511 * 2512 * @name: directory entry's name (i.e. file name, directory name) 2513 * Return: required size in bytes 2514 */ 2515 size_t v9fs_readdir_response_size(V9fsString *name) 2516 { 2517 /* 2518 * Size of each dirent on the wire: size of qid (13) + size of offset (8) 2519 * size of type (1) + size of name.size (2) + strlen(name.data) 2520 */ 2521 return 24 + v9fs_string_size(name); 2522 } 2523 2524 static void v9fs_free_dirents(struct V9fsDirEnt *e) 2525 { 2526 struct V9fsDirEnt *next = NULL; 2527 2528 for (; e; e = next) { 2529 next = e->next; 2530 g_free(e->dent); 2531 g_free(e->st); 2532 g_free(e); 2533 } 2534 } 2535 2536 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, 2537 off_t offset, int32_t max_count) 2538 { 2539 size_t size; 2540 V9fsQID qid; 2541 V9fsString name; 2542 int len, err = 0; 2543 int32_t count = 0; 2544 off_t off; 2545 struct dirent *dent; 2546 struct stat *st; 2547 struct V9fsDirEnt *entries = NULL; 2548 2549 /* 2550 * inode remapping requires the device id, which in turn might be 2551 * different for different directory entries, so if inode remapping is 2552 * enabled we have to make a full stat for each directory entry 2553 */ 2554 const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES; 2555 2556 /* 2557 * Fetch all required directory entries altogether on a background IO 2558 * thread from fs driver. We don't want to do that for each entry 2559 * individually, because hopping between threads (this main IO thread 2560 * and background IO driver thread) would sum up to huge latencies. 2561 */ 2562 count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count, 2563 dostat); 2564 if (count < 0) { 2565 err = count; 2566 count = 0; 2567 goto out; 2568 } 2569 count = 0; 2570 2571 for (struct V9fsDirEnt *e = entries; e; e = e->next) { 2572 dent = e->dent; 2573 2574 if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { 2575 st = e->st; 2576 /* e->st should never be NULL, but just to be sure */ 2577 if (!st) { 2578 err = -1; 2579 break; 2580 } 2581 2582 /* remap inode */ 2583 err = stat_to_qid(pdu, st, &qid); 2584 if (err < 0) { 2585 break; 2586 } 2587 } else { 2588 /* 2589 * Fill up just the path field of qid because the client uses 2590 * only that. To fill the entire qid structure we will have 2591 * to stat each dirent found, which is expensive. For the 2592 * latter reason we don't call stat_to_qid() here. Only drawback 2593 * is that no multi-device export detection of stat_to_qid() 2594 * would be done and provided as error to the user here. But 2595 * user would get that error anyway when accessing those 2596 * files/dirs through other ways. 2597 */ 2598 size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); 2599 memcpy(&qid.path, &dent->d_ino, size); 2600 /* Fill the other fields with dummy values */ 2601 qid.type = 0; 2602 qid.version = 0; 2603 } 2604 2605 off = qemu_dirent_off(dent); 2606 v9fs_string_init(&name); 2607 v9fs_string_sprintf(&name, "%s", dent->d_name); 2608 2609 /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ 2610 len = pdu_marshal(pdu, 11 + count, "Qqbs", 2611 &qid, off, 2612 dent->d_type, &name); 2613 2614 v9fs_string_free(&name); 2615 2616 if (len < 0) { 2617 err = len; 2618 break; 2619 } 2620 2621 count += len; 2622 } 2623 2624 out: 2625 v9fs_free_dirents(entries); 2626 if (err < 0) { 2627 return err; 2628 } 2629 return count; 2630 } 2631 2632 static void coroutine_fn v9fs_readdir(void *opaque) 2633 { 2634 int32_t fid; 2635 V9fsFidState *fidp; 2636 ssize_t retval = 0; 2637 size_t offset = 7; 2638 uint64_t initial_offset; 2639 int32_t count; 2640 uint32_t max_count; 2641 V9fsPDU *pdu = opaque; 2642 V9fsState *s = pdu->s; 2643 2644 retval = pdu_unmarshal(pdu, offset, "dqd", &fid, 2645 &initial_offset, &max_count); 2646 if (retval < 0) { 2647 goto out_nofid; 2648 } 2649 trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count); 2650 2651 /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */ 2652 if (max_count > s->msize - 11) { 2653 max_count = s->msize - 11; 2654 warn_report_once( 2655 "9p: bad client: T_readdir with count > msize - 11" 2656 ); 2657 } 2658 2659 fidp = get_fid(pdu, fid); 2660 if (fidp == NULL) { 2661 retval = -EINVAL; 2662 goto out_nofid; 2663 } 2664 if (fidp->fid_type != P9_FID_DIR) { 2665 warn_report_once("9p: bad client: T_readdir on non-directory stream"); 2666 retval = -ENOTDIR; 2667 goto out; 2668 } 2669 if (!fidp->fs.dir.stream) { 2670 retval = -EINVAL; 2671 goto out; 2672 } 2673 if (s->proto_version != V9FS_PROTO_2000L) { 2674 warn_report_once( 2675 "9p: bad client: T_readdir request only expected with 9P2000.L " 2676 "protocol version" 2677 ); 2678 retval = -EOPNOTSUPP; 2679 goto out; 2680 } 2681 count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count); 2682 if (count < 0) { 2683 retval = count; 2684 goto out; 2685 } 2686 retval = pdu_marshal(pdu, offset, "d", count); 2687 if (retval < 0) { 2688 goto out; 2689 } 2690 retval += count + offset; 2691 trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval); 2692 out: 2693 put_fid(pdu, fidp); 2694 out_nofid: 2695 pdu_complete(pdu, retval); 2696 } 2697 2698 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp, 2699 uint64_t off, uint32_t count, 2700 struct iovec *sg, int cnt) 2701 { 2702 int i, to_copy; 2703 ssize_t err = 0; 2704 uint64_t write_count; 2705 size_t offset = 7; 2706 2707 2708 if (fidp->fs.xattr.len < off) { 2709 return -ENOSPC; 2710 } 2711 write_count = fidp->fs.xattr.len - off; 2712 if (write_count > count) { 2713 write_count = count; 2714 } 2715 err = pdu_marshal(pdu, offset, "d", write_count); 2716 if (err < 0) { 2717 return err; 2718 } 2719 err += offset; 2720 fidp->fs.xattr.copied_len += write_count; 2721 /* 2722 * Now copy the content from sg list 2723 */ 2724 for (i = 0; i < cnt; i++) { 2725 if (write_count > sg[i].iov_len) { 2726 to_copy = sg[i].iov_len; 2727 } else { 2728 to_copy = write_count; 2729 } 2730 memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy); 2731 /* updating vs->off since we are not using below */ 2732 off += to_copy; 2733 write_count -= to_copy; 2734 } 2735 2736 return err; 2737 } 2738 2739 static void coroutine_fn v9fs_write(void *opaque) 2740 { 2741 ssize_t err; 2742 int32_t fid; 2743 uint64_t off; 2744 uint32_t count; 2745 int32_t len = 0; 2746 int32_t total = 0; 2747 size_t offset = 7; 2748 V9fsFidState *fidp; 2749 V9fsPDU *pdu = opaque; 2750 V9fsState *s = pdu->s; 2751 QEMUIOVector qiov_full; 2752 QEMUIOVector qiov; 2753 2754 err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count); 2755 if (err < 0) { 2756 pdu_complete(pdu, err); 2757 return; 2758 } 2759 offset += err; 2760 v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true); 2761 trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov); 2762 2763 fidp = get_fid(pdu, fid); 2764 if (fidp == NULL) { 2765 err = -EINVAL; 2766 goto out_nofid; 2767 } 2768 if (fidp->fid_type == P9_FID_FILE) { 2769 if (fidp->fs.fd == -1) { 2770 err = -EINVAL; 2771 goto out; 2772 } 2773 } else if (fidp->fid_type == P9_FID_XATTR) { 2774 /* 2775 * setxattr operation 2776 */ 2777 err = v9fs_xattr_write(s, pdu, fidp, off, count, 2778 qiov_full.iov, qiov_full.niov); 2779 goto out; 2780 } else { 2781 err = -EINVAL; 2782 goto out; 2783 } 2784 qemu_iovec_init(&qiov, qiov_full.niov); 2785 do { 2786 qemu_iovec_reset(&qiov); 2787 qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); 2788 if (0) { 2789 print_sg(qiov.iov, qiov.niov); 2790 } 2791 /* Loop in case of EINTR */ 2792 do { 2793 len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off); 2794 if (len >= 0) { 2795 off += len; 2796 total += len; 2797 } 2798 } while (len == -EINTR && !pdu->cancelled); 2799 if (len < 0) { 2800 /* IO error return the error */ 2801 err = len; 2802 goto out_qiov; 2803 } 2804 } while (total < count && len > 0); 2805 2806 offset = 7; 2807 err = pdu_marshal(pdu, offset, "d", total); 2808 if (err < 0) { 2809 goto out_qiov; 2810 } 2811 err += offset; 2812 trace_v9fs_write_return(pdu->tag, pdu->id, total, err); 2813 out_qiov: 2814 qemu_iovec_destroy(&qiov); 2815 out: 2816 put_fid(pdu, fidp); 2817 out_nofid: 2818 qemu_iovec_destroy(&qiov_full); 2819 pdu_complete(pdu, err); 2820 } 2821 2822 static void coroutine_fn v9fs_create(void *opaque) 2823 { 2824 int32_t fid; 2825 int err = 0; 2826 size_t offset = 7; 2827 V9fsFidState *fidp; 2828 V9fsQID qid; 2829 int32_t perm; 2830 int8_t mode; 2831 V9fsPath path; 2832 struct stat stbuf; 2833 V9fsString name; 2834 V9fsString extension; 2835 int iounit; 2836 V9fsPDU *pdu = opaque; 2837 V9fsState *s = pdu->s; 2838 2839 v9fs_path_init(&path); 2840 v9fs_string_init(&name); 2841 v9fs_string_init(&extension); 2842 err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name, 2843 &perm, &mode, &extension); 2844 if (err < 0) { 2845 goto out_nofid; 2846 } 2847 trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode); 2848 2849 if (name_is_illegal(name.data)) { 2850 err = -ENOENT; 2851 goto out_nofid; 2852 } 2853 2854 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 2855 err = -EEXIST; 2856 goto out_nofid; 2857 } 2858 2859 fidp = get_fid(pdu, fid); 2860 if (fidp == NULL) { 2861 err = -EINVAL; 2862 goto out_nofid; 2863 } 2864 if (fidp->fid_type != P9_FID_NONE) { 2865 err = -EINVAL; 2866 goto out; 2867 } 2868 if (perm & P9_STAT_MODE_DIR) { 2869 err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777, 2870 fidp->uid, -1, &stbuf); 2871 if (err < 0) { 2872 goto out; 2873 } 2874 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2875 if (err < 0) { 2876 goto out; 2877 } 2878 v9fs_path_write_lock(s); 2879 v9fs_path_copy(&fidp->path, &path); 2880 v9fs_path_unlock(s); 2881 err = v9fs_co_opendir(pdu, fidp); 2882 if (err < 0) { 2883 goto out; 2884 } 2885 fidp->fid_type = P9_FID_DIR; 2886 } else if (perm & P9_STAT_MODE_SYMLINK) { 2887 err = v9fs_co_symlink(pdu, fidp, &name, 2888 extension.data, -1 , &stbuf); 2889 if (err < 0) { 2890 goto out; 2891 } 2892 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2893 if (err < 0) { 2894 goto out; 2895 } 2896 v9fs_path_write_lock(s); 2897 v9fs_path_copy(&fidp->path, &path); 2898 v9fs_path_unlock(s); 2899 } else if (perm & P9_STAT_MODE_LINK) { 2900 int32_t ofid = atoi(extension.data); 2901 V9fsFidState *ofidp = get_fid(pdu, ofid); 2902 if (ofidp == NULL) { 2903 err = -EINVAL; 2904 goto out; 2905 } 2906 err = v9fs_co_link(pdu, ofidp, fidp, &name); 2907 put_fid(pdu, ofidp); 2908 if (err < 0) { 2909 goto out; 2910 } 2911 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2912 if (err < 0) { 2913 fidp->fid_type = P9_FID_NONE; 2914 goto out; 2915 } 2916 v9fs_path_write_lock(s); 2917 v9fs_path_copy(&fidp->path, &path); 2918 v9fs_path_unlock(s); 2919 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 2920 if (err < 0) { 2921 fidp->fid_type = P9_FID_NONE; 2922 goto out; 2923 } 2924 } else if (perm & P9_STAT_MODE_DEVICE) { 2925 char ctype; 2926 uint32_t major, minor; 2927 mode_t nmode = 0; 2928 2929 if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) { 2930 err = -errno; 2931 goto out; 2932 } 2933 2934 switch (ctype) { 2935 case 'c': 2936 nmode = S_IFCHR; 2937 break; 2938 case 'b': 2939 nmode = S_IFBLK; 2940 break; 2941 default: 2942 err = -EIO; 2943 goto out; 2944 } 2945 2946 nmode |= perm & 0777; 2947 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2948 makedev(major, minor), nmode, &stbuf); 2949 if (err < 0) { 2950 goto out; 2951 } 2952 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2953 if (err < 0) { 2954 goto out; 2955 } 2956 v9fs_path_write_lock(s); 2957 v9fs_path_copy(&fidp->path, &path); 2958 v9fs_path_unlock(s); 2959 } else if (perm & P9_STAT_MODE_NAMED_PIPE) { 2960 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2961 0, S_IFIFO | (perm & 0777), &stbuf); 2962 if (err < 0) { 2963 goto out; 2964 } 2965 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2966 if (err < 0) { 2967 goto out; 2968 } 2969 v9fs_path_write_lock(s); 2970 v9fs_path_copy(&fidp->path, &path); 2971 v9fs_path_unlock(s); 2972 } else if (perm & P9_STAT_MODE_SOCKET) { 2973 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1, 2974 0, S_IFSOCK | (perm & 0777), &stbuf); 2975 if (err < 0) { 2976 goto out; 2977 } 2978 err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path); 2979 if (err < 0) { 2980 goto out; 2981 } 2982 v9fs_path_write_lock(s); 2983 v9fs_path_copy(&fidp->path, &path); 2984 v9fs_path_unlock(s); 2985 } else { 2986 err = v9fs_co_open2(pdu, fidp, &name, -1, 2987 omode_to_uflags(mode) | O_CREAT, perm, &stbuf); 2988 if (err < 0) { 2989 goto out; 2990 } 2991 fidp->fid_type = P9_FID_FILE; 2992 fidp->open_flags = omode_to_uflags(mode); 2993 if (fidp->open_flags & O_EXCL) { 2994 /* 2995 * We let the host file system do O_EXCL check 2996 * We should not reclaim such fd 2997 */ 2998 fidp->flags |= FID_NON_RECLAIMABLE; 2999 } 3000 } 3001 iounit = get_iounit(pdu, &fidp->path); 3002 err = stat_to_qid(pdu, &stbuf, &qid); 3003 if (err < 0) { 3004 goto out; 3005 } 3006 err = pdu_marshal(pdu, offset, "Qd", &qid, iounit); 3007 if (err < 0) { 3008 goto out; 3009 } 3010 err += offset; 3011 trace_v9fs_create_return(pdu->tag, pdu->id, 3012 qid.type, qid.version, qid.path, iounit); 3013 out: 3014 put_fid(pdu, fidp); 3015 out_nofid: 3016 pdu_complete(pdu, err); 3017 v9fs_string_free(&name); 3018 v9fs_string_free(&extension); 3019 v9fs_path_free(&path); 3020 } 3021 3022 static void coroutine_fn v9fs_symlink(void *opaque) 3023 { 3024 V9fsPDU *pdu = opaque; 3025 V9fsString name; 3026 V9fsString symname; 3027 V9fsFidState *dfidp; 3028 V9fsQID qid; 3029 struct stat stbuf; 3030 int32_t dfid; 3031 int err = 0; 3032 gid_t gid; 3033 size_t offset = 7; 3034 3035 v9fs_string_init(&name); 3036 v9fs_string_init(&symname); 3037 err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid); 3038 if (err < 0) { 3039 goto out_nofid; 3040 } 3041 trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid); 3042 3043 if (name_is_illegal(name.data)) { 3044 err = -ENOENT; 3045 goto out_nofid; 3046 } 3047 3048 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3049 err = -EEXIST; 3050 goto out_nofid; 3051 } 3052 3053 dfidp = get_fid(pdu, dfid); 3054 if (dfidp == NULL) { 3055 err = -EINVAL; 3056 goto out_nofid; 3057 } 3058 err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf); 3059 if (err < 0) { 3060 goto out; 3061 } 3062 err = stat_to_qid(pdu, &stbuf, &qid); 3063 if (err < 0) { 3064 goto out; 3065 } 3066 err = pdu_marshal(pdu, offset, "Q", &qid); 3067 if (err < 0) { 3068 goto out; 3069 } 3070 err += offset; 3071 trace_v9fs_symlink_return(pdu->tag, pdu->id, 3072 qid.type, qid.version, qid.path); 3073 out: 3074 put_fid(pdu, dfidp); 3075 out_nofid: 3076 pdu_complete(pdu, err); 3077 v9fs_string_free(&name); 3078 v9fs_string_free(&symname); 3079 } 3080 3081 static void coroutine_fn v9fs_flush(void *opaque) 3082 { 3083 ssize_t err; 3084 int16_t tag; 3085 size_t offset = 7; 3086 V9fsPDU *cancel_pdu = NULL; 3087 V9fsPDU *pdu = opaque; 3088 V9fsState *s = pdu->s; 3089 3090 err = pdu_unmarshal(pdu, offset, "w", &tag); 3091 if (err < 0) { 3092 pdu_complete(pdu, err); 3093 return; 3094 } 3095 trace_v9fs_flush(pdu->tag, pdu->id, tag); 3096 3097 if (pdu->tag == tag) { 3098 warn_report("the guest sent a self-referencing 9P flush request"); 3099 } else { 3100 QLIST_FOREACH(cancel_pdu, &s->active_list, next) { 3101 if (cancel_pdu->tag == tag) { 3102 break; 3103 } 3104 } 3105 } 3106 if (cancel_pdu) { 3107 cancel_pdu->cancelled = 1; 3108 /* 3109 * Wait for pdu to complete. 3110 */ 3111 qemu_co_queue_wait(&cancel_pdu->complete, NULL); 3112 if (!qemu_co_queue_next(&cancel_pdu->complete)) { 3113 cancel_pdu->cancelled = 0; 3114 pdu_free(cancel_pdu); 3115 } 3116 } 3117 pdu_complete(pdu, 7); 3118 } 3119 3120 static void coroutine_fn v9fs_link(void *opaque) 3121 { 3122 V9fsPDU *pdu = opaque; 3123 int32_t dfid, oldfid; 3124 V9fsFidState *dfidp, *oldfidp; 3125 V9fsString name; 3126 size_t offset = 7; 3127 int err = 0; 3128 3129 v9fs_string_init(&name); 3130 err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name); 3131 if (err < 0) { 3132 goto out_nofid; 3133 } 3134 trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data); 3135 3136 if (name_is_illegal(name.data)) { 3137 err = -ENOENT; 3138 goto out_nofid; 3139 } 3140 3141 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3142 err = -EEXIST; 3143 goto out_nofid; 3144 } 3145 3146 dfidp = get_fid(pdu, dfid); 3147 if (dfidp == NULL) { 3148 err = -ENOENT; 3149 goto out_nofid; 3150 } 3151 3152 oldfidp = get_fid(pdu, oldfid); 3153 if (oldfidp == NULL) { 3154 err = -ENOENT; 3155 goto out; 3156 } 3157 err = v9fs_co_link(pdu, oldfidp, dfidp, &name); 3158 if (!err) { 3159 err = offset; 3160 } 3161 put_fid(pdu, oldfidp); 3162 out: 3163 put_fid(pdu, dfidp); 3164 out_nofid: 3165 v9fs_string_free(&name); 3166 pdu_complete(pdu, err); 3167 } 3168 3169 /* Only works with path name based fid */ 3170 static void coroutine_fn v9fs_remove(void *opaque) 3171 { 3172 int32_t fid; 3173 int err = 0; 3174 size_t offset = 7; 3175 V9fsFidState *fidp; 3176 V9fsPDU *pdu = opaque; 3177 3178 err = pdu_unmarshal(pdu, offset, "d", &fid); 3179 if (err < 0) { 3180 goto out_nofid; 3181 } 3182 trace_v9fs_remove(pdu->tag, pdu->id, fid); 3183 3184 fidp = get_fid(pdu, fid); 3185 if (fidp == NULL) { 3186 err = -EINVAL; 3187 goto out_nofid; 3188 } 3189 /* if fs driver is not path based, return EOPNOTSUPP */ 3190 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3191 err = -EOPNOTSUPP; 3192 goto out_err; 3193 } 3194 /* 3195 * IF the file is unlinked, we cannot reopen 3196 * the file later. So don't reclaim fd 3197 */ 3198 err = v9fs_mark_fids_unreclaim(pdu, &fidp->path); 3199 if (err < 0) { 3200 goto out_err; 3201 } 3202 err = v9fs_co_remove(pdu, &fidp->path); 3203 if (!err) { 3204 err = offset; 3205 } 3206 out_err: 3207 /* For TREMOVE we need to clunk the fid even on failed remove */ 3208 clunk_fid(pdu->s, fidp->fid); 3209 put_fid(pdu, fidp); 3210 out_nofid: 3211 pdu_complete(pdu, err); 3212 } 3213 3214 static void coroutine_fn v9fs_unlinkat(void *opaque) 3215 { 3216 int err = 0; 3217 V9fsString name; 3218 int32_t dfid, flags, rflags = 0; 3219 size_t offset = 7; 3220 V9fsPath path; 3221 V9fsFidState *dfidp; 3222 V9fsPDU *pdu = opaque; 3223 3224 v9fs_string_init(&name); 3225 err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags); 3226 if (err < 0) { 3227 goto out_nofid; 3228 } 3229 3230 if (name_is_illegal(name.data)) { 3231 err = -ENOENT; 3232 goto out_nofid; 3233 } 3234 3235 if (!strcmp(".", name.data)) { 3236 err = -EINVAL; 3237 goto out_nofid; 3238 } 3239 3240 if (!strcmp("..", name.data)) { 3241 err = -ENOTEMPTY; 3242 goto out_nofid; 3243 } 3244 3245 if (flags & ~P9_DOTL_AT_REMOVEDIR) { 3246 err = -EINVAL; 3247 goto out_nofid; 3248 } 3249 3250 if (flags & P9_DOTL_AT_REMOVEDIR) { 3251 rflags |= AT_REMOVEDIR; 3252 } 3253 3254 dfidp = get_fid(pdu, dfid); 3255 if (dfidp == NULL) { 3256 err = -EINVAL; 3257 goto out_nofid; 3258 } 3259 /* 3260 * IF the file is unlinked, we cannot reopen 3261 * the file later. So don't reclaim fd 3262 */ 3263 v9fs_path_init(&path); 3264 err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path); 3265 if (err < 0) { 3266 goto out_err; 3267 } 3268 err = v9fs_mark_fids_unreclaim(pdu, &path); 3269 if (err < 0) { 3270 goto out_err; 3271 } 3272 err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags); 3273 if (!err) { 3274 err = offset; 3275 } 3276 out_err: 3277 put_fid(pdu, dfidp); 3278 v9fs_path_free(&path); 3279 out_nofid: 3280 pdu_complete(pdu, err); 3281 v9fs_string_free(&name); 3282 } 3283 3284 3285 /* Only works with path name based fid */ 3286 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp, 3287 int32_t newdirfid, 3288 V9fsString *name) 3289 { 3290 int err = 0; 3291 V9fsPath new_path; 3292 V9fsFidState *tfidp; 3293 V9fsState *s = pdu->s; 3294 V9fsFidState *dirfidp = NULL; 3295 GHashTableIter iter; 3296 gpointer fid; 3297 3298 v9fs_path_init(&new_path); 3299 if (newdirfid != -1) { 3300 dirfidp = get_fid(pdu, newdirfid); 3301 if (dirfidp == NULL) { 3302 return -ENOENT; 3303 } 3304 if (fidp->fid_type != P9_FID_NONE) { 3305 err = -EINVAL; 3306 goto out; 3307 } 3308 err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path); 3309 if (err < 0) { 3310 goto out; 3311 } 3312 } else { 3313 char *dir_name = g_path_get_dirname(fidp->path.data); 3314 V9fsPath dir_path; 3315 3316 v9fs_path_init(&dir_path); 3317 v9fs_path_sprintf(&dir_path, "%s", dir_name); 3318 g_free(dir_name); 3319 3320 err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path); 3321 v9fs_path_free(&dir_path); 3322 if (err < 0) { 3323 goto out; 3324 } 3325 } 3326 err = v9fs_co_rename(pdu, &fidp->path, &new_path); 3327 if (err < 0) { 3328 goto out; 3329 } 3330 3331 /* 3332 * Fixup fid's pointing to the old name to 3333 * start pointing to the new name 3334 */ 3335 g_hash_table_iter_init(&iter, s->fids); 3336 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3337 if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) { 3338 /* replace the name */ 3339 v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data)); 3340 } 3341 } 3342 out: 3343 if (dirfidp) { 3344 put_fid(pdu, dirfidp); 3345 } 3346 v9fs_path_free(&new_path); 3347 return err; 3348 } 3349 3350 /* Only works with path name based fid */ 3351 static void coroutine_fn v9fs_rename(void *opaque) 3352 { 3353 int32_t fid; 3354 ssize_t err = 0; 3355 size_t offset = 7; 3356 V9fsString name; 3357 int32_t newdirfid; 3358 V9fsFidState *fidp; 3359 V9fsPDU *pdu = opaque; 3360 V9fsState *s = pdu->s; 3361 3362 v9fs_string_init(&name); 3363 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name); 3364 if (err < 0) { 3365 goto out_nofid; 3366 } 3367 3368 if (name_is_illegal(name.data)) { 3369 err = -ENOENT; 3370 goto out_nofid; 3371 } 3372 3373 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3374 err = -EISDIR; 3375 goto out_nofid; 3376 } 3377 3378 fidp = get_fid(pdu, fid); 3379 if (fidp == NULL) { 3380 err = -ENOENT; 3381 goto out_nofid; 3382 } 3383 if (fidp->fid_type != P9_FID_NONE) { 3384 err = -EINVAL; 3385 goto out; 3386 } 3387 /* if fs driver is not path based, return EOPNOTSUPP */ 3388 if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) { 3389 err = -EOPNOTSUPP; 3390 goto out; 3391 } 3392 v9fs_path_write_lock(s); 3393 err = v9fs_complete_rename(pdu, fidp, newdirfid, &name); 3394 v9fs_path_unlock(s); 3395 if (!err) { 3396 err = offset; 3397 } 3398 out: 3399 put_fid(pdu, fidp); 3400 out_nofid: 3401 pdu_complete(pdu, err); 3402 v9fs_string_free(&name); 3403 } 3404 3405 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir, 3406 V9fsString *old_name, 3407 V9fsPath *newdir, 3408 V9fsString *new_name) 3409 { 3410 V9fsFidState *tfidp; 3411 V9fsPath oldpath, newpath; 3412 V9fsState *s = pdu->s; 3413 int err; 3414 GHashTableIter iter; 3415 gpointer fid; 3416 3417 v9fs_path_init(&oldpath); 3418 v9fs_path_init(&newpath); 3419 err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath); 3420 if (err < 0) { 3421 goto out; 3422 } 3423 err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath); 3424 if (err < 0) { 3425 goto out; 3426 } 3427 3428 /* 3429 * Fixup fid's pointing to the old name to 3430 * start pointing to the new name 3431 */ 3432 g_hash_table_iter_init(&iter, s->fids); 3433 while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) { 3434 if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) { 3435 /* replace the name */ 3436 v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data)); 3437 } 3438 } 3439 out: 3440 v9fs_path_free(&oldpath); 3441 v9fs_path_free(&newpath); 3442 return err; 3443 } 3444 3445 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid, 3446 V9fsString *old_name, 3447 int32_t newdirfid, 3448 V9fsString *new_name) 3449 { 3450 int err = 0; 3451 V9fsState *s = pdu->s; 3452 V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL; 3453 3454 olddirfidp = get_fid(pdu, olddirfid); 3455 if (olddirfidp == NULL) { 3456 err = -ENOENT; 3457 goto out; 3458 } 3459 if (newdirfid != -1) { 3460 newdirfidp = get_fid(pdu, newdirfid); 3461 if (newdirfidp == NULL) { 3462 err = -ENOENT; 3463 goto out; 3464 } 3465 } else { 3466 newdirfidp = get_fid(pdu, olddirfid); 3467 } 3468 3469 err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name, 3470 &newdirfidp->path, new_name); 3471 if (err < 0) { 3472 goto out; 3473 } 3474 if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { 3475 /* Only for path based fid we need to do the below fixup */ 3476 err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name, 3477 &newdirfidp->path, new_name); 3478 } 3479 out: 3480 if (olddirfidp) { 3481 put_fid(pdu, olddirfidp); 3482 } 3483 if (newdirfidp) { 3484 put_fid(pdu, newdirfidp); 3485 } 3486 return err; 3487 } 3488 3489 static void coroutine_fn v9fs_renameat(void *opaque) 3490 { 3491 ssize_t err = 0; 3492 size_t offset = 7; 3493 V9fsPDU *pdu = opaque; 3494 V9fsState *s = pdu->s; 3495 int32_t olddirfid, newdirfid; 3496 V9fsString old_name, new_name; 3497 3498 v9fs_string_init(&old_name); 3499 v9fs_string_init(&new_name); 3500 err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid, 3501 &old_name, &newdirfid, &new_name); 3502 if (err < 0) { 3503 goto out_err; 3504 } 3505 3506 if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) { 3507 err = -ENOENT; 3508 goto out_err; 3509 } 3510 3511 if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) || 3512 !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) { 3513 err = -EISDIR; 3514 goto out_err; 3515 } 3516 3517 v9fs_path_write_lock(s); 3518 err = v9fs_complete_renameat(pdu, olddirfid, 3519 &old_name, newdirfid, &new_name); 3520 v9fs_path_unlock(s); 3521 if (!err) { 3522 err = offset; 3523 } 3524 3525 out_err: 3526 pdu_complete(pdu, err); 3527 v9fs_string_free(&old_name); 3528 v9fs_string_free(&new_name); 3529 } 3530 3531 static void coroutine_fn v9fs_wstat(void *opaque) 3532 { 3533 int32_t fid; 3534 int err = 0; 3535 int16_t unused; 3536 V9fsStat v9stat; 3537 size_t offset = 7; 3538 struct stat stbuf; 3539 V9fsFidState *fidp; 3540 V9fsPDU *pdu = opaque; 3541 V9fsState *s = pdu->s; 3542 3543 v9fs_stat_init(&v9stat); 3544 err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat); 3545 if (err < 0) { 3546 goto out_nofid; 3547 } 3548 trace_v9fs_wstat(pdu->tag, pdu->id, fid, 3549 v9stat.mode, v9stat.atime, v9stat.mtime); 3550 3551 fidp = get_fid(pdu, fid); 3552 if (fidp == NULL) { 3553 err = -EINVAL; 3554 goto out_nofid; 3555 } 3556 /* do we need to sync the file? */ 3557 if (donttouch_stat(&v9stat)) { 3558 err = v9fs_co_fsync(pdu, fidp, 0); 3559 goto out; 3560 } 3561 if (v9stat.mode != -1) { 3562 uint32_t v9_mode; 3563 err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); 3564 if (err < 0) { 3565 goto out; 3566 } 3567 v9_mode = stat_to_v9mode(&stbuf); 3568 if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) != 3569 (v9_mode & P9_STAT_MODE_TYPE_BITS)) { 3570 /* Attempting to change the type */ 3571 err = -EIO; 3572 goto out; 3573 } 3574 err = v9fs_co_chmod(pdu, &fidp->path, 3575 v9mode_to_mode(v9stat.mode, 3576 &v9stat.extension)); 3577 if (err < 0) { 3578 goto out; 3579 } 3580 } 3581 if (v9stat.mtime != -1 || v9stat.atime != -1) { 3582 struct timespec times[2]; 3583 if (v9stat.atime != -1) { 3584 times[0].tv_sec = v9stat.atime; 3585 times[0].tv_nsec = 0; 3586 } else { 3587 times[0].tv_nsec = UTIME_OMIT; 3588 } 3589 if (v9stat.mtime != -1) { 3590 times[1].tv_sec = v9stat.mtime; 3591 times[1].tv_nsec = 0; 3592 } else { 3593 times[1].tv_nsec = UTIME_OMIT; 3594 } 3595 err = v9fs_co_utimensat(pdu, &fidp->path, times); 3596 if (err < 0) { 3597 goto out; 3598 } 3599 } 3600 if (v9stat.n_gid != -1 || v9stat.n_uid != -1) { 3601 err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid); 3602 if (err < 0) { 3603 goto out; 3604 } 3605 } 3606 if (v9stat.name.size != 0) { 3607 v9fs_path_write_lock(s); 3608 err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name); 3609 v9fs_path_unlock(s); 3610 if (err < 0) { 3611 goto out; 3612 } 3613 } 3614 if (v9stat.length != -1) { 3615 err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length); 3616 if (err < 0) { 3617 goto out; 3618 } 3619 } 3620 err = offset; 3621 out: 3622 put_fid(pdu, fidp); 3623 out_nofid: 3624 v9fs_stat_free(&v9stat); 3625 pdu_complete(pdu, err); 3626 } 3627 3628 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf) 3629 { 3630 uint32_t f_type; 3631 uint32_t f_bsize; 3632 uint64_t f_blocks; 3633 uint64_t f_bfree; 3634 uint64_t f_bavail; 3635 uint64_t f_files; 3636 uint64_t f_ffree; 3637 uint64_t fsid_val; 3638 uint32_t f_namelen; 3639 size_t offset = 7; 3640 int32_t bsize_factor; 3641 3642 /* 3643 * compute bsize factor based on host file system block size 3644 * and client msize 3645 */ 3646 bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize; 3647 if (!bsize_factor) { 3648 bsize_factor = 1; 3649 } 3650 f_type = stbuf->f_type; 3651 f_bsize = stbuf->f_bsize; 3652 f_bsize *= bsize_factor; 3653 /* 3654 * f_bsize is adjusted(multiplied) by bsize factor, so we need to 3655 * adjust(divide) the number of blocks, free blocks and available 3656 * blocks by bsize factor 3657 */ 3658 f_blocks = stbuf->f_blocks / bsize_factor; 3659 f_bfree = stbuf->f_bfree / bsize_factor; 3660 f_bavail = stbuf->f_bavail / bsize_factor; 3661 f_files = stbuf->f_files; 3662 f_ffree = stbuf->f_ffree; 3663 #if defined(CONFIG_DARWIN) || defined(CONFIG_FREEBSD) 3664 fsid_val = (unsigned int)stbuf->f_fsid.val[0] | 3665 (unsigned long long)stbuf->f_fsid.val[1] << 32; 3666 f_namelen = NAME_MAX; 3667 #else 3668 fsid_val = (unsigned int) stbuf->f_fsid.__val[0] | 3669 (unsigned long long)stbuf->f_fsid.__val[1] << 32; 3670 f_namelen = stbuf->f_namelen; 3671 #endif 3672 3673 return pdu_marshal(pdu, offset, "ddqqqqqqd", 3674 f_type, f_bsize, f_blocks, f_bfree, 3675 f_bavail, f_files, f_ffree, 3676 fsid_val, f_namelen); 3677 } 3678 3679 static void coroutine_fn v9fs_statfs(void *opaque) 3680 { 3681 int32_t fid; 3682 ssize_t retval = 0; 3683 size_t offset = 7; 3684 V9fsFidState *fidp; 3685 struct statfs stbuf; 3686 V9fsPDU *pdu = opaque; 3687 V9fsState *s = pdu->s; 3688 3689 retval = pdu_unmarshal(pdu, offset, "d", &fid); 3690 if (retval < 0) { 3691 goto out_nofid; 3692 } 3693 fidp = get_fid(pdu, fid); 3694 if (fidp == NULL) { 3695 retval = -ENOENT; 3696 goto out_nofid; 3697 } 3698 retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf); 3699 if (retval < 0) { 3700 goto out; 3701 } 3702 retval = v9fs_fill_statfs(s, pdu, &stbuf); 3703 if (retval < 0) { 3704 goto out; 3705 } 3706 retval += offset; 3707 out: 3708 put_fid(pdu, fidp); 3709 out_nofid: 3710 pdu_complete(pdu, retval); 3711 } 3712 3713 static void coroutine_fn v9fs_mknod(void *opaque) 3714 { 3715 3716 int mode; 3717 gid_t gid; 3718 int32_t fid; 3719 V9fsQID qid; 3720 int err = 0; 3721 int major, minor; 3722 size_t offset = 7; 3723 V9fsString name; 3724 struct stat stbuf; 3725 V9fsFidState *fidp; 3726 V9fsPDU *pdu = opaque; 3727 3728 v9fs_string_init(&name); 3729 err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode, 3730 &major, &minor, &gid); 3731 if (err < 0) { 3732 goto out_nofid; 3733 } 3734 trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor); 3735 3736 if (name_is_illegal(name.data)) { 3737 err = -ENOENT; 3738 goto out_nofid; 3739 } 3740 3741 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3742 err = -EEXIST; 3743 goto out_nofid; 3744 } 3745 3746 fidp = get_fid(pdu, fid); 3747 if (fidp == NULL) { 3748 err = -ENOENT; 3749 goto out_nofid; 3750 } 3751 err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid, 3752 makedev(major, minor), mode, &stbuf); 3753 if (err < 0) { 3754 goto out; 3755 } 3756 err = stat_to_qid(pdu, &stbuf, &qid); 3757 if (err < 0) { 3758 goto out; 3759 } 3760 err = pdu_marshal(pdu, offset, "Q", &qid); 3761 if (err < 0) { 3762 goto out; 3763 } 3764 err += offset; 3765 trace_v9fs_mknod_return(pdu->tag, pdu->id, 3766 qid.type, qid.version, qid.path); 3767 out: 3768 put_fid(pdu, fidp); 3769 out_nofid: 3770 pdu_complete(pdu, err); 3771 v9fs_string_free(&name); 3772 } 3773 3774 /* 3775 * Implement posix byte range locking code 3776 * Server side handling of locking code is very simple, because 9p server in 3777 * QEMU can handle only one client. And most of the lock handling 3778 * (like conflict, merging) etc is done by the VFS layer itself, so no need to 3779 * do any thing in * qemu 9p server side lock code path. 3780 * So when a TLOCK request comes, always return success 3781 */ 3782 static void coroutine_fn v9fs_lock(void *opaque) 3783 { 3784 V9fsFlock flock; 3785 size_t offset = 7; 3786 struct stat stbuf; 3787 V9fsFidState *fidp; 3788 int32_t fid, err = 0; 3789 V9fsPDU *pdu = opaque; 3790 3791 v9fs_string_init(&flock.client_id); 3792 err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type, 3793 &flock.flags, &flock.start, &flock.length, 3794 &flock.proc_id, &flock.client_id); 3795 if (err < 0) { 3796 goto out_nofid; 3797 } 3798 trace_v9fs_lock(pdu->tag, pdu->id, fid, 3799 flock.type, flock.start, flock.length); 3800 3801 3802 /* We support only block flag now (that too ignored currently) */ 3803 if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) { 3804 err = -EINVAL; 3805 goto out_nofid; 3806 } 3807 fidp = get_fid(pdu, fid); 3808 if (fidp == NULL) { 3809 err = -ENOENT; 3810 goto out_nofid; 3811 } 3812 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3813 if (err < 0) { 3814 goto out; 3815 } 3816 err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS); 3817 if (err < 0) { 3818 goto out; 3819 } 3820 err += offset; 3821 trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS); 3822 out: 3823 put_fid(pdu, fidp); 3824 out_nofid: 3825 pdu_complete(pdu, err); 3826 v9fs_string_free(&flock.client_id); 3827 } 3828 3829 /* 3830 * When a TGETLOCK request comes, always return success because all lock 3831 * handling is done by client's VFS layer. 3832 */ 3833 static void coroutine_fn v9fs_getlock(void *opaque) 3834 { 3835 size_t offset = 7; 3836 struct stat stbuf; 3837 V9fsFidState *fidp; 3838 V9fsGetlock glock; 3839 int32_t fid, err = 0; 3840 V9fsPDU *pdu = opaque; 3841 3842 v9fs_string_init(&glock.client_id); 3843 err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type, 3844 &glock.start, &glock.length, &glock.proc_id, 3845 &glock.client_id); 3846 if (err < 0) { 3847 goto out_nofid; 3848 } 3849 trace_v9fs_getlock(pdu->tag, pdu->id, fid, 3850 glock.type, glock.start, glock.length); 3851 3852 fidp = get_fid(pdu, fid); 3853 if (fidp == NULL) { 3854 err = -ENOENT; 3855 goto out_nofid; 3856 } 3857 err = v9fs_co_fstat(pdu, fidp, &stbuf); 3858 if (err < 0) { 3859 goto out; 3860 } 3861 glock.type = P9_LOCK_TYPE_UNLCK; 3862 err = pdu_marshal(pdu, offset, "bqqds", glock.type, 3863 glock.start, glock.length, glock.proc_id, 3864 &glock.client_id); 3865 if (err < 0) { 3866 goto out; 3867 } 3868 err += offset; 3869 trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start, 3870 glock.length, glock.proc_id); 3871 out: 3872 put_fid(pdu, fidp); 3873 out_nofid: 3874 pdu_complete(pdu, err); 3875 v9fs_string_free(&glock.client_id); 3876 } 3877 3878 static void coroutine_fn v9fs_mkdir(void *opaque) 3879 { 3880 V9fsPDU *pdu = opaque; 3881 size_t offset = 7; 3882 int32_t fid; 3883 struct stat stbuf; 3884 V9fsQID qid; 3885 V9fsString name; 3886 V9fsFidState *fidp; 3887 gid_t gid; 3888 int mode; 3889 int err = 0; 3890 3891 v9fs_string_init(&name); 3892 err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid); 3893 if (err < 0) { 3894 goto out_nofid; 3895 } 3896 trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid); 3897 3898 if (name_is_illegal(name.data)) { 3899 err = -ENOENT; 3900 goto out_nofid; 3901 } 3902 3903 if (!strcmp(".", name.data) || !strcmp("..", name.data)) { 3904 err = -EEXIST; 3905 goto out_nofid; 3906 } 3907 3908 fidp = get_fid(pdu, fid); 3909 if (fidp == NULL) { 3910 err = -ENOENT; 3911 goto out_nofid; 3912 } 3913 err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf); 3914 if (err < 0) { 3915 goto out; 3916 } 3917 err = stat_to_qid(pdu, &stbuf, &qid); 3918 if (err < 0) { 3919 goto out; 3920 } 3921 err = pdu_marshal(pdu, offset, "Q", &qid); 3922 if (err < 0) { 3923 goto out; 3924 } 3925 err += offset; 3926 trace_v9fs_mkdir_return(pdu->tag, pdu->id, 3927 qid.type, qid.version, qid.path, err); 3928 out: 3929 put_fid(pdu, fidp); 3930 out_nofid: 3931 pdu_complete(pdu, err); 3932 v9fs_string_free(&name); 3933 } 3934 3935 static void coroutine_fn v9fs_xattrwalk(void *opaque) 3936 { 3937 int64_t size; 3938 V9fsString name; 3939 ssize_t err = 0; 3940 size_t offset = 7; 3941 int32_t fid, newfid; 3942 V9fsFidState *file_fidp; 3943 V9fsFidState *xattr_fidp = NULL; 3944 V9fsPDU *pdu = opaque; 3945 V9fsState *s = pdu->s; 3946 3947 v9fs_string_init(&name); 3948 err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name); 3949 if (err < 0) { 3950 goto out_nofid; 3951 } 3952 trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data); 3953 3954 file_fidp = get_fid(pdu, fid); 3955 if (file_fidp == NULL) { 3956 err = -ENOENT; 3957 goto out_nofid; 3958 } 3959 xattr_fidp = alloc_fid(s, newfid); 3960 if (xattr_fidp == NULL) { 3961 err = -EINVAL; 3962 goto out; 3963 } 3964 v9fs_path_copy(&xattr_fidp->path, &file_fidp->path); 3965 if (!v9fs_string_size(&name)) { 3966 /* 3967 * listxattr request. Get the size first 3968 */ 3969 size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0); 3970 if (size < 0) { 3971 err = size; 3972 clunk_fid(s, xattr_fidp->fid); 3973 goto out; 3974 } 3975 /* 3976 * Read the xattr value 3977 */ 3978 xattr_fidp->fs.xattr.len = size; 3979 xattr_fidp->fid_type = P9_FID_XATTR; 3980 xattr_fidp->fs.xattr.xattrwalk_fid = true; 3981 xattr_fidp->fs.xattr.value = g_malloc0(size); 3982 if (size) { 3983 err = v9fs_co_llistxattr(pdu, &xattr_fidp->path, 3984 xattr_fidp->fs.xattr.value, 3985 xattr_fidp->fs.xattr.len); 3986 if (err < 0) { 3987 clunk_fid(s, xattr_fidp->fid); 3988 goto out; 3989 } 3990 } 3991 err = pdu_marshal(pdu, offset, "q", size); 3992 if (err < 0) { 3993 goto out; 3994 } 3995 err += offset; 3996 } else { 3997 /* 3998 * specific xattr fid. We check for xattr 3999 * presence also collect the xattr size 4000 */ 4001 size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 4002 &name, NULL, 0); 4003 if (size < 0) { 4004 err = size; 4005 clunk_fid(s, xattr_fidp->fid); 4006 goto out; 4007 } 4008 /* 4009 * Read the xattr value 4010 */ 4011 xattr_fidp->fs.xattr.len = size; 4012 xattr_fidp->fid_type = P9_FID_XATTR; 4013 xattr_fidp->fs.xattr.xattrwalk_fid = true; 4014 xattr_fidp->fs.xattr.value = g_malloc0(size); 4015 if (size) { 4016 err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path, 4017 &name, xattr_fidp->fs.xattr.value, 4018 xattr_fidp->fs.xattr.len); 4019 if (err < 0) { 4020 clunk_fid(s, xattr_fidp->fid); 4021 goto out; 4022 } 4023 } 4024 err = pdu_marshal(pdu, offset, "q", size); 4025 if (err < 0) { 4026 goto out; 4027 } 4028 err += offset; 4029 } 4030 trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size); 4031 out: 4032 put_fid(pdu, file_fidp); 4033 if (xattr_fidp) { 4034 put_fid(pdu, xattr_fidp); 4035 } 4036 out_nofid: 4037 pdu_complete(pdu, err); 4038 v9fs_string_free(&name); 4039 } 4040 4041 #if defined(CONFIG_LINUX) 4042 /* Currently, only Linux has XATTR_SIZE_MAX */ 4043 #define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX 4044 #elif defined(CONFIG_DARWIN) 4045 /* 4046 * Darwin doesn't seem to define a maximum xattr size in its user 4047 * space header, so manually configure it across platforms as 64k. 4048 * 4049 * Having no limit at all can lead to QEMU crashing during large g_malloc() 4050 * calls. Because QEMU does not currently support macOS guests, the below 4051 * preliminary solution only works due to its being a reflection of the limit of 4052 * Linux guests. 4053 */ 4054 #define P9_XATTR_SIZE_MAX 65536 4055 #elif defined(CONFIG_FREEBSD) 4056 /* 4057 * FreeBSD similarly doesn't define a maximum xattr size, the limit is 4058 * filesystem dependent. On UFS filesystems it's 2 times the filesystem block 4059 * size, typically 32KB. On ZFS it depends on the value of the xattr property; 4060 * with the default value there is no limit, and with xattr=sa it is 64KB. 4061 * 4062 * So, a limit of 64k seems reasonable here too. 4063 */ 4064 #define P9_XATTR_SIZE_MAX 65536 4065 #else 4066 #error Missing definition for P9_XATTR_SIZE_MAX for this host system 4067 #endif 4068 4069 static void coroutine_fn v9fs_xattrcreate(void *opaque) 4070 { 4071 int flags, rflags = 0; 4072 int32_t fid; 4073 uint64_t size; 4074 ssize_t err = 0; 4075 V9fsString name; 4076 size_t offset = 7; 4077 V9fsFidState *file_fidp; 4078 V9fsFidState *xattr_fidp; 4079 V9fsPDU *pdu = opaque; 4080 4081 v9fs_string_init(&name); 4082 err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags); 4083 if (err < 0) { 4084 goto out_nofid; 4085 } 4086 trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags); 4087 4088 if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) { 4089 err = -EINVAL; 4090 goto out_nofid; 4091 } 4092 4093 if (flags & P9_XATTR_CREATE) { 4094 rflags |= XATTR_CREATE; 4095 } 4096 4097 if (flags & P9_XATTR_REPLACE) { 4098 rflags |= XATTR_REPLACE; 4099 } 4100 4101 if (size > P9_XATTR_SIZE_MAX) { 4102 err = -E2BIG; 4103 goto out_nofid; 4104 } 4105 4106 file_fidp = get_fid(pdu, fid); 4107 if (file_fidp == NULL) { 4108 err = -EINVAL; 4109 goto out_nofid; 4110 } 4111 if (file_fidp->fid_type != P9_FID_NONE) { 4112 err = -EINVAL; 4113 goto out_put_fid; 4114 } 4115 4116 /* Make the file fid point to xattr */ 4117 xattr_fidp = file_fidp; 4118 xattr_fidp->fid_type = P9_FID_XATTR; 4119 xattr_fidp->fs.xattr.copied_len = 0; 4120 xattr_fidp->fs.xattr.xattrwalk_fid = false; 4121 xattr_fidp->fs.xattr.len = size; 4122 xattr_fidp->fs.xattr.flags = rflags; 4123 v9fs_string_init(&xattr_fidp->fs.xattr.name); 4124 v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name); 4125 xattr_fidp->fs.xattr.value = g_malloc0(size); 4126 err = offset; 4127 out_put_fid: 4128 put_fid(pdu, file_fidp); 4129 out_nofid: 4130 pdu_complete(pdu, err); 4131 v9fs_string_free(&name); 4132 } 4133 4134 static void coroutine_fn v9fs_readlink(void *opaque) 4135 { 4136 V9fsPDU *pdu = opaque; 4137 size_t offset = 7; 4138 V9fsString target; 4139 int32_t fid; 4140 int err = 0; 4141 V9fsFidState *fidp; 4142 4143 err = pdu_unmarshal(pdu, offset, "d", &fid); 4144 if (err < 0) { 4145 goto out_nofid; 4146 } 4147 trace_v9fs_readlink(pdu->tag, pdu->id, fid); 4148 fidp = get_fid(pdu, fid); 4149 if (fidp == NULL) { 4150 err = -ENOENT; 4151 goto out_nofid; 4152 } 4153 4154 v9fs_string_init(&target); 4155 err = v9fs_co_readlink(pdu, &fidp->path, &target); 4156 if (err < 0) { 4157 goto out; 4158 } 4159 err = pdu_marshal(pdu, offset, "s", &target); 4160 if (err < 0) { 4161 v9fs_string_free(&target); 4162 goto out; 4163 } 4164 err += offset; 4165 trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data); 4166 v9fs_string_free(&target); 4167 out: 4168 put_fid(pdu, fidp); 4169 out_nofid: 4170 pdu_complete(pdu, err); 4171 } 4172 4173 static CoroutineEntry *pdu_co_handlers[] = { 4174 [P9_TREADDIR] = v9fs_readdir, 4175 [P9_TSTATFS] = v9fs_statfs, 4176 [P9_TGETATTR] = v9fs_getattr, 4177 [P9_TSETATTR] = v9fs_setattr, 4178 [P9_TXATTRWALK] = v9fs_xattrwalk, 4179 [P9_TXATTRCREATE] = v9fs_xattrcreate, 4180 [P9_TMKNOD] = v9fs_mknod, 4181 [P9_TRENAME] = v9fs_rename, 4182 [P9_TLOCK] = v9fs_lock, 4183 [P9_TGETLOCK] = v9fs_getlock, 4184 [P9_TRENAMEAT] = v9fs_renameat, 4185 [P9_TREADLINK] = v9fs_readlink, 4186 [P9_TUNLINKAT] = v9fs_unlinkat, 4187 [P9_TMKDIR] = v9fs_mkdir, 4188 [P9_TVERSION] = v9fs_version, 4189 [P9_TLOPEN] = v9fs_open, 4190 [P9_TATTACH] = v9fs_attach, 4191 [P9_TSTAT] = v9fs_stat, 4192 [P9_TWALK] = v9fs_walk, 4193 [P9_TCLUNK] = v9fs_clunk, 4194 [P9_TFSYNC] = v9fs_fsync, 4195 [P9_TOPEN] = v9fs_open, 4196 [P9_TREAD] = v9fs_read, 4197 #if 0 4198 [P9_TAUTH] = v9fs_auth, 4199 #endif 4200 [P9_TFLUSH] = v9fs_flush, 4201 [P9_TLINK] = v9fs_link, 4202 [P9_TSYMLINK] = v9fs_symlink, 4203 [P9_TCREATE] = v9fs_create, 4204 [P9_TLCREATE] = v9fs_lcreate, 4205 [P9_TWRITE] = v9fs_write, 4206 [P9_TWSTAT] = v9fs_wstat, 4207 [P9_TREMOVE] = v9fs_remove, 4208 }; 4209 4210 static void coroutine_fn v9fs_op_not_supp(void *opaque) 4211 { 4212 V9fsPDU *pdu = opaque; 4213 pdu_complete(pdu, -EOPNOTSUPP); 4214 } 4215 4216 static void coroutine_fn v9fs_fs_ro(void *opaque) 4217 { 4218 V9fsPDU *pdu = opaque; 4219 pdu_complete(pdu, -EROFS); 4220 } 4221 4222 static inline bool is_read_only_op(V9fsPDU *pdu) 4223 { 4224 switch (pdu->id) { 4225 case P9_TREADDIR: 4226 case P9_TSTATFS: 4227 case P9_TGETATTR: 4228 case P9_TXATTRWALK: 4229 case P9_TLOCK: 4230 case P9_TGETLOCK: 4231 case P9_TREADLINK: 4232 case P9_TVERSION: 4233 case P9_TLOPEN: 4234 case P9_TATTACH: 4235 case P9_TSTAT: 4236 case P9_TWALK: 4237 case P9_TCLUNK: 4238 case P9_TFSYNC: 4239 case P9_TOPEN: 4240 case P9_TREAD: 4241 case P9_TAUTH: 4242 case P9_TFLUSH: 4243 return 1; 4244 default: 4245 return 0; 4246 } 4247 } 4248 4249 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) 4250 { 4251 Coroutine *co; 4252 CoroutineEntry *handler; 4253 V9fsState *s = pdu->s; 4254 4255 pdu->size = le32_to_cpu(hdr->size_le); 4256 pdu->id = hdr->id; 4257 pdu->tag = le16_to_cpu(hdr->tag_le); 4258 4259 if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) || 4260 (pdu_co_handlers[pdu->id] == NULL)) { 4261 handler = v9fs_op_not_supp; 4262 } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) { 4263 handler = v9fs_fs_ro; 4264 } else { 4265 handler = pdu_co_handlers[pdu->id]; 4266 } 4267 4268 qemu_co_queue_init(&pdu->complete); 4269 co = qemu_coroutine_create(handler, pdu); 4270 qemu_coroutine_enter(co); 4271 } 4272 4273 /* Returns 0 on success, 1 on failure. */ 4274 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, 4275 Error **errp) 4276 { 4277 ERRP_GUARD(); 4278 int i, len; 4279 struct stat stat; 4280 FsDriverEntry *fse; 4281 V9fsPath path; 4282 int rc = 1; 4283 4284 assert(!s->transport); 4285 s->transport = t; 4286 4287 /* initialize pdu allocator */ 4288 QLIST_INIT(&s->free_list); 4289 QLIST_INIT(&s->active_list); 4290 for (i = 0; i < MAX_REQ; i++) { 4291 QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next); 4292 s->pdus[i].s = s; 4293 s->pdus[i].idx = i; 4294 } 4295 4296 v9fs_path_init(&path); 4297 4298 fse = get_fsdev_fsentry(s->fsconf.fsdev_id); 4299 4300 if (!fse) { 4301 /* We don't have a fsdev identified by fsdev_id */ 4302 error_setg(errp, "9pfs device couldn't find fsdev with the " 4303 "id = %s", 4304 s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL"); 4305 goto out; 4306 } 4307 4308 if (!s->fsconf.tag) { 4309 /* we haven't specified a mount_tag */ 4310 error_setg(errp, "fsdev with id %s needs mount_tag arguments", 4311 s->fsconf.fsdev_id); 4312 goto out; 4313 } 4314 4315 s->ctx.export_flags = fse->export_flags; 4316 s->ctx.fs_root = g_strdup(fse->path); 4317 s->ctx.exops.get_st_gen = NULL; 4318 len = strlen(s->fsconf.tag); 4319 if (len > MAX_TAG_LEN - 1) { 4320 error_setg(errp, "mount tag '%s' (%d bytes) is longer than " 4321 "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1); 4322 goto out; 4323 } 4324 4325 s->tag = g_strdup(s->fsconf.tag); 4326 s->ctx.uid = -1; 4327 4328 s->ops = fse->ops; 4329 4330 s->ctx.fmode = fse->fmode; 4331 s->ctx.dmode = fse->dmode; 4332 4333 s->fids = g_hash_table_new(NULL, NULL); 4334 qemu_co_rwlock_init(&s->rename_lock); 4335 4336 if (s->ops->init(&s->ctx, errp) < 0) { 4337 error_prepend(errp, "cannot initialize fsdev '%s': ", 4338 s->fsconf.fsdev_id); 4339 goto out; 4340 } 4341 4342 /* 4343 * Check details of export path, We need to use fs driver 4344 * call back to do that. Since we are in the init path, we don't 4345 * use co-routines here. 4346 */ 4347 if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) { 4348 error_setg(errp, 4349 "error in converting name to path %s", strerror(errno)); 4350 goto out; 4351 } 4352 if (s->ops->lstat(&s->ctx, &path, &stat)) { 4353 error_setg(errp, "share path %s does not exist", fse->path); 4354 goto out; 4355 } else if (!S_ISDIR(stat.st_mode)) { 4356 error_setg(errp, "share path %s is not a directory", fse->path); 4357 goto out; 4358 } 4359 4360 s->dev_id = stat.st_dev; 4361 4362 /* init inode remapping : */ 4363 /* hash table for variable length inode suffixes */ 4364 qpd_table_init(&s->qpd_table); 4365 /* hash table for slow/full inode remapping (most users won't need it) */ 4366 qpf_table_init(&s->qpf_table); 4367 /* hash table for quick inode remapping */ 4368 qpp_table_init(&s->qpp_table); 4369 s->qp_ndevices = 0; 4370 s->qp_affix_next = 1; /* reserve 0 to detect overflow */ 4371 s->qp_fullpath_next = 1; 4372 4373 s->ctx.fst = &fse->fst; 4374 fsdev_throttle_init(s->ctx.fst); 4375 4376 s->reclaiming = false; 4377 4378 rc = 0; 4379 out: 4380 if (rc) { 4381 v9fs_device_unrealize_common(s); 4382 } 4383 v9fs_path_free(&path); 4384 return rc; 4385 } 4386 4387 void v9fs_device_unrealize_common(V9fsState *s) 4388 { 4389 if (s->ops && s->ops->cleanup) { 4390 s->ops->cleanup(&s->ctx); 4391 } 4392 if (s->ctx.fst) { 4393 fsdev_throttle_cleanup(s->ctx.fst); 4394 } 4395 if (s->fids) { 4396 g_hash_table_destroy(s->fids); 4397 s->fids = NULL; 4398 } 4399 g_free(s->tag); 4400 qp_table_destroy(&s->qpd_table); 4401 qp_table_destroy(&s->qpp_table); 4402 qp_table_destroy(&s->qpf_table); 4403 g_free(s->ctx.fs_root); 4404 } 4405 4406 typedef struct VirtfsCoResetData { 4407 V9fsPDU pdu; 4408 bool done; 4409 } VirtfsCoResetData; 4410 4411 static void coroutine_fn virtfs_co_reset(void *opaque) 4412 { 4413 VirtfsCoResetData *data = opaque; 4414 4415 virtfs_reset(&data->pdu); 4416 data->done = true; 4417 } 4418 4419 void v9fs_reset(V9fsState *s) 4420 { 4421 VirtfsCoResetData data = { .pdu = { .s = s }, .done = false }; 4422 Coroutine *co; 4423 4424 while (!QLIST_EMPTY(&s->active_list)) { 4425 aio_poll(qemu_get_aio_context(), true); 4426 } 4427 4428 co = qemu_coroutine_create(virtfs_co_reset, &data); 4429 qemu_coroutine_enter(co); 4430 4431 while (!data.done) { 4432 aio_poll(qemu_get_aio_context(), true); 4433 } 4434 } 4435 4436 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void) 4437 { 4438 struct rlimit rlim; 4439 if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) { 4440 error_report("Failed to get the resource limit"); 4441 exit(1); 4442 } 4443 open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3); 4444 open_fd_rc = rlim.rlim_cur / 2; 4445 } 4446