xref: /openbmc/qemu/hw/9pfs/9p.c (revision 83b4613ba835d6ed6dcee3001c7fc56dc7b21685)
1  /*
2   * Virtio 9p backend
3   *
4   * Copyright IBM, Corp. 2010
5   *
6   * Authors:
7   *  Anthony Liguori   <aliguori@us.ibm.com>
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2.  See
10   * the COPYING file in the top-level directory.
11   *
12   */
13  
14  /*
15   * Not so fast! You might want to read the 9p developer docs first:
16   * https://wiki.qemu.org/Documentation/9p
17   */
18  
19  #include "qemu/osdep.h"
20  #ifdef CONFIG_LINUX
21  #include <linux/limits.h>
22  #endif
23  #include <glib/gprintf.h>
24  #include "hw/virtio/virtio.h"
25  #include "qapi/error.h"
26  #include "qemu/error-report.h"
27  #include "qemu/iov.h"
28  #include "qemu/main-loop.h"
29  #include "qemu/sockets.h"
30  #include "virtio-9p.h"
31  #include "fsdev/qemu-fsdev.h"
32  #include "9p-xattr.h"
33  #include "9p-util.h"
34  #include "coth.h"
35  #include "trace.h"
36  #include "migration/blocker.h"
37  #include "qemu/xxhash.h"
38  #include <math.h>
39  
40  int open_fd_hw;
41  int total_open_fd;
42  static int open_fd_rc;
43  
44  enum {
45      Oread   = 0x00,
46      Owrite  = 0x01,
47      Ordwr   = 0x02,
48      Oexec   = 0x03,
49      Oexcl   = 0x04,
50      Otrunc  = 0x10,
51      Orexec  = 0x20,
52      Orclose = 0x40,
53      Oappend = 0x80,
54  };
55  
56  P9ARRAY_DEFINE_TYPE(V9fsPath, v9fs_path_free);
57  
58  static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
59  {
60      ssize_t ret;
61      va_list ap;
62  
63      va_start(ap, fmt);
64      ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
65      va_end(ap);
66  
67      return ret;
68  }
69  
70  static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
71  {
72      ssize_t ret;
73      va_list ap;
74  
75      va_start(ap, fmt);
76      ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
77      va_end(ap);
78  
79      return ret;
80  }
81  
82  static int omode_to_uflags(int8_t mode)
83  {
84      int ret = 0;
85  
86      switch (mode & 3) {
87      case Oread:
88          ret = O_RDONLY;
89          break;
90      case Ordwr:
91          ret = O_RDWR;
92          break;
93      case Owrite:
94          ret = O_WRONLY;
95          break;
96      case Oexec:
97          ret = O_RDONLY;
98          break;
99      }
100  
101      if (mode & Otrunc) {
102          ret |= O_TRUNC;
103      }
104  
105      if (mode & Oappend) {
106          ret |= O_APPEND;
107      }
108  
109      if (mode & Oexcl) {
110          ret |= O_EXCL;
111      }
112  
113      return ret;
114  }
115  
116  typedef struct DotlOpenflagMap {
117      int dotl_flag;
118      int open_flag;
119  } DotlOpenflagMap;
120  
121  static int dotl_to_open_flags(int flags)
122  {
123      int i;
124      /*
125       * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
126       * and P9_DOTL_NOACCESS
127       */
128      int oflags = flags & O_ACCMODE;
129  
130      DotlOpenflagMap dotl_oflag_map[] = {
131          { P9_DOTL_CREATE, O_CREAT },
132          { P9_DOTL_EXCL, O_EXCL },
133          { P9_DOTL_NOCTTY , O_NOCTTY },
134          { P9_DOTL_TRUNC, O_TRUNC },
135          { P9_DOTL_APPEND, O_APPEND },
136          { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
137          { P9_DOTL_DSYNC, O_DSYNC },
138          { P9_DOTL_FASYNC, FASYNC },
139  #ifndef CONFIG_DARWIN
140          { P9_DOTL_NOATIME, O_NOATIME },
141          /*
142           *  On Darwin, we could map to F_NOCACHE, which is
143           *  similar, but doesn't quite have the same
144           *  semantics. However, we don't support O_DIRECT
145           *  even on linux at the moment, so we just ignore
146           *  it here.
147           */
148          { P9_DOTL_DIRECT, O_DIRECT },
149  #endif
150          { P9_DOTL_LARGEFILE, O_LARGEFILE },
151          { P9_DOTL_DIRECTORY, O_DIRECTORY },
152          { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
153          { P9_DOTL_SYNC, O_SYNC },
154      };
155  
156      for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
157          if (flags & dotl_oflag_map[i].dotl_flag) {
158              oflags |= dotl_oflag_map[i].open_flag;
159          }
160      }
161  
162      return oflags;
163  }
164  
165  void cred_init(FsCred *credp)
166  {
167      credp->fc_uid = -1;
168      credp->fc_gid = -1;
169      credp->fc_mode = -1;
170      credp->fc_rdev = -1;
171  }
172  
173  static int get_dotl_openflags(V9fsState *s, int oflags)
174  {
175      int flags;
176      /*
177       * Filter the client open flags
178       */
179      flags = dotl_to_open_flags(oflags);
180      flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
181  #ifndef CONFIG_DARWIN
182      /*
183       * Ignore direct disk access hint until the server supports it.
184       */
185      flags &= ~O_DIRECT;
186  #endif
187      return flags;
188  }
189  
190  void v9fs_path_init(V9fsPath *path)
191  {
192      path->data = NULL;
193      path->size = 0;
194  }
195  
196  void v9fs_path_free(V9fsPath *path)
197  {
198      g_free(path->data);
199      path->data = NULL;
200      path->size = 0;
201  }
202  
203  
204  void G_GNUC_PRINTF(2, 3)
205  v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
206  {
207      va_list ap;
208  
209      v9fs_path_free(path);
210  
211      va_start(ap, fmt);
212      /* Bump the size for including terminating NULL */
213      path->size = g_vasprintf(&path->data, fmt, ap) + 1;
214      va_end(ap);
215  }
216  
217  void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
218  {
219      v9fs_path_free(dst);
220      dst->size = src->size;
221      dst->data = g_memdup(src->data, src->size);
222  }
223  
224  int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
225                        const char *name, V9fsPath *path)
226  {
227      int err;
228      err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
229      if (err < 0) {
230          err = -errno;
231      }
232      return err;
233  }
234  
235  /*
236   * Return TRUE if s1 is an ancestor of s2.
237   *
238   * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
239   * As a special case, We treat s1 as ancestor of s2 if they are same!
240   */
241  static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
242  {
243      if (!strncmp(s1->data, s2->data, s1->size - 1)) {
244          if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
245              return 1;
246          }
247      }
248      return 0;
249  }
250  
251  static size_t v9fs_string_size(V9fsString *str)
252  {
253      return str->size;
254  }
255  
256  /*
257   * returns 0 if fid got re-opened, 1 if not, < 0 on error
258   */
259  static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
260  {
261      int err = 1;
262      if (f->fid_type == P9_FID_FILE) {
263          if (f->fs.fd == -1) {
264              do {
265                  err = v9fs_co_open(pdu, f, f->open_flags);
266              } while (err == -EINTR && !pdu->cancelled);
267          }
268      } else if (f->fid_type == P9_FID_DIR) {
269          if (f->fs.dir.stream == NULL) {
270              do {
271                  err = v9fs_co_opendir(pdu, f);
272              } while (err == -EINTR && !pdu->cancelled);
273          }
274      }
275      return err;
276  }
277  
278  static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
279  {
280      int err;
281      V9fsFidState *f;
282      V9fsState *s = pdu->s;
283  
284      f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid));
285      if (f) {
286          BUG_ON(f->clunked);
287          /*
288           * Update the fid ref upfront so that
289           * we don't get reclaimed when we yield
290           * in open later.
291           */
292          f->ref++;
293          /*
294           * check whether we need to reopen the
295           * file. We might have closed the fd
296           * while trying to free up some file
297           * descriptors.
298           */
299          err = v9fs_reopen_fid(pdu, f);
300          if (err < 0) {
301              f->ref--;
302              return NULL;
303          }
304          /*
305           * Mark the fid as referenced so that the LRU
306           * reclaim won't close the file descriptor
307           */
308          f->flags |= FID_REFERENCED;
309          return f;
310      }
311      return NULL;
312  }
313  
314  static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
315  {
316      V9fsFidState *f;
317  
318      f = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid));
319      if (f) {
320          /* If fid is already there return NULL */
321          BUG_ON(f->clunked);
322          return NULL;
323      }
324      f = g_new0(V9fsFidState, 1);
325      f->fid = fid;
326      f->fid_type = P9_FID_NONE;
327      f->ref = 1;
328      /*
329       * Mark the fid as referenced so that the LRU
330       * reclaim won't close the file descriptor
331       */
332      f->flags |= FID_REFERENCED;
333      g_hash_table_insert(s->fids, GINT_TO_POINTER(fid), f);
334  
335      v9fs_readdir_init(s->proto_version, &f->fs.dir);
336      v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
337  
338      return f;
339  }
340  
341  static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
342  {
343      int retval = 0;
344  
345      if (fidp->fs.xattr.xattrwalk_fid) {
346          /* getxattr/listxattr fid */
347          goto free_value;
348      }
349      /*
350       * if this is fid for setxattr. clunk should
351       * result in setxattr localcall
352       */
353      if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
354          /* clunk after partial write */
355          retval = -EINVAL;
356          goto free_out;
357      }
358      if (fidp->fs.xattr.len) {
359          retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
360                                     fidp->fs.xattr.value,
361                                     fidp->fs.xattr.len,
362                                     fidp->fs.xattr.flags);
363      } else {
364          retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
365      }
366  free_out:
367      v9fs_string_free(&fidp->fs.xattr.name);
368  free_value:
369      g_free(fidp->fs.xattr.value);
370      return retval;
371  }
372  
373  static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
374  {
375      int retval = 0;
376  
377      if (fidp->fid_type == P9_FID_FILE) {
378          /* If we reclaimed the fd no need to close */
379          if (fidp->fs.fd != -1) {
380              retval = v9fs_co_close(pdu, &fidp->fs);
381          }
382      } else if (fidp->fid_type == P9_FID_DIR) {
383          if (fidp->fs.dir.stream != NULL) {
384              retval = v9fs_co_closedir(pdu, &fidp->fs);
385          }
386      } else if (fidp->fid_type == P9_FID_XATTR) {
387          retval = v9fs_xattr_fid_clunk(pdu, fidp);
388      }
389      v9fs_path_free(&fidp->path);
390      g_free(fidp);
391      return retval;
392  }
393  
394  static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
395  {
396      BUG_ON(!fidp->ref);
397      fidp->ref--;
398      /*
399       * Don't free the fid if it is in reclaim list
400       */
401      if (!fidp->ref && fidp->clunked) {
402          if (fidp->fid == pdu->s->root_fid) {
403              /*
404               * if the clunked fid is root fid then we
405               * have unmounted the fs on the client side.
406               * delete the migration blocker. Ideally, this
407               * should be hooked to transport close notification
408               */
409              migrate_del_blocker(&pdu->s->migration_blocker);
410          }
411          return free_fid(pdu, fidp);
412      }
413      return 0;
414  }
415  
416  static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
417  {
418      V9fsFidState *fidp;
419  
420      /* TODO: Use g_hash_table_steal_extended() instead? */
421      fidp = g_hash_table_lookup(s->fids, GINT_TO_POINTER(fid));
422      if (fidp) {
423          g_hash_table_remove(s->fids, GINT_TO_POINTER(fid));
424          fidp->clunked = true;
425          return fidp;
426      }
427      return NULL;
428  }
429  
430  void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
431  {
432      int reclaim_count = 0;
433      V9fsState *s = pdu->s;
434      V9fsFidState *f;
435      GHashTableIter iter;
436      gpointer fid;
437  
438      g_hash_table_iter_init(&iter, s->fids);
439  
440      QSLIST_HEAD(, V9fsFidState) reclaim_list =
441          QSLIST_HEAD_INITIALIZER(reclaim_list);
442  
443      while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &f)) {
444          /*
445           * Unlink fids cannot be reclaimed. Check
446           * for them and skip them. Also skip fids
447           * currently being operated on.
448           */
449          if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
450              continue;
451          }
452          /*
453           * if it is a recently referenced fid
454           * we leave the fid untouched and clear the
455           * reference bit. We come back to it later
456           * in the next iteration. (a simple LRU without
457           * moving list elements around)
458           */
459          if (f->flags & FID_REFERENCED) {
460              f->flags &= ~FID_REFERENCED;
461              continue;
462          }
463          /*
464           * Add fids to reclaim list.
465           */
466          if (f->fid_type == P9_FID_FILE) {
467              if (f->fs.fd != -1) {
468                  /*
469                   * Up the reference count so that
470                   * a clunk request won't free this fid
471                   */
472                  f->ref++;
473                  QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
474                  f->fs_reclaim.fd = f->fs.fd;
475                  f->fs.fd = -1;
476                  reclaim_count++;
477              }
478          } else if (f->fid_type == P9_FID_DIR) {
479              if (f->fs.dir.stream != NULL) {
480                  /*
481                   * Up the reference count so that
482                   * a clunk request won't free this fid
483                   */
484                  f->ref++;
485                  QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
486                  f->fs_reclaim.dir.stream = f->fs.dir.stream;
487                  f->fs.dir.stream = NULL;
488                  reclaim_count++;
489              }
490          }
491          if (reclaim_count >= open_fd_rc) {
492              break;
493          }
494      }
495      /*
496       * Now close the fid in reclaim list. Free them if they
497       * are already clunked.
498       */
499      while (!QSLIST_EMPTY(&reclaim_list)) {
500          f = QSLIST_FIRST(&reclaim_list);
501          QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next);
502          if (f->fid_type == P9_FID_FILE) {
503              v9fs_co_close(pdu, &f->fs_reclaim);
504          } else if (f->fid_type == P9_FID_DIR) {
505              v9fs_co_closedir(pdu, &f->fs_reclaim);
506          }
507          /*
508           * Now drop the fid reference, free it
509           * if clunked.
510           */
511          put_fid(pdu, f);
512      }
513  }
514  
515  /*
516   * This is used when a path is removed from the directory tree. Any
517   * fids that still reference it must not be closed from then on, since
518   * they cannot be reopened.
519   */
520  static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
521  {
522      int err = 0;
523      V9fsState *s = pdu->s;
524      V9fsFidState *fidp;
525      gpointer fid;
526      GHashTableIter iter;
527      /*
528       * The most common case is probably that we have exactly one
529       * fid for the given path, so preallocate exactly one.
530       */
531      g_autoptr(GArray) to_reopen = g_array_sized_new(FALSE, FALSE,
532              sizeof(V9fsFidState *), 1);
533      gint i;
534  
535      g_hash_table_iter_init(&iter, s->fids);
536  
537      /*
538       * We iterate over the fid table looking for the entries we need
539       * to reopen, and store them in to_reopen. This is because
540       * v9fs_reopen_fid() and put_fid() yield. This allows the fid table
541       * to be modified in the meantime, invalidating our iterator.
542       */
543      while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &fidp)) {
544          if (fidp->path.size == path->size &&
545              !memcmp(fidp->path.data, path->data, path->size)) {
546              /*
547               * Ensure the fid survives a potential clunk request during
548               * v9fs_reopen_fid or put_fid.
549               */
550              fidp->ref++;
551              fidp->flags |= FID_NON_RECLAIMABLE;
552              g_array_append_val(to_reopen, fidp);
553          }
554      }
555  
556      for (i = 0; i < to_reopen->len; i++) {
557          fidp = g_array_index(to_reopen, V9fsFidState*, i);
558          /* reopen the file/dir if already closed */
559          err = v9fs_reopen_fid(pdu, fidp);
560          if (err < 0) {
561              break;
562          }
563      }
564  
565      for (i = 0; i < to_reopen->len; i++) {
566          put_fid(pdu, g_array_index(to_reopen, V9fsFidState*, i));
567      }
568      return err;
569  }
570  
571  static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
572  {
573      V9fsState *s = pdu->s;
574      V9fsFidState *fidp;
575      GList *freeing;
576      /*
577       * Get a list of all the values (fid states) in the table, which
578       * we then...
579       */
580      g_autoptr(GList) fids = g_hash_table_get_values(s->fids);
581  
582      /* ... remove from the table, taking over ownership. */
583      g_hash_table_steal_all(s->fids);
584  
585      /*
586       * This allows us to release our references to them asynchronously without
587       * iterating over the hash table and risking iterator invalidation
588       * through concurrent modifications.
589       */
590      for (freeing = fids; freeing; freeing = freeing->next) {
591          fidp = freeing->data;
592          fidp->ref++;
593          fidp->clunked = true;
594          put_fid(pdu, fidp);
595      }
596  }
597  
598  #define P9_QID_TYPE_DIR         0x80
599  #define P9_QID_TYPE_SYMLINK     0x02
600  
601  #define P9_STAT_MODE_DIR        0x80000000
602  #define P9_STAT_MODE_APPEND     0x40000000
603  #define P9_STAT_MODE_EXCL       0x20000000
604  #define P9_STAT_MODE_MOUNT      0x10000000
605  #define P9_STAT_MODE_AUTH       0x08000000
606  #define P9_STAT_MODE_TMP        0x04000000
607  #define P9_STAT_MODE_SYMLINK    0x02000000
608  #define P9_STAT_MODE_LINK       0x01000000
609  #define P9_STAT_MODE_DEVICE     0x00800000
610  #define P9_STAT_MODE_NAMED_PIPE 0x00200000
611  #define P9_STAT_MODE_SOCKET     0x00100000
612  #define P9_STAT_MODE_SETUID     0x00080000
613  #define P9_STAT_MODE_SETGID     0x00040000
614  #define P9_STAT_MODE_SETVTX     0x00010000
615  
616  #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
617                                  P9_STAT_MODE_SYMLINK |      \
618                                  P9_STAT_MODE_LINK |         \
619                                  P9_STAT_MODE_DEVICE |       \
620                                  P9_STAT_MODE_NAMED_PIPE |   \
621                                  P9_STAT_MODE_SOCKET)
622  
623  /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
624  static inline uint8_t mirror8bit(uint8_t byte)
625  {
626      return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
627  }
628  
629  /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
630  static inline uint64_t mirror64bit(uint64_t value)
631  {
632      return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
633             ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
634             ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
635             ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
636             ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
637             ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
638             ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
639             ((uint64_t)mirror8bit((value >> 56) & 0xff));
640  }
641  
642  /*
643   * Parameter k for the Exponential Golomb algorithm to be used.
644   *
645   * The smaller this value, the smaller the minimum bit count for the Exp.
646   * Golomb generated affixes will be (at lowest index) however for the
647   * price of having higher maximum bit count of generated affixes (at highest
648   * index). Likewise increasing this parameter yields in smaller maximum bit
649   * count for the price of having higher minimum bit count.
650   *
651   * In practice that means: a good value for k depends on the expected amount
652   * of devices to be exposed by one export. For a small amount of devices k
653   * should be small, for a large amount of devices k might be increased
654   * instead. The default of k=0 should be fine for most users though.
655   *
656   * IMPORTANT: In case this ever becomes a runtime parameter; the value of
657   * k should not change as long as guest is still running! Because that would
658   * cause completely different inode numbers to be generated on guest.
659   */
660  #define EXP_GOLOMB_K    0
661  
662  /**
663   * expGolombEncode() - Exponential Golomb algorithm for arbitrary k
664   *                     (including k=0).
665   *
666   * @n: natural number (or index) of the prefix to be generated
667   *     (1, 2, 3, ...)
668   * @k: parameter k of Exp. Golomb algorithm to be used
669   *     (see comment on EXP_GOLOMB_K macro for details about k)
670   * Return: prefix for given @n and @k
671   *
672   * The Exponential Golomb algorithm generates prefixes (NOT suffixes!)
673   * with growing length and with the mathematical property of being
674   * "prefix-free". The latter means the generated prefixes can be prepended
675   * in front of arbitrary numbers and the resulting concatenated numbers are
676   * guaranteed to be always unique.
677   *
678   * This is a minor adjustment to the original Exp. Golomb algorithm in the
679   * sense that lowest allowed index (@n) starts with 1, not with zero.
680   */
681  static VariLenAffix expGolombEncode(uint64_t n, int k)
682  {
683      const uint64_t value = n + (1 << k) - 1;
684      const int bits = (int) log2(value) + 1;
685      return (VariLenAffix) {
686          .type = AffixType_Prefix,
687          .value = value,
688          .bits = bits + MAX((bits - 1 - k), 0)
689      };
690  }
691  
692  /**
693   * invertAffix() - Converts a suffix into a prefix, or a prefix into a suffix.
694   * @affix: either suffix or prefix to be inverted
695   * Return: inversion of passed @affix
696   *
697   * Simply mirror all bits of the affix value, for the purpose to preserve
698   * respectively the mathematical "prefix-free" or "suffix-free" property
699   * after the conversion.
700   *
701   * If a passed prefix is suitable to create unique numbers, then the
702   * returned suffix is suitable to create unique numbers as well (and vice
703   * versa).
704   */
705  static VariLenAffix invertAffix(const VariLenAffix *affix)
706  {
707      return (VariLenAffix) {
708          .type =
709              (affix->type == AffixType_Suffix) ?
710                  AffixType_Prefix : AffixType_Suffix,
711          .value =
712              mirror64bit(affix->value) >>
713              ((sizeof(affix->value) * 8) - affix->bits),
714          .bits = affix->bits
715      };
716  }
717  
718  /**
719   * affixForIndex() - Generates suffix numbers with "suffix-free" property.
720   * @index: natural number (or index) of the suffix to be generated
721   *         (1, 2, 3, ...)
722   * Return: Suffix suitable to assemble unique number.
723   *
724   * This is just a wrapper function on top of the Exp. Golomb algorithm.
725   *
726   * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
727   * this function converts the Exp. Golomb prefixes into appropriate suffixes
728   * which are still suitable for generating unique numbers.
729   */
730  static VariLenAffix affixForIndex(uint64_t index)
731  {
732      VariLenAffix prefix;
733      prefix = expGolombEncode(index, EXP_GOLOMB_K);
734      return invertAffix(&prefix); /* convert prefix to suffix */
735  }
736  
737  static uint32_t qpp_hash(QppEntry e)
738  {
739      return qemu_xxhash4(e.ino_prefix, e.dev);
740  }
741  
742  static uint32_t qpf_hash(QpfEntry e)
743  {
744      return qemu_xxhash4(e.ino, e.dev);
745  }
746  
747  static bool qpd_cmp_func(const void *obj, const void *userp)
748  {
749      const QpdEntry *e1 = obj, *e2 = userp;
750      return e1->dev == e2->dev;
751  }
752  
753  static bool qpp_cmp_func(const void *obj, const void *userp)
754  {
755      const QppEntry *e1 = obj, *e2 = userp;
756      return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
757  }
758  
759  static bool qpf_cmp_func(const void *obj, const void *userp)
760  {
761      const QpfEntry *e1 = obj, *e2 = userp;
762      return e1->dev == e2->dev && e1->ino == e2->ino;
763  }
764  
765  static void qp_table_remove(void *p, uint32_t h, void *up)
766  {
767      g_free(p);
768  }
769  
770  static void qp_table_destroy(struct qht *ht)
771  {
772      if (!ht || !ht->map) {
773          return;
774      }
775      qht_iter(ht, qp_table_remove, NULL);
776      qht_destroy(ht);
777  }
778  
779  static void qpd_table_init(struct qht *ht)
780  {
781      qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
782  }
783  
784  static void qpp_table_init(struct qht *ht)
785  {
786      qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
787  }
788  
789  static void qpf_table_init(struct qht *ht)
790  {
791      qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
792  }
793  
794  /*
795   * Returns how many (high end) bits of inode numbers of the passed fs
796   * device shall be used (in combination with the device number) to
797   * generate hash values for qpp_table entries.
798   *
799   * This function is required if variable length suffixes are used for inode
800   * number mapping on guest level. Since a device may end up having multiple
801   * entries in qpp_table, each entry most probably with a different suffix
802   * length, we thus need this function in conjunction with qpd_table to
803   * "agree" about a fix amount of bits (per device) to be always used for
804   * generating hash values for the purpose of accessing qpp_table in order
805   * get consistent behaviour when accessing qpp_table.
806   */
807  static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
808  {
809      QpdEntry lookup = {
810          .dev = dev
811      }, *val;
812      uint32_t hash = dev;
813      VariLenAffix affix;
814  
815      val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
816      if (!val) {
817          val = g_new0(QpdEntry, 1);
818          *val = lookup;
819          affix = affixForIndex(pdu->s->qp_affix_next);
820          val->prefix_bits = affix.bits;
821          qht_insert(&pdu->s->qpd_table, val, hash, NULL);
822          pdu->s->qp_ndevices++;
823      }
824      return val->prefix_bits;
825  }
826  
827  /*
828   * Slow / full mapping host inode nr -> guest inode nr.
829   *
830   * This function performs a slower and much more costly remapping of an
831   * original file inode number on host to an appropriate different inode
832   * number on guest. For every (dev, inode) combination on host a new
833   * sequential number is generated, cached and exposed as inode number on
834   * guest.
835   *
836   * This is just a "last resort" fallback solution if the much faster/cheaper
837   * qid_path_suffixmap() failed. In practice this slow / full mapping is not
838   * expected ever to be used at all though.
839   *
840   * See qid_path_suffixmap() for details
841   *
842   */
843  static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
844                              uint64_t *path)
845  {
846      QpfEntry lookup = {
847          .dev = stbuf->st_dev,
848          .ino = stbuf->st_ino
849      }, *val;
850      uint32_t hash = qpf_hash(lookup);
851      VariLenAffix affix;
852  
853      val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
854  
855      if (!val) {
856          if (pdu->s->qp_fullpath_next == 0) {
857              /* no more files can be mapped :'( */
858              error_report_once(
859                  "9p: No more prefixes available for remapping inodes from "
860                  "host to guest."
861              );
862              return -ENFILE;
863          }
864  
865          val = g_new0(QpfEntry, 1);
866          *val = lookup;
867  
868          /* new unique inode and device combo */
869          affix = affixForIndex(
870              1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
871          );
872          val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
873          pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
874          qht_insert(&pdu->s->qpf_table, val, hash, NULL);
875      }
876  
877      *path = val->path;
878      return 0;
879  }
880  
881  /*
882   * Quick mapping host inode nr -> guest inode nr.
883   *
884   * This function performs quick remapping of an original file inode number
885   * on host to an appropriate different inode number on guest. This remapping
886   * of inodes is required to avoid inode nr collisions on guest which would
887   * happen if the 9p export contains more than 1 exported file system (or
888   * more than 1 file system data set), because unlike on host level where the
889   * files would have different device nrs, all files exported by 9p would
890   * share the same device nr on guest (the device nr of the virtual 9p device
891   * that is).
892   *
893   * Inode remapping is performed by chopping off high end bits of the original
894   * inode number from host, shifting the result upwards and then assigning a
895   * generated suffix number for the low end bits, where the same suffix number
896   * will be shared by all inodes with the same device id AND the same high end
897   * bits that have been chopped off. That approach utilizes the fact that inode
898   * numbers very likely share the same high end bits (i.e. due to their common
899   * sequential generation by file systems) and hence we only have to generate
900   * and track a very limited amount of suffixes in practice due to that.
901   *
902   * We generate variable size suffixes for that purpose. The 1st generated
903   * suffix will only have 1 bit and hence we only need to chop off 1 bit from
904   * the original inode number. The subsequent suffixes being generated will
905   * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
906   * generated will have 3 bits and hence we have to chop off 3 bits from their
907   * original inodes, and so on. That approach of using variable length suffixes
908   * (i.e. over fixed size ones) utilizes the fact that in practice only a very
909   * limited amount of devices are shared by the same export (e.g. typically
910   * less than 2 dozen devices per 9p export), so in practice we need to chop
911   * off less bits than with fixed size prefixes and yet are flexible to add
912   * new devices at runtime below host's export directory at any time without
913   * having to reboot guest nor requiring to reconfigure guest for that. And due
914   * to the very limited amount of original high end bits that we chop off that
915   * way, the total amount of suffixes we need to generate is less than by using
916   * fixed size prefixes and hence it also improves performance of the inode
917   * remapping algorithm, and finally has the nice side effect that the inode
918   * numbers on guest will be much smaller & human friendly. ;-)
919   */
920  static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
921                                uint64_t *path)
922  {
923      const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
924      QppEntry lookup = {
925          .dev = stbuf->st_dev,
926          .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
927      }, *val;
928      uint32_t hash = qpp_hash(lookup);
929  
930      val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
931  
932      if (!val) {
933          if (pdu->s->qp_affix_next == 0) {
934              /* we ran out of affixes */
935              warn_report_once(
936                  "9p: Potential degraded performance of inode remapping"
937              );
938              return -ENFILE;
939          }
940  
941          val = g_new0(QppEntry, 1);
942          *val = lookup;
943  
944          /* new unique inode affix and device combo */
945          val->qp_affix_index = pdu->s->qp_affix_next++;
946          val->qp_affix = affixForIndex(val->qp_affix_index);
947          qht_insert(&pdu->s->qpp_table, val, hash, NULL);
948      }
949      /* assuming generated affix to be suffix type, not prefix */
950      *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
951      return 0;
952  }
953  
954  static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
955  {
956      int err;
957      size_t size;
958  
959      if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
960          /* map inode+device to qid path (fast path) */
961          err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
962          if (err == -ENFILE) {
963              /* fast path didn't work, fall back to full map */
964              err = qid_path_fullmap(pdu, stbuf, &qidp->path);
965          }
966          if (err) {
967              return err;
968          }
969      } else {
970          if (pdu->s->dev_id != stbuf->st_dev) {
971              if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
972                  error_report_once(
973                      "9p: Multiple devices detected in same VirtFS export. "
974                      "Access of guest to additional devices is (partly) "
975                      "denied due to virtfs option 'multidevs=forbid' being "
976                      "effective."
977                  );
978                  return -ENODEV;
979              } else {
980                  warn_report_once(
981                      "9p: Multiple devices detected in same VirtFS export, "
982                      "which might lead to file ID collisions and severe "
983                      "misbehaviours on guest! You should either use a "
984                      "separate export for each device shared from host or "
985                      "use virtfs option 'multidevs=remap'!"
986                  );
987              }
988          }
989          memset(&qidp->path, 0, sizeof(qidp->path));
990          size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
991          memcpy(&qidp->path, &stbuf->st_ino, size);
992      }
993  
994      qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
995      qidp->type = 0;
996      if (S_ISDIR(stbuf->st_mode)) {
997          qidp->type |= P9_QID_TYPE_DIR;
998      }
999      if (S_ISLNK(stbuf->st_mode)) {
1000          qidp->type |= P9_QID_TYPE_SYMLINK;
1001      }
1002  
1003      return 0;
1004  }
1005  
1006  V9fsPDU *pdu_alloc(V9fsState *s)
1007  {
1008      V9fsPDU *pdu = NULL;
1009  
1010      if (!QLIST_EMPTY(&s->free_list)) {
1011          pdu = QLIST_FIRST(&s->free_list);
1012          QLIST_REMOVE(pdu, next);
1013          QLIST_INSERT_HEAD(&s->active_list, pdu, next);
1014      }
1015      return pdu;
1016  }
1017  
1018  void pdu_free(V9fsPDU *pdu)
1019  {
1020      V9fsState *s = pdu->s;
1021  
1022      g_assert(!pdu->cancelled);
1023      QLIST_REMOVE(pdu, next);
1024      QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1025  }
1026  
1027  static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1028  {
1029      int8_t id = pdu->id + 1; /* Response */
1030      V9fsState *s = pdu->s;
1031      int ret;
1032  
1033      /*
1034       * The 9p spec requires that successfully cancelled pdus receive no reply.
1035       * Sending a reply would confuse clients because they would
1036       * assume that any EINTR is the actual result of the operation,
1037       * rather than a consequence of the cancellation. However, if
1038       * the operation completed (successfully or with an error other
1039       * than caused be cancellation), we do send out that reply, both
1040       * for efficiency and to avoid confusing the rest of the state machine
1041       * that assumes passing a non-error here will mean a successful
1042       * transmission of the reply.
1043       */
1044      bool discard = pdu->cancelled && len == -EINTR;
1045      if (discard) {
1046          trace_v9fs_rcancel(pdu->tag, pdu->id);
1047          pdu->size = 0;
1048          goto out_notify;
1049      }
1050  
1051      if (len < 0) {
1052          int err = -len;
1053          len = 7;
1054  
1055          if (s->proto_version != V9FS_PROTO_2000L) {
1056              V9fsString str;
1057  
1058              str.data = strerror(err);
1059              str.size = strlen(str.data);
1060  
1061              ret = pdu_marshal(pdu, len, "s", &str);
1062              if (ret < 0) {
1063                  goto out_notify;
1064              }
1065              len += ret;
1066              id = P9_RERROR;
1067          } else {
1068              err = errno_to_dotl(err);
1069          }
1070  
1071          ret = pdu_marshal(pdu, len, "d", err);
1072          if (ret < 0) {
1073              goto out_notify;
1074          }
1075          len += ret;
1076  
1077          if (s->proto_version == V9FS_PROTO_2000L) {
1078              id = P9_RLERROR;
1079          }
1080          trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1081      }
1082  
1083      /* fill out the header */
1084      if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1085          goto out_notify;
1086      }
1087  
1088      /* keep these in sync */
1089      pdu->size = len;
1090      pdu->id = id;
1091  
1092  out_notify:
1093      pdu->s->transport->push_and_notify(pdu);
1094  
1095      /* Now wakeup anybody waiting in flush for this request */
1096      if (!qemu_co_queue_next(&pdu->complete)) {
1097          pdu_free(pdu);
1098      }
1099  }
1100  
1101  static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1102  {
1103      mode_t ret;
1104  
1105      ret = mode & 0777;
1106      if (mode & P9_STAT_MODE_DIR) {
1107          ret |= S_IFDIR;
1108      }
1109  
1110      if (mode & P9_STAT_MODE_SYMLINK) {
1111          ret |= S_IFLNK;
1112      }
1113      if (mode & P9_STAT_MODE_SOCKET) {
1114          ret |= S_IFSOCK;
1115      }
1116      if (mode & P9_STAT_MODE_NAMED_PIPE) {
1117          ret |= S_IFIFO;
1118      }
1119      if (mode & P9_STAT_MODE_DEVICE) {
1120          if (extension->size && extension->data[0] == 'c') {
1121              ret |= S_IFCHR;
1122          } else {
1123              ret |= S_IFBLK;
1124          }
1125      }
1126  
1127      if (!(ret & ~0777)) {
1128          ret |= S_IFREG;
1129      }
1130  
1131      if (mode & P9_STAT_MODE_SETUID) {
1132          ret |= S_ISUID;
1133      }
1134      if (mode & P9_STAT_MODE_SETGID) {
1135          ret |= S_ISGID;
1136      }
1137      if (mode & P9_STAT_MODE_SETVTX) {
1138          ret |= S_ISVTX;
1139      }
1140  
1141      return ret;
1142  }
1143  
1144  static int donttouch_stat(V9fsStat *stat)
1145  {
1146      if (stat->type == -1 &&
1147          stat->dev == -1 &&
1148          stat->qid.type == 0xff &&
1149          stat->qid.version == (uint32_t) -1 &&
1150          stat->qid.path == (uint64_t) -1 &&
1151          stat->mode == -1 &&
1152          stat->atime == -1 &&
1153          stat->mtime == -1 &&
1154          stat->length == -1 &&
1155          !stat->name.size &&
1156          !stat->uid.size &&
1157          !stat->gid.size &&
1158          !stat->muid.size &&
1159          stat->n_uid == -1 &&
1160          stat->n_gid == -1 &&
1161          stat->n_muid == -1) {
1162          return 1;
1163      }
1164  
1165      return 0;
1166  }
1167  
1168  static void v9fs_stat_init(V9fsStat *stat)
1169  {
1170      v9fs_string_init(&stat->name);
1171      v9fs_string_init(&stat->uid);
1172      v9fs_string_init(&stat->gid);
1173      v9fs_string_init(&stat->muid);
1174      v9fs_string_init(&stat->extension);
1175  }
1176  
1177  static void v9fs_stat_free(V9fsStat *stat)
1178  {
1179      v9fs_string_free(&stat->name);
1180      v9fs_string_free(&stat->uid);
1181      v9fs_string_free(&stat->gid);
1182      v9fs_string_free(&stat->muid);
1183      v9fs_string_free(&stat->extension);
1184  }
1185  
1186  static uint32_t stat_to_v9mode(const struct stat *stbuf)
1187  {
1188      uint32_t mode;
1189  
1190      mode = stbuf->st_mode & 0777;
1191      if (S_ISDIR(stbuf->st_mode)) {
1192          mode |= P9_STAT_MODE_DIR;
1193      }
1194  
1195      if (S_ISLNK(stbuf->st_mode)) {
1196          mode |= P9_STAT_MODE_SYMLINK;
1197      }
1198  
1199      if (S_ISSOCK(stbuf->st_mode)) {
1200          mode |= P9_STAT_MODE_SOCKET;
1201      }
1202  
1203      if (S_ISFIFO(stbuf->st_mode)) {
1204          mode |= P9_STAT_MODE_NAMED_PIPE;
1205      }
1206  
1207      if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1208          mode |= P9_STAT_MODE_DEVICE;
1209      }
1210  
1211      if (stbuf->st_mode & S_ISUID) {
1212          mode |= P9_STAT_MODE_SETUID;
1213      }
1214  
1215      if (stbuf->st_mode & S_ISGID) {
1216          mode |= P9_STAT_MODE_SETGID;
1217      }
1218  
1219      if (stbuf->st_mode & S_ISVTX) {
1220          mode |= P9_STAT_MODE_SETVTX;
1221      }
1222  
1223      return mode;
1224  }
1225  
1226  static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1227                                         const char *basename,
1228                                         const struct stat *stbuf,
1229                                         V9fsStat *v9stat)
1230  {
1231      int err;
1232  
1233      memset(v9stat, 0, sizeof(*v9stat));
1234  
1235      err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1236      if (err < 0) {
1237          return err;
1238      }
1239      v9stat->mode = stat_to_v9mode(stbuf);
1240      v9stat->atime = stbuf->st_atime;
1241      v9stat->mtime = stbuf->st_mtime;
1242      v9stat->length = stbuf->st_size;
1243  
1244      v9fs_string_free(&v9stat->uid);
1245      v9fs_string_free(&v9stat->gid);
1246      v9fs_string_free(&v9stat->muid);
1247  
1248      v9stat->n_uid = stbuf->st_uid;
1249      v9stat->n_gid = stbuf->st_gid;
1250      v9stat->n_muid = 0;
1251  
1252      v9fs_string_free(&v9stat->extension);
1253  
1254      if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1255          err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1256          if (err < 0) {
1257              return err;
1258          }
1259      } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1260          v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1261                  S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1262                  major(stbuf->st_rdev), minor(stbuf->st_rdev));
1263      } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1264          v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1265                  "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1266      }
1267  
1268      v9fs_string_sprintf(&v9stat->name, "%s", basename);
1269  
1270      v9stat->size = 61 +
1271          v9fs_string_size(&v9stat->name) +
1272          v9fs_string_size(&v9stat->uid) +
1273          v9fs_string_size(&v9stat->gid) +
1274          v9fs_string_size(&v9stat->muid) +
1275          v9fs_string_size(&v9stat->extension);
1276      return 0;
1277  }
1278  
1279  #define P9_STATS_MODE          0x00000001ULL
1280  #define P9_STATS_NLINK         0x00000002ULL
1281  #define P9_STATS_UID           0x00000004ULL
1282  #define P9_STATS_GID           0x00000008ULL
1283  #define P9_STATS_RDEV          0x00000010ULL
1284  #define P9_STATS_ATIME         0x00000020ULL
1285  #define P9_STATS_MTIME         0x00000040ULL
1286  #define P9_STATS_CTIME         0x00000080ULL
1287  #define P9_STATS_INO           0x00000100ULL
1288  #define P9_STATS_SIZE          0x00000200ULL
1289  #define P9_STATS_BLOCKS        0x00000400ULL
1290  
1291  #define P9_STATS_BTIME         0x00000800ULL
1292  #define P9_STATS_GEN           0x00001000ULL
1293  #define P9_STATS_DATA_VERSION  0x00002000ULL
1294  
1295  #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1296  #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1297  
1298  
1299  /**
1300   * blksize_to_iounit() - Block size exposed to 9p client.
1301   * Return: block size
1302   *
1303   * @pdu: 9p client request
1304   * @blksize: host filesystem's block size
1305   *
1306   * Convert host filesystem's block size into an appropriate block size for
1307   * 9p client (guest OS side). The value returned suggests an "optimum" block
1308   * size for 9p I/O, i.e. to maximize performance.
1309   */
1310  static int32_t blksize_to_iounit(const V9fsPDU *pdu, int32_t blksize)
1311  {
1312      int32_t iounit = 0;
1313      V9fsState *s = pdu->s;
1314  
1315      /*
1316       * iounit should be multiples of blksize (host filesystem block size)
1317       * as well as less than (client msize - P9_IOHDRSZ)
1318       */
1319      if (blksize) {
1320          iounit = QEMU_ALIGN_DOWN(s->msize - P9_IOHDRSZ, blksize);
1321      }
1322      if (!iounit) {
1323          iounit = s->msize - P9_IOHDRSZ;
1324      }
1325      return iounit;
1326  }
1327  
1328  static int32_t stat_to_iounit(const V9fsPDU *pdu, const struct stat *stbuf)
1329  {
1330      return blksize_to_iounit(pdu, stbuf->st_blksize);
1331  }
1332  
1333  static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1334                                  V9fsStatDotl *v9lstat)
1335  {
1336      memset(v9lstat, 0, sizeof(*v9lstat));
1337  
1338      v9lstat->st_mode = stbuf->st_mode;
1339      v9lstat->st_nlink = stbuf->st_nlink;
1340      v9lstat->st_uid = stbuf->st_uid;
1341      v9lstat->st_gid = stbuf->st_gid;
1342      v9lstat->st_rdev = host_dev_to_dotl_dev(stbuf->st_rdev);
1343      v9lstat->st_size = stbuf->st_size;
1344      v9lstat->st_blksize = stat_to_iounit(pdu, stbuf);
1345      v9lstat->st_blocks = stbuf->st_blocks;
1346      v9lstat->st_atime_sec = stbuf->st_atime;
1347      v9lstat->st_mtime_sec = stbuf->st_mtime;
1348      v9lstat->st_ctime_sec = stbuf->st_ctime;
1349  #ifdef CONFIG_DARWIN
1350      v9lstat->st_atime_nsec = stbuf->st_atimespec.tv_nsec;
1351      v9lstat->st_mtime_nsec = stbuf->st_mtimespec.tv_nsec;
1352      v9lstat->st_ctime_nsec = stbuf->st_ctimespec.tv_nsec;
1353  #else
1354      v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1355      v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1356      v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1357  #endif
1358      /* Currently we only support BASIC fields in stat */
1359      v9lstat->st_result_mask = P9_STATS_BASIC;
1360  
1361      return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1362  }
1363  
1364  static void print_sg(struct iovec *sg, int cnt)
1365  {
1366      int i;
1367  
1368      printf("sg[%d]: {", cnt);
1369      for (i = 0; i < cnt; i++) {
1370          if (i) {
1371              printf(", ");
1372          }
1373          printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1374      }
1375      printf("}\n");
1376  }
1377  
1378  /* Will call this only for path name based fid */
1379  static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1380  {
1381      V9fsPath str;
1382      v9fs_path_init(&str);
1383      v9fs_path_copy(&str, dst);
1384      v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1385      v9fs_path_free(&str);
1386  }
1387  
1388  static inline bool is_ro_export(FsContext *ctx)
1389  {
1390      return ctx->export_flags & V9FS_RDONLY;
1391  }
1392  
1393  static void coroutine_fn v9fs_version(void *opaque)
1394  {
1395      ssize_t err;
1396      V9fsPDU *pdu = opaque;
1397      V9fsState *s = pdu->s;
1398      V9fsString version;
1399      size_t offset = 7;
1400  
1401      v9fs_string_init(&version);
1402      err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1403      if (err < 0) {
1404          goto out;
1405      }
1406      trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1407  
1408      virtfs_reset(pdu);
1409  
1410      if (!strcmp(version.data, "9P2000.u")) {
1411          s->proto_version = V9FS_PROTO_2000U;
1412      } else if (!strcmp(version.data, "9P2000.L")) {
1413          s->proto_version = V9FS_PROTO_2000L;
1414      } else {
1415          v9fs_string_sprintf(&version, "unknown");
1416          /* skip min. msize check, reporting invalid version has priority */
1417          goto marshal;
1418      }
1419  
1420      if (s->msize < P9_MIN_MSIZE) {
1421          err = -EMSGSIZE;
1422          error_report(
1423              "9pfs: Client requested msize < minimum msize ("
1424              stringify(P9_MIN_MSIZE) ") supported by this server."
1425          );
1426          goto out;
1427      }
1428  
1429      /* 8192 is the default msize of Linux clients */
1430      if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
1431          warn_report_once(
1432              "9p: degraded performance: a reasonable high msize should be "
1433              "chosen on client/guest side (chosen msize is <= 8192). See "
1434              "https://wiki.qemu.org/Documentation/9psetup#msize for details."
1435          );
1436      }
1437  
1438  marshal:
1439      err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1440      if (err < 0) {
1441          goto out;
1442      }
1443      err += offset;
1444      trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1445  out:
1446      pdu_complete(pdu, err);
1447      v9fs_string_free(&version);
1448  }
1449  
1450  static void coroutine_fn v9fs_attach(void *opaque)
1451  {
1452      V9fsPDU *pdu = opaque;
1453      V9fsState *s = pdu->s;
1454      int32_t fid, afid, n_uname;
1455      V9fsString uname, aname;
1456      V9fsFidState *fidp;
1457      size_t offset = 7;
1458      V9fsQID qid;
1459      ssize_t err;
1460      struct stat stbuf;
1461  
1462      v9fs_string_init(&uname);
1463      v9fs_string_init(&aname);
1464      err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1465                          &afid, &uname, &aname, &n_uname);
1466      if (err < 0) {
1467          goto out_nofid;
1468      }
1469      trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1470  
1471      fidp = alloc_fid(s, fid);
1472      if (fidp == NULL) {
1473          err = -EINVAL;
1474          goto out_nofid;
1475      }
1476      fidp->uid = n_uname;
1477      err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1478      if (err < 0) {
1479          err = -EINVAL;
1480          clunk_fid(s, fid);
1481          goto out;
1482      }
1483      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1484      if (err < 0) {
1485          err = -EINVAL;
1486          clunk_fid(s, fid);
1487          goto out;
1488      }
1489      err = stat_to_qid(pdu, &stbuf, &qid);
1490      if (err < 0) {
1491          err = -EINVAL;
1492          clunk_fid(s, fid);
1493          goto out;
1494      }
1495  
1496      /*
1497       * disable migration if we haven't done already.
1498       * attach could get called multiple times for the same export.
1499       */
1500      if (!s->migration_blocker) {
1501          error_setg(&s->migration_blocker,
1502                     "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1503                     s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1504          err = migrate_add_blocker(&s->migration_blocker, NULL);
1505          if (err < 0) {
1506              clunk_fid(s, fid);
1507              goto out;
1508          }
1509          s->root_fid = fid;
1510      }
1511  
1512      err = pdu_marshal(pdu, offset, "Q", &qid);
1513      if (err < 0) {
1514          clunk_fid(s, fid);
1515          goto out;
1516      }
1517      err += offset;
1518  
1519      memcpy(&s->root_st, &stbuf, sizeof(stbuf));
1520      trace_v9fs_attach_return(pdu->tag, pdu->id,
1521                               qid.type, qid.version, qid.path);
1522  out:
1523      put_fid(pdu, fidp);
1524  out_nofid:
1525      pdu_complete(pdu, err);
1526      v9fs_string_free(&uname);
1527      v9fs_string_free(&aname);
1528  }
1529  
1530  static void coroutine_fn v9fs_stat(void *opaque)
1531  {
1532      int32_t fid;
1533      V9fsStat v9stat;
1534      ssize_t err = 0;
1535      size_t offset = 7;
1536      struct stat stbuf;
1537      V9fsFidState *fidp;
1538      V9fsPDU *pdu = opaque;
1539      char *basename;
1540  
1541      err = pdu_unmarshal(pdu, offset, "d", &fid);
1542      if (err < 0) {
1543          goto out_nofid;
1544      }
1545      trace_v9fs_stat(pdu->tag, pdu->id, fid);
1546  
1547      fidp = get_fid(pdu, fid);
1548      if (fidp == NULL) {
1549          err = -ENOENT;
1550          goto out_nofid;
1551      }
1552      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1553      if (err < 0) {
1554          goto out;
1555      }
1556      basename = g_path_get_basename(fidp->path.data);
1557      err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1558      g_free(basename);
1559      if (err < 0) {
1560          goto out;
1561      }
1562      err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1563      if (err < 0) {
1564          v9fs_stat_free(&v9stat);
1565          goto out;
1566      }
1567      trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1568                             v9stat.atime, v9stat.mtime, v9stat.length);
1569      err += offset;
1570      v9fs_stat_free(&v9stat);
1571  out:
1572      put_fid(pdu, fidp);
1573  out_nofid:
1574      pdu_complete(pdu, err);
1575  }
1576  
1577  static void coroutine_fn v9fs_getattr(void *opaque)
1578  {
1579      int32_t fid;
1580      size_t offset = 7;
1581      ssize_t retval = 0;
1582      struct stat stbuf;
1583      V9fsFidState *fidp;
1584      uint64_t request_mask;
1585      V9fsStatDotl v9stat_dotl;
1586      V9fsPDU *pdu = opaque;
1587  
1588      retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1589      if (retval < 0) {
1590          goto out_nofid;
1591      }
1592      trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1593  
1594      fidp = get_fid(pdu, fid);
1595      if (fidp == NULL) {
1596          retval = -ENOENT;
1597          goto out_nofid;
1598      }
1599      /*
1600       * Currently we only support BASIC fields in stat, so there is no
1601       * need to look at request_mask.
1602       */
1603      retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1604      if (retval < 0) {
1605          goto out;
1606      }
1607      retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1608      if (retval < 0) {
1609          goto out;
1610      }
1611  
1612      /*  fill st_gen if requested and supported by underlying fs */
1613      if (request_mask & P9_STATS_GEN) {
1614          retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1615          switch (retval) {
1616          case 0:
1617              /* we have valid st_gen: update result mask */
1618              v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1619              break;
1620          case -EINTR:
1621              /* request cancelled, e.g. by Tflush */
1622              goto out;
1623          default:
1624              /* failed to get st_gen: not fatal, ignore */
1625              break;
1626          }
1627      }
1628      retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1629      if (retval < 0) {
1630          goto out;
1631      }
1632      retval += offset;
1633      trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1634                                v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1635                                v9stat_dotl.st_gid);
1636  out:
1637      put_fid(pdu, fidp);
1638  out_nofid:
1639      pdu_complete(pdu, retval);
1640  }
1641  
1642  /* Attribute flags */
1643  #define P9_ATTR_MODE       (1 << 0)
1644  #define P9_ATTR_UID        (1 << 1)
1645  #define P9_ATTR_GID        (1 << 2)
1646  #define P9_ATTR_SIZE       (1 << 3)
1647  #define P9_ATTR_ATIME      (1 << 4)
1648  #define P9_ATTR_MTIME      (1 << 5)
1649  #define P9_ATTR_CTIME      (1 << 6)
1650  #define P9_ATTR_ATIME_SET  (1 << 7)
1651  #define P9_ATTR_MTIME_SET  (1 << 8)
1652  
1653  #define P9_ATTR_MASK    127
1654  
1655  static void coroutine_fn v9fs_setattr(void *opaque)
1656  {
1657      int err = 0;
1658      int32_t fid;
1659      V9fsFidState *fidp;
1660      size_t offset = 7;
1661      V9fsIattr v9iattr;
1662      V9fsPDU *pdu = opaque;
1663  
1664      err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1665      if (err < 0) {
1666          goto out_nofid;
1667      }
1668  
1669      trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1670                         v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1671                         v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1672  
1673      fidp = get_fid(pdu, fid);
1674      if (fidp == NULL) {
1675          err = -EINVAL;
1676          goto out_nofid;
1677      }
1678      if (v9iattr.valid & P9_ATTR_MODE) {
1679          err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1680          if (err < 0) {
1681              goto out;
1682          }
1683      }
1684      if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1685          struct timespec times[2];
1686          if (v9iattr.valid & P9_ATTR_ATIME) {
1687              if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1688                  times[0].tv_sec = v9iattr.atime_sec;
1689                  times[0].tv_nsec = v9iattr.atime_nsec;
1690              } else {
1691                  times[0].tv_nsec = UTIME_NOW;
1692              }
1693          } else {
1694              times[0].tv_nsec = UTIME_OMIT;
1695          }
1696          if (v9iattr.valid & P9_ATTR_MTIME) {
1697              if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1698                  times[1].tv_sec = v9iattr.mtime_sec;
1699                  times[1].tv_nsec = v9iattr.mtime_nsec;
1700              } else {
1701                  times[1].tv_nsec = UTIME_NOW;
1702              }
1703          } else {
1704              times[1].tv_nsec = UTIME_OMIT;
1705          }
1706          err = v9fs_co_utimensat(pdu, &fidp->path, times);
1707          if (err < 0) {
1708              goto out;
1709          }
1710      }
1711      /*
1712       * If the only valid entry in iattr is ctime we can call
1713       * chown(-1,-1) to update the ctime of the file
1714       */
1715      if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1716          ((v9iattr.valid & P9_ATTR_CTIME)
1717           && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1718          if (!(v9iattr.valid & P9_ATTR_UID)) {
1719              v9iattr.uid = -1;
1720          }
1721          if (!(v9iattr.valid & P9_ATTR_GID)) {
1722              v9iattr.gid = -1;
1723          }
1724          err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1725                              v9iattr.gid);
1726          if (err < 0) {
1727              goto out;
1728          }
1729      }
1730      if (v9iattr.valid & (P9_ATTR_SIZE)) {
1731          err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1732          if (err < 0) {
1733              goto out;
1734          }
1735      }
1736      err = offset;
1737      trace_v9fs_setattr_return(pdu->tag, pdu->id);
1738  out:
1739      put_fid(pdu, fidp);
1740  out_nofid:
1741      pdu_complete(pdu, err);
1742  }
1743  
1744  static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1745  {
1746      int i;
1747      ssize_t err;
1748      size_t offset = 7;
1749  
1750      err = pdu_marshal(pdu, offset, "w", nwnames);
1751      if (err < 0) {
1752          return err;
1753      }
1754      offset += err;
1755      for (i = 0; i < nwnames; i++) {
1756          err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1757          if (err < 0) {
1758              return err;
1759          }
1760          offset += err;
1761      }
1762      return offset;
1763  }
1764  
1765  static bool name_is_illegal(const char *name)
1766  {
1767      return !*name || strchr(name, '/') != NULL;
1768  }
1769  
1770  static bool same_stat_id(const struct stat *a, const struct stat *b)
1771  {
1772      return a->st_dev == b->st_dev && a->st_ino == b->st_ino;
1773  }
1774  
1775  static void coroutine_fn v9fs_walk(void *opaque)
1776  {
1777      int name_idx, nwalked;
1778      g_autofree V9fsQID *qids = NULL;
1779      int i, err = 0, any_err = 0;
1780      V9fsPath dpath, path;
1781      P9ARRAY_REF(V9fsPath) pathes = NULL;
1782      uint16_t nwnames;
1783      struct stat stbuf, fidst;
1784      g_autofree struct stat *stbufs = NULL;
1785      size_t offset = 7;
1786      int32_t fid, newfid;
1787      P9ARRAY_REF(V9fsString) wnames = NULL;
1788      V9fsFidState *fidp;
1789      V9fsFidState *newfidp = NULL;
1790      V9fsPDU *pdu = opaque;
1791      V9fsState *s = pdu->s;
1792      V9fsQID qid;
1793  
1794      err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1795      if (err < 0) {
1796          pdu_complete(pdu, err);
1797          return;
1798      }
1799      offset += err;
1800  
1801      trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1802  
1803      if (nwnames > P9_MAXWELEM) {
1804          err = -EINVAL;
1805          goto out_nofid;
1806      }
1807      if (nwnames) {
1808          P9ARRAY_NEW(V9fsString, wnames, nwnames);
1809          qids   = g_new0(V9fsQID, nwnames);
1810          stbufs = g_new0(struct stat, nwnames);
1811          P9ARRAY_NEW(V9fsPath, pathes, nwnames);
1812          for (i = 0; i < nwnames; i++) {
1813              err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1814              if (err < 0) {
1815                  goto out_nofid;
1816              }
1817              if (name_is_illegal(wnames[i].data)) {
1818                  err = -ENOENT;
1819                  goto out_nofid;
1820              }
1821              offset += err;
1822          }
1823      }
1824      fidp = get_fid(pdu, fid);
1825      if (fidp == NULL) {
1826          err = -ENOENT;
1827          goto out_nofid;
1828      }
1829  
1830      v9fs_path_init(&dpath);
1831      v9fs_path_init(&path);
1832      /*
1833       * Both dpath and path initially point to fidp.
1834       * Needed to handle request with nwnames == 0
1835       */
1836      v9fs_path_copy(&dpath, &fidp->path);
1837      v9fs_path_copy(&path, &fidp->path);
1838  
1839      /*
1840       * To keep latency (i.e. overall execution time for processing this
1841       * Twalk client request) as small as possible, run all the required fs
1842       * driver code altogether inside the following block.
1843       */
1844      v9fs_co_run_in_worker({
1845          nwalked = 0;
1846          if (v9fs_request_cancelled(pdu)) {
1847              any_err |= err = -EINTR;
1848              break;
1849          }
1850          err = s->ops->lstat(&s->ctx, &dpath, &fidst);
1851          if (err < 0) {
1852              any_err |= err = -errno;
1853              break;
1854          }
1855          stbuf = fidst;
1856          for (; nwalked < nwnames; nwalked++) {
1857              if (v9fs_request_cancelled(pdu)) {
1858                  any_err |= err = -EINTR;
1859                  break;
1860              }
1861              if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
1862                  strcmp("..", wnames[nwalked].data))
1863              {
1864                  err = s->ops->name_to_path(&s->ctx, &dpath,
1865                                             wnames[nwalked].data,
1866                                             &pathes[nwalked]);
1867                  if (err < 0) {
1868                      any_err |= err = -errno;
1869                      break;
1870                  }
1871                  if (v9fs_request_cancelled(pdu)) {
1872                      any_err |= err = -EINTR;
1873                      break;
1874                  }
1875                  err = s->ops->lstat(&s->ctx, &pathes[nwalked], &stbuf);
1876                  if (err < 0) {
1877                      any_err |= err = -errno;
1878                      break;
1879                  }
1880                  stbufs[nwalked] = stbuf;
1881                  v9fs_path_copy(&dpath, &pathes[nwalked]);
1882              }
1883          }
1884      });
1885      /*
1886       * Handle all the rest of this Twalk request on main thread ...
1887       *
1888       * NOTE: -EINTR is an exception where we deviate from the protocol spec
1889       * and simply send a (R)Lerror response instead of bothering to assemble
1890       * a (deducted) Rwalk response; because -EINTR is always the result of a
1891       * Tflush request, so client would no longer wait for a response in this
1892       * case anyway.
1893       */
1894      if ((err < 0 && !nwalked) || err == -EINTR) {
1895          goto out;
1896      }
1897  
1898      any_err |= err = stat_to_qid(pdu, &fidst, &qid);
1899      if (err < 0 && !nwalked) {
1900          goto out;
1901      }
1902      stbuf = fidst;
1903  
1904      /* reset dpath and path */
1905      v9fs_path_copy(&dpath, &fidp->path);
1906      v9fs_path_copy(&path, &fidp->path);
1907  
1908      for (name_idx = 0; name_idx < nwalked; name_idx++) {
1909          if (!same_stat_id(&pdu->s->root_st, &stbuf) ||
1910              strcmp("..", wnames[name_idx].data))
1911          {
1912              stbuf = stbufs[name_idx];
1913              any_err |= err = stat_to_qid(pdu, &stbuf, &qid);
1914              if (err < 0) {
1915                  break;
1916              }
1917              v9fs_path_copy(&path, &pathes[name_idx]);
1918              v9fs_path_copy(&dpath, &path);
1919          }
1920          memcpy(&qids[name_idx], &qid, sizeof(qid));
1921      }
1922      if (any_err < 0) {
1923          if (!name_idx) {
1924              /* don't send any QIDs, send Rlerror instead */
1925              goto out;
1926          } else {
1927              /* send QIDs (not Rlerror), but fid MUST remain unaffected */
1928              goto send_qids;
1929          }
1930      }
1931      if (fid == newfid) {
1932          if (fidp->fid_type != P9_FID_NONE) {
1933              err = -EINVAL;
1934              goto out;
1935          }
1936          v9fs_path_write_lock(s);
1937          v9fs_path_copy(&fidp->path, &path);
1938          v9fs_path_unlock(s);
1939      } else {
1940          newfidp = alloc_fid(s, newfid);
1941          if (newfidp == NULL) {
1942              err = -EINVAL;
1943              goto out;
1944          }
1945          newfidp->uid = fidp->uid;
1946          v9fs_path_copy(&newfidp->path, &path);
1947      }
1948  send_qids:
1949      err = v9fs_walk_marshal(pdu, name_idx, qids);
1950      trace_v9fs_walk_return(pdu->tag, pdu->id, name_idx, qids);
1951  out:
1952      put_fid(pdu, fidp);
1953      if (newfidp) {
1954          put_fid(pdu, newfidp);
1955      }
1956      v9fs_path_free(&dpath);
1957      v9fs_path_free(&path);
1958  out_nofid:
1959      pdu_complete(pdu, err);
1960  }
1961  
1962  static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1963  {
1964      struct statfs stbuf;
1965      int err = v9fs_co_statfs(pdu, path, &stbuf);
1966  
1967      return blksize_to_iounit(pdu, (err >= 0) ? stbuf.f_bsize : 0);
1968  }
1969  
1970  static void coroutine_fn v9fs_open(void *opaque)
1971  {
1972      int flags;
1973      int32_t fid;
1974      int32_t mode;
1975      V9fsQID qid;
1976      int iounit = 0;
1977      ssize_t err = 0;
1978      size_t offset = 7;
1979      struct stat stbuf;
1980      V9fsFidState *fidp;
1981      V9fsPDU *pdu = opaque;
1982      V9fsState *s = pdu->s;
1983  
1984      if (s->proto_version == V9FS_PROTO_2000L) {
1985          err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1986      } else {
1987          uint8_t modebyte;
1988          err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1989          mode = modebyte;
1990      }
1991      if (err < 0) {
1992          goto out_nofid;
1993      }
1994      trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1995  
1996      fidp = get_fid(pdu, fid);
1997      if (fidp == NULL) {
1998          err = -ENOENT;
1999          goto out_nofid;
2000      }
2001      if (fidp->fid_type != P9_FID_NONE) {
2002          err = -EINVAL;
2003          goto out;
2004      }
2005  
2006      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2007      if (err < 0) {
2008          goto out;
2009      }
2010      err = stat_to_qid(pdu, &stbuf, &qid);
2011      if (err < 0) {
2012          goto out;
2013      }
2014      if (S_ISDIR(stbuf.st_mode)) {
2015          err = v9fs_co_opendir(pdu, fidp);
2016          if (err < 0) {
2017              goto out;
2018          }
2019          fidp->fid_type = P9_FID_DIR;
2020          err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
2021          if (err < 0) {
2022              goto out;
2023          }
2024          err += offset;
2025      } else {
2026          if (s->proto_version == V9FS_PROTO_2000L) {
2027              flags = get_dotl_openflags(s, mode);
2028          } else {
2029              flags = omode_to_uflags(mode);
2030          }
2031          if (is_ro_export(&s->ctx)) {
2032              if (mode & O_WRONLY || mode & O_RDWR ||
2033                  mode & O_APPEND || mode & O_TRUNC) {
2034                  err = -EROFS;
2035                  goto out;
2036              }
2037          }
2038          err = v9fs_co_open(pdu, fidp, flags);
2039          if (err < 0) {
2040              goto out;
2041          }
2042          fidp->fid_type = P9_FID_FILE;
2043          fidp->open_flags = flags;
2044          if (flags & O_EXCL) {
2045              /*
2046               * We let the host file system do O_EXCL check
2047               * We should not reclaim such fd
2048               */
2049              fidp->flags |= FID_NON_RECLAIMABLE;
2050          }
2051          iounit = get_iounit(pdu, &fidp->path);
2052          err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2053          if (err < 0) {
2054              goto out;
2055          }
2056          err += offset;
2057      }
2058      trace_v9fs_open_return(pdu->tag, pdu->id,
2059                             qid.type, qid.version, qid.path, iounit);
2060  out:
2061      put_fid(pdu, fidp);
2062  out_nofid:
2063      pdu_complete(pdu, err);
2064  }
2065  
2066  static void coroutine_fn v9fs_lcreate(void *opaque)
2067  {
2068      int32_t dfid, flags, mode;
2069      gid_t gid;
2070      ssize_t err = 0;
2071      ssize_t offset = 7;
2072      V9fsString name;
2073      V9fsFidState *fidp;
2074      struct stat stbuf;
2075      V9fsQID qid;
2076      int32_t iounit;
2077      V9fsPDU *pdu = opaque;
2078  
2079      v9fs_string_init(&name);
2080      err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
2081                          &name, &flags, &mode, &gid);
2082      if (err < 0) {
2083          goto out_nofid;
2084      }
2085      trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
2086  
2087      if (name_is_illegal(name.data)) {
2088          err = -ENOENT;
2089          goto out_nofid;
2090      }
2091  
2092      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2093          err = -EEXIST;
2094          goto out_nofid;
2095      }
2096  
2097      fidp = get_fid(pdu, dfid);
2098      if (fidp == NULL) {
2099          err = -ENOENT;
2100          goto out_nofid;
2101      }
2102      if (fidp->fid_type != P9_FID_NONE) {
2103          err = -EINVAL;
2104          goto out;
2105      }
2106  
2107      flags = get_dotl_openflags(pdu->s, flags);
2108      err = v9fs_co_open2(pdu, fidp, &name, gid,
2109                          flags | O_CREAT, mode, &stbuf);
2110      if (err < 0) {
2111          goto out;
2112      }
2113      fidp->fid_type = P9_FID_FILE;
2114      fidp->open_flags = flags;
2115      if (flags & O_EXCL) {
2116          /*
2117           * We let the host file system do O_EXCL check
2118           * We should not reclaim such fd
2119           */
2120          fidp->flags |= FID_NON_RECLAIMABLE;
2121      }
2122      iounit =  get_iounit(pdu, &fidp->path);
2123      err = stat_to_qid(pdu, &stbuf, &qid);
2124      if (err < 0) {
2125          goto out;
2126      }
2127      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2128      if (err < 0) {
2129          goto out;
2130      }
2131      err += offset;
2132      trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2133                                qid.type, qid.version, qid.path, iounit);
2134  out:
2135      put_fid(pdu, fidp);
2136  out_nofid:
2137      pdu_complete(pdu, err);
2138      v9fs_string_free(&name);
2139  }
2140  
2141  static void coroutine_fn v9fs_fsync(void *opaque)
2142  {
2143      int err;
2144      int32_t fid;
2145      int datasync;
2146      size_t offset = 7;
2147      V9fsFidState *fidp;
2148      V9fsPDU *pdu = opaque;
2149  
2150      err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2151      if (err < 0) {
2152          goto out_nofid;
2153      }
2154      trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2155  
2156      fidp = get_fid(pdu, fid);
2157      if (fidp == NULL) {
2158          err = -ENOENT;
2159          goto out_nofid;
2160      }
2161      err = v9fs_co_fsync(pdu, fidp, datasync);
2162      if (!err) {
2163          err = offset;
2164      }
2165      put_fid(pdu, fidp);
2166  out_nofid:
2167      pdu_complete(pdu, err);
2168  }
2169  
2170  static void coroutine_fn v9fs_clunk(void *opaque)
2171  {
2172      int err;
2173      int32_t fid;
2174      size_t offset = 7;
2175      V9fsFidState *fidp;
2176      V9fsPDU *pdu = opaque;
2177      V9fsState *s = pdu->s;
2178  
2179      err = pdu_unmarshal(pdu, offset, "d", &fid);
2180      if (err < 0) {
2181          goto out_nofid;
2182      }
2183      trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2184  
2185      fidp = clunk_fid(s, fid);
2186      if (fidp == NULL) {
2187          err = -ENOENT;
2188          goto out_nofid;
2189      }
2190      /*
2191       * Bump the ref so that put_fid will
2192       * free the fid.
2193       */
2194      fidp->ref++;
2195      err = put_fid(pdu, fidp);
2196      if (!err) {
2197          err = offset;
2198      }
2199  out_nofid:
2200      pdu_complete(pdu, err);
2201  }
2202  
2203  /*
2204   * Create a QEMUIOVector for a sub-region of PDU iovecs
2205   *
2206   * @qiov:       uninitialized QEMUIOVector
2207   * @skip:       number of bytes to skip from beginning of PDU
2208   * @size:       number of bytes to include
2209   * @is_write:   true - write, false - read
2210   *
2211   * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2212   * with qemu_iovec_destroy().
2213   */
2214  static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2215                                      size_t skip, size_t size,
2216                                      bool is_write)
2217  {
2218      QEMUIOVector elem;
2219      struct iovec *iov;
2220      unsigned int niov;
2221  
2222      if (is_write) {
2223          pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2224      } else {
2225          pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2226      }
2227  
2228      qemu_iovec_init_external(&elem, iov, niov);
2229      qemu_iovec_init(qiov, niov);
2230      qemu_iovec_concat(qiov, &elem, skip, size);
2231  }
2232  
2233  static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2234                             uint64_t off, uint32_t max_count)
2235  {
2236      ssize_t err;
2237      size_t offset = 7;
2238      uint64_t read_count;
2239      QEMUIOVector qiov_full;
2240  
2241      if (fidp->fs.xattr.len < off) {
2242          read_count = 0;
2243      } else {
2244          read_count = fidp->fs.xattr.len - off;
2245      }
2246      if (read_count > max_count) {
2247          read_count = max_count;
2248      }
2249      err = pdu_marshal(pdu, offset, "d", read_count);
2250      if (err < 0) {
2251          return err;
2252      }
2253      offset += err;
2254  
2255      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2256      err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2257                      ((char *)fidp->fs.xattr.value) + off,
2258                      read_count);
2259      qemu_iovec_destroy(&qiov_full);
2260      if (err < 0) {
2261          return err;
2262      }
2263      offset += err;
2264      return offset;
2265  }
2266  
2267  static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2268                                                    V9fsFidState *fidp,
2269                                                    uint32_t max_count)
2270  {
2271      V9fsPath path;
2272      V9fsStat v9stat;
2273      int len, err = 0;
2274      int32_t count = 0;
2275      struct stat stbuf;
2276      off_t saved_dir_pos;
2277      struct dirent *dent;
2278  
2279      /* save the directory position */
2280      saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2281      if (saved_dir_pos < 0) {
2282          return saved_dir_pos;
2283      }
2284  
2285      while (1) {
2286          v9fs_path_init(&path);
2287  
2288          v9fs_readdir_lock(&fidp->fs.dir);
2289  
2290          err = v9fs_co_readdir(pdu, fidp, &dent);
2291          if (err || !dent) {
2292              break;
2293          }
2294          err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2295          if (err < 0) {
2296              break;
2297          }
2298          err = v9fs_co_lstat(pdu, &path, &stbuf);
2299          if (err < 0) {
2300              break;
2301          }
2302          err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2303          if (err < 0) {
2304              break;
2305          }
2306          if ((count + v9stat.size + 2) > max_count) {
2307              v9fs_readdir_unlock(&fidp->fs.dir);
2308  
2309              /* Ran out of buffer. Set dir back to old position and return */
2310              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2311              v9fs_stat_free(&v9stat);
2312              v9fs_path_free(&path);
2313              return count;
2314          }
2315  
2316          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2317          len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2318  
2319          v9fs_readdir_unlock(&fidp->fs.dir);
2320  
2321          if (len < 0) {
2322              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2323              v9fs_stat_free(&v9stat);
2324              v9fs_path_free(&path);
2325              return len;
2326          }
2327          count += len;
2328          v9fs_stat_free(&v9stat);
2329          v9fs_path_free(&path);
2330          saved_dir_pos = qemu_dirent_off(dent);
2331      }
2332  
2333      v9fs_readdir_unlock(&fidp->fs.dir);
2334  
2335      v9fs_path_free(&path);
2336      if (err < 0) {
2337          return err;
2338      }
2339      return count;
2340  }
2341  
2342  static void coroutine_fn v9fs_read(void *opaque)
2343  {
2344      int32_t fid;
2345      uint64_t off;
2346      ssize_t err = 0;
2347      int32_t count = 0;
2348      size_t offset = 7;
2349      uint32_t max_count;
2350      V9fsFidState *fidp;
2351      V9fsPDU *pdu = opaque;
2352      V9fsState *s = pdu->s;
2353  
2354      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2355      if (err < 0) {
2356          goto out_nofid;
2357      }
2358      trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2359  
2360      fidp = get_fid(pdu, fid);
2361      if (fidp == NULL) {
2362          err = -EINVAL;
2363          goto out_nofid;
2364      }
2365      if (fidp->fid_type == P9_FID_DIR) {
2366          if (s->proto_version != V9FS_PROTO_2000U) {
2367              warn_report_once(
2368                  "9p: bad client: T_read request on directory only expected "
2369                  "with 9P2000.u protocol version"
2370              );
2371              err = -EOPNOTSUPP;
2372              goto out;
2373          }
2374          if (off == 0) {
2375              v9fs_co_rewinddir(pdu, fidp);
2376          }
2377          count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2378          if (count < 0) {
2379              err = count;
2380              goto out;
2381          }
2382          err = pdu_marshal(pdu, offset, "d", count);
2383          if (err < 0) {
2384              goto out;
2385          }
2386          err += offset + count;
2387      } else if (fidp->fid_type == P9_FID_FILE) {
2388          QEMUIOVector qiov_full;
2389          QEMUIOVector qiov;
2390          int32_t len;
2391  
2392          v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2393          qemu_iovec_init(&qiov, qiov_full.niov);
2394          do {
2395              qemu_iovec_reset(&qiov);
2396              qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2397              if (0) {
2398                  print_sg(qiov.iov, qiov.niov);
2399              }
2400              /* Loop in case of EINTR */
2401              do {
2402                  len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2403                  if (len >= 0) {
2404                      off   += len;
2405                      count += len;
2406                  }
2407              } while (len == -EINTR && !pdu->cancelled);
2408              if (len < 0) {
2409                  /* IO error return the error */
2410                  err = len;
2411                  goto out_free_iovec;
2412              }
2413          } while (count < max_count && len > 0);
2414          err = pdu_marshal(pdu, offset, "d", count);
2415          if (err < 0) {
2416              goto out_free_iovec;
2417          }
2418          err += offset + count;
2419  out_free_iovec:
2420          qemu_iovec_destroy(&qiov);
2421          qemu_iovec_destroy(&qiov_full);
2422      } else if (fidp->fid_type == P9_FID_XATTR) {
2423          err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2424      } else {
2425          err = -EINVAL;
2426      }
2427      trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2428  out:
2429      put_fid(pdu, fidp);
2430  out_nofid:
2431      pdu_complete(pdu, err);
2432  }
2433  
2434  /**
2435   * v9fs_readdir_response_size() - Returns size required in Rreaddir response
2436   * for the passed dirent @name.
2437   *
2438   * @name: directory entry's name (i.e. file name, directory name)
2439   * Return: required size in bytes
2440   */
2441  size_t v9fs_readdir_response_size(V9fsString *name)
2442  {
2443      /*
2444       * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2445       * size of type (1) + size of name.size (2) + strlen(name.data)
2446       */
2447      return 24 + v9fs_string_size(name);
2448  }
2449  
2450  static void v9fs_free_dirents(struct V9fsDirEnt *e)
2451  {
2452      struct V9fsDirEnt *next = NULL;
2453  
2454      for (; e; e = next) {
2455          next = e->next;
2456          g_free(e->dent);
2457          g_free(e->st);
2458          g_free(e);
2459      }
2460  }
2461  
2462  static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2463                                          off_t offset, int32_t max_count)
2464  {
2465      size_t size;
2466      V9fsQID qid;
2467      V9fsString name;
2468      int len, err = 0;
2469      int32_t count = 0;
2470      off_t off;
2471      struct dirent *dent;
2472      struct stat *st;
2473      struct V9fsDirEnt *entries = NULL;
2474  
2475      /*
2476       * inode remapping requires the device id, which in turn might be
2477       * different for different directory entries, so if inode remapping is
2478       * enabled we have to make a full stat for each directory entry
2479       */
2480      const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
2481  
2482      /*
2483       * Fetch all required directory entries altogether on a background IO
2484       * thread from fs driver. We don't want to do that for each entry
2485       * individually, because hopping between threads (this main IO thread
2486       * and background IO driver thread) would sum up to huge latencies.
2487       */
2488      count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
2489                                   dostat);
2490      if (count < 0) {
2491          err = count;
2492          count = 0;
2493          goto out;
2494      }
2495      count = 0;
2496  
2497      for (struct V9fsDirEnt *e = entries; e; e = e->next) {
2498          dent = e->dent;
2499  
2500          if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2501              st = e->st;
2502              /* e->st should never be NULL, but just to be sure */
2503              if (!st) {
2504                  err = -1;
2505                  break;
2506              }
2507  
2508              /* remap inode */
2509              err = stat_to_qid(pdu, st, &qid);
2510              if (err < 0) {
2511                  break;
2512              }
2513          } else {
2514              /*
2515               * Fill up just the path field of qid because the client uses
2516               * only that. To fill the entire qid structure we will have
2517               * to stat each dirent found, which is expensive. For the
2518               * latter reason we don't call stat_to_qid() here. Only drawback
2519               * is that no multi-device export detection of stat_to_qid()
2520               * would be done and provided as error to the user here. But
2521               * user would get that error anyway when accessing those
2522               * files/dirs through other ways.
2523               */
2524              size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2525              memcpy(&qid.path, &dent->d_ino, size);
2526              /* Fill the other fields with dummy values */
2527              qid.type = 0;
2528              qid.version = 0;
2529          }
2530  
2531          off = qemu_dirent_off(dent);
2532          v9fs_string_init(&name);
2533          v9fs_string_sprintf(&name, "%s", dent->d_name);
2534  
2535          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2536          len = pdu_marshal(pdu, 11 + count, "Qqbs",
2537                            &qid, off,
2538                            dent->d_type, &name);
2539  
2540          v9fs_string_free(&name);
2541  
2542          if (len < 0) {
2543              err = len;
2544              break;
2545          }
2546  
2547          count += len;
2548      }
2549  
2550  out:
2551      v9fs_free_dirents(entries);
2552      if (err < 0) {
2553          return err;
2554      }
2555      return count;
2556  }
2557  
2558  static void coroutine_fn v9fs_readdir(void *opaque)
2559  {
2560      int32_t fid;
2561      V9fsFidState *fidp;
2562      ssize_t retval = 0;
2563      size_t offset = 7;
2564      uint64_t initial_offset;
2565      int32_t count;
2566      uint32_t max_count;
2567      V9fsPDU *pdu = opaque;
2568      V9fsState *s = pdu->s;
2569  
2570      retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2571                             &initial_offset, &max_count);
2572      if (retval < 0) {
2573          goto out_nofid;
2574      }
2575      trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2576  
2577      /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2578      if (max_count > s->msize - 11) {
2579          max_count = s->msize - 11;
2580          warn_report_once(
2581              "9p: bad client: T_readdir with count > msize - 11"
2582          );
2583      }
2584  
2585      fidp = get_fid(pdu, fid);
2586      if (fidp == NULL) {
2587          retval = -EINVAL;
2588          goto out_nofid;
2589      }
2590      if (!fidp->fs.dir.stream) {
2591          retval = -EINVAL;
2592          goto out;
2593      }
2594      if (s->proto_version != V9FS_PROTO_2000L) {
2595          warn_report_once(
2596              "9p: bad client: T_readdir request only expected with 9P2000.L "
2597              "protocol version"
2598          );
2599          retval = -EOPNOTSUPP;
2600          goto out;
2601      }
2602      count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
2603      if (count < 0) {
2604          retval = count;
2605          goto out;
2606      }
2607      retval = pdu_marshal(pdu, offset, "d", count);
2608      if (retval < 0) {
2609          goto out;
2610      }
2611      retval += count + offset;
2612      trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2613  out:
2614      put_fid(pdu, fidp);
2615  out_nofid:
2616      pdu_complete(pdu, retval);
2617  }
2618  
2619  static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2620                              uint64_t off, uint32_t count,
2621                              struct iovec *sg, int cnt)
2622  {
2623      int i, to_copy;
2624      ssize_t err = 0;
2625      uint64_t write_count;
2626      size_t offset = 7;
2627  
2628  
2629      if (fidp->fs.xattr.len < off) {
2630          return -ENOSPC;
2631      }
2632      write_count = fidp->fs.xattr.len - off;
2633      if (write_count > count) {
2634          write_count = count;
2635      }
2636      err = pdu_marshal(pdu, offset, "d", write_count);
2637      if (err < 0) {
2638          return err;
2639      }
2640      err += offset;
2641      fidp->fs.xattr.copied_len += write_count;
2642      /*
2643       * Now copy the content from sg list
2644       */
2645      for (i = 0; i < cnt; i++) {
2646          if (write_count > sg[i].iov_len) {
2647              to_copy = sg[i].iov_len;
2648          } else {
2649              to_copy = write_count;
2650          }
2651          memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2652          /* updating vs->off since we are not using below */
2653          off += to_copy;
2654          write_count -= to_copy;
2655      }
2656  
2657      return err;
2658  }
2659  
2660  static void coroutine_fn v9fs_write(void *opaque)
2661  {
2662      ssize_t err;
2663      int32_t fid;
2664      uint64_t off;
2665      uint32_t count;
2666      int32_t len = 0;
2667      int32_t total = 0;
2668      size_t offset = 7;
2669      V9fsFidState *fidp;
2670      V9fsPDU *pdu = opaque;
2671      V9fsState *s = pdu->s;
2672      QEMUIOVector qiov_full;
2673      QEMUIOVector qiov;
2674  
2675      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2676      if (err < 0) {
2677          pdu_complete(pdu, err);
2678          return;
2679      }
2680      offset += err;
2681      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2682      trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2683  
2684      fidp = get_fid(pdu, fid);
2685      if (fidp == NULL) {
2686          err = -EINVAL;
2687          goto out_nofid;
2688      }
2689      if (fidp->fid_type == P9_FID_FILE) {
2690          if (fidp->fs.fd == -1) {
2691              err = -EINVAL;
2692              goto out;
2693          }
2694      } else if (fidp->fid_type == P9_FID_XATTR) {
2695          /*
2696           * setxattr operation
2697           */
2698          err = v9fs_xattr_write(s, pdu, fidp, off, count,
2699                                 qiov_full.iov, qiov_full.niov);
2700          goto out;
2701      } else {
2702          err = -EINVAL;
2703          goto out;
2704      }
2705      qemu_iovec_init(&qiov, qiov_full.niov);
2706      do {
2707          qemu_iovec_reset(&qiov);
2708          qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2709          if (0) {
2710              print_sg(qiov.iov, qiov.niov);
2711          }
2712          /* Loop in case of EINTR */
2713          do {
2714              len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2715              if (len >= 0) {
2716                  off   += len;
2717                  total += len;
2718              }
2719          } while (len == -EINTR && !pdu->cancelled);
2720          if (len < 0) {
2721              /* IO error return the error */
2722              err = len;
2723              goto out_qiov;
2724          }
2725      } while (total < count && len > 0);
2726  
2727      offset = 7;
2728      err = pdu_marshal(pdu, offset, "d", total);
2729      if (err < 0) {
2730          goto out_qiov;
2731      }
2732      err += offset;
2733      trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2734  out_qiov:
2735      qemu_iovec_destroy(&qiov);
2736  out:
2737      put_fid(pdu, fidp);
2738  out_nofid:
2739      qemu_iovec_destroy(&qiov_full);
2740      pdu_complete(pdu, err);
2741  }
2742  
2743  static void coroutine_fn v9fs_create(void *opaque)
2744  {
2745      int32_t fid;
2746      int err = 0;
2747      size_t offset = 7;
2748      V9fsFidState *fidp;
2749      V9fsQID qid;
2750      int32_t perm;
2751      int8_t mode;
2752      V9fsPath path;
2753      struct stat stbuf;
2754      V9fsString name;
2755      V9fsString extension;
2756      int iounit;
2757      V9fsPDU *pdu = opaque;
2758      V9fsState *s = pdu->s;
2759  
2760      v9fs_path_init(&path);
2761      v9fs_string_init(&name);
2762      v9fs_string_init(&extension);
2763      err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2764                          &perm, &mode, &extension);
2765      if (err < 0) {
2766          goto out_nofid;
2767      }
2768      trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2769  
2770      if (name_is_illegal(name.data)) {
2771          err = -ENOENT;
2772          goto out_nofid;
2773      }
2774  
2775      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2776          err = -EEXIST;
2777          goto out_nofid;
2778      }
2779  
2780      fidp = get_fid(pdu, fid);
2781      if (fidp == NULL) {
2782          err = -EINVAL;
2783          goto out_nofid;
2784      }
2785      if (fidp->fid_type != P9_FID_NONE) {
2786          err = -EINVAL;
2787          goto out;
2788      }
2789      if (perm & P9_STAT_MODE_DIR) {
2790          err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2791                              fidp->uid, -1, &stbuf);
2792          if (err < 0) {
2793              goto out;
2794          }
2795          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2796          if (err < 0) {
2797              goto out;
2798          }
2799          v9fs_path_write_lock(s);
2800          v9fs_path_copy(&fidp->path, &path);
2801          v9fs_path_unlock(s);
2802          err = v9fs_co_opendir(pdu, fidp);
2803          if (err < 0) {
2804              goto out;
2805          }
2806          fidp->fid_type = P9_FID_DIR;
2807      } else if (perm & P9_STAT_MODE_SYMLINK) {
2808          err = v9fs_co_symlink(pdu, fidp, &name,
2809                                extension.data, -1 , &stbuf);
2810          if (err < 0) {
2811              goto out;
2812          }
2813          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2814          if (err < 0) {
2815              goto out;
2816          }
2817          v9fs_path_write_lock(s);
2818          v9fs_path_copy(&fidp->path, &path);
2819          v9fs_path_unlock(s);
2820      } else if (perm & P9_STAT_MODE_LINK) {
2821          int32_t ofid = atoi(extension.data);
2822          V9fsFidState *ofidp = get_fid(pdu, ofid);
2823          if (ofidp == NULL) {
2824              err = -EINVAL;
2825              goto out;
2826          }
2827          err = v9fs_co_link(pdu, ofidp, fidp, &name);
2828          put_fid(pdu, ofidp);
2829          if (err < 0) {
2830              goto out;
2831          }
2832          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2833          if (err < 0) {
2834              fidp->fid_type = P9_FID_NONE;
2835              goto out;
2836          }
2837          v9fs_path_write_lock(s);
2838          v9fs_path_copy(&fidp->path, &path);
2839          v9fs_path_unlock(s);
2840          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2841          if (err < 0) {
2842              fidp->fid_type = P9_FID_NONE;
2843              goto out;
2844          }
2845      } else if (perm & P9_STAT_MODE_DEVICE) {
2846          char ctype;
2847          uint32_t major, minor;
2848          mode_t nmode = 0;
2849  
2850          if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2851              err = -errno;
2852              goto out;
2853          }
2854  
2855          switch (ctype) {
2856          case 'c':
2857              nmode = S_IFCHR;
2858              break;
2859          case 'b':
2860              nmode = S_IFBLK;
2861              break;
2862          default:
2863              err = -EIO;
2864              goto out;
2865          }
2866  
2867          nmode |= perm & 0777;
2868          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2869                              makedev(major, minor), nmode, &stbuf);
2870          if (err < 0) {
2871              goto out;
2872          }
2873          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2874          if (err < 0) {
2875              goto out;
2876          }
2877          v9fs_path_write_lock(s);
2878          v9fs_path_copy(&fidp->path, &path);
2879          v9fs_path_unlock(s);
2880      } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2881          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2882                              0, S_IFIFO | (perm & 0777), &stbuf);
2883          if (err < 0) {
2884              goto out;
2885          }
2886          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2887          if (err < 0) {
2888              goto out;
2889          }
2890          v9fs_path_write_lock(s);
2891          v9fs_path_copy(&fidp->path, &path);
2892          v9fs_path_unlock(s);
2893      } else if (perm & P9_STAT_MODE_SOCKET) {
2894          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2895                              0, S_IFSOCK | (perm & 0777), &stbuf);
2896          if (err < 0) {
2897              goto out;
2898          }
2899          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2900          if (err < 0) {
2901              goto out;
2902          }
2903          v9fs_path_write_lock(s);
2904          v9fs_path_copy(&fidp->path, &path);
2905          v9fs_path_unlock(s);
2906      } else {
2907          err = v9fs_co_open2(pdu, fidp, &name, -1,
2908                              omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
2909          if (err < 0) {
2910              goto out;
2911          }
2912          fidp->fid_type = P9_FID_FILE;
2913          fidp->open_flags = omode_to_uflags(mode);
2914          if (fidp->open_flags & O_EXCL) {
2915              /*
2916               * We let the host file system do O_EXCL check
2917               * We should not reclaim such fd
2918               */
2919              fidp->flags |= FID_NON_RECLAIMABLE;
2920          }
2921      }
2922      iounit = get_iounit(pdu, &fidp->path);
2923      err = stat_to_qid(pdu, &stbuf, &qid);
2924      if (err < 0) {
2925          goto out;
2926      }
2927      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2928      if (err < 0) {
2929          goto out;
2930      }
2931      err += offset;
2932      trace_v9fs_create_return(pdu->tag, pdu->id,
2933                               qid.type, qid.version, qid.path, iounit);
2934  out:
2935      put_fid(pdu, fidp);
2936  out_nofid:
2937     pdu_complete(pdu, err);
2938     v9fs_string_free(&name);
2939     v9fs_string_free(&extension);
2940     v9fs_path_free(&path);
2941  }
2942  
2943  static void coroutine_fn v9fs_symlink(void *opaque)
2944  {
2945      V9fsPDU *pdu = opaque;
2946      V9fsString name;
2947      V9fsString symname;
2948      V9fsFidState *dfidp;
2949      V9fsQID qid;
2950      struct stat stbuf;
2951      int32_t dfid;
2952      int err = 0;
2953      gid_t gid;
2954      size_t offset = 7;
2955  
2956      v9fs_string_init(&name);
2957      v9fs_string_init(&symname);
2958      err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2959      if (err < 0) {
2960          goto out_nofid;
2961      }
2962      trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2963  
2964      if (name_is_illegal(name.data)) {
2965          err = -ENOENT;
2966          goto out_nofid;
2967      }
2968  
2969      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2970          err = -EEXIST;
2971          goto out_nofid;
2972      }
2973  
2974      dfidp = get_fid(pdu, dfid);
2975      if (dfidp == NULL) {
2976          err = -EINVAL;
2977          goto out_nofid;
2978      }
2979      err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2980      if (err < 0) {
2981          goto out;
2982      }
2983      err = stat_to_qid(pdu, &stbuf, &qid);
2984      if (err < 0) {
2985          goto out;
2986      }
2987      err =  pdu_marshal(pdu, offset, "Q", &qid);
2988      if (err < 0) {
2989          goto out;
2990      }
2991      err += offset;
2992      trace_v9fs_symlink_return(pdu->tag, pdu->id,
2993                                qid.type, qid.version, qid.path);
2994  out:
2995      put_fid(pdu, dfidp);
2996  out_nofid:
2997      pdu_complete(pdu, err);
2998      v9fs_string_free(&name);
2999      v9fs_string_free(&symname);
3000  }
3001  
3002  static void coroutine_fn v9fs_flush(void *opaque)
3003  {
3004      ssize_t err;
3005      int16_t tag;
3006      size_t offset = 7;
3007      V9fsPDU *cancel_pdu = NULL;
3008      V9fsPDU *pdu = opaque;
3009      V9fsState *s = pdu->s;
3010  
3011      err = pdu_unmarshal(pdu, offset, "w", &tag);
3012      if (err < 0) {
3013          pdu_complete(pdu, err);
3014          return;
3015      }
3016      trace_v9fs_flush(pdu->tag, pdu->id, tag);
3017  
3018      if (pdu->tag == tag) {
3019          warn_report("the guest sent a self-referencing 9P flush request");
3020      } else {
3021          QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
3022              if (cancel_pdu->tag == tag) {
3023                  break;
3024              }
3025          }
3026      }
3027      if (cancel_pdu) {
3028          cancel_pdu->cancelled = 1;
3029          /*
3030           * Wait for pdu to complete.
3031           */
3032          qemu_co_queue_wait(&cancel_pdu->complete, NULL);
3033          if (!qemu_co_queue_next(&cancel_pdu->complete)) {
3034              cancel_pdu->cancelled = 0;
3035              pdu_free(cancel_pdu);
3036          }
3037      }
3038      pdu_complete(pdu, 7);
3039  }
3040  
3041  static void coroutine_fn v9fs_link(void *opaque)
3042  {
3043      V9fsPDU *pdu = opaque;
3044      int32_t dfid, oldfid;
3045      V9fsFidState *dfidp, *oldfidp;
3046      V9fsString name;
3047      size_t offset = 7;
3048      int err = 0;
3049  
3050      v9fs_string_init(&name);
3051      err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
3052      if (err < 0) {
3053          goto out_nofid;
3054      }
3055      trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
3056  
3057      if (name_is_illegal(name.data)) {
3058          err = -ENOENT;
3059          goto out_nofid;
3060      }
3061  
3062      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3063          err = -EEXIST;
3064          goto out_nofid;
3065      }
3066  
3067      dfidp = get_fid(pdu, dfid);
3068      if (dfidp == NULL) {
3069          err = -ENOENT;
3070          goto out_nofid;
3071      }
3072  
3073      oldfidp = get_fid(pdu, oldfid);
3074      if (oldfidp == NULL) {
3075          err = -ENOENT;
3076          goto out;
3077      }
3078      err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
3079      if (!err) {
3080          err = offset;
3081      }
3082      put_fid(pdu, oldfidp);
3083  out:
3084      put_fid(pdu, dfidp);
3085  out_nofid:
3086      v9fs_string_free(&name);
3087      pdu_complete(pdu, err);
3088  }
3089  
3090  /* Only works with path name based fid */
3091  static void coroutine_fn v9fs_remove(void *opaque)
3092  {
3093      int32_t fid;
3094      int err = 0;
3095      size_t offset = 7;
3096      V9fsFidState *fidp;
3097      V9fsPDU *pdu = opaque;
3098  
3099      err = pdu_unmarshal(pdu, offset, "d", &fid);
3100      if (err < 0) {
3101          goto out_nofid;
3102      }
3103      trace_v9fs_remove(pdu->tag, pdu->id, fid);
3104  
3105      fidp = get_fid(pdu, fid);
3106      if (fidp == NULL) {
3107          err = -EINVAL;
3108          goto out_nofid;
3109      }
3110      /* if fs driver is not path based, return EOPNOTSUPP */
3111      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3112          err = -EOPNOTSUPP;
3113          goto out_err;
3114      }
3115      /*
3116       * IF the file is unlinked, we cannot reopen
3117       * the file later. So don't reclaim fd
3118       */
3119      err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
3120      if (err < 0) {
3121          goto out_err;
3122      }
3123      err = v9fs_co_remove(pdu, &fidp->path);
3124      if (!err) {
3125          err = offset;
3126      }
3127  out_err:
3128      /* For TREMOVE we need to clunk the fid even on failed remove */
3129      clunk_fid(pdu->s, fidp->fid);
3130      put_fid(pdu, fidp);
3131  out_nofid:
3132      pdu_complete(pdu, err);
3133  }
3134  
3135  static void coroutine_fn v9fs_unlinkat(void *opaque)
3136  {
3137      int err = 0;
3138      V9fsString name;
3139      int32_t dfid, flags, rflags = 0;
3140      size_t offset = 7;
3141      V9fsPath path;
3142      V9fsFidState *dfidp;
3143      V9fsPDU *pdu = opaque;
3144  
3145      v9fs_string_init(&name);
3146      err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3147      if (err < 0) {
3148          goto out_nofid;
3149      }
3150  
3151      if (name_is_illegal(name.data)) {
3152          err = -ENOENT;
3153          goto out_nofid;
3154      }
3155  
3156      if (!strcmp(".", name.data)) {
3157          err = -EINVAL;
3158          goto out_nofid;
3159      }
3160  
3161      if (!strcmp("..", name.data)) {
3162          err = -ENOTEMPTY;
3163          goto out_nofid;
3164      }
3165  
3166      if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3167          err = -EINVAL;
3168          goto out_nofid;
3169      }
3170  
3171      if (flags & P9_DOTL_AT_REMOVEDIR) {
3172          rflags |= AT_REMOVEDIR;
3173      }
3174  
3175      dfidp = get_fid(pdu, dfid);
3176      if (dfidp == NULL) {
3177          err = -EINVAL;
3178          goto out_nofid;
3179      }
3180      /*
3181       * IF the file is unlinked, we cannot reopen
3182       * the file later. So don't reclaim fd
3183       */
3184      v9fs_path_init(&path);
3185      err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3186      if (err < 0) {
3187          goto out_err;
3188      }
3189      err = v9fs_mark_fids_unreclaim(pdu, &path);
3190      if (err < 0) {
3191          goto out_err;
3192      }
3193      err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3194      if (!err) {
3195          err = offset;
3196      }
3197  out_err:
3198      put_fid(pdu, dfidp);
3199      v9fs_path_free(&path);
3200  out_nofid:
3201      pdu_complete(pdu, err);
3202      v9fs_string_free(&name);
3203  }
3204  
3205  
3206  /* Only works with path name based fid */
3207  static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3208                                               int32_t newdirfid,
3209                                               V9fsString *name)
3210  {
3211      int err = 0;
3212      V9fsPath new_path;
3213      V9fsFidState *tfidp;
3214      V9fsState *s = pdu->s;
3215      V9fsFidState *dirfidp = NULL;
3216      GHashTableIter iter;
3217      gpointer fid;
3218  
3219      v9fs_path_init(&new_path);
3220      if (newdirfid != -1) {
3221          dirfidp = get_fid(pdu, newdirfid);
3222          if (dirfidp == NULL) {
3223              return -ENOENT;
3224          }
3225          if (fidp->fid_type != P9_FID_NONE) {
3226              err = -EINVAL;
3227              goto out;
3228          }
3229          err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3230          if (err < 0) {
3231              goto out;
3232          }
3233      } else {
3234          char *dir_name = g_path_get_dirname(fidp->path.data);
3235          V9fsPath dir_path;
3236  
3237          v9fs_path_init(&dir_path);
3238          v9fs_path_sprintf(&dir_path, "%s", dir_name);
3239          g_free(dir_name);
3240  
3241          err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3242          v9fs_path_free(&dir_path);
3243          if (err < 0) {
3244              goto out;
3245          }
3246      }
3247      err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3248      if (err < 0) {
3249          goto out;
3250      }
3251  
3252      /*
3253       * Fixup fid's pointing to the old name to
3254       * start pointing to the new name
3255       */
3256      g_hash_table_iter_init(&iter, s->fids);
3257      while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) {
3258          if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3259              /* replace the name */
3260              v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3261          }
3262      }
3263  out:
3264      if (dirfidp) {
3265          put_fid(pdu, dirfidp);
3266      }
3267      v9fs_path_free(&new_path);
3268      return err;
3269  }
3270  
3271  /* Only works with path name based fid */
3272  static void coroutine_fn v9fs_rename(void *opaque)
3273  {
3274      int32_t fid;
3275      ssize_t err = 0;
3276      size_t offset = 7;
3277      V9fsString name;
3278      int32_t newdirfid;
3279      V9fsFidState *fidp;
3280      V9fsPDU *pdu = opaque;
3281      V9fsState *s = pdu->s;
3282  
3283      v9fs_string_init(&name);
3284      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3285      if (err < 0) {
3286          goto out_nofid;
3287      }
3288  
3289      if (name_is_illegal(name.data)) {
3290          err = -ENOENT;
3291          goto out_nofid;
3292      }
3293  
3294      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3295          err = -EISDIR;
3296          goto out_nofid;
3297      }
3298  
3299      fidp = get_fid(pdu, fid);
3300      if (fidp == NULL) {
3301          err = -ENOENT;
3302          goto out_nofid;
3303      }
3304      if (fidp->fid_type != P9_FID_NONE) {
3305          err = -EINVAL;
3306          goto out;
3307      }
3308      /* if fs driver is not path based, return EOPNOTSUPP */
3309      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3310          err = -EOPNOTSUPP;
3311          goto out;
3312      }
3313      v9fs_path_write_lock(s);
3314      err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3315      v9fs_path_unlock(s);
3316      if (!err) {
3317          err = offset;
3318      }
3319  out:
3320      put_fid(pdu, fidp);
3321  out_nofid:
3322      pdu_complete(pdu, err);
3323      v9fs_string_free(&name);
3324  }
3325  
3326  static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3327                                             V9fsString *old_name,
3328                                             V9fsPath *newdir,
3329                                             V9fsString *new_name)
3330  {
3331      V9fsFidState *tfidp;
3332      V9fsPath oldpath, newpath;
3333      V9fsState *s = pdu->s;
3334      int err;
3335      GHashTableIter iter;
3336      gpointer fid;
3337  
3338      v9fs_path_init(&oldpath);
3339      v9fs_path_init(&newpath);
3340      err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3341      if (err < 0) {
3342          goto out;
3343      }
3344      err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3345      if (err < 0) {
3346          goto out;
3347      }
3348  
3349      /*
3350       * Fixup fid's pointing to the old name to
3351       * start pointing to the new name
3352       */
3353      g_hash_table_iter_init(&iter, s->fids);
3354      while (g_hash_table_iter_next(&iter, &fid, (gpointer *) &tfidp)) {
3355          if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3356              /* replace the name */
3357              v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3358          }
3359      }
3360  out:
3361      v9fs_path_free(&oldpath);
3362      v9fs_path_free(&newpath);
3363      return err;
3364  }
3365  
3366  static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3367                                                 V9fsString *old_name,
3368                                                 int32_t newdirfid,
3369                                                 V9fsString *new_name)
3370  {
3371      int err = 0;
3372      V9fsState *s = pdu->s;
3373      V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3374  
3375      olddirfidp = get_fid(pdu, olddirfid);
3376      if (olddirfidp == NULL) {
3377          err = -ENOENT;
3378          goto out;
3379      }
3380      if (newdirfid != -1) {
3381          newdirfidp = get_fid(pdu, newdirfid);
3382          if (newdirfidp == NULL) {
3383              err = -ENOENT;
3384              goto out;
3385          }
3386      } else {
3387          newdirfidp = get_fid(pdu, olddirfid);
3388      }
3389  
3390      err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3391                             &newdirfidp->path, new_name);
3392      if (err < 0) {
3393          goto out;
3394      }
3395      if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3396          /* Only for path based fid  we need to do the below fixup */
3397          err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3398                                   &newdirfidp->path, new_name);
3399      }
3400  out:
3401      if (olddirfidp) {
3402          put_fid(pdu, olddirfidp);
3403      }
3404      if (newdirfidp) {
3405          put_fid(pdu, newdirfidp);
3406      }
3407      return err;
3408  }
3409  
3410  static void coroutine_fn v9fs_renameat(void *opaque)
3411  {
3412      ssize_t err = 0;
3413      size_t offset = 7;
3414      V9fsPDU *pdu = opaque;
3415      V9fsState *s = pdu->s;
3416      int32_t olddirfid, newdirfid;
3417      V9fsString old_name, new_name;
3418  
3419      v9fs_string_init(&old_name);
3420      v9fs_string_init(&new_name);
3421      err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3422                          &old_name, &newdirfid, &new_name);
3423      if (err < 0) {
3424          goto out_err;
3425      }
3426  
3427      if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3428          err = -ENOENT;
3429          goto out_err;
3430      }
3431  
3432      if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3433          !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3434          err = -EISDIR;
3435          goto out_err;
3436      }
3437  
3438      v9fs_path_write_lock(s);
3439      err = v9fs_complete_renameat(pdu, olddirfid,
3440                                   &old_name, newdirfid, &new_name);
3441      v9fs_path_unlock(s);
3442      if (!err) {
3443          err = offset;
3444      }
3445  
3446  out_err:
3447      pdu_complete(pdu, err);
3448      v9fs_string_free(&old_name);
3449      v9fs_string_free(&new_name);
3450  }
3451  
3452  static void coroutine_fn v9fs_wstat(void *opaque)
3453  {
3454      int32_t fid;
3455      int err = 0;
3456      int16_t unused;
3457      V9fsStat v9stat;
3458      size_t offset = 7;
3459      struct stat stbuf;
3460      V9fsFidState *fidp;
3461      V9fsPDU *pdu = opaque;
3462      V9fsState *s = pdu->s;
3463  
3464      v9fs_stat_init(&v9stat);
3465      err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3466      if (err < 0) {
3467          goto out_nofid;
3468      }
3469      trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3470                       v9stat.mode, v9stat.atime, v9stat.mtime);
3471  
3472      fidp = get_fid(pdu, fid);
3473      if (fidp == NULL) {
3474          err = -EINVAL;
3475          goto out_nofid;
3476      }
3477      /* do we need to sync the file? */
3478      if (donttouch_stat(&v9stat)) {
3479          err = v9fs_co_fsync(pdu, fidp, 0);
3480          goto out;
3481      }
3482      if (v9stat.mode != -1) {
3483          uint32_t v9_mode;
3484          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3485          if (err < 0) {
3486              goto out;
3487          }
3488          v9_mode = stat_to_v9mode(&stbuf);
3489          if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3490              (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3491              /* Attempting to change the type */
3492              err = -EIO;
3493              goto out;
3494          }
3495          err = v9fs_co_chmod(pdu, &fidp->path,
3496                              v9mode_to_mode(v9stat.mode,
3497                                             &v9stat.extension));
3498          if (err < 0) {
3499              goto out;
3500          }
3501      }
3502      if (v9stat.mtime != -1 || v9stat.atime != -1) {
3503          struct timespec times[2];
3504          if (v9stat.atime != -1) {
3505              times[0].tv_sec = v9stat.atime;
3506              times[0].tv_nsec = 0;
3507          } else {
3508              times[0].tv_nsec = UTIME_OMIT;
3509          }
3510          if (v9stat.mtime != -1) {
3511              times[1].tv_sec = v9stat.mtime;
3512              times[1].tv_nsec = 0;
3513          } else {
3514              times[1].tv_nsec = UTIME_OMIT;
3515          }
3516          err = v9fs_co_utimensat(pdu, &fidp->path, times);
3517          if (err < 0) {
3518              goto out;
3519          }
3520      }
3521      if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3522          err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3523          if (err < 0) {
3524              goto out;
3525          }
3526      }
3527      if (v9stat.name.size != 0) {
3528          v9fs_path_write_lock(s);
3529          err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3530          v9fs_path_unlock(s);
3531          if (err < 0) {
3532              goto out;
3533          }
3534      }
3535      if (v9stat.length != -1) {
3536          err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3537          if (err < 0) {
3538              goto out;
3539          }
3540      }
3541      err = offset;
3542  out:
3543      put_fid(pdu, fidp);
3544  out_nofid:
3545      v9fs_stat_free(&v9stat);
3546      pdu_complete(pdu, err);
3547  }
3548  
3549  static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3550  {
3551      uint32_t f_type;
3552      uint32_t f_bsize;
3553      uint64_t f_blocks;
3554      uint64_t f_bfree;
3555      uint64_t f_bavail;
3556      uint64_t f_files;
3557      uint64_t f_ffree;
3558      uint64_t fsid_val;
3559      uint32_t f_namelen;
3560      size_t offset = 7;
3561      int32_t bsize_factor;
3562  
3563      /*
3564       * compute bsize factor based on host file system block size
3565       * and client msize
3566       */
3567      bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
3568      if (!bsize_factor) {
3569          bsize_factor = 1;
3570      }
3571      f_type  = stbuf->f_type;
3572      f_bsize = stbuf->f_bsize;
3573      f_bsize *= bsize_factor;
3574      /*
3575       * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3576       * adjust(divide) the number of blocks, free blocks and available
3577       * blocks by bsize factor
3578       */
3579      f_blocks = stbuf->f_blocks / bsize_factor;
3580      f_bfree  = stbuf->f_bfree / bsize_factor;
3581      f_bavail = stbuf->f_bavail / bsize_factor;
3582      f_files  = stbuf->f_files;
3583      f_ffree  = stbuf->f_ffree;
3584  #ifdef CONFIG_DARWIN
3585      fsid_val = (unsigned int)stbuf->f_fsid.val[0] |
3586                 (unsigned long long)stbuf->f_fsid.val[1] << 32;
3587      f_namelen = NAME_MAX;
3588  #else
3589      fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3590                 (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3591      f_namelen = stbuf->f_namelen;
3592  #endif
3593  
3594      return pdu_marshal(pdu, offset, "ddqqqqqqd",
3595                         f_type, f_bsize, f_blocks, f_bfree,
3596                         f_bavail, f_files, f_ffree,
3597                         fsid_val, f_namelen);
3598  }
3599  
3600  static void coroutine_fn v9fs_statfs(void *opaque)
3601  {
3602      int32_t fid;
3603      ssize_t retval = 0;
3604      size_t offset = 7;
3605      V9fsFidState *fidp;
3606      struct statfs stbuf;
3607      V9fsPDU *pdu = opaque;
3608      V9fsState *s = pdu->s;
3609  
3610      retval = pdu_unmarshal(pdu, offset, "d", &fid);
3611      if (retval < 0) {
3612          goto out_nofid;
3613      }
3614      fidp = get_fid(pdu, fid);
3615      if (fidp == NULL) {
3616          retval = -ENOENT;
3617          goto out_nofid;
3618      }
3619      retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3620      if (retval < 0) {
3621          goto out;
3622      }
3623      retval = v9fs_fill_statfs(s, pdu, &stbuf);
3624      if (retval < 0) {
3625          goto out;
3626      }
3627      retval += offset;
3628  out:
3629      put_fid(pdu, fidp);
3630  out_nofid:
3631      pdu_complete(pdu, retval);
3632  }
3633  
3634  static void coroutine_fn v9fs_mknod(void *opaque)
3635  {
3636  
3637      int mode;
3638      gid_t gid;
3639      int32_t fid;
3640      V9fsQID qid;
3641      int err = 0;
3642      int major, minor;
3643      size_t offset = 7;
3644      V9fsString name;
3645      struct stat stbuf;
3646      V9fsFidState *fidp;
3647      V9fsPDU *pdu = opaque;
3648  
3649      v9fs_string_init(&name);
3650      err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3651                          &major, &minor, &gid);
3652      if (err < 0) {
3653          goto out_nofid;
3654      }
3655      trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3656  
3657      if (name_is_illegal(name.data)) {
3658          err = -ENOENT;
3659          goto out_nofid;
3660      }
3661  
3662      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3663          err = -EEXIST;
3664          goto out_nofid;
3665      }
3666  
3667      fidp = get_fid(pdu, fid);
3668      if (fidp == NULL) {
3669          err = -ENOENT;
3670          goto out_nofid;
3671      }
3672      err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3673                          makedev(major, minor), mode, &stbuf);
3674      if (err < 0) {
3675          goto out;
3676      }
3677      err = stat_to_qid(pdu, &stbuf, &qid);
3678      if (err < 0) {
3679          goto out;
3680      }
3681      err = pdu_marshal(pdu, offset, "Q", &qid);
3682      if (err < 0) {
3683          goto out;
3684      }
3685      err += offset;
3686      trace_v9fs_mknod_return(pdu->tag, pdu->id,
3687                              qid.type, qid.version, qid.path);
3688  out:
3689      put_fid(pdu, fidp);
3690  out_nofid:
3691      pdu_complete(pdu, err);
3692      v9fs_string_free(&name);
3693  }
3694  
3695  /*
3696   * Implement posix byte range locking code
3697   * Server side handling of locking code is very simple, because 9p server in
3698   * QEMU can handle only one client. And most of the lock handling
3699   * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3700   * do any thing in * qemu 9p server side lock code path.
3701   * So when a TLOCK request comes, always return success
3702   */
3703  static void coroutine_fn v9fs_lock(void *opaque)
3704  {
3705      V9fsFlock flock;
3706      size_t offset = 7;
3707      struct stat stbuf;
3708      V9fsFidState *fidp;
3709      int32_t fid, err = 0;
3710      V9fsPDU *pdu = opaque;
3711  
3712      v9fs_string_init(&flock.client_id);
3713      err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3714                          &flock.flags, &flock.start, &flock.length,
3715                          &flock.proc_id, &flock.client_id);
3716      if (err < 0) {
3717          goto out_nofid;
3718      }
3719      trace_v9fs_lock(pdu->tag, pdu->id, fid,
3720                      flock.type, flock.start, flock.length);
3721  
3722  
3723      /* We support only block flag now (that too ignored currently) */
3724      if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3725          err = -EINVAL;
3726          goto out_nofid;
3727      }
3728      fidp = get_fid(pdu, fid);
3729      if (fidp == NULL) {
3730          err = -ENOENT;
3731          goto out_nofid;
3732      }
3733      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3734      if (err < 0) {
3735          goto out;
3736      }
3737      err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3738      if (err < 0) {
3739          goto out;
3740      }
3741      err += offset;
3742      trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3743  out:
3744      put_fid(pdu, fidp);
3745  out_nofid:
3746      pdu_complete(pdu, err);
3747      v9fs_string_free(&flock.client_id);
3748  }
3749  
3750  /*
3751   * When a TGETLOCK request comes, always return success because all lock
3752   * handling is done by client's VFS layer.
3753   */
3754  static void coroutine_fn v9fs_getlock(void *opaque)
3755  {
3756      size_t offset = 7;
3757      struct stat stbuf;
3758      V9fsFidState *fidp;
3759      V9fsGetlock glock;
3760      int32_t fid, err = 0;
3761      V9fsPDU *pdu = opaque;
3762  
3763      v9fs_string_init(&glock.client_id);
3764      err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3765                          &glock.start, &glock.length, &glock.proc_id,
3766                          &glock.client_id);
3767      if (err < 0) {
3768          goto out_nofid;
3769      }
3770      trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3771                         glock.type, glock.start, glock.length);
3772  
3773      fidp = get_fid(pdu, fid);
3774      if (fidp == NULL) {
3775          err = -ENOENT;
3776          goto out_nofid;
3777      }
3778      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3779      if (err < 0) {
3780          goto out;
3781      }
3782      glock.type = P9_LOCK_TYPE_UNLCK;
3783      err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3784                            glock.start, glock.length, glock.proc_id,
3785                            &glock.client_id);
3786      if (err < 0) {
3787          goto out;
3788      }
3789      err += offset;
3790      trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3791                                glock.length, glock.proc_id);
3792  out:
3793      put_fid(pdu, fidp);
3794  out_nofid:
3795      pdu_complete(pdu, err);
3796      v9fs_string_free(&glock.client_id);
3797  }
3798  
3799  static void coroutine_fn v9fs_mkdir(void *opaque)
3800  {
3801      V9fsPDU *pdu = opaque;
3802      size_t offset = 7;
3803      int32_t fid;
3804      struct stat stbuf;
3805      V9fsQID qid;
3806      V9fsString name;
3807      V9fsFidState *fidp;
3808      gid_t gid;
3809      int mode;
3810      int err = 0;
3811  
3812      v9fs_string_init(&name);
3813      err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3814      if (err < 0) {
3815          goto out_nofid;
3816      }
3817      trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3818  
3819      if (name_is_illegal(name.data)) {
3820          err = -ENOENT;
3821          goto out_nofid;
3822      }
3823  
3824      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3825          err = -EEXIST;
3826          goto out_nofid;
3827      }
3828  
3829      fidp = get_fid(pdu, fid);
3830      if (fidp == NULL) {
3831          err = -ENOENT;
3832          goto out_nofid;
3833      }
3834      err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3835      if (err < 0) {
3836          goto out;
3837      }
3838      err = stat_to_qid(pdu, &stbuf, &qid);
3839      if (err < 0) {
3840          goto out;
3841      }
3842      err = pdu_marshal(pdu, offset, "Q", &qid);
3843      if (err < 0) {
3844          goto out;
3845      }
3846      err += offset;
3847      trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3848                              qid.type, qid.version, qid.path, err);
3849  out:
3850      put_fid(pdu, fidp);
3851  out_nofid:
3852      pdu_complete(pdu, err);
3853      v9fs_string_free(&name);
3854  }
3855  
3856  static void coroutine_fn v9fs_xattrwalk(void *opaque)
3857  {
3858      int64_t size;
3859      V9fsString name;
3860      ssize_t err = 0;
3861      size_t offset = 7;
3862      int32_t fid, newfid;
3863      V9fsFidState *file_fidp;
3864      V9fsFidState *xattr_fidp = NULL;
3865      V9fsPDU *pdu = opaque;
3866      V9fsState *s = pdu->s;
3867  
3868      v9fs_string_init(&name);
3869      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3870      if (err < 0) {
3871          goto out_nofid;
3872      }
3873      trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3874  
3875      file_fidp = get_fid(pdu, fid);
3876      if (file_fidp == NULL) {
3877          err = -ENOENT;
3878          goto out_nofid;
3879      }
3880      xattr_fidp = alloc_fid(s, newfid);
3881      if (xattr_fidp == NULL) {
3882          err = -EINVAL;
3883          goto out;
3884      }
3885      v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3886      if (!v9fs_string_size(&name)) {
3887          /*
3888           * listxattr request. Get the size first
3889           */
3890          size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3891          if (size < 0) {
3892              err = size;
3893              clunk_fid(s, xattr_fidp->fid);
3894              goto out;
3895          }
3896          /*
3897           * Read the xattr value
3898           */
3899          xattr_fidp->fs.xattr.len = size;
3900          xattr_fidp->fid_type = P9_FID_XATTR;
3901          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3902          xattr_fidp->fs.xattr.value = g_malloc0(size);
3903          if (size) {
3904              err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3905                                       xattr_fidp->fs.xattr.value,
3906                                       xattr_fidp->fs.xattr.len);
3907              if (err < 0) {
3908                  clunk_fid(s, xattr_fidp->fid);
3909                  goto out;
3910              }
3911          }
3912          err = pdu_marshal(pdu, offset, "q", size);
3913          if (err < 0) {
3914              goto out;
3915          }
3916          err += offset;
3917      } else {
3918          /*
3919           * specific xattr fid. We check for xattr
3920           * presence also collect the xattr size
3921           */
3922          size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3923                                   &name, NULL, 0);
3924          if (size < 0) {
3925              err = size;
3926              clunk_fid(s, xattr_fidp->fid);
3927              goto out;
3928          }
3929          /*
3930           * Read the xattr value
3931           */
3932          xattr_fidp->fs.xattr.len = size;
3933          xattr_fidp->fid_type = P9_FID_XATTR;
3934          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3935          xattr_fidp->fs.xattr.value = g_malloc0(size);
3936          if (size) {
3937              err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3938                                      &name, xattr_fidp->fs.xattr.value,
3939                                      xattr_fidp->fs.xattr.len);
3940              if (err < 0) {
3941                  clunk_fid(s, xattr_fidp->fid);
3942                  goto out;
3943              }
3944          }
3945          err = pdu_marshal(pdu, offset, "q", size);
3946          if (err < 0) {
3947              goto out;
3948          }
3949          err += offset;
3950      }
3951      trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3952  out:
3953      put_fid(pdu, file_fidp);
3954      if (xattr_fidp) {
3955          put_fid(pdu, xattr_fidp);
3956      }
3957  out_nofid:
3958      pdu_complete(pdu, err);
3959      v9fs_string_free(&name);
3960  }
3961  
3962  #if defined(CONFIG_LINUX)
3963  /* Currently, only Linux has XATTR_SIZE_MAX */
3964  #define P9_XATTR_SIZE_MAX XATTR_SIZE_MAX
3965  #elif defined(CONFIG_DARWIN)
3966  /*
3967   * Darwin doesn't seem to define a maximum xattr size in its user
3968   * space header, so manually configure it across platforms as 64k.
3969   *
3970   * Having no limit at all can lead to QEMU crashing during large g_malloc()
3971   * calls. Because QEMU does not currently support macOS guests, the below
3972   * preliminary solution only works due to its being a reflection of the limit of
3973   * Linux guests.
3974   */
3975  #define P9_XATTR_SIZE_MAX 65536
3976  #else
3977  #error Missing definition for P9_XATTR_SIZE_MAX for this host system
3978  #endif
3979  
3980  static void coroutine_fn v9fs_xattrcreate(void *opaque)
3981  {
3982      int flags, rflags = 0;
3983      int32_t fid;
3984      uint64_t size;
3985      ssize_t err = 0;
3986      V9fsString name;
3987      size_t offset = 7;
3988      V9fsFidState *file_fidp;
3989      V9fsFidState *xattr_fidp;
3990      V9fsPDU *pdu = opaque;
3991  
3992      v9fs_string_init(&name);
3993      err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3994      if (err < 0) {
3995          goto out_nofid;
3996      }
3997      trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3998  
3999      if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
4000          err = -EINVAL;
4001          goto out_nofid;
4002      }
4003  
4004      if (flags & P9_XATTR_CREATE) {
4005          rflags |= XATTR_CREATE;
4006      }
4007  
4008      if (flags & P9_XATTR_REPLACE) {
4009          rflags |= XATTR_REPLACE;
4010      }
4011  
4012      if (size > P9_XATTR_SIZE_MAX) {
4013          err = -E2BIG;
4014          goto out_nofid;
4015      }
4016  
4017      file_fidp = get_fid(pdu, fid);
4018      if (file_fidp == NULL) {
4019          err = -EINVAL;
4020          goto out_nofid;
4021      }
4022      if (file_fidp->fid_type != P9_FID_NONE) {
4023          err = -EINVAL;
4024          goto out_put_fid;
4025      }
4026  
4027      /* Make the file fid point to xattr */
4028      xattr_fidp = file_fidp;
4029      xattr_fidp->fid_type = P9_FID_XATTR;
4030      xattr_fidp->fs.xattr.copied_len = 0;
4031      xattr_fidp->fs.xattr.xattrwalk_fid = false;
4032      xattr_fidp->fs.xattr.len = size;
4033      xattr_fidp->fs.xattr.flags = rflags;
4034      v9fs_string_init(&xattr_fidp->fs.xattr.name);
4035      v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
4036      xattr_fidp->fs.xattr.value = g_malloc0(size);
4037      err = offset;
4038  out_put_fid:
4039      put_fid(pdu, file_fidp);
4040  out_nofid:
4041      pdu_complete(pdu, err);
4042      v9fs_string_free(&name);
4043  }
4044  
4045  static void coroutine_fn v9fs_readlink(void *opaque)
4046  {
4047      V9fsPDU *pdu = opaque;
4048      size_t offset = 7;
4049      V9fsString target;
4050      int32_t fid;
4051      int err = 0;
4052      V9fsFidState *fidp;
4053  
4054      err = pdu_unmarshal(pdu, offset, "d", &fid);
4055      if (err < 0) {
4056          goto out_nofid;
4057      }
4058      trace_v9fs_readlink(pdu->tag, pdu->id, fid);
4059      fidp = get_fid(pdu, fid);
4060      if (fidp == NULL) {
4061          err = -ENOENT;
4062          goto out_nofid;
4063      }
4064  
4065      v9fs_string_init(&target);
4066      err = v9fs_co_readlink(pdu, &fidp->path, &target);
4067      if (err < 0) {
4068          goto out;
4069      }
4070      err = pdu_marshal(pdu, offset, "s", &target);
4071      if (err < 0) {
4072          v9fs_string_free(&target);
4073          goto out;
4074      }
4075      err += offset;
4076      trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
4077      v9fs_string_free(&target);
4078  out:
4079      put_fid(pdu, fidp);
4080  out_nofid:
4081      pdu_complete(pdu, err);
4082  }
4083  
4084  static CoroutineEntry *pdu_co_handlers[] = {
4085      [P9_TREADDIR] = v9fs_readdir,
4086      [P9_TSTATFS] = v9fs_statfs,
4087      [P9_TGETATTR] = v9fs_getattr,
4088      [P9_TSETATTR] = v9fs_setattr,
4089      [P9_TXATTRWALK] = v9fs_xattrwalk,
4090      [P9_TXATTRCREATE] = v9fs_xattrcreate,
4091      [P9_TMKNOD] = v9fs_mknod,
4092      [P9_TRENAME] = v9fs_rename,
4093      [P9_TLOCK] = v9fs_lock,
4094      [P9_TGETLOCK] = v9fs_getlock,
4095      [P9_TRENAMEAT] = v9fs_renameat,
4096      [P9_TREADLINK] = v9fs_readlink,
4097      [P9_TUNLINKAT] = v9fs_unlinkat,
4098      [P9_TMKDIR] = v9fs_mkdir,
4099      [P9_TVERSION] = v9fs_version,
4100      [P9_TLOPEN] = v9fs_open,
4101      [P9_TATTACH] = v9fs_attach,
4102      [P9_TSTAT] = v9fs_stat,
4103      [P9_TWALK] = v9fs_walk,
4104      [P9_TCLUNK] = v9fs_clunk,
4105      [P9_TFSYNC] = v9fs_fsync,
4106      [P9_TOPEN] = v9fs_open,
4107      [P9_TREAD] = v9fs_read,
4108  #if 0
4109      [P9_TAUTH] = v9fs_auth,
4110  #endif
4111      [P9_TFLUSH] = v9fs_flush,
4112      [P9_TLINK] = v9fs_link,
4113      [P9_TSYMLINK] = v9fs_symlink,
4114      [P9_TCREATE] = v9fs_create,
4115      [P9_TLCREATE] = v9fs_lcreate,
4116      [P9_TWRITE] = v9fs_write,
4117      [P9_TWSTAT] = v9fs_wstat,
4118      [P9_TREMOVE] = v9fs_remove,
4119  };
4120  
4121  static void coroutine_fn v9fs_op_not_supp(void *opaque)
4122  {
4123      V9fsPDU *pdu = opaque;
4124      pdu_complete(pdu, -EOPNOTSUPP);
4125  }
4126  
4127  static void coroutine_fn v9fs_fs_ro(void *opaque)
4128  {
4129      V9fsPDU *pdu = opaque;
4130      pdu_complete(pdu, -EROFS);
4131  }
4132  
4133  static inline bool is_read_only_op(V9fsPDU *pdu)
4134  {
4135      switch (pdu->id) {
4136      case P9_TREADDIR:
4137      case P9_TSTATFS:
4138      case P9_TGETATTR:
4139      case P9_TXATTRWALK:
4140      case P9_TLOCK:
4141      case P9_TGETLOCK:
4142      case P9_TREADLINK:
4143      case P9_TVERSION:
4144      case P9_TLOPEN:
4145      case P9_TATTACH:
4146      case P9_TSTAT:
4147      case P9_TWALK:
4148      case P9_TCLUNK:
4149      case P9_TFSYNC:
4150      case P9_TOPEN:
4151      case P9_TREAD:
4152      case P9_TAUTH:
4153      case P9_TFLUSH:
4154          return 1;
4155      default:
4156          return 0;
4157      }
4158  }
4159  
4160  void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
4161  {
4162      Coroutine *co;
4163      CoroutineEntry *handler;
4164      V9fsState *s = pdu->s;
4165  
4166      pdu->size = le32_to_cpu(hdr->size_le);
4167      pdu->id = hdr->id;
4168      pdu->tag = le16_to_cpu(hdr->tag_le);
4169  
4170      if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4171          (pdu_co_handlers[pdu->id] == NULL)) {
4172          handler = v9fs_op_not_supp;
4173      } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4174          handler = v9fs_fs_ro;
4175      } else {
4176          handler = pdu_co_handlers[pdu->id];
4177      }
4178  
4179      qemu_co_queue_init(&pdu->complete);
4180      co = qemu_coroutine_create(handler, pdu);
4181      qemu_coroutine_enter(co);
4182  }
4183  
4184  /* Returns 0 on success, 1 on failure. */
4185  int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4186                                 Error **errp)
4187  {
4188      ERRP_GUARD();
4189      int i, len;
4190      struct stat stat;
4191      FsDriverEntry *fse;
4192      V9fsPath path;
4193      int rc = 1;
4194  
4195      assert(!s->transport);
4196      s->transport = t;
4197  
4198      /* initialize pdu allocator */
4199      QLIST_INIT(&s->free_list);
4200      QLIST_INIT(&s->active_list);
4201      for (i = 0; i < MAX_REQ; i++) {
4202          QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4203          s->pdus[i].s = s;
4204          s->pdus[i].idx = i;
4205      }
4206  
4207      v9fs_path_init(&path);
4208  
4209      fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4210  
4211      if (!fse) {
4212          /* We don't have a fsdev identified by fsdev_id */
4213          error_setg(errp, "9pfs device couldn't find fsdev with the "
4214                     "id = %s",
4215                     s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4216          goto out;
4217      }
4218  
4219      if (!s->fsconf.tag) {
4220          /* we haven't specified a mount_tag */
4221          error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4222                     s->fsconf.fsdev_id);
4223          goto out;
4224      }
4225  
4226      s->ctx.export_flags = fse->export_flags;
4227      s->ctx.fs_root = g_strdup(fse->path);
4228      s->ctx.exops.get_st_gen = NULL;
4229      len = strlen(s->fsconf.tag);
4230      if (len > MAX_TAG_LEN - 1) {
4231          error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4232                     "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4233          goto out;
4234      }
4235  
4236      s->tag = g_strdup(s->fsconf.tag);
4237      s->ctx.uid = -1;
4238  
4239      s->ops = fse->ops;
4240  
4241      s->ctx.fmode = fse->fmode;
4242      s->ctx.dmode = fse->dmode;
4243  
4244      s->fids = g_hash_table_new(NULL, NULL);
4245      qemu_co_rwlock_init(&s->rename_lock);
4246  
4247      if (s->ops->init(&s->ctx, errp) < 0) {
4248          error_prepend(errp, "cannot initialize fsdev '%s': ",
4249                        s->fsconf.fsdev_id);
4250          goto out;
4251      }
4252  
4253      /*
4254       * Check details of export path, We need to use fs driver
4255       * call back to do that. Since we are in the init path, we don't
4256       * use co-routines here.
4257       */
4258      if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4259          error_setg(errp,
4260                     "error in converting name to path %s", strerror(errno));
4261          goto out;
4262      }
4263      if (s->ops->lstat(&s->ctx, &path, &stat)) {
4264          error_setg(errp, "share path %s does not exist", fse->path);
4265          goto out;
4266      } else if (!S_ISDIR(stat.st_mode)) {
4267          error_setg(errp, "share path %s is not a directory", fse->path);
4268          goto out;
4269      }
4270  
4271      s->dev_id = stat.st_dev;
4272  
4273      /* init inode remapping : */
4274      /* hash table for variable length inode suffixes */
4275      qpd_table_init(&s->qpd_table);
4276      /* hash table for slow/full inode remapping (most users won't need it) */
4277      qpf_table_init(&s->qpf_table);
4278      /* hash table for quick inode remapping */
4279      qpp_table_init(&s->qpp_table);
4280      s->qp_ndevices = 0;
4281      s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4282      s->qp_fullpath_next = 1;
4283  
4284      s->ctx.fst = &fse->fst;
4285      fsdev_throttle_init(s->ctx.fst);
4286  
4287      rc = 0;
4288  out:
4289      if (rc) {
4290          v9fs_device_unrealize_common(s);
4291      }
4292      v9fs_path_free(&path);
4293      return rc;
4294  }
4295  
4296  void v9fs_device_unrealize_common(V9fsState *s)
4297  {
4298      if (s->ops && s->ops->cleanup) {
4299          s->ops->cleanup(&s->ctx);
4300      }
4301      if (s->ctx.fst) {
4302          fsdev_throttle_cleanup(s->ctx.fst);
4303      }
4304      if (s->fids) {
4305          g_hash_table_destroy(s->fids);
4306          s->fids = NULL;
4307      }
4308      g_free(s->tag);
4309      qp_table_destroy(&s->qpd_table);
4310      qp_table_destroy(&s->qpp_table);
4311      qp_table_destroy(&s->qpf_table);
4312      g_free(s->ctx.fs_root);
4313  }
4314  
4315  typedef struct VirtfsCoResetData {
4316      V9fsPDU pdu;
4317      bool done;
4318  } VirtfsCoResetData;
4319  
4320  static void coroutine_fn virtfs_co_reset(void *opaque)
4321  {
4322      VirtfsCoResetData *data = opaque;
4323  
4324      virtfs_reset(&data->pdu);
4325      data->done = true;
4326  }
4327  
4328  void v9fs_reset(V9fsState *s)
4329  {
4330      VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4331      Coroutine *co;
4332  
4333      while (!QLIST_EMPTY(&s->active_list)) {
4334          aio_poll(qemu_get_aio_context(), true);
4335      }
4336  
4337      co = qemu_coroutine_create(virtfs_co_reset, &data);
4338      qemu_coroutine_enter(co);
4339  
4340      while (!data.done) {
4341          aio_poll(qemu_get_aio_context(), true);
4342      }
4343  }
4344  
4345  static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4346  {
4347      struct rlimit rlim;
4348      if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4349          error_report("Failed to get the resource limit");
4350          exit(1);
4351      }
4352      open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
4353      open_fd_rc = rlim.rlim_cur / 2;
4354  }
4355