xref: /openbmc/qemu/hw/9pfs/9p.c (revision c3033fd372fdaf5b89190136a74b3d78880b85d6)
1  /*
2   * Virtio 9p backend
3   *
4   * Copyright IBM, Corp. 2010
5   *
6   * Authors:
7   *  Anthony Liguori   <aliguori@us.ibm.com>
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2.  See
10   * the COPYING file in the top-level directory.
11   *
12   */
13  
14  #include "qemu/osdep.h"
15  #include <glib/gprintf.h>
16  #include "hw/virtio/virtio.h"
17  #include "qapi/error.h"
18  #include "qemu/error-report.h"
19  #include "qemu/iov.h"
20  #include "qemu/main-loop.h"
21  #include "qemu/sockets.h"
22  #include "virtio-9p.h"
23  #include "fsdev/qemu-fsdev.h"
24  #include "9p-xattr.h"
25  #include "coth.h"
26  #include "trace.h"
27  #include "migration/blocker.h"
28  #include "sysemu/qtest.h"
29  #include "qemu/xxhash.h"
30  #include <math.h>
31  #include <linux/limits.h>
32  
33  int open_fd_hw;
34  int total_open_fd;
35  static int open_fd_rc;
36  
37  enum {
38      Oread   = 0x00,
39      Owrite  = 0x01,
40      Ordwr   = 0x02,
41      Oexec   = 0x03,
42      Oexcl   = 0x04,
43      Otrunc  = 0x10,
44      Orexec  = 0x20,
45      Orclose = 0x40,
46      Oappend = 0x80,
47  };
48  
49  static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
50  {
51      ssize_t ret;
52      va_list ap;
53  
54      va_start(ap, fmt);
55      ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
56      va_end(ap);
57  
58      return ret;
59  }
60  
61  static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
62  {
63      ssize_t ret;
64      va_list ap;
65  
66      va_start(ap, fmt);
67      ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
68      va_end(ap);
69  
70      return ret;
71  }
72  
73  static int omode_to_uflags(int8_t mode)
74  {
75      int ret = 0;
76  
77      switch (mode & 3) {
78      case Oread:
79          ret = O_RDONLY;
80          break;
81      case Ordwr:
82          ret = O_RDWR;
83          break;
84      case Owrite:
85          ret = O_WRONLY;
86          break;
87      case Oexec:
88          ret = O_RDONLY;
89          break;
90      }
91  
92      if (mode & Otrunc) {
93          ret |= O_TRUNC;
94      }
95  
96      if (mode & Oappend) {
97          ret |= O_APPEND;
98      }
99  
100      if (mode & Oexcl) {
101          ret |= O_EXCL;
102      }
103  
104      return ret;
105  }
106  
107  typedef struct DotlOpenflagMap {
108      int dotl_flag;
109      int open_flag;
110  } DotlOpenflagMap;
111  
112  static int dotl_to_open_flags(int flags)
113  {
114      int i;
115      /*
116       * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
117       * and P9_DOTL_NOACCESS
118       */
119      int oflags = flags & O_ACCMODE;
120  
121      DotlOpenflagMap dotl_oflag_map[] = {
122          { P9_DOTL_CREATE, O_CREAT },
123          { P9_DOTL_EXCL, O_EXCL },
124          { P9_DOTL_NOCTTY , O_NOCTTY },
125          { P9_DOTL_TRUNC, O_TRUNC },
126          { P9_DOTL_APPEND, O_APPEND },
127          { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
128          { P9_DOTL_DSYNC, O_DSYNC },
129          { P9_DOTL_FASYNC, FASYNC },
130          { P9_DOTL_DIRECT, O_DIRECT },
131          { P9_DOTL_LARGEFILE, O_LARGEFILE },
132          { P9_DOTL_DIRECTORY, O_DIRECTORY },
133          { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
134          { P9_DOTL_NOATIME, O_NOATIME },
135          { P9_DOTL_SYNC, O_SYNC },
136      };
137  
138      for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
139          if (flags & dotl_oflag_map[i].dotl_flag) {
140              oflags |= dotl_oflag_map[i].open_flag;
141          }
142      }
143  
144      return oflags;
145  }
146  
147  void cred_init(FsCred *credp)
148  {
149      credp->fc_uid = -1;
150      credp->fc_gid = -1;
151      credp->fc_mode = -1;
152      credp->fc_rdev = -1;
153  }
154  
155  static int get_dotl_openflags(V9fsState *s, int oflags)
156  {
157      int flags;
158      /*
159       * Filter the client open flags
160       */
161      flags = dotl_to_open_flags(oflags);
162      flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
163      /*
164       * Ignore direct disk access hint until the server supports it.
165       */
166      flags &= ~O_DIRECT;
167      return flags;
168  }
169  
170  void v9fs_path_init(V9fsPath *path)
171  {
172      path->data = NULL;
173      path->size = 0;
174  }
175  
176  void v9fs_path_free(V9fsPath *path)
177  {
178      g_free(path->data);
179      path->data = NULL;
180      path->size = 0;
181  }
182  
183  
184  void GCC_FMT_ATTR(2, 3)
185  v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
186  {
187      va_list ap;
188  
189      v9fs_path_free(path);
190  
191      va_start(ap, fmt);
192      /* Bump the size for including terminating NULL */
193      path->size = g_vasprintf(&path->data, fmt, ap) + 1;
194      va_end(ap);
195  }
196  
197  void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
198  {
199      v9fs_path_free(dst);
200      dst->size = src->size;
201      dst->data = g_memdup(src->data, src->size);
202  }
203  
204  int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
205                        const char *name, V9fsPath *path)
206  {
207      int err;
208      err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
209      if (err < 0) {
210          err = -errno;
211      }
212      return err;
213  }
214  
215  /*
216   * Return TRUE if s1 is an ancestor of s2.
217   *
218   * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
219   * As a special case, We treat s1 as ancestor of s2 if they are same!
220   */
221  static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
222  {
223      if (!strncmp(s1->data, s2->data, s1->size - 1)) {
224          if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
225              return 1;
226          }
227      }
228      return 0;
229  }
230  
231  static size_t v9fs_string_size(V9fsString *str)
232  {
233      return str->size;
234  }
235  
236  /*
237   * returns 0 if fid got re-opened, 1 if not, < 0 on error */
238  static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
239  {
240      int err = 1;
241      if (f->fid_type == P9_FID_FILE) {
242          if (f->fs.fd == -1) {
243              do {
244                  err = v9fs_co_open(pdu, f, f->open_flags);
245              } while (err == -EINTR && !pdu->cancelled);
246          }
247      } else if (f->fid_type == P9_FID_DIR) {
248          if (f->fs.dir.stream == NULL) {
249              do {
250                  err = v9fs_co_opendir(pdu, f);
251              } while (err == -EINTR && !pdu->cancelled);
252          }
253      }
254      return err;
255  }
256  
257  static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
258  {
259      int err;
260      V9fsFidState *f;
261      V9fsState *s = pdu->s;
262  
263      QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
264          BUG_ON(f->clunked);
265          if (f->fid == fid) {
266              /*
267               * Update the fid ref upfront so that
268               * we don't get reclaimed when we yield
269               * in open later.
270               */
271              f->ref++;
272              /*
273               * check whether we need to reopen the
274               * file. We might have closed the fd
275               * while trying to free up some file
276               * descriptors.
277               */
278              err = v9fs_reopen_fid(pdu, f);
279              if (err < 0) {
280                  f->ref--;
281                  return NULL;
282              }
283              /*
284               * Mark the fid as referenced so that the LRU
285               * reclaim won't close the file descriptor
286               */
287              f->flags |= FID_REFERENCED;
288              return f;
289          }
290      }
291      return NULL;
292  }
293  
294  static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
295  {
296      V9fsFidState *f;
297  
298      QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
299          /* If fid is already there return NULL */
300          BUG_ON(f->clunked);
301          if (f->fid == fid) {
302              return NULL;
303          }
304      }
305      f = g_malloc0(sizeof(V9fsFidState));
306      f->fid = fid;
307      f->fid_type = P9_FID_NONE;
308      f->ref = 1;
309      /*
310       * Mark the fid as referenced so that the LRU
311       * reclaim won't close the file descriptor
312       */
313      f->flags |= FID_REFERENCED;
314      QSIMPLEQ_INSERT_TAIL(&s->fid_list, f, next);
315  
316      v9fs_readdir_init(s->proto_version, &f->fs.dir);
317      v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
318  
319      return f;
320  }
321  
322  static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
323  {
324      int retval = 0;
325  
326      if (fidp->fs.xattr.xattrwalk_fid) {
327          /* getxattr/listxattr fid */
328          goto free_value;
329      }
330      /*
331       * if this is fid for setxattr. clunk should
332       * result in setxattr localcall
333       */
334      if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
335          /* clunk after partial write */
336          retval = -EINVAL;
337          goto free_out;
338      }
339      if (fidp->fs.xattr.len) {
340          retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
341                                     fidp->fs.xattr.value,
342                                     fidp->fs.xattr.len,
343                                     fidp->fs.xattr.flags);
344      } else {
345          retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
346      }
347  free_out:
348      v9fs_string_free(&fidp->fs.xattr.name);
349  free_value:
350      g_free(fidp->fs.xattr.value);
351      return retval;
352  }
353  
354  static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
355  {
356      int retval = 0;
357  
358      if (fidp->fid_type == P9_FID_FILE) {
359          /* If we reclaimed the fd no need to close */
360          if (fidp->fs.fd != -1) {
361              retval = v9fs_co_close(pdu, &fidp->fs);
362          }
363      } else if (fidp->fid_type == P9_FID_DIR) {
364          if (fidp->fs.dir.stream != NULL) {
365              retval = v9fs_co_closedir(pdu, &fidp->fs);
366          }
367      } else if (fidp->fid_type == P9_FID_XATTR) {
368          retval = v9fs_xattr_fid_clunk(pdu, fidp);
369      }
370      v9fs_path_free(&fidp->path);
371      g_free(fidp);
372      return retval;
373  }
374  
375  static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
376  {
377      BUG_ON(!fidp->ref);
378      fidp->ref--;
379      /*
380       * Don't free the fid if it is in reclaim list
381       */
382      if (!fidp->ref && fidp->clunked) {
383          if (fidp->fid == pdu->s->root_fid) {
384              /*
385               * if the clunked fid is root fid then we
386               * have unmounted the fs on the client side.
387               * delete the migration blocker. Ideally, this
388               * should be hooked to transport close notification
389               */
390              if (pdu->s->migration_blocker) {
391                  migrate_del_blocker(pdu->s->migration_blocker);
392                  error_free(pdu->s->migration_blocker);
393                  pdu->s->migration_blocker = NULL;
394              }
395          }
396          return free_fid(pdu, fidp);
397      }
398      return 0;
399  }
400  
401  static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
402  {
403      V9fsFidState *fidp;
404  
405      QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
406          if (fidp->fid == fid) {
407              QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
408              fidp->clunked = true;
409              return fidp;
410          }
411      }
412      return NULL;
413  }
414  
415  void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
416  {
417      int reclaim_count = 0;
418      V9fsState *s = pdu->s;
419      V9fsFidState *f;
420      QSLIST_HEAD(, V9fsFidState) reclaim_list =
421          QSLIST_HEAD_INITIALIZER(reclaim_list);
422  
423      QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
424          /*
425           * Unlink fids cannot be reclaimed. Check
426           * for them and skip them. Also skip fids
427           * currently being operated on.
428           */
429          if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
430              continue;
431          }
432          /*
433           * if it is a recently referenced fid
434           * we leave the fid untouched and clear the
435           * reference bit. We come back to it later
436           * in the next iteration. (a simple LRU without
437           * moving list elements around)
438           */
439          if (f->flags & FID_REFERENCED) {
440              f->flags &= ~FID_REFERENCED;
441              continue;
442          }
443          /*
444           * Add fids to reclaim list.
445           */
446          if (f->fid_type == P9_FID_FILE) {
447              if (f->fs.fd != -1) {
448                  /*
449                   * Up the reference count so that
450                   * a clunk request won't free this fid
451                   */
452                  f->ref++;
453                  QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
454                  f->fs_reclaim.fd = f->fs.fd;
455                  f->fs.fd = -1;
456                  reclaim_count++;
457              }
458          } else if (f->fid_type == P9_FID_DIR) {
459              if (f->fs.dir.stream != NULL) {
460                  /*
461                   * Up the reference count so that
462                   * a clunk request won't free this fid
463                   */
464                  f->ref++;
465                  QSLIST_INSERT_HEAD(&reclaim_list, f, reclaim_next);
466                  f->fs_reclaim.dir.stream = f->fs.dir.stream;
467                  f->fs.dir.stream = NULL;
468                  reclaim_count++;
469              }
470          }
471          if (reclaim_count >= open_fd_rc) {
472              break;
473          }
474      }
475      /*
476       * Now close the fid in reclaim list. Free them if they
477       * are already clunked.
478       */
479      while (!QSLIST_EMPTY(&reclaim_list)) {
480          f = QSLIST_FIRST(&reclaim_list);
481          QSLIST_REMOVE(&reclaim_list, f, V9fsFidState, reclaim_next);
482          if (f->fid_type == P9_FID_FILE) {
483              v9fs_co_close(pdu, &f->fs_reclaim);
484          } else if (f->fid_type == P9_FID_DIR) {
485              v9fs_co_closedir(pdu, &f->fs_reclaim);
486          }
487          /*
488           * Now drop the fid reference, free it
489           * if clunked.
490           */
491          put_fid(pdu, f);
492      }
493  }
494  
495  static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
496  {
497      int err;
498      V9fsState *s = pdu->s;
499      V9fsFidState *fidp, *fidp_next;
500  
501      fidp = QSIMPLEQ_FIRST(&s->fid_list);
502      if (!fidp) {
503          return 0;
504      }
505  
506      /*
507       * v9fs_reopen_fid() can yield : a reference on the fid must be held
508       * to ensure its pointer remains valid and we can safely pass it to
509       * QSIMPLEQ_NEXT(). The corresponding put_fid() can also yield so
510       * we must keep a reference on the next fid as well. So the logic here
511       * is to get a reference on a fid and only put it back during the next
512       * iteration after we could get a reference on the next fid. Start with
513       * the first one.
514       */
515      for (fidp->ref++; fidp; fidp = fidp_next) {
516          if (fidp->path.size == path->size &&
517              !memcmp(fidp->path.data, path->data, path->size)) {
518              /* Mark the fid non reclaimable. */
519              fidp->flags |= FID_NON_RECLAIMABLE;
520  
521              /* reopen the file/dir if already closed */
522              err = v9fs_reopen_fid(pdu, fidp);
523              if (err < 0) {
524                  put_fid(pdu, fidp);
525                  return err;
526              }
527          }
528  
529          fidp_next = QSIMPLEQ_NEXT(fidp, next);
530  
531          if (fidp_next) {
532              /*
533               * Ensure the next fid survives a potential clunk request during
534               * put_fid() below and v9fs_reopen_fid() in the next iteration.
535               */
536              fidp_next->ref++;
537          }
538  
539          /* We're done with this fid */
540          put_fid(pdu, fidp);
541      }
542  
543      return 0;
544  }
545  
546  static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
547  {
548      V9fsState *s = pdu->s;
549      V9fsFidState *fidp;
550  
551      /* Free all fids */
552      while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
553          /* Get fid */
554          fidp = QSIMPLEQ_FIRST(&s->fid_list);
555          fidp->ref++;
556  
557          /* Clunk fid */
558          QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
559          fidp->clunked = true;
560  
561          put_fid(pdu, fidp);
562      }
563  }
564  
565  #define P9_QID_TYPE_DIR         0x80
566  #define P9_QID_TYPE_SYMLINK     0x02
567  
568  #define P9_STAT_MODE_DIR        0x80000000
569  #define P9_STAT_MODE_APPEND     0x40000000
570  #define P9_STAT_MODE_EXCL       0x20000000
571  #define P9_STAT_MODE_MOUNT      0x10000000
572  #define P9_STAT_MODE_AUTH       0x08000000
573  #define P9_STAT_MODE_TMP        0x04000000
574  #define P9_STAT_MODE_SYMLINK    0x02000000
575  #define P9_STAT_MODE_LINK       0x01000000
576  #define P9_STAT_MODE_DEVICE     0x00800000
577  #define P9_STAT_MODE_NAMED_PIPE 0x00200000
578  #define P9_STAT_MODE_SOCKET     0x00100000
579  #define P9_STAT_MODE_SETUID     0x00080000
580  #define P9_STAT_MODE_SETGID     0x00040000
581  #define P9_STAT_MODE_SETVTX     0x00010000
582  
583  #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
584                                  P9_STAT_MODE_SYMLINK |      \
585                                  P9_STAT_MODE_LINK |         \
586                                  P9_STAT_MODE_DEVICE |       \
587                                  P9_STAT_MODE_NAMED_PIPE |   \
588                                  P9_STAT_MODE_SOCKET)
589  
590  /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
591  static inline uint8_t mirror8bit(uint8_t byte)
592  {
593      return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
594  }
595  
596  /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
597  static inline uint64_t mirror64bit(uint64_t value)
598  {
599      return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
600             ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
601             ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
602             ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
603             ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
604             ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
605             ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
606             ((uint64_t)mirror8bit((value >> 56) & 0xff));
607  }
608  
609  /**
610   * @brief Parameter k for the Exponential Golomb algorihm to be used.
611   *
612   * The smaller this value, the smaller the minimum bit count for the Exp.
613   * Golomb generated affixes will be (at lowest index) however for the
614   * price of having higher maximum bit count of generated affixes (at highest
615   * index). Likewise increasing this parameter yields in smaller maximum bit
616   * count for the price of having higher minimum bit count.
617   *
618   * In practice that means: a good value for k depends on the expected amount
619   * of devices to be exposed by one export. For a small amount of devices k
620   * should be small, for a large amount of devices k might be increased
621   * instead. The default of k=0 should be fine for most users though.
622   *
623   * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
624   * k should not change as long as guest is still running! Because that would
625   * cause completely different inode numbers to be generated on guest.
626   */
627  #define EXP_GOLOMB_K    0
628  
629  /**
630   * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
631   *
632   * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
633   * with growing length and with the mathematical property of being
634   * "prefix-free". The latter means the generated prefixes can be prepended
635   * in front of arbitrary numbers and the resulting concatenated numbers are
636   * guaranteed to be always unique.
637   *
638   * This is a minor adjustment to the original Exp. Golomb algorithm in the
639   * sense that lowest allowed index (@param n) starts with 1, not with zero.
640   *
641   * @param n - natural number (or index) of the prefix to be generated
642   *            (1, 2, 3, ...)
643   * @param k - parameter k of Exp. Golomb algorithm to be used
644   *            (see comment on EXP_GOLOMB_K macro for details about k)
645   */
646  static VariLenAffix expGolombEncode(uint64_t n, int k)
647  {
648      const uint64_t value = n + (1 << k) - 1;
649      const int bits = (int) log2(value) + 1;
650      return (VariLenAffix) {
651          .type = AffixType_Prefix,
652          .value = value,
653          .bits = bits + MAX((bits - 1 - k), 0)
654      };
655  }
656  
657  /**
658   * @brief Converts a suffix into a prefix, or a prefix into a suffix.
659   *
660   * Simply mirror all bits of the affix value, for the purpose to preserve
661   * respectively the mathematical "prefix-free" or "suffix-free" property
662   * after the conversion.
663   *
664   * If a passed prefix is suitable to create unique numbers, then the
665   * returned suffix is suitable to create unique numbers as well (and vice
666   * versa).
667   */
668  static VariLenAffix invertAffix(const VariLenAffix *affix)
669  {
670      return (VariLenAffix) {
671          .type =
672              (affix->type == AffixType_Suffix) ?
673                  AffixType_Prefix : AffixType_Suffix,
674          .value =
675              mirror64bit(affix->value) >>
676              ((sizeof(affix->value) * 8) - affix->bits),
677          .bits = affix->bits
678      };
679  }
680  
681  /**
682   * @brief Generates suffix numbers with "suffix-free" property.
683   *
684   * This is just a wrapper function on top of the Exp. Golomb algorithm.
685   *
686   * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
687   * this function converts the Exp. Golomb prefixes into appropriate suffixes
688   * which are still suitable for generating unique numbers.
689   *
690   * @param n - natural number (or index) of the suffix to be generated
691   *            (1, 2, 3, ...)
692   */
693  static VariLenAffix affixForIndex(uint64_t index)
694  {
695      VariLenAffix prefix;
696      prefix = expGolombEncode(index, EXP_GOLOMB_K);
697      return invertAffix(&prefix); /* convert prefix to suffix */
698  }
699  
700  /* creative abuse of tb_hash_func7, which is based on xxhash */
701  static uint32_t qpp_hash(QppEntry e)
702  {
703      return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
704  }
705  
706  static uint32_t qpf_hash(QpfEntry e)
707  {
708      return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
709  }
710  
711  static bool qpd_cmp_func(const void *obj, const void *userp)
712  {
713      const QpdEntry *e1 = obj, *e2 = userp;
714      return e1->dev == e2->dev;
715  }
716  
717  static bool qpp_cmp_func(const void *obj, const void *userp)
718  {
719      const QppEntry *e1 = obj, *e2 = userp;
720      return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
721  }
722  
723  static bool qpf_cmp_func(const void *obj, const void *userp)
724  {
725      const QpfEntry *e1 = obj, *e2 = userp;
726      return e1->dev == e2->dev && e1->ino == e2->ino;
727  }
728  
729  static void qp_table_remove(void *p, uint32_t h, void *up)
730  {
731      g_free(p);
732  }
733  
734  static void qp_table_destroy(struct qht *ht)
735  {
736      if (!ht || !ht->map) {
737          return;
738      }
739      qht_iter(ht, qp_table_remove, NULL);
740      qht_destroy(ht);
741  }
742  
743  static void qpd_table_init(struct qht *ht)
744  {
745      qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
746  }
747  
748  static void qpp_table_init(struct qht *ht)
749  {
750      qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
751  }
752  
753  static void qpf_table_init(struct qht *ht)
754  {
755      qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
756  }
757  
758  /*
759   * Returns how many (high end) bits of inode numbers of the passed fs
760   * device shall be used (in combination with the device number) to
761   * generate hash values for qpp_table entries.
762   *
763   * This function is required if variable length suffixes are used for inode
764   * number mapping on guest level. Since a device may end up having multiple
765   * entries in qpp_table, each entry most probably with a different suffix
766   * length, we thus need this function in conjunction with qpd_table to
767   * "agree" about a fix amount of bits (per device) to be always used for
768   * generating hash values for the purpose of accessing qpp_table in order
769   * get consistent behaviour when accessing qpp_table.
770   */
771  static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
772  {
773      QpdEntry lookup = {
774          .dev = dev
775      }, *val;
776      uint32_t hash = dev;
777      VariLenAffix affix;
778  
779      val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
780      if (!val) {
781          val = g_malloc0(sizeof(QpdEntry));
782          *val = lookup;
783          affix = affixForIndex(pdu->s->qp_affix_next);
784          val->prefix_bits = affix.bits;
785          qht_insert(&pdu->s->qpd_table, val, hash, NULL);
786          pdu->s->qp_ndevices++;
787      }
788      return val->prefix_bits;
789  }
790  
791  /**
792   * @brief Slow / full mapping host inode nr -> guest inode nr.
793   *
794   * This function performs a slower and much more costly remapping of an
795   * original file inode number on host to an appropriate different inode
796   * number on guest. For every (dev, inode) combination on host a new
797   * sequential number is generated, cached and exposed as inode number on
798   * guest.
799   *
800   * This is just a "last resort" fallback solution if the much faster/cheaper
801   * qid_path_suffixmap() failed. In practice this slow / full mapping is not
802   * expected ever to be used at all though.
803   *
804   * @see qid_path_suffixmap() for details
805   *
806   */
807  static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
808                              uint64_t *path)
809  {
810      QpfEntry lookup = {
811          .dev = stbuf->st_dev,
812          .ino = stbuf->st_ino
813      }, *val;
814      uint32_t hash = qpf_hash(lookup);
815      VariLenAffix affix;
816  
817      val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
818  
819      if (!val) {
820          if (pdu->s->qp_fullpath_next == 0) {
821              /* no more files can be mapped :'( */
822              error_report_once(
823                  "9p: No more prefixes available for remapping inodes from "
824                  "host to guest."
825              );
826              return -ENFILE;
827          }
828  
829          val = g_malloc0(sizeof(QppEntry));
830          *val = lookup;
831  
832          /* new unique inode and device combo */
833          affix = affixForIndex(
834              1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
835          );
836          val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
837          pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
838          qht_insert(&pdu->s->qpf_table, val, hash, NULL);
839      }
840  
841      *path = val->path;
842      return 0;
843  }
844  
845  /**
846   * @brief Quick mapping host inode nr -> guest inode nr.
847   *
848   * This function performs quick remapping of an original file inode number
849   * on host to an appropriate different inode number on guest. This remapping
850   * of inodes is required to avoid inode nr collisions on guest which would
851   * happen if the 9p export contains more than 1 exported file system (or
852   * more than 1 file system data set), because unlike on host level where the
853   * files would have different device nrs, all files exported by 9p would
854   * share the same device nr on guest (the device nr of the virtual 9p device
855   * that is).
856   *
857   * Inode remapping is performed by chopping off high end bits of the original
858   * inode number from host, shifting the result upwards and then assigning a
859   * generated suffix number for the low end bits, where the same suffix number
860   * will be shared by all inodes with the same device id AND the same high end
861   * bits that have been chopped off. That approach utilizes the fact that inode
862   * numbers very likely share the same high end bits (i.e. due to their common
863   * sequential generation by file systems) and hence we only have to generate
864   * and track a very limited amount of suffixes in practice due to that.
865   *
866   * We generate variable size suffixes for that purpose. The 1st generated
867   * suffix will only have 1 bit and hence we only need to chop off 1 bit from
868   * the original inode number. The subsequent suffixes being generated will
869   * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
870   * generated will have 3 bits and hence we have to chop off 3 bits from their
871   * original inodes, and so on. That approach of using variable length suffixes
872   * (i.e. over fixed size ones) utilizes the fact that in practice only a very
873   * limited amount of devices are shared by the same export (e.g. typically
874   * less than 2 dozen devices per 9p export), so in practice we need to chop
875   * off less bits than with fixed size prefixes and yet are flexible to add
876   * new devices at runtime below host's export directory at any time without
877   * having to reboot guest nor requiring to reconfigure guest for that. And due
878   * to the very limited amount of original high end bits that we chop off that
879   * way, the total amount of suffixes we need to generate is less than by using
880   * fixed size prefixes and hence it also improves performance of the inode
881   * remapping algorithm, and finally has the nice side effect that the inode
882   * numbers on guest will be much smaller & human friendly. ;-)
883   */
884  static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
885                                uint64_t *path)
886  {
887      const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
888      QppEntry lookup = {
889          .dev = stbuf->st_dev,
890          .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
891      }, *val;
892      uint32_t hash = qpp_hash(lookup);
893  
894      val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
895  
896      if (!val) {
897          if (pdu->s->qp_affix_next == 0) {
898              /* we ran out of affixes */
899              warn_report_once(
900                  "9p: Potential degraded performance of inode remapping"
901              );
902              return -ENFILE;
903          }
904  
905          val = g_malloc0(sizeof(QppEntry));
906          *val = lookup;
907  
908          /* new unique inode affix and device combo */
909          val->qp_affix_index = pdu->s->qp_affix_next++;
910          val->qp_affix = affixForIndex(val->qp_affix_index);
911          qht_insert(&pdu->s->qpp_table, val, hash, NULL);
912      }
913      /* assuming generated affix to be suffix type, not prefix */
914      *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
915      return 0;
916  }
917  
918  static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
919  {
920      int err;
921      size_t size;
922  
923      if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
924          /* map inode+device to qid path (fast path) */
925          err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
926          if (err == -ENFILE) {
927              /* fast path didn't work, fall back to full map */
928              err = qid_path_fullmap(pdu, stbuf, &qidp->path);
929          }
930          if (err) {
931              return err;
932          }
933      } else {
934          if (pdu->s->dev_id != stbuf->st_dev) {
935              if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
936                  error_report_once(
937                      "9p: Multiple devices detected in same VirtFS export. "
938                      "Access of guest to additional devices is (partly) "
939                      "denied due to virtfs option 'multidevs=forbid' being "
940                      "effective."
941                  );
942                  return -ENODEV;
943              } else {
944                  warn_report_once(
945                      "9p: Multiple devices detected in same VirtFS export, "
946                      "which might lead to file ID collisions and severe "
947                      "misbehaviours on guest! You should either use a "
948                      "separate export for each device shared from host or "
949                      "use virtfs option 'multidevs=remap'!"
950                  );
951              }
952          }
953          memset(&qidp->path, 0, sizeof(qidp->path));
954          size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
955          memcpy(&qidp->path, &stbuf->st_ino, size);
956      }
957  
958      qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
959      qidp->type = 0;
960      if (S_ISDIR(stbuf->st_mode)) {
961          qidp->type |= P9_QID_TYPE_DIR;
962      }
963      if (S_ISLNK(stbuf->st_mode)) {
964          qidp->type |= P9_QID_TYPE_SYMLINK;
965      }
966  
967      return 0;
968  }
969  
970  static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
971                                     V9fsQID *qidp)
972  {
973      struct stat stbuf;
974      int err;
975  
976      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
977      if (err < 0) {
978          return err;
979      }
980      err = stat_to_qid(pdu, &stbuf, qidp);
981      if (err < 0) {
982          return err;
983      }
984      return 0;
985  }
986  
987  V9fsPDU *pdu_alloc(V9fsState *s)
988  {
989      V9fsPDU *pdu = NULL;
990  
991      if (!QLIST_EMPTY(&s->free_list)) {
992          pdu = QLIST_FIRST(&s->free_list);
993          QLIST_REMOVE(pdu, next);
994          QLIST_INSERT_HEAD(&s->active_list, pdu, next);
995      }
996      return pdu;
997  }
998  
999  void pdu_free(V9fsPDU *pdu)
1000  {
1001      V9fsState *s = pdu->s;
1002  
1003      g_assert(!pdu->cancelled);
1004      QLIST_REMOVE(pdu, next);
1005      QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1006  }
1007  
1008  static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1009  {
1010      int8_t id = pdu->id + 1; /* Response */
1011      V9fsState *s = pdu->s;
1012      int ret;
1013  
1014      /*
1015       * The 9p spec requires that successfully cancelled pdus receive no reply.
1016       * Sending a reply would confuse clients because they would
1017       * assume that any EINTR is the actual result of the operation,
1018       * rather than a consequence of the cancellation. However, if
1019       * the operation completed (succesfully or with an error other
1020       * than caused be cancellation), we do send out that reply, both
1021       * for efficiency and to avoid confusing the rest of the state machine
1022       * that assumes passing a non-error here will mean a successful
1023       * transmission of the reply.
1024       */
1025      bool discard = pdu->cancelled && len == -EINTR;
1026      if (discard) {
1027          trace_v9fs_rcancel(pdu->tag, pdu->id);
1028          pdu->size = 0;
1029          goto out_notify;
1030      }
1031  
1032      if (len < 0) {
1033          int err = -len;
1034          len = 7;
1035  
1036          if (s->proto_version != V9FS_PROTO_2000L) {
1037              V9fsString str;
1038  
1039              str.data = strerror(err);
1040              str.size = strlen(str.data);
1041  
1042              ret = pdu_marshal(pdu, len, "s", &str);
1043              if (ret < 0) {
1044                  goto out_notify;
1045              }
1046              len += ret;
1047              id = P9_RERROR;
1048          }
1049  
1050          ret = pdu_marshal(pdu, len, "d", err);
1051          if (ret < 0) {
1052              goto out_notify;
1053          }
1054          len += ret;
1055  
1056          if (s->proto_version == V9FS_PROTO_2000L) {
1057              id = P9_RLERROR;
1058          }
1059          trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1060      }
1061  
1062      /* fill out the header */
1063      if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1064          goto out_notify;
1065      }
1066  
1067      /* keep these in sync */
1068      pdu->size = len;
1069      pdu->id = id;
1070  
1071  out_notify:
1072      pdu->s->transport->push_and_notify(pdu);
1073  
1074      /* Now wakeup anybody waiting in flush for this request */
1075      if (!qemu_co_queue_next(&pdu->complete)) {
1076          pdu_free(pdu);
1077      }
1078  }
1079  
1080  static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1081  {
1082      mode_t ret;
1083  
1084      ret = mode & 0777;
1085      if (mode & P9_STAT_MODE_DIR) {
1086          ret |= S_IFDIR;
1087      }
1088  
1089      if (mode & P9_STAT_MODE_SYMLINK) {
1090          ret |= S_IFLNK;
1091      }
1092      if (mode & P9_STAT_MODE_SOCKET) {
1093          ret |= S_IFSOCK;
1094      }
1095      if (mode & P9_STAT_MODE_NAMED_PIPE) {
1096          ret |= S_IFIFO;
1097      }
1098      if (mode & P9_STAT_MODE_DEVICE) {
1099          if (extension->size && extension->data[0] == 'c') {
1100              ret |= S_IFCHR;
1101          } else {
1102              ret |= S_IFBLK;
1103          }
1104      }
1105  
1106      if (!(ret & ~0777)) {
1107          ret |= S_IFREG;
1108      }
1109  
1110      if (mode & P9_STAT_MODE_SETUID) {
1111          ret |= S_ISUID;
1112      }
1113      if (mode & P9_STAT_MODE_SETGID) {
1114          ret |= S_ISGID;
1115      }
1116      if (mode & P9_STAT_MODE_SETVTX) {
1117          ret |= S_ISVTX;
1118      }
1119  
1120      return ret;
1121  }
1122  
1123  static int donttouch_stat(V9fsStat *stat)
1124  {
1125      if (stat->type == -1 &&
1126          stat->dev == -1 &&
1127          stat->qid.type == 0xff &&
1128          stat->qid.version == (uint32_t) -1 &&
1129          stat->qid.path == (uint64_t) -1 &&
1130          stat->mode == -1 &&
1131          stat->atime == -1 &&
1132          stat->mtime == -1 &&
1133          stat->length == -1 &&
1134          !stat->name.size &&
1135          !stat->uid.size &&
1136          !stat->gid.size &&
1137          !stat->muid.size &&
1138          stat->n_uid == -1 &&
1139          stat->n_gid == -1 &&
1140          stat->n_muid == -1) {
1141          return 1;
1142      }
1143  
1144      return 0;
1145  }
1146  
1147  static void v9fs_stat_init(V9fsStat *stat)
1148  {
1149      v9fs_string_init(&stat->name);
1150      v9fs_string_init(&stat->uid);
1151      v9fs_string_init(&stat->gid);
1152      v9fs_string_init(&stat->muid);
1153      v9fs_string_init(&stat->extension);
1154  }
1155  
1156  static void v9fs_stat_free(V9fsStat *stat)
1157  {
1158      v9fs_string_free(&stat->name);
1159      v9fs_string_free(&stat->uid);
1160      v9fs_string_free(&stat->gid);
1161      v9fs_string_free(&stat->muid);
1162      v9fs_string_free(&stat->extension);
1163  }
1164  
1165  static uint32_t stat_to_v9mode(const struct stat *stbuf)
1166  {
1167      uint32_t mode;
1168  
1169      mode = stbuf->st_mode & 0777;
1170      if (S_ISDIR(stbuf->st_mode)) {
1171          mode |= P9_STAT_MODE_DIR;
1172      }
1173  
1174      if (S_ISLNK(stbuf->st_mode)) {
1175          mode |= P9_STAT_MODE_SYMLINK;
1176      }
1177  
1178      if (S_ISSOCK(stbuf->st_mode)) {
1179          mode |= P9_STAT_MODE_SOCKET;
1180      }
1181  
1182      if (S_ISFIFO(stbuf->st_mode)) {
1183          mode |= P9_STAT_MODE_NAMED_PIPE;
1184      }
1185  
1186      if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1187          mode |= P9_STAT_MODE_DEVICE;
1188      }
1189  
1190      if (stbuf->st_mode & S_ISUID) {
1191          mode |= P9_STAT_MODE_SETUID;
1192      }
1193  
1194      if (stbuf->st_mode & S_ISGID) {
1195          mode |= P9_STAT_MODE_SETGID;
1196      }
1197  
1198      if (stbuf->st_mode & S_ISVTX) {
1199          mode |= P9_STAT_MODE_SETVTX;
1200      }
1201  
1202      return mode;
1203  }
1204  
1205  static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1206                                         const char *basename,
1207                                         const struct stat *stbuf,
1208                                         V9fsStat *v9stat)
1209  {
1210      int err;
1211  
1212      memset(v9stat, 0, sizeof(*v9stat));
1213  
1214      err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1215      if (err < 0) {
1216          return err;
1217      }
1218      v9stat->mode = stat_to_v9mode(stbuf);
1219      v9stat->atime = stbuf->st_atime;
1220      v9stat->mtime = stbuf->st_mtime;
1221      v9stat->length = stbuf->st_size;
1222  
1223      v9fs_string_free(&v9stat->uid);
1224      v9fs_string_free(&v9stat->gid);
1225      v9fs_string_free(&v9stat->muid);
1226  
1227      v9stat->n_uid = stbuf->st_uid;
1228      v9stat->n_gid = stbuf->st_gid;
1229      v9stat->n_muid = 0;
1230  
1231      v9fs_string_free(&v9stat->extension);
1232  
1233      if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1234          err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1235          if (err < 0) {
1236              return err;
1237          }
1238      } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1239          v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1240                  S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1241                  major(stbuf->st_rdev), minor(stbuf->st_rdev));
1242      } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1243          v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1244                  "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1245      }
1246  
1247      v9fs_string_sprintf(&v9stat->name, "%s", basename);
1248  
1249      v9stat->size = 61 +
1250          v9fs_string_size(&v9stat->name) +
1251          v9fs_string_size(&v9stat->uid) +
1252          v9fs_string_size(&v9stat->gid) +
1253          v9fs_string_size(&v9stat->muid) +
1254          v9fs_string_size(&v9stat->extension);
1255      return 0;
1256  }
1257  
1258  #define P9_STATS_MODE          0x00000001ULL
1259  #define P9_STATS_NLINK         0x00000002ULL
1260  #define P9_STATS_UID           0x00000004ULL
1261  #define P9_STATS_GID           0x00000008ULL
1262  #define P9_STATS_RDEV          0x00000010ULL
1263  #define P9_STATS_ATIME         0x00000020ULL
1264  #define P9_STATS_MTIME         0x00000040ULL
1265  #define P9_STATS_CTIME         0x00000080ULL
1266  #define P9_STATS_INO           0x00000100ULL
1267  #define P9_STATS_SIZE          0x00000200ULL
1268  #define P9_STATS_BLOCKS        0x00000400ULL
1269  
1270  #define P9_STATS_BTIME         0x00000800ULL
1271  #define P9_STATS_GEN           0x00001000ULL
1272  #define P9_STATS_DATA_VERSION  0x00002000ULL
1273  
1274  #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1275  #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1276  
1277  
1278  static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1279                                  V9fsStatDotl *v9lstat)
1280  {
1281      memset(v9lstat, 0, sizeof(*v9lstat));
1282  
1283      v9lstat->st_mode = stbuf->st_mode;
1284      v9lstat->st_nlink = stbuf->st_nlink;
1285      v9lstat->st_uid = stbuf->st_uid;
1286      v9lstat->st_gid = stbuf->st_gid;
1287      v9lstat->st_rdev = stbuf->st_rdev;
1288      v9lstat->st_size = stbuf->st_size;
1289      v9lstat->st_blksize = stbuf->st_blksize;
1290      v9lstat->st_blocks = stbuf->st_blocks;
1291      v9lstat->st_atime_sec = stbuf->st_atime;
1292      v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1293      v9lstat->st_mtime_sec = stbuf->st_mtime;
1294      v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1295      v9lstat->st_ctime_sec = stbuf->st_ctime;
1296      v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1297      /* Currently we only support BASIC fields in stat */
1298      v9lstat->st_result_mask = P9_STATS_BASIC;
1299  
1300      return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1301  }
1302  
1303  static void print_sg(struct iovec *sg, int cnt)
1304  {
1305      int i;
1306  
1307      printf("sg[%d]: {", cnt);
1308      for (i = 0; i < cnt; i++) {
1309          if (i) {
1310              printf(", ");
1311          }
1312          printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1313      }
1314      printf("}\n");
1315  }
1316  
1317  /* Will call this only for path name based fid */
1318  static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1319  {
1320      V9fsPath str;
1321      v9fs_path_init(&str);
1322      v9fs_path_copy(&str, dst);
1323      v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1324      v9fs_path_free(&str);
1325  }
1326  
1327  static inline bool is_ro_export(FsContext *ctx)
1328  {
1329      return ctx->export_flags & V9FS_RDONLY;
1330  }
1331  
1332  static void coroutine_fn v9fs_version(void *opaque)
1333  {
1334      ssize_t err;
1335      V9fsPDU *pdu = opaque;
1336      V9fsState *s = pdu->s;
1337      V9fsString version;
1338      size_t offset = 7;
1339  
1340      v9fs_string_init(&version);
1341      err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1342      if (err < 0) {
1343          goto out;
1344      }
1345      trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1346  
1347      virtfs_reset(pdu);
1348  
1349      if (!strcmp(version.data, "9P2000.u")) {
1350          s->proto_version = V9FS_PROTO_2000U;
1351      } else if (!strcmp(version.data, "9P2000.L")) {
1352          s->proto_version = V9FS_PROTO_2000L;
1353      } else {
1354          v9fs_string_sprintf(&version, "unknown");
1355          /* skip min. msize check, reporting invalid version has priority */
1356          goto marshal;
1357      }
1358  
1359      if (s->msize < P9_MIN_MSIZE) {
1360          err = -EMSGSIZE;
1361          error_report(
1362              "9pfs: Client requested msize < minimum msize ("
1363              stringify(P9_MIN_MSIZE) ") supported by this server."
1364          );
1365          goto out;
1366      }
1367  
1368      /* 8192 is the default msize of Linux clients */
1369      if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
1370          warn_report_once(
1371              "9p: degraded performance: a reasonable high msize should be "
1372              "chosen on client/guest side (chosen msize is <= 8192). See "
1373              "https://wiki.qemu.org/Documentation/9psetup#msize for details."
1374          );
1375      }
1376  
1377  marshal:
1378      err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1379      if (err < 0) {
1380          goto out;
1381      }
1382      err += offset;
1383      trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1384  out:
1385      pdu_complete(pdu, err);
1386      v9fs_string_free(&version);
1387  }
1388  
1389  static void coroutine_fn v9fs_attach(void *opaque)
1390  {
1391      V9fsPDU *pdu = opaque;
1392      V9fsState *s = pdu->s;
1393      int32_t fid, afid, n_uname;
1394      V9fsString uname, aname;
1395      V9fsFidState *fidp;
1396      size_t offset = 7;
1397      V9fsQID qid;
1398      ssize_t err;
1399  
1400      v9fs_string_init(&uname);
1401      v9fs_string_init(&aname);
1402      err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1403                          &afid, &uname, &aname, &n_uname);
1404      if (err < 0) {
1405          goto out_nofid;
1406      }
1407      trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1408  
1409      fidp = alloc_fid(s, fid);
1410      if (fidp == NULL) {
1411          err = -EINVAL;
1412          goto out_nofid;
1413      }
1414      fidp->uid = n_uname;
1415      err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1416      if (err < 0) {
1417          err = -EINVAL;
1418          clunk_fid(s, fid);
1419          goto out;
1420      }
1421      err = fid_to_qid(pdu, fidp, &qid);
1422      if (err < 0) {
1423          err = -EINVAL;
1424          clunk_fid(s, fid);
1425          goto out;
1426      }
1427  
1428      /*
1429       * disable migration if we haven't done already.
1430       * attach could get called multiple times for the same export.
1431       */
1432      if (!s->migration_blocker) {
1433          error_setg(&s->migration_blocker,
1434                     "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1435                     s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1436          err = migrate_add_blocker(s->migration_blocker, NULL);
1437          if (err < 0) {
1438              error_free(s->migration_blocker);
1439              s->migration_blocker = NULL;
1440              clunk_fid(s, fid);
1441              goto out;
1442          }
1443          s->root_fid = fid;
1444      }
1445  
1446      err = pdu_marshal(pdu, offset, "Q", &qid);
1447      if (err < 0) {
1448          clunk_fid(s, fid);
1449          goto out;
1450      }
1451      err += offset;
1452  
1453      memcpy(&s->root_qid, &qid, sizeof(qid));
1454      trace_v9fs_attach_return(pdu->tag, pdu->id,
1455                               qid.type, qid.version, qid.path);
1456  out:
1457      put_fid(pdu, fidp);
1458  out_nofid:
1459      pdu_complete(pdu, err);
1460      v9fs_string_free(&uname);
1461      v9fs_string_free(&aname);
1462  }
1463  
1464  static void coroutine_fn v9fs_stat(void *opaque)
1465  {
1466      int32_t fid;
1467      V9fsStat v9stat;
1468      ssize_t err = 0;
1469      size_t offset = 7;
1470      struct stat stbuf;
1471      V9fsFidState *fidp;
1472      V9fsPDU *pdu = opaque;
1473      char *basename;
1474  
1475      err = pdu_unmarshal(pdu, offset, "d", &fid);
1476      if (err < 0) {
1477          goto out_nofid;
1478      }
1479      trace_v9fs_stat(pdu->tag, pdu->id, fid);
1480  
1481      fidp = get_fid(pdu, fid);
1482      if (fidp == NULL) {
1483          err = -ENOENT;
1484          goto out_nofid;
1485      }
1486      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1487      if (err < 0) {
1488          goto out;
1489      }
1490      basename = g_path_get_basename(fidp->path.data);
1491      err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1492      g_free(basename);
1493      if (err < 0) {
1494          goto out;
1495      }
1496      err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1497      if (err < 0) {
1498          v9fs_stat_free(&v9stat);
1499          goto out;
1500      }
1501      trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1502                             v9stat.atime, v9stat.mtime, v9stat.length);
1503      err += offset;
1504      v9fs_stat_free(&v9stat);
1505  out:
1506      put_fid(pdu, fidp);
1507  out_nofid:
1508      pdu_complete(pdu, err);
1509  }
1510  
1511  static void coroutine_fn v9fs_getattr(void *opaque)
1512  {
1513      int32_t fid;
1514      size_t offset = 7;
1515      ssize_t retval = 0;
1516      struct stat stbuf;
1517      V9fsFidState *fidp;
1518      uint64_t request_mask;
1519      V9fsStatDotl v9stat_dotl;
1520      V9fsPDU *pdu = opaque;
1521  
1522      retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1523      if (retval < 0) {
1524          goto out_nofid;
1525      }
1526      trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1527  
1528      fidp = get_fid(pdu, fid);
1529      if (fidp == NULL) {
1530          retval = -ENOENT;
1531          goto out_nofid;
1532      }
1533      /*
1534       * Currently we only support BASIC fields in stat, so there is no
1535       * need to look at request_mask.
1536       */
1537      retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1538      if (retval < 0) {
1539          goto out;
1540      }
1541      retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1542      if (retval < 0) {
1543          goto out;
1544      }
1545  
1546      /*  fill st_gen if requested and supported by underlying fs */
1547      if (request_mask & P9_STATS_GEN) {
1548          retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1549          switch (retval) {
1550          case 0:
1551              /* we have valid st_gen: update result mask */
1552              v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1553              break;
1554          case -EINTR:
1555              /* request cancelled, e.g. by Tflush */
1556              goto out;
1557          default:
1558              /* failed to get st_gen: not fatal, ignore */
1559              break;
1560          }
1561      }
1562      retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1563      if (retval < 0) {
1564          goto out;
1565      }
1566      retval += offset;
1567      trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1568                                v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1569                                v9stat_dotl.st_gid);
1570  out:
1571      put_fid(pdu, fidp);
1572  out_nofid:
1573      pdu_complete(pdu, retval);
1574  }
1575  
1576  /* Attribute flags */
1577  #define P9_ATTR_MODE       (1 << 0)
1578  #define P9_ATTR_UID        (1 << 1)
1579  #define P9_ATTR_GID        (1 << 2)
1580  #define P9_ATTR_SIZE       (1 << 3)
1581  #define P9_ATTR_ATIME      (1 << 4)
1582  #define P9_ATTR_MTIME      (1 << 5)
1583  #define P9_ATTR_CTIME      (1 << 6)
1584  #define P9_ATTR_ATIME_SET  (1 << 7)
1585  #define P9_ATTR_MTIME_SET  (1 << 8)
1586  
1587  #define P9_ATTR_MASK    127
1588  
1589  static void coroutine_fn v9fs_setattr(void *opaque)
1590  {
1591      int err = 0;
1592      int32_t fid;
1593      V9fsFidState *fidp;
1594      size_t offset = 7;
1595      V9fsIattr v9iattr;
1596      V9fsPDU *pdu = opaque;
1597  
1598      err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1599      if (err < 0) {
1600          goto out_nofid;
1601      }
1602  
1603      trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1604                         v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1605                         v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1606  
1607      fidp = get_fid(pdu, fid);
1608      if (fidp == NULL) {
1609          err = -EINVAL;
1610          goto out_nofid;
1611      }
1612      if (v9iattr.valid & P9_ATTR_MODE) {
1613          err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1614          if (err < 0) {
1615              goto out;
1616          }
1617      }
1618      if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1619          struct timespec times[2];
1620          if (v9iattr.valid & P9_ATTR_ATIME) {
1621              if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1622                  times[0].tv_sec = v9iattr.atime_sec;
1623                  times[0].tv_nsec = v9iattr.atime_nsec;
1624              } else {
1625                  times[0].tv_nsec = UTIME_NOW;
1626              }
1627          } else {
1628              times[0].tv_nsec = UTIME_OMIT;
1629          }
1630          if (v9iattr.valid & P9_ATTR_MTIME) {
1631              if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1632                  times[1].tv_sec = v9iattr.mtime_sec;
1633                  times[1].tv_nsec = v9iattr.mtime_nsec;
1634              } else {
1635                  times[1].tv_nsec = UTIME_NOW;
1636              }
1637          } else {
1638              times[1].tv_nsec = UTIME_OMIT;
1639          }
1640          err = v9fs_co_utimensat(pdu, &fidp->path, times);
1641          if (err < 0) {
1642              goto out;
1643          }
1644      }
1645      /*
1646       * If the only valid entry in iattr is ctime we can call
1647       * chown(-1,-1) to update the ctime of the file
1648       */
1649      if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1650          ((v9iattr.valid & P9_ATTR_CTIME)
1651           && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1652          if (!(v9iattr.valid & P9_ATTR_UID)) {
1653              v9iattr.uid = -1;
1654          }
1655          if (!(v9iattr.valid & P9_ATTR_GID)) {
1656              v9iattr.gid = -1;
1657          }
1658          err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1659                              v9iattr.gid);
1660          if (err < 0) {
1661              goto out;
1662          }
1663      }
1664      if (v9iattr.valid & (P9_ATTR_SIZE)) {
1665          err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1666          if (err < 0) {
1667              goto out;
1668          }
1669      }
1670      err = offset;
1671      trace_v9fs_setattr_return(pdu->tag, pdu->id);
1672  out:
1673      put_fid(pdu, fidp);
1674  out_nofid:
1675      pdu_complete(pdu, err);
1676  }
1677  
1678  static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1679  {
1680      int i;
1681      ssize_t err;
1682      size_t offset = 7;
1683  
1684      err = pdu_marshal(pdu, offset, "w", nwnames);
1685      if (err < 0) {
1686          return err;
1687      }
1688      offset += err;
1689      for (i = 0; i < nwnames; i++) {
1690          err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1691          if (err < 0) {
1692              return err;
1693          }
1694          offset += err;
1695      }
1696      return offset;
1697  }
1698  
1699  static bool name_is_illegal(const char *name)
1700  {
1701      return !*name || strchr(name, '/') != NULL;
1702  }
1703  
1704  static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1705  {
1706      return
1707          qid1->type != qid2->type ||
1708          qid1->version != qid2->version ||
1709          qid1->path != qid2->path;
1710  }
1711  
1712  static void coroutine_fn v9fs_walk(void *opaque)
1713  {
1714      int name_idx;
1715      V9fsQID *qids = NULL;
1716      int i, err = 0;
1717      V9fsPath dpath, path;
1718      uint16_t nwnames;
1719      struct stat stbuf;
1720      size_t offset = 7;
1721      int32_t fid, newfid;
1722      V9fsString *wnames = NULL;
1723      V9fsFidState *fidp;
1724      V9fsFidState *newfidp = NULL;
1725      V9fsPDU *pdu = opaque;
1726      V9fsState *s = pdu->s;
1727      V9fsQID qid;
1728  
1729      err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1730      if (err < 0) {
1731          pdu_complete(pdu, err);
1732          return ;
1733      }
1734      offset += err;
1735  
1736      trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1737  
1738      if (nwnames && nwnames <= P9_MAXWELEM) {
1739          wnames = g_new0(V9fsString, nwnames);
1740          qids   = g_new0(V9fsQID, nwnames);
1741          for (i = 0; i < nwnames; i++) {
1742              err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1743              if (err < 0) {
1744                  goto out_nofid;
1745              }
1746              if (name_is_illegal(wnames[i].data)) {
1747                  err = -ENOENT;
1748                  goto out_nofid;
1749              }
1750              offset += err;
1751          }
1752      } else if (nwnames > P9_MAXWELEM) {
1753          err = -EINVAL;
1754          goto out_nofid;
1755      }
1756      fidp = get_fid(pdu, fid);
1757      if (fidp == NULL) {
1758          err = -ENOENT;
1759          goto out_nofid;
1760      }
1761  
1762      v9fs_path_init(&dpath);
1763      v9fs_path_init(&path);
1764  
1765      err = fid_to_qid(pdu, fidp, &qid);
1766      if (err < 0) {
1767          goto out;
1768      }
1769  
1770      /*
1771       * Both dpath and path initially poin to fidp.
1772       * Needed to handle request with nwnames == 0
1773       */
1774      v9fs_path_copy(&dpath, &fidp->path);
1775      v9fs_path_copy(&path, &fidp->path);
1776      for (name_idx = 0; name_idx < nwnames; name_idx++) {
1777          if (not_same_qid(&pdu->s->root_qid, &qid) ||
1778              strcmp("..", wnames[name_idx].data)) {
1779              err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1780                                         &path);
1781              if (err < 0) {
1782                  goto out;
1783              }
1784  
1785              err = v9fs_co_lstat(pdu, &path, &stbuf);
1786              if (err < 0) {
1787                  goto out;
1788              }
1789              err = stat_to_qid(pdu, &stbuf, &qid);
1790              if (err < 0) {
1791                  goto out;
1792              }
1793              v9fs_path_copy(&dpath, &path);
1794          }
1795          memcpy(&qids[name_idx], &qid, sizeof(qid));
1796      }
1797      if (fid == newfid) {
1798          if (fidp->fid_type != P9_FID_NONE) {
1799              err = -EINVAL;
1800              goto out;
1801          }
1802          v9fs_path_write_lock(s);
1803          v9fs_path_copy(&fidp->path, &path);
1804          v9fs_path_unlock(s);
1805      } else {
1806          newfidp = alloc_fid(s, newfid);
1807          if (newfidp == NULL) {
1808              err = -EINVAL;
1809              goto out;
1810          }
1811          newfidp->uid = fidp->uid;
1812          v9fs_path_copy(&newfidp->path, &path);
1813      }
1814      err = v9fs_walk_marshal(pdu, nwnames, qids);
1815      trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1816  out:
1817      put_fid(pdu, fidp);
1818      if (newfidp) {
1819          put_fid(pdu, newfidp);
1820      }
1821      v9fs_path_free(&dpath);
1822      v9fs_path_free(&path);
1823  out_nofid:
1824      pdu_complete(pdu, err);
1825      if (nwnames && nwnames <= P9_MAXWELEM) {
1826          for (name_idx = 0; name_idx < nwnames; name_idx++) {
1827              v9fs_string_free(&wnames[name_idx]);
1828          }
1829          g_free(wnames);
1830          g_free(qids);
1831      }
1832  }
1833  
1834  static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1835  {
1836      struct statfs stbuf;
1837      int32_t iounit = 0;
1838      V9fsState *s = pdu->s;
1839  
1840      /*
1841       * iounit should be multiples of f_bsize (host filesystem block size
1842       * and as well as less than (client msize - P9_IOHDRSZ))
1843       */
1844      if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1845          if (stbuf.f_bsize) {
1846              iounit = stbuf.f_bsize;
1847              iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1848          }
1849      }
1850      if (!iounit) {
1851          iounit = s->msize - P9_IOHDRSZ;
1852      }
1853      return iounit;
1854  }
1855  
1856  static void coroutine_fn v9fs_open(void *opaque)
1857  {
1858      int flags;
1859      int32_t fid;
1860      int32_t mode;
1861      V9fsQID qid;
1862      int iounit = 0;
1863      ssize_t err = 0;
1864      size_t offset = 7;
1865      struct stat stbuf;
1866      V9fsFidState *fidp;
1867      V9fsPDU *pdu = opaque;
1868      V9fsState *s = pdu->s;
1869  
1870      if (s->proto_version == V9FS_PROTO_2000L) {
1871          err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1872      } else {
1873          uint8_t modebyte;
1874          err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1875          mode = modebyte;
1876      }
1877      if (err < 0) {
1878          goto out_nofid;
1879      }
1880      trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1881  
1882      fidp = get_fid(pdu, fid);
1883      if (fidp == NULL) {
1884          err = -ENOENT;
1885          goto out_nofid;
1886      }
1887      if (fidp->fid_type != P9_FID_NONE) {
1888          err = -EINVAL;
1889          goto out;
1890      }
1891  
1892      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1893      if (err < 0) {
1894          goto out;
1895      }
1896      err = stat_to_qid(pdu, &stbuf, &qid);
1897      if (err < 0) {
1898          goto out;
1899      }
1900      if (S_ISDIR(stbuf.st_mode)) {
1901          err = v9fs_co_opendir(pdu, fidp);
1902          if (err < 0) {
1903              goto out;
1904          }
1905          fidp->fid_type = P9_FID_DIR;
1906          err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1907          if (err < 0) {
1908              goto out;
1909          }
1910          err += offset;
1911      } else {
1912          if (s->proto_version == V9FS_PROTO_2000L) {
1913              flags = get_dotl_openflags(s, mode);
1914          } else {
1915              flags = omode_to_uflags(mode);
1916          }
1917          if (is_ro_export(&s->ctx)) {
1918              if (mode & O_WRONLY || mode & O_RDWR ||
1919                  mode & O_APPEND || mode & O_TRUNC) {
1920                  err = -EROFS;
1921                  goto out;
1922              }
1923          }
1924          err = v9fs_co_open(pdu, fidp, flags);
1925          if (err < 0) {
1926              goto out;
1927          }
1928          fidp->fid_type = P9_FID_FILE;
1929          fidp->open_flags = flags;
1930          if (flags & O_EXCL) {
1931              /*
1932               * We let the host file system do O_EXCL check
1933               * We should not reclaim such fd
1934               */
1935              fidp->flags |= FID_NON_RECLAIMABLE;
1936          }
1937          iounit = get_iounit(pdu, &fidp->path);
1938          err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1939          if (err < 0) {
1940              goto out;
1941          }
1942          err += offset;
1943      }
1944      trace_v9fs_open_return(pdu->tag, pdu->id,
1945                             qid.type, qid.version, qid.path, iounit);
1946  out:
1947      put_fid(pdu, fidp);
1948  out_nofid:
1949      pdu_complete(pdu, err);
1950  }
1951  
1952  static void coroutine_fn v9fs_lcreate(void *opaque)
1953  {
1954      int32_t dfid, flags, mode;
1955      gid_t gid;
1956      ssize_t err = 0;
1957      ssize_t offset = 7;
1958      V9fsString name;
1959      V9fsFidState *fidp;
1960      struct stat stbuf;
1961      V9fsQID qid;
1962      int32_t iounit;
1963      V9fsPDU *pdu = opaque;
1964  
1965      v9fs_string_init(&name);
1966      err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1967                          &name, &flags, &mode, &gid);
1968      if (err < 0) {
1969          goto out_nofid;
1970      }
1971      trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1972  
1973      if (name_is_illegal(name.data)) {
1974          err = -ENOENT;
1975          goto out_nofid;
1976      }
1977  
1978      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1979          err = -EEXIST;
1980          goto out_nofid;
1981      }
1982  
1983      fidp = get_fid(pdu, dfid);
1984      if (fidp == NULL) {
1985          err = -ENOENT;
1986          goto out_nofid;
1987      }
1988      if (fidp->fid_type != P9_FID_NONE) {
1989          err = -EINVAL;
1990          goto out;
1991      }
1992  
1993      flags = get_dotl_openflags(pdu->s, flags);
1994      err = v9fs_co_open2(pdu, fidp, &name, gid,
1995                          flags | O_CREAT, mode, &stbuf);
1996      if (err < 0) {
1997          goto out;
1998      }
1999      fidp->fid_type = P9_FID_FILE;
2000      fidp->open_flags = flags;
2001      if (flags & O_EXCL) {
2002          /*
2003           * We let the host file system do O_EXCL check
2004           * We should not reclaim such fd
2005           */
2006          fidp->flags |= FID_NON_RECLAIMABLE;
2007      }
2008      iounit =  get_iounit(pdu, &fidp->path);
2009      err = stat_to_qid(pdu, &stbuf, &qid);
2010      if (err < 0) {
2011          goto out;
2012      }
2013      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2014      if (err < 0) {
2015          goto out;
2016      }
2017      err += offset;
2018      trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2019                                qid.type, qid.version, qid.path, iounit);
2020  out:
2021      put_fid(pdu, fidp);
2022  out_nofid:
2023      pdu_complete(pdu, err);
2024      v9fs_string_free(&name);
2025  }
2026  
2027  static void coroutine_fn v9fs_fsync(void *opaque)
2028  {
2029      int err;
2030      int32_t fid;
2031      int datasync;
2032      size_t offset = 7;
2033      V9fsFidState *fidp;
2034      V9fsPDU *pdu = opaque;
2035  
2036      err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2037      if (err < 0) {
2038          goto out_nofid;
2039      }
2040      trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2041  
2042      fidp = get_fid(pdu, fid);
2043      if (fidp == NULL) {
2044          err = -ENOENT;
2045          goto out_nofid;
2046      }
2047      err = v9fs_co_fsync(pdu, fidp, datasync);
2048      if (!err) {
2049          err = offset;
2050      }
2051      put_fid(pdu, fidp);
2052  out_nofid:
2053      pdu_complete(pdu, err);
2054  }
2055  
2056  static void coroutine_fn v9fs_clunk(void *opaque)
2057  {
2058      int err;
2059      int32_t fid;
2060      size_t offset = 7;
2061      V9fsFidState *fidp;
2062      V9fsPDU *pdu = opaque;
2063      V9fsState *s = pdu->s;
2064  
2065      err = pdu_unmarshal(pdu, offset, "d", &fid);
2066      if (err < 0) {
2067          goto out_nofid;
2068      }
2069      trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2070  
2071      fidp = clunk_fid(s, fid);
2072      if (fidp == NULL) {
2073          err = -ENOENT;
2074          goto out_nofid;
2075      }
2076      /*
2077       * Bump the ref so that put_fid will
2078       * free the fid.
2079       */
2080      fidp->ref++;
2081      err = put_fid(pdu, fidp);
2082      if (!err) {
2083          err = offset;
2084      }
2085  out_nofid:
2086      pdu_complete(pdu, err);
2087  }
2088  
2089  /*
2090   * Create a QEMUIOVector for a sub-region of PDU iovecs
2091   *
2092   * @qiov:       uninitialized QEMUIOVector
2093   * @skip:       number of bytes to skip from beginning of PDU
2094   * @size:       number of bytes to include
2095   * @is_write:   true - write, false - read
2096   *
2097   * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2098   * with qemu_iovec_destroy().
2099   */
2100  static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2101                                      size_t skip, size_t size,
2102                                      bool is_write)
2103  {
2104      QEMUIOVector elem;
2105      struct iovec *iov;
2106      unsigned int niov;
2107  
2108      if (is_write) {
2109          pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2110      } else {
2111          pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2112      }
2113  
2114      qemu_iovec_init_external(&elem, iov, niov);
2115      qemu_iovec_init(qiov, niov);
2116      qemu_iovec_concat(qiov, &elem, skip, size);
2117  }
2118  
2119  static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2120                             uint64_t off, uint32_t max_count)
2121  {
2122      ssize_t err;
2123      size_t offset = 7;
2124      uint64_t read_count;
2125      QEMUIOVector qiov_full;
2126  
2127      if (fidp->fs.xattr.len < off) {
2128          read_count = 0;
2129      } else {
2130          read_count = fidp->fs.xattr.len - off;
2131      }
2132      if (read_count > max_count) {
2133          read_count = max_count;
2134      }
2135      err = pdu_marshal(pdu, offset, "d", read_count);
2136      if (err < 0) {
2137          return err;
2138      }
2139      offset += err;
2140  
2141      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2142      err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2143                      ((char *)fidp->fs.xattr.value) + off,
2144                      read_count);
2145      qemu_iovec_destroy(&qiov_full);
2146      if (err < 0) {
2147          return err;
2148      }
2149      offset += err;
2150      return offset;
2151  }
2152  
2153  static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2154                                                    V9fsFidState *fidp,
2155                                                    uint32_t max_count)
2156  {
2157      V9fsPath path;
2158      V9fsStat v9stat;
2159      int len, err = 0;
2160      int32_t count = 0;
2161      struct stat stbuf;
2162      off_t saved_dir_pos;
2163      struct dirent *dent;
2164  
2165      /* save the directory position */
2166      saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2167      if (saved_dir_pos < 0) {
2168          return saved_dir_pos;
2169      }
2170  
2171      while (1) {
2172          v9fs_path_init(&path);
2173  
2174          v9fs_readdir_lock(&fidp->fs.dir);
2175  
2176          err = v9fs_co_readdir(pdu, fidp, &dent);
2177          if (err || !dent) {
2178              break;
2179          }
2180          err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2181          if (err < 0) {
2182              break;
2183          }
2184          err = v9fs_co_lstat(pdu, &path, &stbuf);
2185          if (err < 0) {
2186              break;
2187          }
2188          err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2189          if (err < 0) {
2190              break;
2191          }
2192          if ((count + v9stat.size + 2) > max_count) {
2193              v9fs_readdir_unlock(&fidp->fs.dir);
2194  
2195              /* Ran out of buffer. Set dir back to old position and return */
2196              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2197              v9fs_stat_free(&v9stat);
2198              v9fs_path_free(&path);
2199              return count;
2200          }
2201  
2202          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2203          len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2204  
2205          v9fs_readdir_unlock(&fidp->fs.dir);
2206  
2207          if (len < 0) {
2208              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2209              v9fs_stat_free(&v9stat);
2210              v9fs_path_free(&path);
2211              return len;
2212          }
2213          count += len;
2214          v9fs_stat_free(&v9stat);
2215          v9fs_path_free(&path);
2216          saved_dir_pos = dent->d_off;
2217      }
2218  
2219      v9fs_readdir_unlock(&fidp->fs.dir);
2220  
2221      v9fs_path_free(&path);
2222      if (err < 0) {
2223          return err;
2224      }
2225      return count;
2226  }
2227  
2228  static void coroutine_fn v9fs_read(void *opaque)
2229  {
2230      int32_t fid;
2231      uint64_t off;
2232      ssize_t err = 0;
2233      int32_t count = 0;
2234      size_t offset = 7;
2235      uint32_t max_count;
2236      V9fsFidState *fidp;
2237      V9fsPDU *pdu = opaque;
2238      V9fsState *s = pdu->s;
2239  
2240      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2241      if (err < 0) {
2242          goto out_nofid;
2243      }
2244      trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2245  
2246      fidp = get_fid(pdu, fid);
2247      if (fidp == NULL) {
2248          err = -EINVAL;
2249          goto out_nofid;
2250      }
2251      if (fidp->fid_type == P9_FID_DIR) {
2252          if (s->proto_version != V9FS_PROTO_2000U) {
2253              warn_report_once(
2254                  "9p: bad client: T_read request on directory only expected "
2255                  "with 9P2000.u protocol version"
2256              );
2257              err = -EOPNOTSUPP;
2258              goto out;
2259          }
2260          if (off == 0) {
2261              v9fs_co_rewinddir(pdu, fidp);
2262          }
2263          count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2264          if (count < 0) {
2265              err = count;
2266              goto out;
2267          }
2268          err = pdu_marshal(pdu, offset, "d", count);
2269          if (err < 0) {
2270              goto out;
2271          }
2272          err += offset + count;
2273      } else if (fidp->fid_type == P9_FID_FILE) {
2274          QEMUIOVector qiov_full;
2275          QEMUIOVector qiov;
2276          int32_t len;
2277  
2278          v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2279          qemu_iovec_init(&qiov, qiov_full.niov);
2280          do {
2281              qemu_iovec_reset(&qiov);
2282              qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2283              if (0) {
2284                  print_sg(qiov.iov, qiov.niov);
2285              }
2286              /* Loop in case of EINTR */
2287              do {
2288                  len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2289                  if (len >= 0) {
2290                      off   += len;
2291                      count += len;
2292                  }
2293              } while (len == -EINTR && !pdu->cancelled);
2294              if (len < 0) {
2295                  /* IO error return the error */
2296                  err = len;
2297                  goto out_free_iovec;
2298              }
2299          } while (count < max_count && len > 0);
2300          err = pdu_marshal(pdu, offset, "d", count);
2301          if (err < 0) {
2302              goto out_free_iovec;
2303          }
2304          err += offset + count;
2305  out_free_iovec:
2306          qemu_iovec_destroy(&qiov);
2307          qemu_iovec_destroy(&qiov_full);
2308      } else if (fidp->fid_type == P9_FID_XATTR) {
2309          err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2310      } else {
2311          err = -EINVAL;
2312      }
2313      trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2314  out:
2315      put_fid(pdu, fidp);
2316  out_nofid:
2317      pdu_complete(pdu, err);
2318  }
2319  
2320  /**
2321   * Returns size required in Rreaddir response for the passed dirent @p name.
2322   *
2323   * @param name - directory entry's name (i.e. file name, directory name)
2324   * @returns required size in bytes
2325   */
2326  size_t v9fs_readdir_response_size(V9fsString *name)
2327  {
2328      /*
2329       * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2330       * size of type (1) + size of name.size (2) + strlen(name.data)
2331       */
2332      return 24 + v9fs_string_size(name);
2333  }
2334  
2335  static void v9fs_free_dirents(struct V9fsDirEnt *e)
2336  {
2337      struct V9fsDirEnt *next = NULL;
2338  
2339      for (; e; e = next) {
2340          next = e->next;
2341          g_free(e->dent);
2342          g_free(e->st);
2343          g_free(e);
2344      }
2345  }
2346  
2347  static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2348                                          off_t offset, int32_t max_count)
2349  {
2350      size_t size;
2351      V9fsQID qid;
2352      V9fsString name;
2353      int len, err = 0;
2354      int32_t count = 0;
2355      struct dirent *dent;
2356      struct stat *st;
2357      struct V9fsDirEnt *entries = NULL;
2358  
2359      /*
2360       * inode remapping requires the device id, which in turn might be
2361       * different for different directory entries, so if inode remapping is
2362       * enabled we have to make a full stat for each directory entry
2363       */
2364      const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
2365  
2366      /*
2367       * Fetch all required directory entries altogether on a background IO
2368       * thread from fs driver. We don't want to do that for each entry
2369       * individually, because hopping between threads (this main IO thread
2370       * and background IO driver thread) would sum up to huge latencies.
2371       */
2372      count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
2373                                   dostat);
2374      if (count < 0) {
2375          err = count;
2376          count = 0;
2377          goto out;
2378      }
2379      count = 0;
2380  
2381      for (struct V9fsDirEnt *e = entries; e; e = e->next) {
2382          dent = e->dent;
2383  
2384          if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2385              st = e->st;
2386              /* e->st should never be NULL, but just to be sure */
2387              if (!st) {
2388                  err = -1;
2389                  break;
2390              }
2391  
2392              /* remap inode */
2393              err = stat_to_qid(pdu, st, &qid);
2394              if (err < 0) {
2395                  break;
2396              }
2397          } else {
2398              /*
2399               * Fill up just the path field of qid because the client uses
2400               * only that. To fill the entire qid structure we will have
2401               * to stat each dirent found, which is expensive. For the
2402               * latter reason we don't call stat_to_qid() here. Only drawback
2403               * is that no multi-device export detection of stat_to_qid()
2404               * would be done and provided as error to the user here. But
2405               * user would get that error anyway when accessing those
2406               * files/dirs through other ways.
2407               */
2408              size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2409              memcpy(&qid.path, &dent->d_ino, size);
2410              /* Fill the other fields with dummy values */
2411              qid.type = 0;
2412              qid.version = 0;
2413          }
2414  
2415          v9fs_string_init(&name);
2416          v9fs_string_sprintf(&name, "%s", dent->d_name);
2417  
2418          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2419          len = pdu_marshal(pdu, 11 + count, "Qqbs",
2420                            &qid, dent->d_off,
2421                            dent->d_type, &name);
2422  
2423          v9fs_string_free(&name);
2424  
2425          if (len < 0) {
2426              err = len;
2427              break;
2428          }
2429  
2430          count += len;
2431      }
2432  
2433  out:
2434      v9fs_free_dirents(entries);
2435      if (err < 0) {
2436          return err;
2437      }
2438      return count;
2439  }
2440  
2441  static void coroutine_fn v9fs_readdir(void *opaque)
2442  {
2443      int32_t fid;
2444      V9fsFidState *fidp;
2445      ssize_t retval = 0;
2446      size_t offset = 7;
2447      uint64_t initial_offset;
2448      int32_t count;
2449      uint32_t max_count;
2450      V9fsPDU *pdu = opaque;
2451      V9fsState *s = pdu->s;
2452  
2453      retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2454                             &initial_offset, &max_count);
2455      if (retval < 0) {
2456          goto out_nofid;
2457      }
2458      trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2459  
2460      /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2461      if (max_count > s->msize - 11) {
2462          max_count = s->msize - 11;
2463          warn_report_once(
2464              "9p: bad client: T_readdir with count > msize - 11"
2465          );
2466      }
2467  
2468      fidp = get_fid(pdu, fid);
2469      if (fidp == NULL) {
2470          retval = -EINVAL;
2471          goto out_nofid;
2472      }
2473      if (!fidp->fs.dir.stream) {
2474          retval = -EINVAL;
2475          goto out;
2476      }
2477      if (s->proto_version != V9FS_PROTO_2000L) {
2478          warn_report_once(
2479              "9p: bad client: T_readdir request only expected with 9P2000.L "
2480              "protocol version"
2481          );
2482          retval = -EOPNOTSUPP;
2483          goto out;
2484      }
2485      count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
2486      if (count < 0) {
2487          retval = count;
2488          goto out;
2489      }
2490      retval = pdu_marshal(pdu, offset, "d", count);
2491      if (retval < 0) {
2492          goto out;
2493      }
2494      retval += count + offset;
2495      trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2496  out:
2497      put_fid(pdu, fidp);
2498  out_nofid:
2499      pdu_complete(pdu, retval);
2500  }
2501  
2502  static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2503                              uint64_t off, uint32_t count,
2504                              struct iovec *sg, int cnt)
2505  {
2506      int i, to_copy;
2507      ssize_t err = 0;
2508      uint64_t write_count;
2509      size_t offset = 7;
2510  
2511  
2512      if (fidp->fs.xattr.len < off) {
2513          return -ENOSPC;
2514      }
2515      write_count = fidp->fs.xattr.len - off;
2516      if (write_count > count) {
2517          write_count = count;
2518      }
2519      err = pdu_marshal(pdu, offset, "d", write_count);
2520      if (err < 0) {
2521          return err;
2522      }
2523      err += offset;
2524      fidp->fs.xattr.copied_len += write_count;
2525      /*
2526       * Now copy the content from sg list
2527       */
2528      for (i = 0; i < cnt; i++) {
2529          if (write_count > sg[i].iov_len) {
2530              to_copy = sg[i].iov_len;
2531          } else {
2532              to_copy = write_count;
2533          }
2534          memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2535          /* updating vs->off since we are not using below */
2536          off += to_copy;
2537          write_count -= to_copy;
2538      }
2539  
2540      return err;
2541  }
2542  
2543  static void coroutine_fn v9fs_write(void *opaque)
2544  {
2545      ssize_t err;
2546      int32_t fid;
2547      uint64_t off;
2548      uint32_t count;
2549      int32_t len = 0;
2550      int32_t total = 0;
2551      size_t offset = 7;
2552      V9fsFidState *fidp;
2553      V9fsPDU *pdu = opaque;
2554      V9fsState *s = pdu->s;
2555      QEMUIOVector qiov_full;
2556      QEMUIOVector qiov;
2557  
2558      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2559      if (err < 0) {
2560          pdu_complete(pdu, err);
2561          return;
2562      }
2563      offset += err;
2564      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2565      trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2566  
2567      fidp = get_fid(pdu, fid);
2568      if (fidp == NULL) {
2569          err = -EINVAL;
2570          goto out_nofid;
2571      }
2572      if (fidp->fid_type == P9_FID_FILE) {
2573          if (fidp->fs.fd == -1) {
2574              err = -EINVAL;
2575              goto out;
2576          }
2577      } else if (fidp->fid_type == P9_FID_XATTR) {
2578          /*
2579           * setxattr operation
2580           */
2581          err = v9fs_xattr_write(s, pdu, fidp, off, count,
2582                                 qiov_full.iov, qiov_full.niov);
2583          goto out;
2584      } else {
2585          err = -EINVAL;
2586          goto out;
2587      }
2588      qemu_iovec_init(&qiov, qiov_full.niov);
2589      do {
2590          qemu_iovec_reset(&qiov);
2591          qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2592          if (0) {
2593              print_sg(qiov.iov, qiov.niov);
2594          }
2595          /* Loop in case of EINTR */
2596          do {
2597              len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2598              if (len >= 0) {
2599                  off   += len;
2600                  total += len;
2601              }
2602          } while (len == -EINTR && !pdu->cancelled);
2603          if (len < 0) {
2604              /* IO error return the error */
2605              err = len;
2606              goto out_qiov;
2607          }
2608      } while (total < count && len > 0);
2609  
2610      offset = 7;
2611      err = pdu_marshal(pdu, offset, "d", total);
2612      if (err < 0) {
2613          goto out_qiov;
2614      }
2615      err += offset;
2616      trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2617  out_qiov:
2618      qemu_iovec_destroy(&qiov);
2619  out:
2620      put_fid(pdu, fidp);
2621  out_nofid:
2622      qemu_iovec_destroy(&qiov_full);
2623      pdu_complete(pdu, err);
2624  }
2625  
2626  static void coroutine_fn v9fs_create(void *opaque)
2627  {
2628      int32_t fid;
2629      int err = 0;
2630      size_t offset = 7;
2631      V9fsFidState *fidp;
2632      V9fsQID qid;
2633      int32_t perm;
2634      int8_t mode;
2635      V9fsPath path;
2636      struct stat stbuf;
2637      V9fsString name;
2638      V9fsString extension;
2639      int iounit;
2640      V9fsPDU *pdu = opaque;
2641      V9fsState *s = pdu->s;
2642  
2643      v9fs_path_init(&path);
2644      v9fs_string_init(&name);
2645      v9fs_string_init(&extension);
2646      err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2647                          &perm, &mode, &extension);
2648      if (err < 0) {
2649          goto out_nofid;
2650      }
2651      trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2652  
2653      if (name_is_illegal(name.data)) {
2654          err = -ENOENT;
2655          goto out_nofid;
2656      }
2657  
2658      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2659          err = -EEXIST;
2660          goto out_nofid;
2661      }
2662  
2663      fidp = get_fid(pdu, fid);
2664      if (fidp == NULL) {
2665          err = -EINVAL;
2666          goto out_nofid;
2667      }
2668      if (fidp->fid_type != P9_FID_NONE) {
2669          err = -EINVAL;
2670          goto out;
2671      }
2672      if (perm & P9_STAT_MODE_DIR) {
2673          err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2674                              fidp->uid, -1, &stbuf);
2675          if (err < 0) {
2676              goto out;
2677          }
2678          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2679          if (err < 0) {
2680              goto out;
2681          }
2682          v9fs_path_write_lock(s);
2683          v9fs_path_copy(&fidp->path, &path);
2684          v9fs_path_unlock(s);
2685          err = v9fs_co_opendir(pdu, fidp);
2686          if (err < 0) {
2687              goto out;
2688          }
2689          fidp->fid_type = P9_FID_DIR;
2690      } else if (perm & P9_STAT_MODE_SYMLINK) {
2691          err = v9fs_co_symlink(pdu, fidp, &name,
2692                                extension.data, -1 , &stbuf);
2693          if (err < 0) {
2694              goto out;
2695          }
2696          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2697          if (err < 0) {
2698              goto out;
2699          }
2700          v9fs_path_write_lock(s);
2701          v9fs_path_copy(&fidp->path, &path);
2702          v9fs_path_unlock(s);
2703      } else if (perm & P9_STAT_MODE_LINK) {
2704          int32_t ofid = atoi(extension.data);
2705          V9fsFidState *ofidp = get_fid(pdu, ofid);
2706          if (ofidp == NULL) {
2707              err = -EINVAL;
2708              goto out;
2709          }
2710          err = v9fs_co_link(pdu, ofidp, fidp, &name);
2711          put_fid(pdu, ofidp);
2712          if (err < 0) {
2713              goto out;
2714          }
2715          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2716          if (err < 0) {
2717              fidp->fid_type = P9_FID_NONE;
2718              goto out;
2719          }
2720          v9fs_path_write_lock(s);
2721          v9fs_path_copy(&fidp->path, &path);
2722          v9fs_path_unlock(s);
2723          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2724          if (err < 0) {
2725              fidp->fid_type = P9_FID_NONE;
2726              goto out;
2727          }
2728      } else if (perm & P9_STAT_MODE_DEVICE) {
2729          char ctype;
2730          uint32_t major, minor;
2731          mode_t nmode = 0;
2732  
2733          if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2734              err = -errno;
2735              goto out;
2736          }
2737  
2738          switch (ctype) {
2739          case 'c':
2740              nmode = S_IFCHR;
2741              break;
2742          case 'b':
2743              nmode = S_IFBLK;
2744              break;
2745          default:
2746              err = -EIO;
2747              goto out;
2748          }
2749  
2750          nmode |= perm & 0777;
2751          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2752                              makedev(major, minor), nmode, &stbuf);
2753          if (err < 0) {
2754              goto out;
2755          }
2756          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2757          if (err < 0) {
2758              goto out;
2759          }
2760          v9fs_path_write_lock(s);
2761          v9fs_path_copy(&fidp->path, &path);
2762          v9fs_path_unlock(s);
2763      } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2764          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2765                              0, S_IFIFO | (perm & 0777), &stbuf);
2766          if (err < 0) {
2767              goto out;
2768          }
2769          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2770          if (err < 0) {
2771              goto out;
2772          }
2773          v9fs_path_write_lock(s);
2774          v9fs_path_copy(&fidp->path, &path);
2775          v9fs_path_unlock(s);
2776      } else if (perm & P9_STAT_MODE_SOCKET) {
2777          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2778                              0, S_IFSOCK | (perm & 0777), &stbuf);
2779          if (err < 0) {
2780              goto out;
2781          }
2782          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2783          if (err < 0) {
2784              goto out;
2785          }
2786          v9fs_path_write_lock(s);
2787          v9fs_path_copy(&fidp->path, &path);
2788          v9fs_path_unlock(s);
2789      } else {
2790          err = v9fs_co_open2(pdu, fidp, &name, -1,
2791                              omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
2792          if (err < 0) {
2793              goto out;
2794          }
2795          fidp->fid_type = P9_FID_FILE;
2796          fidp->open_flags = omode_to_uflags(mode);
2797          if (fidp->open_flags & O_EXCL) {
2798              /*
2799               * We let the host file system do O_EXCL check
2800               * We should not reclaim such fd
2801               */
2802              fidp->flags |= FID_NON_RECLAIMABLE;
2803          }
2804      }
2805      iounit = get_iounit(pdu, &fidp->path);
2806      err = stat_to_qid(pdu, &stbuf, &qid);
2807      if (err < 0) {
2808          goto out;
2809      }
2810      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2811      if (err < 0) {
2812          goto out;
2813      }
2814      err += offset;
2815      trace_v9fs_create_return(pdu->tag, pdu->id,
2816                               qid.type, qid.version, qid.path, iounit);
2817  out:
2818      put_fid(pdu, fidp);
2819  out_nofid:
2820     pdu_complete(pdu, err);
2821     v9fs_string_free(&name);
2822     v9fs_string_free(&extension);
2823     v9fs_path_free(&path);
2824  }
2825  
2826  static void coroutine_fn v9fs_symlink(void *opaque)
2827  {
2828      V9fsPDU *pdu = opaque;
2829      V9fsString name;
2830      V9fsString symname;
2831      V9fsFidState *dfidp;
2832      V9fsQID qid;
2833      struct stat stbuf;
2834      int32_t dfid;
2835      int err = 0;
2836      gid_t gid;
2837      size_t offset = 7;
2838  
2839      v9fs_string_init(&name);
2840      v9fs_string_init(&symname);
2841      err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2842      if (err < 0) {
2843          goto out_nofid;
2844      }
2845      trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2846  
2847      if (name_is_illegal(name.data)) {
2848          err = -ENOENT;
2849          goto out_nofid;
2850      }
2851  
2852      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2853          err = -EEXIST;
2854          goto out_nofid;
2855      }
2856  
2857      dfidp = get_fid(pdu, dfid);
2858      if (dfidp == NULL) {
2859          err = -EINVAL;
2860          goto out_nofid;
2861      }
2862      err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2863      if (err < 0) {
2864          goto out;
2865      }
2866      err = stat_to_qid(pdu, &stbuf, &qid);
2867      if (err < 0) {
2868          goto out;
2869      }
2870      err =  pdu_marshal(pdu, offset, "Q", &qid);
2871      if (err < 0) {
2872          goto out;
2873      }
2874      err += offset;
2875      trace_v9fs_symlink_return(pdu->tag, pdu->id,
2876                                qid.type, qid.version, qid.path);
2877  out:
2878      put_fid(pdu, dfidp);
2879  out_nofid:
2880      pdu_complete(pdu, err);
2881      v9fs_string_free(&name);
2882      v9fs_string_free(&symname);
2883  }
2884  
2885  static void coroutine_fn v9fs_flush(void *opaque)
2886  {
2887      ssize_t err;
2888      int16_t tag;
2889      size_t offset = 7;
2890      V9fsPDU *cancel_pdu = NULL;
2891      V9fsPDU *pdu = opaque;
2892      V9fsState *s = pdu->s;
2893  
2894      err = pdu_unmarshal(pdu, offset, "w", &tag);
2895      if (err < 0) {
2896          pdu_complete(pdu, err);
2897          return;
2898      }
2899      trace_v9fs_flush(pdu->tag, pdu->id, tag);
2900  
2901      if (pdu->tag == tag) {
2902          warn_report("the guest sent a self-referencing 9P flush request");
2903      } else {
2904          QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2905              if (cancel_pdu->tag == tag) {
2906                  break;
2907              }
2908          }
2909      }
2910      if (cancel_pdu) {
2911          cancel_pdu->cancelled = 1;
2912          /*
2913           * Wait for pdu to complete.
2914           */
2915          qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2916          if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2917              cancel_pdu->cancelled = 0;
2918              pdu_free(cancel_pdu);
2919          }
2920      }
2921      pdu_complete(pdu, 7);
2922  }
2923  
2924  static void coroutine_fn v9fs_link(void *opaque)
2925  {
2926      V9fsPDU *pdu = opaque;
2927      int32_t dfid, oldfid;
2928      V9fsFidState *dfidp, *oldfidp;
2929      V9fsString name;
2930      size_t offset = 7;
2931      int err = 0;
2932  
2933      v9fs_string_init(&name);
2934      err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2935      if (err < 0) {
2936          goto out_nofid;
2937      }
2938      trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2939  
2940      if (name_is_illegal(name.data)) {
2941          err = -ENOENT;
2942          goto out_nofid;
2943      }
2944  
2945      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2946          err = -EEXIST;
2947          goto out_nofid;
2948      }
2949  
2950      dfidp = get_fid(pdu, dfid);
2951      if (dfidp == NULL) {
2952          err = -ENOENT;
2953          goto out_nofid;
2954      }
2955  
2956      oldfidp = get_fid(pdu, oldfid);
2957      if (oldfidp == NULL) {
2958          err = -ENOENT;
2959          goto out;
2960      }
2961      err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2962      if (!err) {
2963          err = offset;
2964      }
2965      put_fid(pdu, oldfidp);
2966  out:
2967      put_fid(pdu, dfidp);
2968  out_nofid:
2969      v9fs_string_free(&name);
2970      pdu_complete(pdu, err);
2971  }
2972  
2973  /* Only works with path name based fid */
2974  static void coroutine_fn v9fs_remove(void *opaque)
2975  {
2976      int32_t fid;
2977      int err = 0;
2978      size_t offset = 7;
2979      V9fsFidState *fidp;
2980      V9fsPDU *pdu = opaque;
2981  
2982      err = pdu_unmarshal(pdu, offset, "d", &fid);
2983      if (err < 0) {
2984          goto out_nofid;
2985      }
2986      trace_v9fs_remove(pdu->tag, pdu->id, fid);
2987  
2988      fidp = get_fid(pdu, fid);
2989      if (fidp == NULL) {
2990          err = -EINVAL;
2991          goto out_nofid;
2992      }
2993      /* if fs driver is not path based, return EOPNOTSUPP */
2994      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2995          err = -EOPNOTSUPP;
2996          goto out_err;
2997      }
2998      /*
2999       * IF the file is unlinked, we cannot reopen
3000       * the file later. So don't reclaim fd
3001       */
3002      err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
3003      if (err < 0) {
3004          goto out_err;
3005      }
3006      err = v9fs_co_remove(pdu, &fidp->path);
3007      if (!err) {
3008          err = offset;
3009      }
3010  out_err:
3011      /* For TREMOVE we need to clunk the fid even on failed remove */
3012      clunk_fid(pdu->s, fidp->fid);
3013      put_fid(pdu, fidp);
3014  out_nofid:
3015      pdu_complete(pdu, err);
3016  }
3017  
3018  static void coroutine_fn v9fs_unlinkat(void *opaque)
3019  {
3020      int err = 0;
3021      V9fsString name;
3022      int32_t dfid, flags, rflags = 0;
3023      size_t offset = 7;
3024      V9fsPath path;
3025      V9fsFidState *dfidp;
3026      V9fsPDU *pdu = opaque;
3027  
3028      v9fs_string_init(&name);
3029      err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3030      if (err < 0) {
3031          goto out_nofid;
3032      }
3033  
3034      if (name_is_illegal(name.data)) {
3035          err = -ENOENT;
3036          goto out_nofid;
3037      }
3038  
3039      if (!strcmp(".", name.data)) {
3040          err = -EINVAL;
3041          goto out_nofid;
3042      }
3043  
3044      if (!strcmp("..", name.data)) {
3045          err = -ENOTEMPTY;
3046          goto out_nofid;
3047      }
3048  
3049      if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3050          err = -EINVAL;
3051          goto out_nofid;
3052      }
3053  
3054      if (flags & P9_DOTL_AT_REMOVEDIR) {
3055          rflags |= AT_REMOVEDIR;
3056      }
3057  
3058      dfidp = get_fid(pdu, dfid);
3059      if (dfidp == NULL) {
3060          err = -EINVAL;
3061          goto out_nofid;
3062      }
3063      /*
3064       * IF the file is unlinked, we cannot reopen
3065       * the file later. So don't reclaim fd
3066       */
3067      v9fs_path_init(&path);
3068      err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3069      if (err < 0) {
3070          goto out_err;
3071      }
3072      err = v9fs_mark_fids_unreclaim(pdu, &path);
3073      if (err < 0) {
3074          goto out_err;
3075      }
3076      err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3077      if (!err) {
3078          err = offset;
3079      }
3080  out_err:
3081      put_fid(pdu, dfidp);
3082      v9fs_path_free(&path);
3083  out_nofid:
3084      pdu_complete(pdu, err);
3085      v9fs_string_free(&name);
3086  }
3087  
3088  
3089  /* Only works with path name based fid */
3090  static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3091                                               int32_t newdirfid,
3092                                               V9fsString *name)
3093  {
3094      int err = 0;
3095      V9fsPath new_path;
3096      V9fsFidState *tfidp;
3097      V9fsState *s = pdu->s;
3098      V9fsFidState *dirfidp = NULL;
3099  
3100      v9fs_path_init(&new_path);
3101      if (newdirfid != -1) {
3102          dirfidp = get_fid(pdu, newdirfid);
3103          if (dirfidp == NULL) {
3104              return -ENOENT;
3105          }
3106          if (fidp->fid_type != P9_FID_NONE) {
3107              err = -EINVAL;
3108              goto out;
3109          }
3110          err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3111          if (err < 0) {
3112              goto out;
3113          }
3114      } else {
3115          char *dir_name = g_path_get_dirname(fidp->path.data);
3116          V9fsPath dir_path;
3117  
3118          v9fs_path_init(&dir_path);
3119          v9fs_path_sprintf(&dir_path, "%s", dir_name);
3120          g_free(dir_name);
3121  
3122          err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3123          v9fs_path_free(&dir_path);
3124          if (err < 0) {
3125              goto out;
3126          }
3127      }
3128      err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3129      if (err < 0) {
3130          goto out;
3131      }
3132      /*
3133       * Fixup fid's pointing to the old name to
3134       * start pointing to the new name
3135       */
3136      QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3137          if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3138              /* replace the name */
3139              v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3140          }
3141      }
3142  out:
3143      if (dirfidp) {
3144          put_fid(pdu, dirfidp);
3145      }
3146      v9fs_path_free(&new_path);
3147      return err;
3148  }
3149  
3150  /* Only works with path name based fid */
3151  static void coroutine_fn v9fs_rename(void *opaque)
3152  {
3153      int32_t fid;
3154      ssize_t err = 0;
3155      size_t offset = 7;
3156      V9fsString name;
3157      int32_t newdirfid;
3158      V9fsFidState *fidp;
3159      V9fsPDU *pdu = opaque;
3160      V9fsState *s = pdu->s;
3161  
3162      v9fs_string_init(&name);
3163      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3164      if (err < 0) {
3165          goto out_nofid;
3166      }
3167  
3168      if (name_is_illegal(name.data)) {
3169          err = -ENOENT;
3170          goto out_nofid;
3171      }
3172  
3173      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3174          err = -EISDIR;
3175          goto out_nofid;
3176      }
3177  
3178      fidp = get_fid(pdu, fid);
3179      if (fidp == NULL) {
3180          err = -ENOENT;
3181          goto out_nofid;
3182      }
3183      if (fidp->fid_type != P9_FID_NONE) {
3184          err = -EINVAL;
3185          goto out;
3186      }
3187      /* if fs driver is not path based, return EOPNOTSUPP */
3188      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3189          err = -EOPNOTSUPP;
3190          goto out;
3191      }
3192      v9fs_path_write_lock(s);
3193      err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3194      v9fs_path_unlock(s);
3195      if (!err) {
3196          err = offset;
3197      }
3198  out:
3199      put_fid(pdu, fidp);
3200  out_nofid:
3201      pdu_complete(pdu, err);
3202      v9fs_string_free(&name);
3203  }
3204  
3205  static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3206                                             V9fsString *old_name,
3207                                             V9fsPath *newdir,
3208                                             V9fsString *new_name)
3209  {
3210      V9fsFidState *tfidp;
3211      V9fsPath oldpath, newpath;
3212      V9fsState *s = pdu->s;
3213      int err;
3214  
3215      v9fs_path_init(&oldpath);
3216      v9fs_path_init(&newpath);
3217      err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3218      if (err < 0) {
3219          goto out;
3220      }
3221      err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3222      if (err < 0) {
3223          goto out;
3224      }
3225  
3226      /*
3227       * Fixup fid's pointing to the old name to
3228       * start pointing to the new name
3229       */
3230      QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3231          if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3232              /* replace the name */
3233              v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3234          }
3235      }
3236  out:
3237      v9fs_path_free(&oldpath);
3238      v9fs_path_free(&newpath);
3239      return err;
3240  }
3241  
3242  static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3243                                                 V9fsString *old_name,
3244                                                 int32_t newdirfid,
3245                                                 V9fsString *new_name)
3246  {
3247      int err = 0;
3248      V9fsState *s = pdu->s;
3249      V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3250  
3251      olddirfidp = get_fid(pdu, olddirfid);
3252      if (olddirfidp == NULL) {
3253          err = -ENOENT;
3254          goto out;
3255      }
3256      if (newdirfid != -1) {
3257          newdirfidp = get_fid(pdu, newdirfid);
3258          if (newdirfidp == NULL) {
3259              err = -ENOENT;
3260              goto out;
3261          }
3262      } else {
3263          newdirfidp = get_fid(pdu, olddirfid);
3264      }
3265  
3266      err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3267                             &newdirfidp->path, new_name);
3268      if (err < 0) {
3269          goto out;
3270      }
3271      if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3272          /* Only for path based fid  we need to do the below fixup */
3273          err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3274                                   &newdirfidp->path, new_name);
3275      }
3276  out:
3277      if (olddirfidp) {
3278          put_fid(pdu, olddirfidp);
3279      }
3280      if (newdirfidp) {
3281          put_fid(pdu, newdirfidp);
3282      }
3283      return err;
3284  }
3285  
3286  static void coroutine_fn v9fs_renameat(void *opaque)
3287  {
3288      ssize_t err = 0;
3289      size_t offset = 7;
3290      V9fsPDU *pdu = opaque;
3291      V9fsState *s = pdu->s;
3292      int32_t olddirfid, newdirfid;
3293      V9fsString old_name, new_name;
3294  
3295      v9fs_string_init(&old_name);
3296      v9fs_string_init(&new_name);
3297      err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3298                          &old_name, &newdirfid, &new_name);
3299      if (err < 0) {
3300          goto out_err;
3301      }
3302  
3303      if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3304          err = -ENOENT;
3305          goto out_err;
3306      }
3307  
3308      if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3309          !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3310          err = -EISDIR;
3311          goto out_err;
3312      }
3313  
3314      v9fs_path_write_lock(s);
3315      err = v9fs_complete_renameat(pdu, olddirfid,
3316                                   &old_name, newdirfid, &new_name);
3317      v9fs_path_unlock(s);
3318      if (!err) {
3319          err = offset;
3320      }
3321  
3322  out_err:
3323      pdu_complete(pdu, err);
3324      v9fs_string_free(&old_name);
3325      v9fs_string_free(&new_name);
3326  }
3327  
3328  static void coroutine_fn v9fs_wstat(void *opaque)
3329  {
3330      int32_t fid;
3331      int err = 0;
3332      int16_t unused;
3333      V9fsStat v9stat;
3334      size_t offset = 7;
3335      struct stat stbuf;
3336      V9fsFidState *fidp;
3337      V9fsPDU *pdu = opaque;
3338      V9fsState *s = pdu->s;
3339  
3340      v9fs_stat_init(&v9stat);
3341      err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3342      if (err < 0) {
3343          goto out_nofid;
3344      }
3345      trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3346                       v9stat.mode, v9stat.atime, v9stat.mtime);
3347  
3348      fidp = get_fid(pdu, fid);
3349      if (fidp == NULL) {
3350          err = -EINVAL;
3351          goto out_nofid;
3352      }
3353      /* do we need to sync the file? */
3354      if (donttouch_stat(&v9stat)) {
3355          err = v9fs_co_fsync(pdu, fidp, 0);
3356          goto out;
3357      }
3358      if (v9stat.mode != -1) {
3359          uint32_t v9_mode;
3360          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3361          if (err < 0) {
3362              goto out;
3363          }
3364          v9_mode = stat_to_v9mode(&stbuf);
3365          if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3366              (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3367              /* Attempting to change the type */
3368              err = -EIO;
3369              goto out;
3370          }
3371          err = v9fs_co_chmod(pdu, &fidp->path,
3372                              v9mode_to_mode(v9stat.mode,
3373                                             &v9stat.extension));
3374          if (err < 0) {
3375              goto out;
3376          }
3377      }
3378      if (v9stat.mtime != -1 || v9stat.atime != -1) {
3379          struct timespec times[2];
3380          if (v9stat.atime != -1) {
3381              times[0].tv_sec = v9stat.atime;
3382              times[0].tv_nsec = 0;
3383          } else {
3384              times[0].tv_nsec = UTIME_OMIT;
3385          }
3386          if (v9stat.mtime != -1) {
3387              times[1].tv_sec = v9stat.mtime;
3388              times[1].tv_nsec = 0;
3389          } else {
3390              times[1].tv_nsec = UTIME_OMIT;
3391          }
3392          err = v9fs_co_utimensat(pdu, &fidp->path, times);
3393          if (err < 0) {
3394              goto out;
3395          }
3396      }
3397      if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3398          err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3399          if (err < 0) {
3400              goto out;
3401          }
3402      }
3403      if (v9stat.name.size != 0) {
3404          v9fs_path_write_lock(s);
3405          err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3406          v9fs_path_unlock(s);
3407          if (err < 0) {
3408              goto out;
3409          }
3410      }
3411      if (v9stat.length != -1) {
3412          err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3413          if (err < 0) {
3414              goto out;
3415          }
3416      }
3417      err = offset;
3418  out:
3419      put_fid(pdu, fidp);
3420  out_nofid:
3421      v9fs_stat_free(&v9stat);
3422      pdu_complete(pdu, err);
3423  }
3424  
3425  static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3426  {
3427      uint32_t f_type;
3428      uint32_t f_bsize;
3429      uint64_t f_blocks;
3430      uint64_t f_bfree;
3431      uint64_t f_bavail;
3432      uint64_t f_files;
3433      uint64_t f_ffree;
3434      uint64_t fsid_val;
3435      uint32_t f_namelen;
3436      size_t offset = 7;
3437      int32_t bsize_factor;
3438  
3439      /*
3440       * compute bsize factor based on host file system block size
3441       * and client msize
3442       */
3443      bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
3444      if (!bsize_factor) {
3445          bsize_factor = 1;
3446      }
3447      f_type  = stbuf->f_type;
3448      f_bsize = stbuf->f_bsize;
3449      f_bsize *= bsize_factor;
3450      /*
3451       * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3452       * adjust(divide) the number of blocks, free blocks and available
3453       * blocks by bsize factor
3454       */
3455      f_blocks = stbuf->f_blocks / bsize_factor;
3456      f_bfree  = stbuf->f_bfree / bsize_factor;
3457      f_bavail = stbuf->f_bavail / bsize_factor;
3458      f_files  = stbuf->f_files;
3459      f_ffree  = stbuf->f_ffree;
3460      fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3461                 (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3462      f_namelen = stbuf->f_namelen;
3463  
3464      return pdu_marshal(pdu, offset, "ddqqqqqqd",
3465                         f_type, f_bsize, f_blocks, f_bfree,
3466                         f_bavail, f_files, f_ffree,
3467                         fsid_val, f_namelen);
3468  }
3469  
3470  static void coroutine_fn v9fs_statfs(void *opaque)
3471  {
3472      int32_t fid;
3473      ssize_t retval = 0;
3474      size_t offset = 7;
3475      V9fsFidState *fidp;
3476      struct statfs stbuf;
3477      V9fsPDU *pdu = opaque;
3478      V9fsState *s = pdu->s;
3479  
3480      retval = pdu_unmarshal(pdu, offset, "d", &fid);
3481      if (retval < 0) {
3482          goto out_nofid;
3483      }
3484      fidp = get_fid(pdu, fid);
3485      if (fidp == NULL) {
3486          retval = -ENOENT;
3487          goto out_nofid;
3488      }
3489      retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3490      if (retval < 0) {
3491          goto out;
3492      }
3493      retval = v9fs_fill_statfs(s, pdu, &stbuf);
3494      if (retval < 0) {
3495          goto out;
3496      }
3497      retval += offset;
3498  out:
3499      put_fid(pdu, fidp);
3500  out_nofid:
3501      pdu_complete(pdu, retval);
3502  }
3503  
3504  static void coroutine_fn v9fs_mknod(void *opaque)
3505  {
3506  
3507      int mode;
3508      gid_t gid;
3509      int32_t fid;
3510      V9fsQID qid;
3511      int err = 0;
3512      int major, minor;
3513      size_t offset = 7;
3514      V9fsString name;
3515      struct stat stbuf;
3516      V9fsFidState *fidp;
3517      V9fsPDU *pdu = opaque;
3518  
3519      v9fs_string_init(&name);
3520      err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3521                          &major, &minor, &gid);
3522      if (err < 0) {
3523          goto out_nofid;
3524      }
3525      trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3526  
3527      if (name_is_illegal(name.data)) {
3528          err = -ENOENT;
3529          goto out_nofid;
3530      }
3531  
3532      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3533          err = -EEXIST;
3534          goto out_nofid;
3535      }
3536  
3537      fidp = get_fid(pdu, fid);
3538      if (fidp == NULL) {
3539          err = -ENOENT;
3540          goto out_nofid;
3541      }
3542      err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3543                          makedev(major, minor), mode, &stbuf);
3544      if (err < 0) {
3545          goto out;
3546      }
3547      err = stat_to_qid(pdu, &stbuf, &qid);
3548      if (err < 0) {
3549          goto out;
3550      }
3551      err = pdu_marshal(pdu, offset, "Q", &qid);
3552      if (err < 0) {
3553          goto out;
3554      }
3555      err += offset;
3556      trace_v9fs_mknod_return(pdu->tag, pdu->id,
3557                              qid.type, qid.version, qid.path);
3558  out:
3559      put_fid(pdu, fidp);
3560  out_nofid:
3561      pdu_complete(pdu, err);
3562      v9fs_string_free(&name);
3563  }
3564  
3565  /*
3566   * Implement posix byte range locking code
3567   * Server side handling of locking code is very simple, because 9p server in
3568   * QEMU can handle only one client. And most of the lock handling
3569   * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3570   * do any thing in * qemu 9p server side lock code path.
3571   * So when a TLOCK request comes, always return success
3572   */
3573  static void coroutine_fn v9fs_lock(void *opaque)
3574  {
3575      V9fsFlock flock;
3576      size_t offset = 7;
3577      struct stat stbuf;
3578      V9fsFidState *fidp;
3579      int32_t fid, err = 0;
3580      V9fsPDU *pdu = opaque;
3581  
3582      v9fs_string_init(&flock.client_id);
3583      err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3584                          &flock.flags, &flock.start, &flock.length,
3585                          &flock.proc_id, &flock.client_id);
3586      if (err < 0) {
3587          goto out_nofid;
3588      }
3589      trace_v9fs_lock(pdu->tag, pdu->id, fid,
3590                      flock.type, flock.start, flock.length);
3591  
3592  
3593      /* We support only block flag now (that too ignored currently) */
3594      if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3595          err = -EINVAL;
3596          goto out_nofid;
3597      }
3598      fidp = get_fid(pdu, fid);
3599      if (fidp == NULL) {
3600          err = -ENOENT;
3601          goto out_nofid;
3602      }
3603      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3604      if (err < 0) {
3605          goto out;
3606      }
3607      err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3608      if (err < 0) {
3609          goto out;
3610      }
3611      err += offset;
3612      trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3613  out:
3614      put_fid(pdu, fidp);
3615  out_nofid:
3616      pdu_complete(pdu, err);
3617      v9fs_string_free(&flock.client_id);
3618  }
3619  
3620  /*
3621   * When a TGETLOCK request comes, always return success because all lock
3622   * handling is done by client's VFS layer.
3623   */
3624  static void coroutine_fn v9fs_getlock(void *opaque)
3625  {
3626      size_t offset = 7;
3627      struct stat stbuf;
3628      V9fsFidState *fidp;
3629      V9fsGetlock glock;
3630      int32_t fid, err = 0;
3631      V9fsPDU *pdu = opaque;
3632  
3633      v9fs_string_init(&glock.client_id);
3634      err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3635                          &glock.start, &glock.length, &glock.proc_id,
3636                          &glock.client_id);
3637      if (err < 0) {
3638          goto out_nofid;
3639      }
3640      trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3641                         glock.type, glock.start, glock.length);
3642  
3643      fidp = get_fid(pdu, fid);
3644      if (fidp == NULL) {
3645          err = -ENOENT;
3646          goto out_nofid;
3647      }
3648      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3649      if (err < 0) {
3650          goto out;
3651      }
3652      glock.type = P9_LOCK_TYPE_UNLCK;
3653      err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3654                            glock.start, glock.length, glock.proc_id,
3655                            &glock.client_id);
3656      if (err < 0) {
3657          goto out;
3658      }
3659      err += offset;
3660      trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3661                                glock.length, glock.proc_id);
3662  out:
3663      put_fid(pdu, fidp);
3664  out_nofid:
3665      pdu_complete(pdu, err);
3666      v9fs_string_free(&glock.client_id);
3667  }
3668  
3669  static void coroutine_fn v9fs_mkdir(void *opaque)
3670  {
3671      V9fsPDU *pdu = opaque;
3672      size_t offset = 7;
3673      int32_t fid;
3674      struct stat stbuf;
3675      V9fsQID qid;
3676      V9fsString name;
3677      V9fsFidState *fidp;
3678      gid_t gid;
3679      int mode;
3680      int err = 0;
3681  
3682      v9fs_string_init(&name);
3683      err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3684      if (err < 0) {
3685          goto out_nofid;
3686      }
3687      trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3688  
3689      if (name_is_illegal(name.data)) {
3690          err = -ENOENT;
3691          goto out_nofid;
3692      }
3693  
3694      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3695          err = -EEXIST;
3696          goto out_nofid;
3697      }
3698  
3699      fidp = get_fid(pdu, fid);
3700      if (fidp == NULL) {
3701          err = -ENOENT;
3702          goto out_nofid;
3703      }
3704      err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3705      if (err < 0) {
3706          goto out;
3707      }
3708      err = stat_to_qid(pdu, &stbuf, &qid);
3709      if (err < 0) {
3710          goto out;
3711      }
3712      err = pdu_marshal(pdu, offset, "Q", &qid);
3713      if (err < 0) {
3714          goto out;
3715      }
3716      err += offset;
3717      trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3718                              qid.type, qid.version, qid.path, err);
3719  out:
3720      put_fid(pdu, fidp);
3721  out_nofid:
3722      pdu_complete(pdu, err);
3723      v9fs_string_free(&name);
3724  }
3725  
3726  static void coroutine_fn v9fs_xattrwalk(void *opaque)
3727  {
3728      int64_t size;
3729      V9fsString name;
3730      ssize_t err = 0;
3731      size_t offset = 7;
3732      int32_t fid, newfid;
3733      V9fsFidState *file_fidp;
3734      V9fsFidState *xattr_fidp = NULL;
3735      V9fsPDU *pdu = opaque;
3736      V9fsState *s = pdu->s;
3737  
3738      v9fs_string_init(&name);
3739      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3740      if (err < 0) {
3741          goto out_nofid;
3742      }
3743      trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3744  
3745      file_fidp = get_fid(pdu, fid);
3746      if (file_fidp == NULL) {
3747          err = -ENOENT;
3748          goto out_nofid;
3749      }
3750      xattr_fidp = alloc_fid(s, newfid);
3751      if (xattr_fidp == NULL) {
3752          err = -EINVAL;
3753          goto out;
3754      }
3755      v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3756      if (!v9fs_string_size(&name)) {
3757          /*
3758           * listxattr request. Get the size first
3759           */
3760          size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3761          if (size < 0) {
3762              err = size;
3763              clunk_fid(s, xattr_fidp->fid);
3764              goto out;
3765          }
3766          /*
3767           * Read the xattr value
3768           */
3769          xattr_fidp->fs.xattr.len = size;
3770          xattr_fidp->fid_type = P9_FID_XATTR;
3771          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3772          xattr_fidp->fs.xattr.value = g_malloc0(size);
3773          if (size) {
3774              err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3775                                       xattr_fidp->fs.xattr.value,
3776                                       xattr_fidp->fs.xattr.len);
3777              if (err < 0) {
3778                  clunk_fid(s, xattr_fidp->fid);
3779                  goto out;
3780              }
3781          }
3782          err = pdu_marshal(pdu, offset, "q", size);
3783          if (err < 0) {
3784              goto out;
3785          }
3786          err += offset;
3787      } else {
3788          /*
3789           * specific xattr fid. We check for xattr
3790           * presence also collect the xattr size
3791           */
3792          size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3793                                   &name, NULL, 0);
3794          if (size < 0) {
3795              err = size;
3796              clunk_fid(s, xattr_fidp->fid);
3797              goto out;
3798          }
3799          /*
3800           * Read the xattr value
3801           */
3802          xattr_fidp->fs.xattr.len = size;
3803          xattr_fidp->fid_type = P9_FID_XATTR;
3804          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3805          xattr_fidp->fs.xattr.value = g_malloc0(size);
3806          if (size) {
3807              err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3808                                      &name, xattr_fidp->fs.xattr.value,
3809                                      xattr_fidp->fs.xattr.len);
3810              if (err < 0) {
3811                  clunk_fid(s, xattr_fidp->fid);
3812                  goto out;
3813              }
3814          }
3815          err = pdu_marshal(pdu, offset, "q", size);
3816          if (err < 0) {
3817              goto out;
3818          }
3819          err += offset;
3820      }
3821      trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3822  out:
3823      put_fid(pdu, file_fidp);
3824      if (xattr_fidp) {
3825          put_fid(pdu, xattr_fidp);
3826      }
3827  out_nofid:
3828      pdu_complete(pdu, err);
3829      v9fs_string_free(&name);
3830  }
3831  
3832  static void coroutine_fn v9fs_xattrcreate(void *opaque)
3833  {
3834      int flags, rflags = 0;
3835      int32_t fid;
3836      uint64_t size;
3837      ssize_t err = 0;
3838      V9fsString name;
3839      size_t offset = 7;
3840      V9fsFidState *file_fidp;
3841      V9fsFidState *xattr_fidp;
3842      V9fsPDU *pdu = opaque;
3843  
3844      v9fs_string_init(&name);
3845      err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3846      if (err < 0) {
3847          goto out_nofid;
3848      }
3849      trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3850  
3851      if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3852          err = -EINVAL;
3853          goto out_nofid;
3854      }
3855  
3856      if (flags & P9_XATTR_CREATE) {
3857          rflags |= XATTR_CREATE;
3858      }
3859  
3860      if (flags & P9_XATTR_REPLACE) {
3861          rflags |= XATTR_REPLACE;
3862      }
3863  
3864      if (size > XATTR_SIZE_MAX) {
3865          err = -E2BIG;
3866          goto out_nofid;
3867      }
3868  
3869      file_fidp = get_fid(pdu, fid);
3870      if (file_fidp == NULL) {
3871          err = -EINVAL;
3872          goto out_nofid;
3873      }
3874      if (file_fidp->fid_type != P9_FID_NONE) {
3875          err = -EINVAL;
3876          goto out_put_fid;
3877      }
3878  
3879      /* Make the file fid point to xattr */
3880      xattr_fidp = file_fidp;
3881      xattr_fidp->fid_type = P9_FID_XATTR;
3882      xattr_fidp->fs.xattr.copied_len = 0;
3883      xattr_fidp->fs.xattr.xattrwalk_fid = false;
3884      xattr_fidp->fs.xattr.len = size;
3885      xattr_fidp->fs.xattr.flags = rflags;
3886      v9fs_string_init(&xattr_fidp->fs.xattr.name);
3887      v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3888      xattr_fidp->fs.xattr.value = g_malloc0(size);
3889      err = offset;
3890  out_put_fid:
3891      put_fid(pdu, file_fidp);
3892  out_nofid:
3893      pdu_complete(pdu, err);
3894      v9fs_string_free(&name);
3895  }
3896  
3897  static void coroutine_fn v9fs_readlink(void *opaque)
3898  {
3899      V9fsPDU *pdu = opaque;
3900      size_t offset = 7;
3901      V9fsString target;
3902      int32_t fid;
3903      int err = 0;
3904      V9fsFidState *fidp;
3905  
3906      err = pdu_unmarshal(pdu, offset, "d", &fid);
3907      if (err < 0) {
3908          goto out_nofid;
3909      }
3910      trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3911      fidp = get_fid(pdu, fid);
3912      if (fidp == NULL) {
3913          err = -ENOENT;
3914          goto out_nofid;
3915      }
3916  
3917      v9fs_string_init(&target);
3918      err = v9fs_co_readlink(pdu, &fidp->path, &target);
3919      if (err < 0) {
3920          goto out;
3921      }
3922      err = pdu_marshal(pdu, offset, "s", &target);
3923      if (err < 0) {
3924          v9fs_string_free(&target);
3925          goto out;
3926      }
3927      err += offset;
3928      trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3929      v9fs_string_free(&target);
3930  out:
3931      put_fid(pdu, fidp);
3932  out_nofid:
3933      pdu_complete(pdu, err);
3934  }
3935  
3936  static CoroutineEntry *pdu_co_handlers[] = {
3937      [P9_TREADDIR] = v9fs_readdir,
3938      [P9_TSTATFS] = v9fs_statfs,
3939      [P9_TGETATTR] = v9fs_getattr,
3940      [P9_TSETATTR] = v9fs_setattr,
3941      [P9_TXATTRWALK] = v9fs_xattrwalk,
3942      [P9_TXATTRCREATE] = v9fs_xattrcreate,
3943      [P9_TMKNOD] = v9fs_mknod,
3944      [P9_TRENAME] = v9fs_rename,
3945      [P9_TLOCK] = v9fs_lock,
3946      [P9_TGETLOCK] = v9fs_getlock,
3947      [P9_TRENAMEAT] = v9fs_renameat,
3948      [P9_TREADLINK] = v9fs_readlink,
3949      [P9_TUNLINKAT] = v9fs_unlinkat,
3950      [P9_TMKDIR] = v9fs_mkdir,
3951      [P9_TVERSION] = v9fs_version,
3952      [P9_TLOPEN] = v9fs_open,
3953      [P9_TATTACH] = v9fs_attach,
3954      [P9_TSTAT] = v9fs_stat,
3955      [P9_TWALK] = v9fs_walk,
3956      [P9_TCLUNK] = v9fs_clunk,
3957      [P9_TFSYNC] = v9fs_fsync,
3958      [P9_TOPEN] = v9fs_open,
3959      [P9_TREAD] = v9fs_read,
3960  #if 0
3961      [P9_TAUTH] = v9fs_auth,
3962  #endif
3963      [P9_TFLUSH] = v9fs_flush,
3964      [P9_TLINK] = v9fs_link,
3965      [P9_TSYMLINK] = v9fs_symlink,
3966      [P9_TCREATE] = v9fs_create,
3967      [P9_TLCREATE] = v9fs_lcreate,
3968      [P9_TWRITE] = v9fs_write,
3969      [P9_TWSTAT] = v9fs_wstat,
3970      [P9_TREMOVE] = v9fs_remove,
3971  };
3972  
3973  static void coroutine_fn v9fs_op_not_supp(void *opaque)
3974  {
3975      V9fsPDU *pdu = opaque;
3976      pdu_complete(pdu, -EOPNOTSUPP);
3977  }
3978  
3979  static void coroutine_fn v9fs_fs_ro(void *opaque)
3980  {
3981      V9fsPDU *pdu = opaque;
3982      pdu_complete(pdu, -EROFS);
3983  }
3984  
3985  static inline bool is_read_only_op(V9fsPDU *pdu)
3986  {
3987      switch (pdu->id) {
3988      case P9_TREADDIR:
3989      case P9_TSTATFS:
3990      case P9_TGETATTR:
3991      case P9_TXATTRWALK:
3992      case P9_TLOCK:
3993      case P9_TGETLOCK:
3994      case P9_TREADLINK:
3995      case P9_TVERSION:
3996      case P9_TLOPEN:
3997      case P9_TATTACH:
3998      case P9_TSTAT:
3999      case P9_TWALK:
4000      case P9_TCLUNK:
4001      case P9_TFSYNC:
4002      case P9_TOPEN:
4003      case P9_TREAD:
4004      case P9_TAUTH:
4005      case P9_TFLUSH:
4006          return 1;
4007      default:
4008          return 0;
4009      }
4010  }
4011  
4012  void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
4013  {
4014      Coroutine *co;
4015      CoroutineEntry *handler;
4016      V9fsState *s = pdu->s;
4017  
4018      pdu->size = le32_to_cpu(hdr->size_le);
4019      pdu->id = hdr->id;
4020      pdu->tag = le16_to_cpu(hdr->tag_le);
4021  
4022      if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4023          (pdu_co_handlers[pdu->id] == NULL)) {
4024          handler = v9fs_op_not_supp;
4025      } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4026          handler = v9fs_fs_ro;
4027      } else {
4028          handler = pdu_co_handlers[pdu->id];
4029      }
4030  
4031      qemu_co_queue_init(&pdu->complete);
4032      co = qemu_coroutine_create(handler, pdu);
4033      qemu_coroutine_enter(co);
4034  }
4035  
4036  /* Returns 0 on success, 1 on failure. */
4037  int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4038                                 Error **errp)
4039  {
4040      ERRP_GUARD();
4041      int i, len;
4042      struct stat stat;
4043      FsDriverEntry *fse;
4044      V9fsPath path;
4045      int rc = 1;
4046  
4047      assert(!s->transport);
4048      s->transport = t;
4049  
4050      /* initialize pdu allocator */
4051      QLIST_INIT(&s->free_list);
4052      QLIST_INIT(&s->active_list);
4053      for (i = 0; i < MAX_REQ; i++) {
4054          QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4055          s->pdus[i].s = s;
4056          s->pdus[i].idx = i;
4057      }
4058  
4059      v9fs_path_init(&path);
4060  
4061      fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4062  
4063      if (!fse) {
4064          /* We don't have a fsdev identified by fsdev_id */
4065          error_setg(errp, "9pfs device couldn't find fsdev with the "
4066                     "id = %s",
4067                     s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4068          goto out;
4069      }
4070  
4071      if (!s->fsconf.tag) {
4072          /* we haven't specified a mount_tag */
4073          error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4074                     s->fsconf.fsdev_id);
4075          goto out;
4076      }
4077  
4078      s->ctx.export_flags = fse->export_flags;
4079      s->ctx.fs_root = g_strdup(fse->path);
4080      s->ctx.exops.get_st_gen = NULL;
4081      len = strlen(s->fsconf.tag);
4082      if (len > MAX_TAG_LEN - 1) {
4083          error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4084                     "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4085          goto out;
4086      }
4087  
4088      s->tag = g_strdup(s->fsconf.tag);
4089      s->ctx.uid = -1;
4090  
4091      s->ops = fse->ops;
4092  
4093      s->ctx.fmode = fse->fmode;
4094      s->ctx.dmode = fse->dmode;
4095  
4096      QSIMPLEQ_INIT(&s->fid_list);
4097      qemu_co_rwlock_init(&s->rename_lock);
4098  
4099      if (s->ops->init(&s->ctx, errp) < 0) {
4100          error_prepend(errp, "cannot initialize fsdev '%s': ",
4101                        s->fsconf.fsdev_id);
4102          goto out;
4103      }
4104  
4105      /*
4106       * Check details of export path, We need to use fs driver
4107       * call back to do that. Since we are in the init path, we don't
4108       * use co-routines here.
4109       */
4110      if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4111          error_setg(errp,
4112                     "error in converting name to path %s", strerror(errno));
4113          goto out;
4114      }
4115      if (s->ops->lstat(&s->ctx, &path, &stat)) {
4116          error_setg(errp, "share path %s does not exist", fse->path);
4117          goto out;
4118      } else if (!S_ISDIR(stat.st_mode)) {
4119          error_setg(errp, "share path %s is not a directory", fse->path);
4120          goto out;
4121      }
4122  
4123      s->dev_id = stat.st_dev;
4124  
4125      /* init inode remapping : */
4126      /* hash table for variable length inode suffixes */
4127      qpd_table_init(&s->qpd_table);
4128      /* hash table for slow/full inode remapping (most users won't need it) */
4129      qpf_table_init(&s->qpf_table);
4130      /* hash table for quick inode remapping */
4131      qpp_table_init(&s->qpp_table);
4132      s->qp_ndevices = 0;
4133      s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4134      s->qp_fullpath_next = 1;
4135  
4136      s->ctx.fst = &fse->fst;
4137      fsdev_throttle_init(s->ctx.fst);
4138  
4139      rc = 0;
4140  out:
4141      if (rc) {
4142          v9fs_device_unrealize_common(s);
4143      }
4144      v9fs_path_free(&path);
4145      return rc;
4146  }
4147  
4148  void v9fs_device_unrealize_common(V9fsState *s)
4149  {
4150      if (s->ops && s->ops->cleanup) {
4151          s->ops->cleanup(&s->ctx);
4152      }
4153      if (s->ctx.fst) {
4154          fsdev_throttle_cleanup(s->ctx.fst);
4155      }
4156      g_free(s->tag);
4157      qp_table_destroy(&s->qpd_table);
4158      qp_table_destroy(&s->qpp_table);
4159      qp_table_destroy(&s->qpf_table);
4160      g_free(s->ctx.fs_root);
4161  }
4162  
4163  typedef struct VirtfsCoResetData {
4164      V9fsPDU pdu;
4165      bool done;
4166  } VirtfsCoResetData;
4167  
4168  static void coroutine_fn virtfs_co_reset(void *opaque)
4169  {
4170      VirtfsCoResetData *data = opaque;
4171  
4172      virtfs_reset(&data->pdu);
4173      data->done = true;
4174  }
4175  
4176  void v9fs_reset(V9fsState *s)
4177  {
4178      VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4179      Coroutine *co;
4180  
4181      while (!QLIST_EMPTY(&s->active_list)) {
4182          aio_poll(qemu_get_aio_context(), true);
4183      }
4184  
4185      co = qemu_coroutine_create(virtfs_co_reset, &data);
4186      qemu_coroutine_enter(co);
4187  
4188      while (!data.done) {
4189          aio_poll(qemu_get_aio_context(), true);
4190      }
4191  }
4192  
4193  static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4194  {
4195      struct rlimit rlim;
4196      if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4197          error_report("Failed to get the resource limit");
4198          exit(1);
4199      }
4200      open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
4201      open_fd_rc = rlim.rlim_cur / 2;
4202  }
4203