xref: /openbmc/qemu/hw/9pfs/9p.c (revision 988717b46b6424907618cb845ace9d69062703af)
1  /*
2   * Virtio 9p backend
3   *
4   * Copyright IBM, Corp. 2010
5   *
6   * Authors:
7   *  Anthony Liguori   <aliguori@us.ibm.com>
8   *
9   * This work is licensed under the terms of the GNU GPL, version 2.  See
10   * the COPYING file in the top-level directory.
11   *
12   */
13  
14  #include "qemu/osdep.h"
15  #include <glib/gprintf.h>
16  #include "hw/virtio/virtio.h"
17  #include "qapi/error.h"
18  #include "qemu/error-report.h"
19  #include "qemu/iov.h"
20  #include "qemu/main-loop.h"
21  #include "qemu/sockets.h"
22  #include "virtio-9p.h"
23  #include "fsdev/qemu-fsdev.h"
24  #include "9p-xattr.h"
25  #include "coth.h"
26  #include "trace.h"
27  #include "migration/blocker.h"
28  #include "sysemu/qtest.h"
29  #include "qemu/xxhash.h"
30  #include <math.h>
31  
32  int open_fd_hw;
33  int total_open_fd;
34  static int open_fd_rc;
35  
36  enum {
37      Oread   = 0x00,
38      Owrite  = 0x01,
39      Ordwr   = 0x02,
40      Oexec   = 0x03,
41      Oexcl   = 0x04,
42      Otrunc  = 0x10,
43      Orexec  = 0x20,
44      Orclose = 0x40,
45      Oappend = 0x80,
46  };
47  
48  static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
49  {
50      ssize_t ret;
51      va_list ap;
52  
53      va_start(ap, fmt);
54      ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
55      va_end(ap);
56  
57      return ret;
58  }
59  
60  static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
61  {
62      ssize_t ret;
63      va_list ap;
64  
65      va_start(ap, fmt);
66      ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
67      va_end(ap);
68  
69      return ret;
70  }
71  
72  static int omode_to_uflags(int8_t mode)
73  {
74      int ret = 0;
75  
76      switch (mode & 3) {
77      case Oread:
78          ret = O_RDONLY;
79          break;
80      case Ordwr:
81          ret = O_RDWR;
82          break;
83      case Owrite:
84          ret = O_WRONLY;
85          break;
86      case Oexec:
87          ret = O_RDONLY;
88          break;
89      }
90  
91      if (mode & Otrunc) {
92          ret |= O_TRUNC;
93      }
94  
95      if (mode & Oappend) {
96          ret |= O_APPEND;
97      }
98  
99      if (mode & Oexcl) {
100          ret |= O_EXCL;
101      }
102  
103      return ret;
104  }
105  
106  typedef struct DotlOpenflagMap {
107      int dotl_flag;
108      int open_flag;
109  } DotlOpenflagMap;
110  
111  static int dotl_to_open_flags(int flags)
112  {
113      int i;
114      /*
115       * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
116       * and P9_DOTL_NOACCESS
117       */
118      int oflags = flags & O_ACCMODE;
119  
120      DotlOpenflagMap dotl_oflag_map[] = {
121          { P9_DOTL_CREATE, O_CREAT },
122          { P9_DOTL_EXCL, O_EXCL },
123          { P9_DOTL_NOCTTY , O_NOCTTY },
124          { P9_DOTL_TRUNC, O_TRUNC },
125          { P9_DOTL_APPEND, O_APPEND },
126          { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
127          { P9_DOTL_DSYNC, O_DSYNC },
128          { P9_DOTL_FASYNC, FASYNC },
129          { P9_DOTL_DIRECT, O_DIRECT },
130          { P9_DOTL_LARGEFILE, O_LARGEFILE },
131          { P9_DOTL_DIRECTORY, O_DIRECTORY },
132          { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
133          { P9_DOTL_NOATIME, O_NOATIME },
134          { P9_DOTL_SYNC, O_SYNC },
135      };
136  
137      for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
138          if (flags & dotl_oflag_map[i].dotl_flag) {
139              oflags |= dotl_oflag_map[i].open_flag;
140          }
141      }
142  
143      return oflags;
144  }
145  
146  void cred_init(FsCred *credp)
147  {
148      credp->fc_uid = -1;
149      credp->fc_gid = -1;
150      credp->fc_mode = -1;
151      credp->fc_rdev = -1;
152  }
153  
154  static int get_dotl_openflags(V9fsState *s, int oflags)
155  {
156      int flags;
157      /*
158       * Filter the client open flags
159       */
160      flags = dotl_to_open_flags(oflags);
161      flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
162      /*
163       * Ignore direct disk access hint until the server supports it.
164       */
165      flags &= ~O_DIRECT;
166      return flags;
167  }
168  
169  void v9fs_path_init(V9fsPath *path)
170  {
171      path->data = NULL;
172      path->size = 0;
173  }
174  
175  void v9fs_path_free(V9fsPath *path)
176  {
177      g_free(path->data);
178      path->data = NULL;
179      path->size = 0;
180  }
181  
182  
183  void GCC_FMT_ATTR(2, 3)
184  v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
185  {
186      va_list ap;
187  
188      v9fs_path_free(path);
189  
190      va_start(ap, fmt);
191      /* Bump the size for including terminating NULL */
192      path->size = g_vasprintf(&path->data, fmt, ap) + 1;
193      va_end(ap);
194  }
195  
196  void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
197  {
198      v9fs_path_free(dst);
199      dst->size = src->size;
200      dst->data = g_memdup(src->data, src->size);
201  }
202  
203  int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
204                        const char *name, V9fsPath *path)
205  {
206      int err;
207      err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
208      if (err < 0) {
209          err = -errno;
210      }
211      return err;
212  }
213  
214  /*
215   * Return TRUE if s1 is an ancestor of s2.
216   *
217   * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
218   * As a special case, We treat s1 as ancestor of s2 if they are same!
219   */
220  static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
221  {
222      if (!strncmp(s1->data, s2->data, s1->size - 1)) {
223          if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
224              return 1;
225          }
226      }
227      return 0;
228  }
229  
230  static size_t v9fs_string_size(V9fsString *str)
231  {
232      return str->size;
233  }
234  
235  /*
236   * returns 0 if fid got re-opened, 1 if not, < 0 on error */
237  static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
238  {
239      int err = 1;
240      if (f->fid_type == P9_FID_FILE) {
241          if (f->fs.fd == -1) {
242              do {
243                  err = v9fs_co_open(pdu, f, f->open_flags);
244              } while (err == -EINTR && !pdu->cancelled);
245          }
246      } else if (f->fid_type == P9_FID_DIR) {
247          if (f->fs.dir.stream == NULL) {
248              do {
249                  err = v9fs_co_opendir(pdu, f);
250              } while (err == -EINTR && !pdu->cancelled);
251          }
252      }
253      return err;
254  }
255  
256  static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
257  {
258      int err;
259      V9fsFidState *f;
260      V9fsState *s = pdu->s;
261  
262      for (f = s->fid_list; f; f = f->next) {
263          BUG_ON(f->clunked);
264          if (f->fid == fid) {
265              /*
266               * Update the fid ref upfront so that
267               * we don't get reclaimed when we yield
268               * in open later.
269               */
270              f->ref++;
271              /*
272               * check whether we need to reopen the
273               * file. We might have closed the fd
274               * while trying to free up some file
275               * descriptors.
276               */
277              err = v9fs_reopen_fid(pdu, f);
278              if (err < 0) {
279                  f->ref--;
280                  return NULL;
281              }
282              /*
283               * Mark the fid as referenced so that the LRU
284               * reclaim won't close the file descriptor
285               */
286              f->flags |= FID_REFERENCED;
287              return f;
288          }
289      }
290      return NULL;
291  }
292  
293  static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
294  {
295      V9fsFidState *f;
296  
297      for (f = s->fid_list; f; f = f->next) {
298          /* If fid is already there return NULL */
299          BUG_ON(f->clunked);
300          if (f->fid == fid) {
301              return NULL;
302          }
303      }
304      f = g_malloc0(sizeof(V9fsFidState));
305      f->fid = fid;
306      f->fid_type = P9_FID_NONE;
307      f->ref = 1;
308      /*
309       * Mark the fid as referenced so that the LRU
310       * reclaim won't close the file descriptor
311       */
312      f->flags |= FID_REFERENCED;
313      f->next = s->fid_list;
314      s->fid_list = f;
315  
316      v9fs_readdir_init(&f->fs.dir);
317      v9fs_readdir_init(&f->fs_reclaim.dir);
318  
319      return f;
320  }
321  
322  static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
323  {
324      int retval = 0;
325  
326      if (fidp->fs.xattr.xattrwalk_fid) {
327          /* getxattr/listxattr fid */
328          goto free_value;
329      }
330      /*
331       * if this is fid for setxattr. clunk should
332       * result in setxattr localcall
333       */
334      if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
335          /* clunk after partial write */
336          retval = -EINVAL;
337          goto free_out;
338      }
339      if (fidp->fs.xattr.len) {
340          retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
341                                     fidp->fs.xattr.value,
342                                     fidp->fs.xattr.len,
343                                     fidp->fs.xattr.flags);
344      } else {
345          retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
346      }
347  free_out:
348      v9fs_string_free(&fidp->fs.xattr.name);
349  free_value:
350      g_free(fidp->fs.xattr.value);
351      return retval;
352  }
353  
354  static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
355  {
356      int retval = 0;
357  
358      if (fidp->fid_type == P9_FID_FILE) {
359          /* If we reclaimed the fd no need to close */
360          if (fidp->fs.fd != -1) {
361              retval = v9fs_co_close(pdu, &fidp->fs);
362          }
363      } else if (fidp->fid_type == P9_FID_DIR) {
364          if (fidp->fs.dir.stream != NULL) {
365              retval = v9fs_co_closedir(pdu, &fidp->fs);
366          }
367      } else if (fidp->fid_type == P9_FID_XATTR) {
368          retval = v9fs_xattr_fid_clunk(pdu, fidp);
369      }
370      v9fs_path_free(&fidp->path);
371      g_free(fidp);
372      return retval;
373  }
374  
375  static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
376  {
377      BUG_ON(!fidp->ref);
378      fidp->ref--;
379      /*
380       * Don't free the fid if it is in reclaim list
381       */
382      if (!fidp->ref && fidp->clunked) {
383          if (fidp->fid == pdu->s->root_fid) {
384              /*
385               * if the clunked fid is root fid then we
386               * have unmounted the fs on the client side.
387               * delete the migration blocker. Ideally, this
388               * should be hooked to transport close notification
389               */
390              if (pdu->s->migration_blocker) {
391                  migrate_del_blocker(pdu->s->migration_blocker);
392                  error_free(pdu->s->migration_blocker);
393                  pdu->s->migration_blocker = NULL;
394              }
395          }
396          return free_fid(pdu, fidp);
397      }
398      return 0;
399  }
400  
401  static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
402  {
403      V9fsFidState **fidpp, *fidp;
404  
405      for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
406          if ((*fidpp)->fid == fid) {
407              break;
408          }
409      }
410      if (*fidpp == NULL) {
411          return NULL;
412      }
413      fidp = *fidpp;
414      *fidpp = fidp->next;
415      fidp->clunked = 1;
416      return fidp;
417  }
418  
419  void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
420  {
421      int reclaim_count = 0;
422      V9fsState *s = pdu->s;
423      V9fsFidState *f, *reclaim_list = NULL;
424  
425      for (f = s->fid_list; f; f = f->next) {
426          /*
427           * Unlink fids cannot be reclaimed. Check
428           * for them and skip them. Also skip fids
429           * currently being operated on.
430           */
431          if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
432              continue;
433          }
434          /*
435           * if it is a recently referenced fid
436           * we leave the fid untouched and clear the
437           * reference bit. We come back to it later
438           * in the next iteration. (a simple LRU without
439           * moving list elements around)
440           */
441          if (f->flags & FID_REFERENCED) {
442              f->flags &= ~FID_REFERENCED;
443              continue;
444          }
445          /*
446           * Add fids to reclaim list.
447           */
448          if (f->fid_type == P9_FID_FILE) {
449              if (f->fs.fd != -1) {
450                  /*
451                   * Up the reference count so that
452                   * a clunk request won't free this fid
453                   */
454                  f->ref++;
455                  f->rclm_lst = reclaim_list;
456                  reclaim_list = f;
457                  f->fs_reclaim.fd = f->fs.fd;
458                  f->fs.fd = -1;
459                  reclaim_count++;
460              }
461          } else if (f->fid_type == P9_FID_DIR) {
462              if (f->fs.dir.stream != NULL) {
463                  /*
464                   * Up the reference count so that
465                   * a clunk request won't free this fid
466                   */
467                  f->ref++;
468                  f->rclm_lst = reclaim_list;
469                  reclaim_list = f;
470                  f->fs_reclaim.dir.stream = f->fs.dir.stream;
471                  f->fs.dir.stream = NULL;
472                  reclaim_count++;
473              }
474          }
475          if (reclaim_count >= open_fd_rc) {
476              break;
477          }
478      }
479      /*
480       * Now close the fid in reclaim list. Free them if they
481       * are already clunked.
482       */
483      while (reclaim_list) {
484          f = reclaim_list;
485          reclaim_list = f->rclm_lst;
486          if (f->fid_type == P9_FID_FILE) {
487              v9fs_co_close(pdu, &f->fs_reclaim);
488          } else if (f->fid_type == P9_FID_DIR) {
489              v9fs_co_closedir(pdu, &f->fs_reclaim);
490          }
491          f->rclm_lst = NULL;
492          /*
493           * Now drop the fid reference, free it
494           * if clunked.
495           */
496          put_fid(pdu, f);
497      }
498  }
499  
500  static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
501  {
502      int err;
503      V9fsState *s = pdu->s;
504      V9fsFidState *fidp, head_fid;
505  
506      head_fid.next = s->fid_list;
507      for (fidp = s->fid_list; fidp; fidp = fidp->next) {
508          if (fidp->path.size != path->size) {
509              continue;
510          }
511          if (!memcmp(fidp->path.data, path->data, path->size)) {
512              /* Mark the fid non reclaimable. */
513              fidp->flags |= FID_NON_RECLAIMABLE;
514  
515              /* reopen the file/dir if already closed */
516              err = v9fs_reopen_fid(pdu, fidp);
517              if (err < 0) {
518                  return err;
519              }
520              /*
521               * Go back to head of fid list because
522               * the list could have got updated when
523               * switched to the worker thread
524               */
525              if (err == 0) {
526                  fidp = &head_fid;
527              }
528          }
529      }
530      return 0;
531  }
532  
533  static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
534  {
535      V9fsState *s = pdu->s;
536      V9fsFidState *fidp;
537  
538      /* Free all fids */
539      while (s->fid_list) {
540          /* Get fid */
541          fidp = s->fid_list;
542          fidp->ref++;
543  
544          /* Clunk fid */
545          s->fid_list = fidp->next;
546          fidp->clunked = 1;
547  
548          put_fid(pdu, fidp);
549      }
550  }
551  
552  #define P9_QID_TYPE_DIR         0x80
553  #define P9_QID_TYPE_SYMLINK     0x02
554  
555  #define P9_STAT_MODE_DIR        0x80000000
556  #define P9_STAT_MODE_APPEND     0x40000000
557  #define P9_STAT_MODE_EXCL       0x20000000
558  #define P9_STAT_MODE_MOUNT      0x10000000
559  #define P9_STAT_MODE_AUTH       0x08000000
560  #define P9_STAT_MODE_TMP        0x04000000
561  #define P9_STAT_MODE_SYMLINK    0x02000000
562  #define P9_STAT_MODE_LINK       0x01000000
563  #define P9_STAT_MODE_DEVICE     0x00800000
564  #define P9_STAT_MODE_NAMED_PIPE 0x00200000
565  #define P9_STAT_MODE_SOCKET     0x00100000
566  #define P9_STAT_MODE_SETUID     0x00080000
567  #define P9_STAT_MODE_SETGID     0x00040000
568  #define P9_STAT_MODE_SETVTX     0x00010000
569  
570  #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
571                                  P9_STAT_MODE_SYMLINK |      \
572                                  P9_STAT_MODE_LINK |         \
573                                  P9_STAT_MODE_DEVICE |       \
574                                  P9_STAT_MODE_NAMED_PIPE |   \
575                                  P9_STAT_MODE_SOCKET)
576  
577  /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
578  static inline uint8_t mirror8bit(uint8_t byte)
579  {
580      return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
581  }
582  
583  /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
584  static inline uint64_t mirror64bit(uint64_t value)
585  {
586      return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
587             ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
588             ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
589             ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
590             ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
591             ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
592             ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
593             ((uint64_t)mirror8bit((value >> 56) & 0xff));
594  }
595  
596  /**
597   * @brief Parameter k for the Exponential Golomb algorihm to be used.
598   *
599   * The smaller this value, the smaller the minimum bit count for the Exp.
600   * Golomb generated affixes will be (at lowest index) however for the
601   * price of having higher maximum bit count of generated affixes (at highest
602   * index). Likewise increasing this parameter yields in smaller maximum bit
603   * count for the price of having higher minimum bit count.
604   *
605   * In practice that means: a good value for k depends on the expected amount
606   * of devices to be exposed by one export. For a small amount of devices k
607   * should be small, for a large amount of devices k might be increased
608   * instead. The default of k=0 should be fine for most users though.
609   *
610   * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
611   * k should not change as long as guest is still running! Because that would
612   * cause completely different inode numbers to be generated on guest.
613   */
614  #define EXP_GOLOMB_K    0
615  
616  /**
617   * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
618   *
619   * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
620   * with growing length and with the mathematical property of being
621   * "prefix-free". The latter means the generated prefixes can be prepended
622   * in front of arbitrary numbers and the resulting concatenated numbers are
623   * guaranteed to be always unique.
624   *
625   * This is a minor adjustment to the original Exp. Golomb algorithm in the
626   * sense that lowest allowed index (@param n) starts with 1, not with zero.
627   *
628   * @param n - natural number (or index) of the prefix to be generated
629   *            (1, 2, 3, ...)
630   * @param k - parameter k of Exp. Golomb algorithm to be used
631   *            (see comment on EXP_GOLOMB_K macro for details about k)
632   */
633  static VariLenAffix expGolombEncode(uint64_t n, int k)
634  {
635      const uint64_t value = n + (1 << k) - 1;
636      const int bits = (int) log2(value) + 1;
637      return (VariLenAffix) {
638          .type = AffixType_Prefix,
639          .value = value,
640          .bits = bits + MAX((bits - 1 - k), 0)
641      };
642  }
643  
644  /**
645   * @brief Converts a suffix into a prefix, or a prefix into a suffix.
646   *
647   * Simply mirror all bits of the affix value, for the purpose to preserve
648   * respectively the mathematical "prefix-free" or "suffix-free" property
649   * after the conversion.
650   *
651   * If a passed prefix is suitable to create unique numbers, then the
652   * returned suffix is suitable to create unique numbers as well (and vice
653   * versa).
654   */
655  static VariLenAffix invertAffix(const VariLenAffix *affix)
656  {
657      return (VariLenAffix) {
658          .type =
659              (affix->type == AffixType_Suffix) ?
660                  AffixType_Prefix : AffixType_Suffix,
661          .value =
662              mirror64bit(affix->value) >>
663              ((sizeof(affix->value) * 8) - affix->bits),
664          .bits = affix->bits
665      };
666  }
667  
668  /**
669   * @brief Generates suffix numbers with "suffix-free" property.
670   *
671   * This is just a wrapper function on top of the Exp. Golomb algorithm.
672   *
673   * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
674   * this function converts the Exp. Golomb prefixes into appropriate suffixes
675   * which are still suitable for generating unique numbers.
676   *
677   * @param n - natural number (or index) of the suffix to be generated
678   *            (1, 2, 3, ...)
679   */
680  static VariLenAffix affixForIndex(uint64_t index)
681  {
682      VariLenAffix prefix;
683      prefix = expGolombEncode(index, EXP_GOLOMB_K);
684      return invertAffix(&prefix); /* convert prefix to suffix */
685  }
686  
687  /* creative abuse of tb_hash_func7, which is based on xxhash */
688  static uint32_t qpp_hash(QppEntry e)
689  {
690      return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
691  }
692  
693  static uint32_t qpf_hash(QpfEntry e)
694  {
695      return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
696  }
697  
698  static bool qpd_cmp_func(const void *obj, const void *userp)
699  {
700      const QpdEntry *e1 = obj, *e2 = userp;
701      return e1->dev == e2->dev;
702  }
703  
704  static bool qpp_cmp_func(const void *obj, const void *userp)
705  {
706      const QppEntry *e1 = obj, *e2 = userp;
707      return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
708  }
709  
710  static bool qpf_cmp_func(const void *obj, const void *userp)
711  {
712      const QpfEntry *e1 = obj, *e2 = userp;
713      return e1->dev == e2->dev && e1->ino == e2->ino;
714  }
715  
716  static void qp_table_remove(void *p, uint32_t h, void *up)
717  {
718      g_free(p);
719  }
720  
721  static void qp_table_destroy(struct qht *ht)
722  {
723      if (!ht || !ht->map) {
724          return;
725      }
726      qht_iter(ht, qp_table_remove, NULL);
727      qht_destroy(ht);
728  }
729  
730  static void qpd_table_init(struct qht *ht)
731  {
732      qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
733  }
734  
735  static void qpp_table_init(struct qht *ht)
736  {
737      qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
738  }
739  
740  static void qpf_table_init(struct qht *ht)
741  {
742      qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
743  }
744  
745  /*
746   * Returns how many (high end) bits of inode numbers of the passed fs
747   * device shall be used (in combination with the device number) to
748   * generate hash values for qpp_table entries.
749   *
750   * This function is required if variable length suffixes are used for inode
751   * number mapping on guest level. Since a device may end up having multiple
752   * entries in qpp_table, each entry most probably with a different suffix
753   * length, we thus need this function in conjunction with qpd_table to
754   * "agree" about a fix amount of bits (per device) to be always used for
755   * generating hash values for the purpose of accessing qpp_table in order
756   * get consistent behaviour when accessing qpp_table.
757   */
758  static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
759  {
760      QpdEntry lookup = {
761          .dev = dev
762      }, *val;
763      uint32_t hash = dev;
764      VariLenAffix affix;
765  
766      val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
767      if (!val) {
768          val = g_malloc0(sizeof(QpdEntry));
769          *val = lookup;
770          affix = affixForIndex(pdu->s->qp_affix_next);
771          val->prefix_bits = affix.bits;
772          qht_insert(&pdu->s->qpd_table, val, hash, NULL);
773          pdu->s->qp_ndevices++;
774      }
775      return val->prefix_bits;
776  }
777  
778  /**
779   * @brief Slow / full mapping host inode nr -> guest inode nr.
780   *
781   * This function performs a slower and much more costly remapping of an
782   * original file inode number on host to an appropriate different inode
783   * number on guest. For every (dev, inode) combination on host a new
784   * sequential number is generated, cached and exposed as inode number on
785   * guest.
786   *
787   * This is just a "last resort" fallback solution if the much faster/cheaper
788   * qid_path_suffixmap() failed. In practice this slow / full mapping is not
789   * expected ever to be used at all though.
790   *
791   * @see qid_path_suffixmap() for details
792   *
793   */
794  static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
795                              uint64_t *path)
796  {
797      QpfEntry lookup = {
798          .dev = stbuf->st_dev,
799          .ino = stbuf->st_ino
800      }, *val;
801      uint32_t hash = qpf_hash(lookup);
802      VariLenAffix affix;
803  
804      val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
805  
806      if (!val) {
807          if (pdu->s->qp_fullpath_next == 0) {
808              /* no more files can be mapped :'( */
809              error_report_once(
810                  "9p: No more prefixes available for remapping inodes from "
811                  "host to guest."
812              );
813              return -ENFILE;
814          }
815  
816          val = g_malloc0(sizeof(QppEntry));
817          *val = lookup;
818  
819          /* new unique inode and device combo */
820          affix = affixForIndex(
821              1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
822          );
823          val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
824          pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
825          qht_insert(&pdu->s->qpf_table, val, hash, NULL);
826      }
827  
828      *path = val->path;
829      return 0;
830  }
831  
832  /**
833   * @brief Quick mapping host inode nr -> guest inode nr.
834   *
835   * This function performs quick remapping of an original file inode number
836   * on host to an appropriate different inode number on guest. This remapping
837   * of inodes is required to avoid inode nr collisions on guest which would
838   * happen if the 9p export contains more than 1 exported file system (or
839   * more than 1 file system data set), because unlike on host level where the
840   * files would have different device nrs, all files exported by 9p would
841   * share the same device nr on guest (the device nr of the virtual 9p device
842   * that is).
843   *
844   * Inode remapping is performed by chopping off high end bits of the original
845   * inode number from host, shifting the result upwards and then assigning a
846   * generated suffix number for the low end bits, where the same suffix number
847   * will be shared by all inodes with the same device id AND the same high end
848   * bits that have been chopped off. That approach utilizes the fact that inode
849   * numbers very likely share the same high end bits (i.e. due to their common
850   * sequential generation by file systems) and hence we only have to generate
851   * and track a very limited amount of suffixes in practice due to that.
852   *
853   * We generate variable size suffixes for that purpose. The 1st generated
854   * suffix will only have 1 bit and hence we only need to chop off 1 bit from
855   * the original inode number. The subsequent suffixes being generated will
856   * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
857   * generated will have 3 bits and hence we have to chop off 3 bits from their
858   * original inodes, and so on. That approach of using variable length suffixes
859   * (i.e. over fixed size ones) utilizes the fact that in practice only a very
860   * limited amount of devices are shared by the same export (e.g. typically
861   * less than 2 dozen devices per 9p export), so in practice we need to chop
862   * off less bits than with fixed size prefixes and yet are flexible to add
863   * new devices at runtime below host's export directory at any time without
864   * having to reboot guest nor requiring to reconfigure guest for that. And due
865   * to the very limited amount of original high end bits that we chop off that
866   * way, the total amount of suffixes we need to generate is less than by using
867   * fixed size prefixes and hence it also improves performance of the inode
868   * remapping algorithm, and finally has the nice side effect that the inode
869   * numbers on guest will be much smaller & human friendly. ;-)
870   */
871  static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
872                                uint64_t *path)
873  {
874      const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
875      QppEntry lookup = {
876          .dev = stbuf->st_dev,
877          .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
878      }, *val;
879      uint32_t hash = qpp_hash(lookup);
880  
881      val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
882  
883      if (!val) {
884          if (pdu->s->qp_affix_next == 0) {
885              /* we ran out of affixes */
886              warn_report_once(
887                  "9p: Potential degraded performance of inode remapping"
888              );
889              return -ENFILE;
890          }
891  
892          val = g_malloc0(sizeof(QppEntry));
893          *val = lookup;
894  
895          /* new unique inode affix and device combo */
896          val->qp_affix_index = pdu->s->qp_affix_next++;
897          val->qp_affix = affixForIndex(val->qp_affix_index);
898          qht_insert(&pdu->s->qpp_table, val, hash, NULL);
899      }
900      /* assuming generated affix to be suffix type, not prefix */
901      *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
902      return 0;
903  }
904  
905  static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
906  {
907      int err;
908      size_t size;
909  
910      if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
911          /* map inode+device to qid path (fast path) */
912          err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
913          if (err == -ENFILE) {
914              /* fast path didn't work, fall back to full map */
915              err = qid_path_fullmap(pdu, stbuf, &qidp->path);
916          }
917          if (err) {
918              return err;
919          }
920      } else {
921          if (pdu->s->dev_id != stbuf->st_dev) {
922              if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
923                  error_report_once(
924                      "9p: Multiple devices detected in same VirtFS export. "
925                      "Access of guest to additional devices is (partly) "
926                      "denied due to virtfs option 'multidevs=forbid' being "
927                      "effective."
928                  );
929                  return -ENODEV;
930              } else {
931                  warn_report_once(
932                      "9p: Multiple devices detected in same VirtFS export, "
933                      "which might lead to file ID collisions and severe "
934                      "misbehaviours on guest! You should either use a "
935                      "separate export for each device shared from host or "
936                      "use virtfs option 'multidevs=remap'!"
937                  );
938              }
939          }
940          memset(&qidp->path, 0, sizeof(qidp->path));
941          size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
942          memcpy(&qidp->path, &stbuf->st_ino, size);
943      }
944  
945      qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
946      qidp->type = 0;
947      if (S_ISDIR(stbuf->st_mode)) {
948          qidp->type |= P9_QID_TYPE_DIR;
949      }
950      if (S_ISLNK(stbuf->st_mode)) {
951          qidp->type |= P9_QID_TYPE_SYMLINK;
952      }
953  
954      return 0;
955  }
956  
957  static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
958                                     V9fsQID *qidp)
959  {
960      struct stat stbuf;
961      int err;
962  
963      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
964      if (err < 0) {
965          return err;
966      }
967      err = stat_to_qid(pdu, &stbuf, qidp);
968      if (err < 0) {
969          return err;
970      }
971      return 0;
972  }
973  
974  static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
975                                        struct dirent *dent, V9fsQID *qidp)
976  {
977      struct stat stbuf;
978      V9fsPath path;
979      int err;
980  
981      v9fs_path_init(&path);
982  
983      err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
984      if (err < 0) {
985          goto out;
986      }
987      err = v9fs_co_lstat(pdu, &path, &stbuf);
988      if (err < 0) {
989          goto out;
990      }
991      err = stat_to_qid(pdu, &stbuf, qidp);
992  
993  out:
994      v9fs_path_free(&path);
995      return err;
996  }
997  
998  V9fsPDU *pdu_alloc(V9fsState *s)
999  {
1000      V9fsPDU *pdu = NULL;
1001  
1002      if (!QLIST_EMPTY(&s->free_list)) {
1003          pdu = QLIST_FIRST(&s->free_list);
1004          QLIST_REMOVE(pdu, next);
1005          QLIST_INSERT_HEAD(&s->active_list, pdu, next);
1006      }
1007      return pdu;
1008  }
1009  
1010  void pdu_free(V9fsPDU *pdu)
1011  {
1012      V9fsState *s = pdu->s;
1013  
1014      g_assert(!pdu->cancelled);
1015      QLIST_REMOVE(pdu, next);
1016      QLIST_INSERT_HEAD(&s->free_list, pdu, next);
1017  }
1018  
1019  static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
1020  {
1021      int8_t id = pdu->id + 1; /* Response */
1022      V9fsState *s = pdu->s;
1023      int ret;
1024  
1025      /*
1026       * The 9p spec requires that successfully cancelled pdus receive no reply.
1027       * Sending a reply would confuse clients because they would
1028       * assume that any EINTR is the actual result of the operation,
1029       * rather than a consequence of the cancellation. However, if
1030       * the operation completed (succesfully or with an error other
1031       * than caused be cancellation), we do send out that reply, both
1032       * for efficiency and to avoid confusing the rest of the state machine
1033       * that assumes passing a non-error here will mean a successful
1034       * transmission of the reply.
1035       */
1036      bool discard = pdu->cancelled && len == -EINTR;
1037      if (discard) {
1038          trace_v9fs_rcancel(pdu->tag, pdu->id);
1039          pdu->size = 0;
1040          goto out_notify;
1041      }
1042  
1043      if (len < 0) {
1044          int err = -len;
1045          len = 7;
1046  
1047          if (s->proto_version != V9FS_PROTO_2000L) {
1048              V9fsString str;
1049  
1050              str.data = strerror(err);
1051              str.size = strlen(str.data);
1052  
1053              ret = pdu_marshal(pdu, len, "s", &str);
1054              if (ret < 0) {
1055                  goto out_notify;
1056              }
1057              len += ret;
1058              id = P9_RERROR;
1059          }
1060  
1061          ret = pdu_marshal(pdu, len, "d", err);
1062          if (ret < 0) {
1063              goto out_notify;
1064          }
1065          len += ret;
1066  
1067          if (s->proto_version == V9FS_PROTO_2000L) {
1068              id = P9_RLERROR;
1069          }
1070          trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1071      }
1072  
1073      /* fill out the header */
1074      if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1075          goto out_notify;
1076      }
1077  
1078      /* keep these in sync */
1079      pdu->size = len;
1080      pdu->id = id;
1081  
1082  out_notify:
1083      pdu->s->transport->push_and_notify(pdu);
1084  
1085      /* Now wakeup anybody waiting in flush for this request */
1086      if (!qemu_co_queue_next(&pdu->complete)) {
1087          pdu_free(pdu);
1088      }
1089  }
1090  
1091  static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1092  {
1093      mode_t ret;
1094  
1095      ret = mode & 0777;
1096      if (mode & P9_STAT_MODE_DIR) {
1097          ret |= S_IFDIR;
1098      }
1099  
1100      if (mode & P9_STAT_MODE_SYMLINK) {
1101          ret |= S_IFLNK;
1102      }
1103      if (mode & P9_STAT_MODE_SOCKET) {
1104          ret |= S_IFSOCK;
1105      }
1106      if (mode & P9_STAT_MODE_NAMED_PIPE) {
1107          ret |= S_IFIFO;
1108      }
1109      if (mode & P9_STAT_MODE_DEVICE) {
1110          if (extension->size && extension->data[0] == 'c') {
1111              ret |= S_IFCHR;
1112          } else {
1113              ret |= S_IFBLK;
1114          }
1115      }
1116  
1117      if (!(ret&~0777)) {
1118          ret |= S_IFREG;
1119      }
1120  
1121      if (mode & P9_STAT_MODE_SETUID) {
1122          ret |= S_ISUID;
1123      }
1124      if (mode & P9_STAT_MODE_SETGID) {
1125          ret |= S_ISGID;
1126      }
1127      if (mode & P9_STAT_MODE_SETVTX) {
1128          ret |= S_ISVTX;
1129      }
1130  
1131      return ret;
1132  }
1133  
1134  static int donttouch_stat(V9fsStat *stat)
1135  {
1136      if (stat->type == -1 &&
1137          stat->dev == -1 &&
1138          stat->qid.type == 0xff &&
1139          stat->qid.version == (uint32_t) -1 &&
1140          stat->qid.path == (uint64_t) -1 &&
1141          stat->mode == -1 &&
1142          stat->atime == -1 &&
1143          stat->mtime == -1 &&
1144          stat->length == -1 &&
1145          !stat->name.size &&
1146          !stat->uid.size &&
1147          !stat->gid.size &&
1148          !stat->muid.size &&
1149          stat->n_uid == -1 &&
1150          stat->n_gid == -1 &&
1151          stat->n_muid == -1) {
1152          return 1;
1153      }
1154  
1155      return 0;
1156  }
1157  
1158  static void v9fs_stat_init(V9fsStat *stat)
1159  {
1160      v9fs_string_init(&stat->name);
1161      v9fs_string_init(&stat->uid);
1162      v9fs_string_init(&stat->gid);
1163      v9fs_string_init(&stat->muid);
1164      v9fs_string_init(&stat->extension);
1165  }
1166  
1167  static void v9fs_stat_free(V9fsStat *stat)
1168  {
1169      v9fs_string_free(&stat->name);
1170      v9fs_string_free(&stat->uid);
1171      v9fs_string_free(&stat->gid);
1172      v9fs_string_free(&stat->muid);
1173      v9fs_string_free(&stat->extension);
1174  }
1175  
1176  static uint32_t stat_to_v9mode(const struct stat *stbuf)
1177  {
1178      uint32_t mode;
1179  
1180      mode = stbuf->st_mode & 0777;
1181      if (S_ISDIR(stbuf->st_mode)) {
1182          mode |= P9_STAT_MODE_DIR;
1183      }
1184  
1185      if (S_ISLNK(stbuf->st_mode)) {
1186          mode |= P9_STAT_MODE_SYMLINK;
1187      }
1188  
1189      if (S_ISSOCK(stbuf->st_mode)) {
1190          mode |= P9_STAT_MODE_SOCKET;
1191      }
1192  
1193      if (S_ISFIFO(stbuf->st_mode)) {
1194          mode |= P9_STAT_MODE_NAMED_PIPE;
1195      }
1196  
1197      if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1198          mode |= P9_STAT_MODE_DEVICE;
1199      }
1200  
1201      if (stbuf->st_mode & S_ISUID) {
1202          mode |= P9_STAT_MODE_SETUID;
1203      }
1204  
1205      if (stbuf->st_mode & S_ISGID) {
1206          mode |= P9_STAT_MODE_SETGID;
1207      }
1208  
1209      if (stbuf->st_mode & S_ISVTX) {
1210          mode |= P9_STAT_MODE_SETVTX;
1211      }
1212  
1213      return mode;
1214  }
1215  
1216  static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1217                                         const char *basename,
1218                                         const struct stat *stbuf,
1219                                         V9fsStat *v9stat)
1220  {
1221      int err;
1222  
1223      memset(v9stat, 0, sizeof(*v9stat));
1224  
1225      err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1226      if (err < 0) {
1227          return err;
1228      }
1229      v9stat->mode = stat_to_v9mode(stbuf);
1230      v9stat->atime = stbuf->st_atime;
1231      v9stat->mtime = stbuf->st_mtime;
1232      v9stat->length = stbuf->st_size;
1233  
1234      v9fs_string_free(&v9stat->uid);
1235      v9fs_string_free(&v9stat->gid);
1236      v9fs_string_free(&v9stat->muid);
1237  
1238      v9stat->n_uid = stbuf->st_uid;
1239      v9stat->n_gid = stbuf->st_gid;
1240      v9stat->n_muid = 0;
1241  
1242      v9fs_string_free(&v9stat->extension);
1243  
1244      if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1245          err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1246          if (err < 0) {
1247              return err;
1248          }
1249      } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1250          v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1251                  S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1252                  major(stbuf->st_rdev), minor(stbuf->st_rdev));
1253      } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1254          v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1255                  "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1256      }
1257  
1258      v9fs_string_sprintf(&v9stat->name, "%s", basename);
1259  
1260      v9stat->size = 61 +
1261          v9fs_string_size(&v9stat->name) +
1262          v9fs_string_size(&v9stat->uid) +
1263          v9fs_string_size(&v9stat->gid) +
1264          v9fs_string_size(&v9stat->muid) +
1265          v9fs_string_size(&v9stat->extension);
1266      return 0;
1267  }
1268  
1269  #define P9_STATS_MODE          0x00000001ULL
1270  #define P9_STATS_NLINK         0x00000002ULL
1271  #define P9_STATS_UID           0x00000004ULL
1272  #define P9_STATS_GID           0x00000008ULL
1273  #define P9_STATS_RDEV          0x00000010ULL
1274  #define P9_STATS_ATIME         0x00000020ULL
1275  #define P9_STATS_MTIME         0x00000040ULL
1276  #define P9_STATS_CTIME         0x00000080ULL
1277  #define P9_STATS_INO           0x00000100ULL
1278  #define P9_STATS_SIZE          0x00000200ULL
1279  #define P9_STATS_BLOCKS        0x00000400ULL
1280  
1281  #define P9_STATS_BTIME         0x00000800ULL
1282  #define P9_STATS_GEN           0x00001000ULL
1283  #define P9_STATS_DATA_VERSION  0x00002000ULL
1284  
1285  #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1286  #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1287  
1288  
1289  static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1290                                  V9fsStatDotl *v9lstat)
1291  {
1292      memset(v9lstat, 0, sizeof(*v9lstat));
1293  
1294      v9lstat->st_mode = stbuf->st_mode;
1295      v9lstat->st_nlink = stbuf->st_nlink;
1296      v9lstat->st_uid = stbuf->st_uid;
1297      v9lstat->st_gid = stbuf->st_gid;
1298      v9lstat->st_rdev = stbuf->st_rdev;
1299      v9lstat->st_size = stbuf->st_size;
1300      v9lstat->st_blksize = stbuf->st_blksize;
1301      v9lstat->st_blocks = stbuf->st_blocks;
1302      v9lstat->st_atime_sec = stbuf->st_atime;
1303      v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1304      v9lstat->st_mtime_sec = stbuf->st_mtime;
1305      v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1306      v9lstat->st_ctime_sec = stbuf->st_ctime;
1307      v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1308      /* Currently we only support BASIC fields in stat */
1309      v9lstat->st_result_mask = P9_STATS_BASIC;
1310  
1311      return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1312  }
1313  
1314  static void print_sg(struct iovec *sg, int cnt)
1315  {
1316      int i;
1317  
1318      printf("sg[%d]: {", cnt);
1319      for (i = 0; i < cnt; i++) {
1320          if (i) {
1321              printf(", ");
1322          }
1323          printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1324      }
1325      printf("}\n");
1326  }
1327  
1328  /* Will call this only for path name based fid */
1329  static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1330  {
1331      V9fsPath str;
1332      v9fs_path_init(&str);
1333      v9fs_path_copy(&str, dst);
1334      v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1335      v9fs_path_free(&str);
1336  }
1337  
1338  static inline bool is_ro_export(FsContext *ctx)
1339  {
1340      return ctx->export_flags & V9FS_RDONLY;
1341  }
1342  
1343  static void coroutine_fn v9fs_version(void *opaque)
1344  {
1345      ssize_t err;
1346      V9fsPDU *pdu = opaque;
1347      V9fsState *s = pdu->s;
1348      V9fsString version;
1349      size_t offset = 7;
1350  
1351      v9fs_string_init(&version);
1352      err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1353      if (err < 0) {
1354          goto out;
1355      }
1356      trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1357  
1358      virtfs_reset(pdu);
1359  
1360      if (!strcmp(version.data, "9P2000.u")) {
1361          s->proto_version = V9FS_PROTO_2000U;
1362      } else if (!strcmp(version.data, "9P2000.L")) {
1363          s->proto_version = V9FS_PROTO_2000L;
1364      } else {
1365          v9fs_string_sprintf(&version, "unknown");
1366          /* skip min. msize check, reporting invalid version has priority */
1367          goto marshal;
1368      }
1369  
1370      if (s->msize < P9_MIN_MSIZE) {
1371          err = -EMSGSIZE;
1372          error_report(
1373              "9pfs: Client requested msize < minimum msize ("
1374              stringify(P9_MIN_MSIZE) ") supported by this server."
1375          );
1376          goto out;
1377      }
1378  
1379  marshal:
1380      err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1381      if (err < 0) {
1382          goto out;
1383      }
1384      err += offset;
1385      trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1386  out:
1387      pdu_complete(pdu, err);
1388      v9fs_string_free(&version);
1389  }
1390  
1391  static void coroutine_fn v9fs_attach(void *opaque)
1392  {
1393      V9fsPDU *pdu = opaque;
1394      V9fsState *s = pdu->s;
1395      int32_t fid, afid, n_uname;
1396      V9fsString uname, aname;
1397      V9fsFidState *fidp;
1398      size_t offset = 7;
1399      V9fsQID qid;
1400      ssize_t err;
1401      Error *local_err = NULL;
1402  
1403      v9fs_string_init(&uname);
1404      v9fs_string_init(&aname);
1405      err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1406                          &afid, &uname, &aname, &n_uname);
1407      if (err < 0) {
1408          goto out_nofid;
1409      }
1410      trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1411  
1412      fidp = alloc_fid(s, fid);
1413      if (fidp == NULL) {
1414          err = -EINVAL;
1415          goto out_nofid;
1416      }
1417      fidp->uid = n_uname;
1418      err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1419      if (err < 0) {
1420          err = -EINVAL;
1421          clunk_fid(s, fid);
1422          goto out;
1423      }
1424      err = fid_to_qid(pdu, fidp, &qid);
1425      if (err < 0) {
1426          err = -EINVAL;
1427          clunk_fid(s, fid);
1428          goto out;
1429      }
1430  
1431      /*
1432       * disable migration if we haven't done already.
1433       * attach could get called multiple times for the same export.
1434       */
1435      if (!s->migration_blocker) {
1436          error_setg(&s->migration_blocker,
1437                     "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1438                     s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1439          err = migrate_add_blocker(s->migration_blocker, &local_err);
1440          if (local_err) {
1441              error_free(local_err);
1442              error_free(s->migration_blocker);
1443              s->migration_blocker = NULL;
1444              clunk_fid(s, fid);
1445              goto out;
1446          }
1447          s->root_fid = fid;
1448      }
1449  
1450      err = pdu_marshal(pdu, offset, "Q", &qid);
1451      if (err < 0) {
1452          clunk_fid(s, fid);
1453          goto out;
1454      }
1455      err += offset;
1456  
1457      memcpy(&s->root_qid, &qid, sizeof(qid));
1458      trace_v9fs_attach_return(pdu->tag, pdu->id,
1459                               qid.type, qid.version, qid.path);
1460  out:
1461      put_fid(pdu, fidp);
1462  out_nofid:
1463      pdu_complete(pdu, err);
1464      v9fs_string_free(&uname);
1465      v9fs_string_free(&aname);
1466  }
1467  
1468  static void coroutine_fn v9fs_stat(void *opaque)
1469  {
1470      int32_t fid;
1471      V9fsStat v9stat;
1472      ssize_t err = 0;
1473      size_t offset = 7;
1474      struct stat stbuf;
1475      V9fsFidState *fidp;
1476      V9fsPDU *pdu = opaque;
1477      char *basename;
1478  
1479      err = pdu_unmarshal(pdu, offset, "d", &fid);
1480      if (err < 0) {
1481          goto out_nofid;
1482      }
1483      trace_v9fs_stat(pdu->tag, pdu->id, fid);
1484  
1485      fidp = get_fid(pdu, fid);
1486      if (fidp == NULL) {
1487          err = -ENOENT;
1488          goto out_nofid;
1489      }
1490      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1491      if (err < 0) {
1492          goto out;
1493      }
1494      basename = g_path_get_basename(fidp->path.data);
1495      err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1496      g_free(basename);
1497      if (err < 0) {
1498          goto out;
1499      }
1500      err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1501      if (err < 0) {
1502          v9fs_stat_free(&v9stat);
1503          goto out;
1504      }
1505      trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1506                             v9stat.atime, v9stat.mtime, v9stat.length);
1507      err += offset;
1508      v9fs_stat_free(&v9stat);
1509  out:
1510      put_fid(pdu, fidp);
1511  out_nofid:
1512      pdu_complete(pdu, err);
1513  }
1514  
1515  static void coroutine_fn v9fs_getattr(void *opaque)
1516  {
1517      int32_t fid;
1518      size_t offset = 7;
1519      ssize_t retval = 0;
1520      struct stat stbuf;
1521      V9fsFidState *fidp;
1522      uint64_t request_mask;
1523      V9fsStatDotl v9stat_dotl;
1524      V9fsPDU *pdu = opaque;
1525  
1526      retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1527      if (retval < 0) {
1528          goto out_nofid;
1529      }
1530      trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1531  
1532      fidp = get_fid(pdu, fid);
1533      if (fidp == NULL) {
1534          retval = -ENOENT;
1535          goto out_nofid;
1536      }
1537      /*
1538       * Currently we only support BASIC fields in stat, so there is no
1539       * need to look at request_mask.
1540       */
1541      retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1542      if (retval < 0) {
1543          goto out;
1544      }
1545      retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1546      if (retval < 0) {
1547          goto out;
1548      }
1549  
1550      /*  fill st_gen if requested and supported by underlying fs */
1551      if (request_mask & P9_STATS_GEN) {
1552          retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1553          switch (retval) {
1554          case 0:
1555              /* we have valid st_gen: update result mask */
1556              v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1557              break;
1558          case -EINTR:
1559              /* request cancelled, e.g. by Tflush */
1560              goto out;
1561          default:
1562              /* failed to get st_gen: not fatal, ignore */
1563              break;
1564          }
1565      }
1566      retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1567      if (retval < 0) {
1568          goto out;
1569      }
1570      retval += offset;
1571      trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1572                                v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1573                                v9stat_dotl.st_gid);
1574  out:
1575      put_fid(pdu, fidp);
1576  out_nofid:
1577      pdu_complete(pdu, retval);
1578  }
1579  
1580  /* Attribute flags */
1581  #define P9_ATTR_MODE       (1 << 0)
1582  #define P9_ATTR_UID        (1 << 1)
1583  #define P9_ATTR_GID        (1 << 2)
1584  #define P9_ATTR_SIZE       (1 << 3)
1585  #define P9_ATTR_ATIME      (1 << 4)
1586  #define P9_ATTR_MTIME      (1 << 5)
1587  #define P9_ATTR_CTIME      (1 << 6)
1588  #define P9_ATTR_ATIME_SET  (1 << 7)
1589  #define P9_ATTR_MTIME_SET  (1 << 8)
1590  
1591  #define P9_ATTR_MASK    127
1592  
1593  static void coroutine_fn v9fs_setattr(void *opaque)
1594  {
1595      int err = 0;
1596      int32_t fid;
1597      V9fsFidState *fidp;
1598      size_t offset = 7;
1599      V9fsIattr v9iattr;
1600      V9fsPDU *pdu = opaque;
1601  
1602      err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1603      if (err < 0) {
1604          goto out_nofid;
1605      }
1606  
1607      trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1608                         v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1609                         v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1610  
1611      fidp = get_fid(pdu, fid);
1612      if (fidp == NULL) {
1613          err = -EINVAL;
1614          goto out_nofid;
1615      }
1616      if (v9iattr.valid & P9_ATTR_MODE) {
1617          err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1618          if (err < 0) {
1619              goto out;
1620          }
1621      }
1622      if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1623          struct timespec times[2];
1624          if (v9iattr.valid & P9_ATTR_ATIME) {
1625              if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1626                  times[0].tv_sec = v9iattr.atime_sec;
1627                  times[0].tv_nsec = v9iattr.atime_nsec;
1628              } else {
1629                  times[0].tv_nsec = UTIME_NOW;
1630              }
1631          } else {
1632              times[0].tv_nsec = UTIME_OMIT;
1633          }
1634          if (v9iattr.valid & P9_ATTR_MTIME) {
1635              if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1636                  times[1].tv_sec = v9iattr.mtime_sec;
1637                  times[1].tv_nsec = v9iattr.mtime_nsec;
1638              } else {
1639                  times[1].tv_nsec = UTIME_NOW;
1640              }
1641          } else {
1642              times[1].tv_nsec = UTIME_OMIT;
1643          }
1644          err = v9fs_co_utimensat(pdu, &fidp->path, times);
1645          if (err < 0) {
1646              goto out;
1647          }
1648      }
1649      /*
1650       * If the only valid entry in iattr is ctime we can call
1651       * chown(-1,-1) to update the ctime of the file
1652       */
1653      if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1654          ((v9iattr.valid & P9_ATTR_CTIME)
1655           && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1656          if (!(v9iattr.valid & P9_ATTR_UID)) {
1657              v9iattr.uid = -1;
1658          }
1659          if (!(v9iattr.valid & P9_ATTR_GID)) {
1660              v9iattr.gid = -1;
1661          }
1662          err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1663                              v9iattr.gid);
1664          if (err < 0) {
1665              goto out;
1666          }
1667      }
1668      if (v9iattr.valid & (P9_ATTR_SIZE)) {
1669          err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1670          if (err < 0) {
1671              goto out;
1672          }
1673      }
1674      err = offset;
1675      trace_v9fs_setattr_return(pdu->tag, pdu->id);
1676  out:
1677      put_fid(pdu, fidp);
1678  out_nofid:
1679      pdu_complete(pdu, err);
1680  }
1681  
1682  static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1683  {
1684      int i;
1685      ssize_t err;
1686      size_t offset = 7;
1687  
1688      err = pdu_marshal(pdu, offset, "w", nwnames);
1689      if (err < 0) {
1690          return err;
1691      }
1692      offset += err;
1693      for (i = 0; i < nwnames; i++) {
1694          err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1695          if (err < 0) {
1696              return err;
1697          }
1698          offset += err;
1699      }
1700      return offset;
1701  }
1702  
1703  static bool name_is_illegal(const char *name)
1704  {
1705      return !*name || strchr(name, '/') != NULL;
1706  }
1707  
1708  static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1709  {
1710      return
1711          qid1->type != qid2->type ||
1712          qid1->version != qid2->version ||
1713          qid1->path != qid2->path;
1714  }
1715  
1716  static void coroutine_fn v9fs_walk(void *opaque)
1717  {
1718      int name_idx;
1719      V9fsQID *qids = NULL;
1720      int i, err = 0;
1721      V9fsPath dpath, path;
1722      uint16_t nwnames;
1723      struct stat stbuf;
1724      size_t offset = 7;
1725      int32_t fid, newfid;
1726      V9fsString *wnames = NULL;
1727      V9fsFidState *fidp;
1728      V9fsFidState *newfidp = NULL;
1729      V9fsPDU *pdu = opaque;
1730      V9fsState *s = pdu->s;
1731      V9fsQID qid;
1732  
1733      err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1734      if (err < 0) {
1735          pdu_complete(pdu, err);
1736          return ;
1737      }
1738      offset += err;
1739  
1740      trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1741  
1742      if (nwnames && nwnames <= P9_MAXWELEM) {
1743          wnames = g_new0(V9fsString, nwnames);
1744          qids   = g_new0(V9fsQID, nwnames);
1745          for (i = 0; i < nwnames; i++) {
1746              err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1747              if (err < 0) {
1748                  goto out_nofid;
1749              }
1750              if (name_is_illegal(wnames[i].data)) {
1751                  err = -ENOENT;
1752                  goto out_nofid;
1753              }
1754              offset += err;
1755          }
1756      } else if (nwnames > P9_MAXWELEM) {
1757          err = -EINVAL;
1758          goto out_nofid;
1759      }
1760      fidp = get_fid(pdu, fid);
1761      if (fidp == NULL) {
1762          err = -ENOENT;
1763          goto out_nofid;
1764      }
1765  
1766      v9fs_path_init(&dpath);
1767      v9fs_path_init(&path);
1768  
1769      err = fid_to_qid(pdu, fidp, &qid);
1770      if (err < 0) {
1771          goto out;
1772      }
1773  
1774      /*
1775       * Both dpath and path initially poin to fidp.
1776       * Needed to handle request with nwnames == 0
1777       */
1778      v9fs_path_copy(&dpath, &fidp->path);
1779      v9fs_path_copy(&path, &fidp->path);
1780      for (name_idx = 0; name_idx < nwnames; name_idx++) {
1781          if (not_same_qid(&pdu->s->root_qid, &qid) ||
1782              strcmp("..", wnames[name_idx].data)) {
1783              err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1784                                         &path);
1785              if (err < 0) {
1786                  goto out;
1787              }
1788  
1789              err = v9fs_co_lstat(pdu, &path, &stbuf);
1790              if (err < 0) {
1791                  goto out;
1792              }
1793              err = stat_to_qid(pdu, &stbuf, &qid);
1794              if (err < 0) {
1795                  goto out;
1796              }
1797              v9fs_path_copy(&dpath, &path);
1798          }
1799          memcpy(&qids[name_idx], &qid, sizeof(qid));
1800      }
1801      if (fid == newfid) {
1802          if (fidp->fid_type != P9_FID_NONE) {
1803              err = -EINVAL;
1804              goto out;
1805          }
1806          v9fs_path_write_lock(s);
1807          v9fs_path_copy(&fidp->path, &path);
1808          v9fs_path_unlock(s);
1809      } else {
1810          newfidp = alloc_fid(s, newfid);
1811          if (newfidp == NULL) {
1812              err = -EINVAL;
1813              goto out;
1814          }
1815          newfidp->uid = fidp->uid;
1816          v9fs_path_copy(&newfidp->path, &path);
1817      }
1818      err = v9fs_walk_marshal(pdu, nwnames, qids);
1819      trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1820  out:
1821      put_fid(pdu, fidp);
1822      if (newfidp) {
1823          put_fid(pdu, newfidp);
1824      }
1825      v9fs_path_free(&dpath);
1826      v9fs_path_free(&path);
1827  out_nofid:
1828      pdu_complete(pdu, err);
1829      if (nwnames && nwnames <= P9_MAXWELEM) {
1830          for (name_idx = 0; name_idx < nwnames; name_idx++) {
1831              v9fs_string_free(&wnames[name_idx]);
1832          }
1833          g_free(wnames);
1834          g_free(qids);
1835      }
1836  }
1837  
1838  static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1839  {
1840      struct statfs stbuf;
1841      int32_t iounit = 0;
1842      V9fsState *s = pdu->s;
1843  
1844      /*
1845       * iounit should be multiples of f_bsize (host filesystem block size
1846       * and as well as less than (client msize - P9_IOHDRSZ))
1847       */
1848      if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1849          if (stbuf.f_bsize) {
1850              iounit = stbuf.f_bsize;
1851              iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1852          }
1853      }
1854      if (!iounit) {
1855          iounit = s->msize - P9_IOHDRSZ;
1856      }
1857      return iounit;
1858  }
1859  
1860  static void coroutine_fn v9fs_open(void *opaque)
1861  {
1862      int flags;
1863      int32_t fid;
1864      int32_t mode;
1865      V9fsQID qid;
1866      int iounit = 0;
1867      ssize_t err = 0;
1868      size_t offset = 7;
1869      struct stat stbuf;
1870      V9fsFidState *fidp;
1871      V9fsPDU *pdu = opaque;
1872      V9fsState *s = pdu->s;
1873  
1874      if (s->proto_version == V9FS_PROTO_2000L) {
1875          err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1876      } else {
1877          uint8_t modebyte;
1878          err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1879          mode = modebyte;
1880      }
1881      if (err < 0) {
1882          goto out_nofid;
1883      }
1884      trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1885  
1886      fidp = get_fid(pdu, fid);
1887      if (fidp == NULL) {
1888          err = -ENOENT;
1889          goto out_nofid;
1890      }
1891      if (fidp->fid_type != P9_FID_NONE) {
1892          err = -EINVAL;
1893          goto out;
1894      }
1895  
1896      err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1897      if (err < 0) {
1898          goto out;
1899      }
1900      err = stat_to_qid(pdu, &stbuf, &qid);
1901      if (err < 0) {
1902          goto out;
1903      }
1904      if (S_ISDIR(stbuf.st_mode)) {
1905          err = v9fs_co_opendir(pdu, fidp);
1906          if (err < 0) {
1907              goto out;
1908          }
1909          fidp->fid_type = P9_FID_DIR;
1910          err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1911          if (err < 0) {
1912              goto out;
1913          }
1914          err += offset;
1915      } else {
1916          if (s->proto_version == V9FS_PROTO_2000L) {
1917              flags = get_dotl_openflags(s, mode);
1918          } else {
1919              flags = omode_to_uflags(mode);
1920          }
1921          if (is_ro_export(&s->ctx)) {
1922              if (mode & O_WRONLY || mode & O_RDWR ||
1923                  mode & O_APPEND || mode & O_TRUNC) {
1924                  err = -EROFS;
1925                  goto out;
1926              }
1927          }
1928          err = v9fs_co_open(pdu, fidp, flags);
1929          if (err < 0) {
1930              goto out;
1931          }
1932          fidp->fid_type = P9_FID_FILE;
1933          fidp->open_flags = flags;
1934          if (flags & O_EXCL) {
1935              /*
1936               * We let the host file system do O_EXCL check
1937               * We should not reclaim such fd
1938               */
1939              fidp->flags |= FID_NON_RECLAIMABLE;
1940          }
1941          iounit = get_iounit(pdu, &fidp->path);
1942          err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1943          if (err < 0) {
1944              goto out;
1945          }
1946          err += offset;
1947      }
1948      trace_v9fs_open_return(pdu->tag, pdu->id,
1949                             qid.type, qid.version, qid.path, iounit);
1950  out:
1951      put_fid(pdu, fidp);
1952  out_nofid:
1953      pdu_complete(pdu, err);
1954  }
1955  
1956  static void coroutine_fn v9fs_lcreate(void *opaque)
1957  {
1958      int32_t dfid, flags, mode;
1959      gid_t gid;
1960      ssize_t err = 0;
1961      ssize_t offset = 7;
1962      V9fsString name;
1963      V9fsFidState *fidp;
1964      struct stat stbuf;
1965      V9fsQID qid;
1966      int32_t iounit;
1967      V9fsPDU *pdu = opaque;
1968  
1969      v9fs_string_init(&name);
1970      err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1971                          &name, &flags, &mode, &gid);
1972      if (err < 0) {
1973          goto out_nofid;
1974      }
1975      trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1976  
1977      if (name_is_illegal(name.data)) {
1978          err = -ENOENT;
1979          goto out_nofid;
1980      }
1981  
1982      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1983          err = -EEXIST;
1984          goto out_nofid;
1985      }
1986  
1987      fidp = get_fid(pdu, dfid);
1988      if (fidp == NULL) {
1989          err = -ENOENT;
1990          goto out_nofid;
1991      }
1992      if (fidp->fid_type != P9_FID_NONE) {
1993          err = -EINVAL;
1994          goto out;
1995      }
1996  
1997      flags = get_dotl_openflags(pdu->s, flags);
1998      err = v9fs_co_open2(pdu, fidp, &name, gid,
1999                          flags | O_CREAT, mode, &stbuf);
2000      if (err < 0) {
2001          goto out;
2002      }
2003      fidp->fid_type = P9_FID_FILE;
2004      fidp->open_flags = flags;
2005      if (flags & O_EXCL) {
2006          /*
2007           * We let the host file system do O_EXCL check
2008           * We should not reclaim such fd
2009           */
2010          fidp->flags |= FID_NON_RECLAIMABLE;
2011      }
2012      iounit =  get_iounit(pdu, &fidp->path);
2013      err = stat_to_qid(pdu, &stbuf, &qid);
2014      if (err < 0) {
2015          goto out;
2016      }
2017      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2018      if (err < 0) {
2019          goto out;
2020      }
2021      err += offset;
2022      trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2023                                qid.type, qid.version, qid.path, iounit);
2024  out:
2025      put_fid(pdu, fidp);
2026  out_nofid:
2027      pdu_complete(pdu, err);
2028      v9fs_string_free(&name);
2029  }
2030  
2031  static void coroutine_fn v9fs_fsync(void *opaque)
2032  {
2033      int err;
2034      int32_t fid;
2035      int datasync;
2036      size_t offset = 7;
2037      V9fsFidState *fidp;
2038      V9fsPDU *pdu = opaque;
2039  
2040      err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2041      if (err < 0) {
2042          goto out_nofid;
2043      }
2044      trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2045  
2046      fidp = get_fid(pdu, fid);
2047      if (fidp == NULL) {
2048          err = -ENOENT;
2049          goto out_nofid;
2050      }
2051      err = v9fs_co_fsync(pdu, fidp, datasync);
2052      if (!err) {
2053          err = offset;
2054      }
2055      put_fid(pdu, fidp);
2056  out_nofid:
2057      pdu_complete(pdu, err);
2058  }
2059  
2060  static void coroutine_fn v9fs_clunk(void *opaque)
2061  {
2062      int err;
2063      int32_t fid;
2064      size_t offset = 7;
2065      V9fsFidState *fidp;
2066      V9fsPDU *pdu = opaque;
2067      V9fsState *s = pdu->s;
2068  
2069      err = pdu_unmarshal(pdu, offset, "d", &fid);
2070      if (err < 0) {
2071          goto out_nofid;
2072      }
2073      trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2074  
2075      fidp = clunk_fid(s, fid);
2076      if (fidp == NULL) {
2077          err = -ENOENT;
2078          goto out_nofid;
2079      }
2080      /*
2081       * Bump the ref so that put_fid will
2082       * free the fid.
2083       */
2084      fidp->ref++;
2085      err = put_fid(pdu, fidp);
2086      if (!err) {
2087          err = offset;
2088      }
2089  out_nofid:
2090      pdu_complete(pdu, err);
2091  }
2092  
2093  /*
2094   * Create a QEMUIOVector for a sub-region of PDU iovecs
2095   *
2096   * @qiov:       uninitialized QEMUIOVector
2097   * @skip:       number of bytes to skip from beginning of PDU
2098   * @size:       number of bytes to include
2099   * @is_write:   true - write, false - read
2100   *
2101   * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2102   * with qemu_iovec_destroy().
2103   */
2104  static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2105                                      size_t skip, size_t *size,
2106                                      bool is_write)
2107  {
2108      QEMUIOVector elem;
2109      struct iovec *iov;
2110      unsigned int niov;
2111      size_t alloc_size = *size + skip;
2112  
2113      if (is_write) {
2114          pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, alloc_size);
2115      } else {
2116          pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, &alloc_size);
2117      }
2118  
2119      if (alloc_size < skip) {
2120          *size = 0;
2121      } else {
2122          *size = alloc_size - skip;
2123      }
2124  
2125      qemu_iovec_init_external(&elem, iov, niov);
2126      qemu_iovec_init(qiov, niov);
2127      qemu_iovec_concat(qiov, &elem, skip, *size);
2128  }
2129  
2130  static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2131                             uint64_t off, uint32_t max_count)
2132  {
2133      ssize_t err;
2134      size_t offset = 7;
2135      size_t read_count;
2136      QEMUIOVector qiov_full;
2137  
2138      if (fidp->fs.xattr.len < off) {
2139          read_count = 0;
2140      } else if (fidp->fs.xattr.len - off < max_count) {
2141          read_count = fidp->fs.xattr.len - off;
2142      } else {
2143          read_count = max_count;
2144      }
2145      err = pdu_marshal(pdu, offset, "d", read_count);
2146      if (err < 0) {
2147          return err;
2148      }
2149      offset += err;
2150  
2151      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &read_count, false);
2152      err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2153                      ((char *)fidp->fs.xattr.value) + off,
2154                      read_count);
2155      qemu_iovec_destroy(&qiov_full);
2156      if (err < 0) {
2157          return err;
2158      }
2159      offset += err;
2160      return offset;
2161  }
2162  
2163  static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2164                                                    V9fsFidState *fidp,
2165                                                    uint32_t max_count)
2166  {
2167      V9fsPath path;
2168      V9fsStat v9stat;
2169      int len, err = 0;
2170      int32_t count = 0;
2171      struct stat stbuf;
2172      off_t saved_dir_pos;
2173      struct dirent *dent;
2174  
2175      /* save the directory position */
2176      saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2177      if (saved_dir_pos < 0) {
2178          return saved_dir_pos;
2179      }
2180  
2181      while (1) {
2182          v9fs_path_init(&path);
2183  
2184          v9fs_readdir_lock(&fidp->fs.dir);
2185  
2186          err = v9fs_co_readdir(pdu, fidp, &dent);
2187          if (err || !dent) {
2188              break;
2189          }
2190          err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2191          if (err < 0) {
2192              break;
2193          }
2194          err = v9fs_co_lstat(pdu, &path, &stbuf);
2195          if (err < 0) {
2196              break;
2197          }
2198          err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2199          if (err < 0) {
2200              break;
2201          }
2202          if ((count + v9stat.size + 2) > max_count) {
2203              v9fs_readdir_unlock(&fidp->fs.dir);
2204  
2205              /* Ran out of buffer. Set dir back to old position and return */
2206              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2207              v9fs_stat_free(&v9stat);
2208              v9fs_path_free(&path);
2209              return count;
2210          }
2211  
2212          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2213          len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2214  
2215          v9fs_readdir_unlock(&fidp->fs.dir);
2216  
2217          if (len < 0) {
2218              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2219              v9fs_stat_free(&v9stat);
2220              v9fs_path_free(&path);
2221              return len;
2222          }
2223          count += len;
2224          v9fs_stat_free(&v9stat);
2225          v9fs_path_free(&path);
2226          saved_dir_pos = dent->d_off;
2227      }
2228  
2229      v9fs_readdir_unlock(&fidp->fs.dir);
2230  
2231      v9fs_path_free(&path);
2232      if (err < 0) {
2233          return err;
2234      }
2235      return count;
2236  }
2237  
2238  static void coroutine_fn v9fs_read(void *opaque)
2239  {
2240      int32_t fid;
2241      uint64_t off;
2242      ssize_t err = 0;
2243      int32_t count = 0;
2244      size_t offset = 7;
2245      uint32_t max_count;
2246      V9fsFidState *fidp;
2247      V9fsPDU *pdu = opaque;
2248      V9fsState *s = pdu->s;
2249  
2250      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2251      if (err < 0) {
2252          goto out_nofid;
2253      }
2254      trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2255  
2256      fidp = get_fid(pdu, fid);
2257      if (fidp == NULL) {
2258          err = -EINVAL;
2259          goto out_nofid;
2260      }
2261      if (fidp->fid_type == P9_FID_DIR) {
2262  
2263          if (off == 0) {
2264              v9fs_co_rewinddir(pdu, fidp);
2265          }
2266          count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2267          if (count < 0) {
2268              err = count;
2269              goto out;
2270          }
2271          err = pdu_marshal(pdu, offset, "d", count);
2272          if (err < 0) {
2273              goto out;
2274          }
2275          err += offset + count;
2276      } else if (fidp->fid_type == P9_FID_FILE) {
2277          QEMUIOVector qiov_full;
2278          QEMUIOVector qiov;
2279          int32_t len;
2280          size_t size = max_count;
2281  
2282          v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, &size, false);
2283          qemu_iovec_init(&qiov, qiov_full.niov);
2284          max_count = size;
2285          do {
2286              qemu_iovec_reset(&qiov);
2287              qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2288              if (0) {
2289                  print_sg(qiov.iov, qiov.niov);
2290              }
2291              /* Loop in case of EINTR */
2292              do {
2293                  len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2294                  if (len >= 0) {
2295                      off   += len;
2296                      count += len;
2297                  }
2298              } while (len == -EINTR && !pdu->cancelled);
2299              if (len < 0) {
2300                  /* IO error return the error */
2301                  err = len;
2302                  goto out_free_iovec;
2303              }
2304          } while (count < max_count && len > 0);
2305          err = pdu_marshal(pdu, offset, "d", count);
2306          if (err < 0) {
2307              goto out_free_iovec;
2308          }
2309          err += offset + count;
2310  out_free_iovec:
2311          qemu_iovec_destroy(&qiov);
2312          qemu_iovec_destroy(&qiov_full);
2313      } else if (fidp->fid_type == P9_FID_XATTR) {
2314          err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2315      } else {
2316          err = -EINVAL;
2317      }
2318      trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2319  out:
2320      put_fid(pdu, fidp);
2321  out_nofid:
2322      pdu_complete(pdu, err);
2323  }
2324  
2325  static size_t v9fs_readdir_data_size(V9fsString *name)
2326  {
2327      /*
2328       * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2329       * size of type (1) + size of name.size (2) + strlen(name.data)
2330       */
2331      return 24 + v9fs_string_size(name);
2332  }
2333  
2334  static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2335                                          int32_t max_count)
2336  {
2337      size_t size;
2338      V9fsQID qid;
2339      V9fsString name;
2340      int len, err = 0;
2341      int32_t count = 0;
2342      off_t saved_dir_pos;
2343      struct dirent *dent;
2344  
2345      /* save the directory position */
2346      saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2347      if (saved_dir_pos < 0) {
2348          return saved_dir_pos;
2349      }
2350  
2351      while (1) {
2352          v9fs_readdir_lock(&fidp->fs.dir);
2353  
2354          err = v9fs_co_readdir(pdu, fidp, &dent);
2355          if (err || !dent) {
2356              break;
2357          }
2358          v9fs_string_init(&name);
2359          v9fs_string_sprintf(&name, "%s", dent->d_name);
2360          if ((count + v9fs_readdir_data_size(&name)) > max_count) {
2361              v9fs_readdir_unlock(&fidp->fs.dir);
2362  
2363              /* Ran out of buffer. Set dir back to old position and return */
2364              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2365              v9fs_string_free(&name);
2366              return count;
2367          }
2368  
2369          if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2370              /*
2371               * dirent_to_qid() implies expensive stat call for each entry,
2372               * we must do that here though since inode remapping requires
2373               * the device id, which in turn might be different for
2374               * different entries; we cannot make any assumption to avoid
2375               * that here.
2376               */
2377              err = dirent_to_qid(pdu, fidp, dent, &qid);
2378              if (err < 0) {
2379                  v9fs_readdir_unlock(&fidp->fs.dir);
2380                  v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2381                  v9fs_string_free(&name);
2382                  return err;
2383              }
2384          } else {
2385              /*
2386               * Fill up just the path field of qid because the client uses
2387               * only that. To fill the entire qid structure we will have
2388               * to stat each dirent found, which is expensive. For the
2389               * latter reason we don't call dirent_to_qid() here. Only drawback
2390               * is that no multi-device export detection of stat_to_qid()
2391               * would be done and provided as error to the user here. But
2392               * user would get that error anyway when accessing those
2393               * files/dirs through other ways.
2394               */
2395              size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2396              memcpy(&qid.path, &dent->d_ino, size);
2397              /* Fill the other fields with dummy values */
2398              qid.type = 0;
2399              qid.version = 0;
2400          }
2401  
2402          /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2403          len = pdu_marshal(pdu, 11 + count, "Qqbs",
2404                            &qid, dent->d_off,
2405                            dent->d_type, &name);
2406  
2407          v9fs_readdir_unlock(&fidp->fs.dir);
2408  
2409          if (len < 0) {
2410              v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2411              v9fs_string_free(&name);
2412              return len;
2413          }
2414          count += len;
2415          v9fs_string_free(&name);
2416          saved_dir_pos = dent->d_off;
2417      }
2418  
2419      v9fs_readdir_unlock(&fidp->fs.dir);
2420  
2421      if (err < 0) {
2422          return err;
2423      }
2424      return count;
2425  }
2426  
2427  static void coroutine_fn v9fs_readdir(void *opaque)
2428  {
2429      int32_t fid;
2430      V9fsFidState *fidp;
2431      ssize_t retval = 0;
2432      size_t offset = 7;
2433      uint64_t initial_offset;
2434      int32_t count;
2435      uint32_t max_count;
2436      V9fsPDU *pdu = opaque;
2437      V9fsState *s = pdu->s;
2438  
2439      retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2440                             &initial_offset, &max_count);
2441      if (retval < 0) {
2442          goto out_nofid;
2443      }
2444      trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2445  
2446      /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2447      if (max_count > s->msize - 11) {
2448          max_count = s->msize - 11;
2449          warn_report_once(
2450              "9p: bad client: T_readdir with count > msize - 11"
2451          );
2452      }
2453  
2454      fidp = get_fid(pdu, fid);
2455      if (fidp == NULL) {
2456          retval = -EINVAL;
2457          goto out_nofid;
2458      }
2459      if (!fidp->fs.dir.stream) {
2460          retval = -EINVAL;
2461          goto out;
2462      }
2463      if (initial_offset == 0) {
2464          v9fs_co_rewinddir(pdu, fidp);
2465      } else {
2466          v9fs_co_seekdir(pdu, fidp, initial_offset);
2467      }
2468      count = v9fs_do_readdir(pdu, fidp, max_count);
2469      if (count < 0) {
2470          retval = count;
2471          goto out;
2472      }
2473      retval = pdu_marshal(pdu, offset, "d", count);
2474      if (retval < 0) {
2475          goto out;
2476      }
2477      retval += count + offset;
2478      trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2479  out:
2480      put_fid(pdu, fidp);
2481  out_nofid:
2482      pdu_complete(pdu, retval);
2483  }
2484  
2485  static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2486                              uint64_t off, uint32_t count,
2487                              struct iovec *sg, int cnt)
2488  {
2489      int i, to_copy;
2490      ssize_t err = 0;
2491      uint64_t write_count;
2492      size_t offset = 7;
2493  
2494  
2495      if (fidp->fs.xattr.len < off) {
2496          return -ENOSPC;
2497      }
2498      write_count = fidp->fs.xattr.len - off;
2499      if (write_count > count) {
2500          write_count = count;
2501      }
2502      err = pdu_marshal(pdu, offset, "d", write_count);
2503      if (err < 0) {
2504          return err;
2505      }
2506      err += offset;
2507      fidp->fs.xattr.copied_len += write_count;
2508      /*
2509       * Now copy the content from sg list
2510       */
2511      for (i = 0; i < cnt; i++) {
2512          if (write_count > sg[i].iov_len) {
2513              to_copy = sg[i].iov_len;
2514          } else {
2515              to_copy = write_count;
2516          }
2517          memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2518          /* updating vs->off since we are not using below */
2519          off += to_copy;
2520          write_count -= to_copy;
2521      }
2522  
2523      return err;
2524  }
2525  
2526  static void coroutine_fn v9fs_write(void *opaque)
2527  {
2528      ssize_t err;
2529      int32_t fid;
2530      uint64_t off;
2531      uint32_t count;
2532      int32_t len = 0;
2533      int32_t total = 0;
2534      size_t offset = 7;
2535      size_t size;
2536      V9fsFidState *fidp;
2537      V9fsPDU *pdu = opaque;
2538      V9fsState *s = pdu->s;
2539      QEMUIOVector qiov_full;
2540      QEMUIOVector qiov;
2541  
2542      err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2543      if (err < 0) {
2544          pdu_complete(pdu, err);
2545          return;
2546      }
2547      offset += err;
2548      size = count;
2549      v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, &size, true);
2550      count = size;
2551      trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2552  
2553      fidp = get_fid(pdu, fid);
2554      if (fidp == NULL) {
2555          err = -EINVAL;
2556          goto out_nofid;
2557      }
2558      if (fidp->fid_type == P9_FID_FILE) {
2559          if (fidp->fs.fd == -1) {
2560              err = -EINVAL;
2561              goto out;
2562          }
2563      } else if (fidp->fid_type == P9_FID_XATTR) {
2564          /*
2565           * setxattr operation
2566           */
2567          err = v9fs_xattr_write(s, pdu, fidp, off, count,
2568                                 qiov_full.iov, qiov_full.niov);
2569          goto out;
2570      } else {
2571          err = -EINVAL;
2572          goto out;
2573      }
2574      qemu_iovec_init(&qiov, qiov_full.niov);
2575      do {
2576          qemu_iovec_reset(&qiov);
2577          qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2578          if (0) {
2579              print_sg(qiov.iov, qiov.niov);
2580          }
2581          /* Loop in case of EINTR */
2582          do {
2583              len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2584              if (len >= 0) {
2585                  off   += len;
2586                  total += len;
2587              }
2588          } while (len == -EINTR && !pdu->cancelled);
2589          if (len < 0) {
2590              /* IO error return the error */
2591              err = len;
2592              goto out_qiov;
2593          }
2594      } while (total < count && len > 0);
2595  
2596      offset = 7;
2597      err = pdu_marshal(pdu, offset, "d", total);
2598      if (err < 0) {
2599          goto out_qiov;
2600      }
2601      err += offset;
2602      trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2603  out_qiov:
2604      qemu_iovec_destroy(&qiov);
2605  out:
2606      put_fid(pdu, fidp);
2607  out_nofid:
2608      qemu_iovec_destroy(&qiov_full);
2609      pdu_complete(pdu, err);
2610  }
2611  
2612  static void coroutine_fn v9fs_create(void *opaque)
2613  {
2614      int32_t fid;
2615      int err = 0;
2616      size_t offset = 7;
2617      V9fsFidState *fidp;
2618      V9fsQID qid;
2619      int32_t perm;
2620      int8_t mode;
2621      V9fsPath path;
2622      struct stat stbuf;
2623      V9fsString name;
2624      V9fsString extension;
2625      int iounit;
2626      V9fsPDU *pdu = opaque;
2627      V9fsState *s = pdu->s;
2628  
2629      v9fs_path_init(&path);
2630      v9fs_string_init(&name);
2631      v9fs_string_init(&extension);
2632      err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2633                          &perm, &mode, &extension);
2634      if (err < 0) {
2635          goto out_nofid;
2636      }
2637      trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2638  
2639      if (name_is_illegal(name.data)) {
2640          err = -ENOENT;
2641          goto out_nofid;
2642      }
2643  
2644      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2645          err = -EEXIST;
2646          goto out_nofid;
2647      }
2648  
2649      fidp = get_fid(pdu, fid);
2650      if (fidp == NULL) {
2651          err = -EINVAL;
2652          goto out_nofid;
2653      }
2654      if (fidp->fid_type != P9_FID_NONE) {
2655          err = -EINVAL;
2656          goto out;
2657      }
2658      if (perm & P9_STAT_MODE_DIR) {
2659          err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2660                              fidp->uid, -1, &stbuf);
2661          if (err < 0) {
2662              goto out;
2663          }
2664          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2665          if (err < 0) {
2666              goto out;
2667          }
2668          v9fs_path_write_lock(s);
2669          v9fs_path_copy(&fidp->path, &path);
2670          v9fs_path_unlock(s);
2671          err = v9fs_co_opendir(pdu, fidp);
2672          if (err < 0) {
2673              goto out;
2674          }
2675          fidp->fid_type = P9_FID_DIR;
2676      } else if (perm & P9_STAT_MODE_SYMLINK) {
2677          err = v9fs_co_symlink(pdu, fidp, &name,
2678                                extension.data, -1 , &stbuf);
2679          if (err < 0) {
2680              goto out;
2681          }
2682          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2683          if (err < 0) {
2684              goto out;
2685          }
2686          v9fs_path_write_lock(s);
2687          v9fs_path_copy(&fidp->path, &path);
2688          v9fs_path_unlock(s);
2689      } else if (perm & P9_STAT_MODE_LINK) {
2690          int32_t ofid = atoi(extension.data);
2691          V9fsFidState *ofidp = get_fid(pdu, ofid);
2692          if (ofidp == NULL) {
2693              err = -EINVAL;
2694              goto out;
2695          }
2696          err = v9fs_co_link(pdu, ofidp, fidp, &name);
2697          put_fid(pdu, ofidp);
2698          if (err < 0) {
2699              goto out;
2700          }
2701          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2702          if (err < 0) {
2703              fidp->fid_type = P9_FID_NONE;
2704              goto out;
2705          }
2706          v9fs_path_write_lock(s);
2707          v9fs_path_copy(&fidp->path, &path);
2708          v9fs_path_unlock(s);
2709          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2710          if (err < 0) {
2711              fidp->fid_type = P9_FID_NONE;
2712              goto out;
2713          }
2714      } else if (perm & P9_STAT_MODE_DEVICE) {
2715          char ctype;
2716          uint32_t major, minor;
2717          mode_t nmode = 0;
2718  
2719          if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2720              err = -errno;
2721              goto out;
2722          }
2723  
2724          switch (ctype) {
2725          case 'c':
2726              nmode = S_IFCHR;
2727              break;
2728          case 'b':
2729              nmode = S_IFBLK;
2730              break;
2731          default:
2732              err = -EIO;
2733              goto out;
2734          }
2735  
2736          nmode |= perm & 0777;
2737          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2738                              makedev(major, minor), nmode, &stbuf);
2739          if (err < 0) {
2740              goto out;
2741          }
2742          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2743          if (err < 0) {
2744              goto out;
2745          }
2746          v9fs_path_write_lock(s);
2747          v9fs_path_copy(&fidp->path, &path);
2748          v9fs_path_unlock(s);
2749      } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2750          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2751                              0, S_IFIFO | (perm & 0777), &stbuf);
2752          if (err < 0) {
2753              goto out;
2754          }
2755          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2756          if (err < 0) {
2757              goto out;
2758          }
2759          v9fs_path_write_lock(s);
2760          v9fs_path_copy(&fidp->path, &path);
2761          v9fs_path_unlock(s);
2762      } else if (perm & P9_STAT_MODE_SOCKET) {
2763          err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2764                              0, S_IFSOCK | (perm & 0777), &stbuf);
2765          if (err < 0) {
2766              goto out;
2767          }
2768          err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2769          if (err < 0) {
2770              goto out;
2771          }
2772          v9fs_path_write_lock(s);
2773          v9fs_path_copy(&fidp->path, &path);
2774          v9fs_path_unlock(s);
2775      } else {
2776          err = v9fs_co_open2(pdu, fidp, &name, -1,
2777                              omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2778          if (err < 0) {
2779              goto out;
2780          }
2781          fidp->fid_type = P9_FID_FILE;
2782          fidp->open_flags = omode_to_uflags(mode);
2783          if (fidp->open_flags & O_EXCL) {
2784              /*
2785               * We let the host file system do O_EXCL check
2786               * We should not reclaim such fd
2787               */
2788              fidp->flags |= FID_NON_RECLAIMABLE;
2789          }
2790      }
2791      iounit = get_iounit(pdu, &fidp->path);
2792      err = stat_to_qid(pdu, &stbuf, &qid);
2793      if (err < 0) {
2794          goto out;
2795      }
2796      err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2797      if (err < 0) {
2798          goto out;
2799      }
2800      err += offset;
2801      trace_v9fs_create_return(pdu->tag, pdu->id,
2802                               qid.type, qid.version, qid.path, iounit);
2803  out:
2804      put_fid(pdu, fidp);
2805  out_nofid:
2806     pdu_complete(pdu, err);
2807     v9fs_string_free(&name);
2808     v9fs_string_free(&extension);
2809     v9fs_path_free(&path);
2810  }
2811  
2812  static void coroutine_fn v9fs_symlink(void *opaque)
2813  {
2814      V9fsPDU *pdu = opaque;
2815      V9fsString name;
2816      V9fsString symname;
2817      V9fsFidState *dfidp;
2818      V9fsQID qid;
2819      struct stat stbuf;
2820      int32_t dfid;
2821      int err = 0;
2822      gid_t gid;
2823      size_t offset = 7;
2824  
2825      v9fs_string_init(&name);
2826      v9fs_string_init(&symname);
2827      err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2828      if (err < 0) {
2829          goto out_nofid;
2830      }
2831      trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2832  
2833      if (name_is_illegal(name.data)) {
2834          err = -ENOENT;
2835          goto out_nofid;
2836      }
2837  
2838      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2839          err = -EEXIST;
2840          goto out_nofid;
2841      }
2842  
2843      dfidp = get_fid(pdu, dfid);
2844      if (dfidp == NULL) {
2845          err = -EINVAL;
2846          goto out_nofid;
2847      }
2848      err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2849      if (err < 0) {
2850          goto out;
2851      }
2852      err = stat_to_qid(pdu, &stbuf, &qid);
2853      if (err < 0) {
2854          goto out;
2855      }
2856      err =  pdu_marshal(pdu, offset, "Q", &qid);
2857      if (err < 0) {
2858          goto out;
2859      }
2860      err += offset;
2861      trace_v9fs_symlink_return(pdu->tag, pdu->id,
2862                                qid.type, qid.version, qid.path);
2863  out:
2864      put_fid(pdu, dfidp);
2865  out_nofid:
2866      pdu_complete(pdu, err);
2867      v9fs_string_free(&name);
2868      v9fs_string_free(&symname);
2869  }
2870  
2871  static void coroutine_fn v9fs_flush(void *opaque)
2872  {
2873      ssize_t err;
2874      int16_t tag;
2875      size_t offset = 7;
2876      V9fsPDU *cancel_pdu = NULL;
2877      V9fsPDU *pdu = opaque;
2878      V9fsState *s = pdu->s;
2879  
2880      err = pdu_unmarshal(pdu, offset, "w", &tag);
2881      if (err < 0) {
2882          pdu_complete(pdu, err);
2883          return;
2884      }
2885      trace_v9fs_flush(pdu->tag, pdu->id, tag);
2886  
2887      if (pdu->tag == tag) {
2888          warn_report("the guest sent a self-referencing 9P flush request");
2889      } else {
2890          QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2891              if (cancel_pdu->tag == tag) {
2892                  break;
2893              }
2894          }
2895      }
2896      if (cancel_pdu) {
2897          cancel_pdu->cancelled = 1;
2898          /*
2899           * Wait for pdu to complete.
2900           */
2901          qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2902          if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2903              cancel_pdu->cancelled = 0;
2904              pdu_free(cancel_pdu);
2905          }
2906      }
2907      pdu_complete(pdu, 7);
2908  }
2909  
2910  static void coroutine_fn v9fs_link(void *opaque)
2911  {
2912      V9fsPDU *pdu = opaque;
2913      int32_t dfid, oldfid;
2914      V9fsFidState *dfidp, *oldfidp;
2915      V9fsString name;
2916      size_t offset = 7;
2917      int err = 0;
2918  
2919      v9fs_string_init(&name);
2920      err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2921      if (err < 0) {
2922          goto out_nofid;
2923      }
2924      trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2925  
2926      if (name_is_illegal(name.data)) {
2927          err = -ENOENT;
2928          goto out_nofid;
2929      }
2930  
2931      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2932          err = -EEXIST;
2933          goto out_nofid;
2934      }
2935  
2936      dfidp = get_fid(pdu, dfid);
2937      if (dfidp == NULL) {
2938          err = -ENOENT;
2939          goto out_nofid;
2940      }
2941  
2942      oldfidp = get_fid(pdu, oldfid);
2943      if (oldfidp == NULL) {
2944          err = -ENOENT;
2945          goto out;
2946      }
2947      err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2948      if (!err) {
2949          err = offset;
2950      }
2951      put_fid(pdu, oldfidp);
2952  out:
2953      put_fid(pdu, dfidp);
2954  out_nofid:
2955      v9fs_string_free(&name);
2956      pdu_complete(pdu, err);
2957  }
2958  
2959  /* Only works with path name based fid */
2960  static void coroutine_fn v9fs_remove(void *opaque)
2961  {
2962      int32_t fid;
2963      int err = 0;
2964      size_t offset = 7;
2965      V9fsFidState *fidp;
2966      V9fsPDU *pdu = opaque;
2967  
2968      err = pdu_unmarshal(pdu, offset, "d", &fid);
2969      if (err < 0) {
2970          goto out_nofid;
2971      }
2972      trace_v9fs_remove(pdu->tag, pdu->id, fid);
2973  
2974      fidp = get_fid(pdu, fid);
2975      if (fidp == NULL) {
2976          err = -EINVAL;
2977          goto out_nofid;
2978      }
2979      /* if fs driver is not path based, return EOPNOTSUPP */
2980      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2981          err = -EOPNOTSUPP;
2982          goto out_err;
2983      }
2984      /*
2985       * IF the file is unlinked, we cannot reopen
2986       * the file later. So don't reclaim fd
2987       */
2988      err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2989      if (err < 0) {
2990          goto out_err;
2991      }
2992      err = v9fs_co_remove(pdu, &fidp->path);
2993      if (!err) {
2994          err = offset;
2995      }
2996  out_err:
2997      /* For TREMOVE we need to clunk the fid even on failed remove */
2998      clunk_fid(pdu->s, fidp->fid);
2999      put_fid(pdu, fidp);
3000  out_nofid:
3001      pdu_complete(pdu, err);
3002  }
3003  
3004  static void coroutine_fn v9fs_unlinkat(void *opaque)
3005  {
3006      int err = 0;
3007      V9fsString name;
3008      int32_t dfid, flags, rflags = 0;
3009      size_t offset = 7;
3010      V9fsPath path;
3011      V9fsFidState *dfidp;
3012      V9fsPDU *pdu = opaque;
3013  
3014      v9fs_string_init(&name);
3015      err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3016      if (err < 0) {
3017          goto out_nofid;
3018      }
3019  
3020      if (name_is_illegal(name.data)) {
3021          err = -ENOENT;
3022          goto out_nofid;
3023      }
3024  
3025      if (!strcmp(".", name.data)) {
3026          err = -EINVAL;
3027          goto out_nofid;
3028      }
3029  
3030      if (!strcmp("..", name.data)) {
3031          err = -ENOTEMPTY;
3032          goto out_nofid;
3033      }
3034  
3035      if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3036          err = -EINVAL;
3037          goto out_nofid;
3038      }
3039  
3040      if (flags & P9_DOTL_AT_REMOVEDIR) {
3041          rflags |= AT_REMOVEDIR;
3042      }
3043  
3044      dfidp = get_fid(pdu, dfid);
3045      if (dfidp == NULL) {
3046          err = -EINVAL;
3047          goto out_nofid;
3048      }
3049      /*
3050       * IF the file is unlinked, we cannot reopen
3051       * the file later. So don't reclaim fd
3052       */
3053      v9fs_path_init(&path);
3054      err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3055      if (err < 0) {
3056          goto out_err;
3057      }
3058      err = v9fs_mark_fids_unreclaim(pdu, &path);
3059      if (err < 0) {
3060          goto out_err;
3061      }
3062      err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3063      if (!err) {
3064          err = offset;
3065      }
3066  out_err:
3067      put_fid(pdu, dfidp);
3068      v9fs_path_free(&path);
3069  out_nofid:
3070      pdu_complete(pdu, err);
3071      v9fs_string_free(&name);
3072  }
3073  
3074  
3075  /* Only works with path name based fid */
3076  static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3077                                               int32_t newdirfid,
3078                                               V9fsString *name)
3079  {
3080      int err = 0;
3081      V9fsPath new_path;
3082      V9fsFidState *tfidp;
3083      V9fsState *s = pdu->s;
3084      V9fsFidState *dirfidp = NULL;
3085  
3086      v9fs_path_init(&new_path);
3087      if (newdirfid != -1) {
3088          dirfidp = get_fid(pdu, newdirfid);
3089          if (dirfidp == NULL) {
3090              return -ENOENT;
3091          }
3092          if (fidp->fid_type != P9_FID_NONE) {
3093              err = -EINVAL;
3094              goto out;
3095          }
3096          err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3097          if (err < 0) {
3098              goto out;
3099          }
3100      } else {
3101          char *dir_name = g_path_get_dirname(fidp->path.data);
3102          V9fsPath dir_path;
3103  
3104          v9fs_path_init(&dir_path);
3105          v9fs_path_sprintf(&dir_path, "%s", dir_name);
3106          g_free(dir_name);
3107  
3108          err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3109          v9fs_path_free(&dir_path);
3110          if (err < 0) {
3111              goto out;
3112          }
3113      }
3114      err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3115      if (err < 0) {
3116          goto out;
3117      }
3118      /*
3119       * Fixup fid's pointing to the old name to
3120       * start pointing to the new name
3121       */
3122      for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3123          if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3124              /* replace the name */
3125              v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3126          }
3127      }
3128  out:
3129      if (dirfidp) {
3130          put_fid(pdu, dirfidp);
3131      }
3132      v9fs_path_free(&new_path);
3133      return err;
3134  }
3135  
3136  /* Only works with path name based fid */
3137  static void coroutine_fn v9fs_rename(void *opaque)
3138  {
3139      int32_t fid;
3140      ssize_t err = 0;
3141      size_t offset = 7;
3142      V9fsString name;
3143      int32_t newdirfid;
3144      V9fsFidState *fidp;
3145      V9fsPDU *pdu = opaque;
3146      V9fsState *s = pdu->s;
3147  
3148      v9fs_string_init(&name);
3149      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3150      if (err < 0) {
3151          goto out_nofid;
3152      }
3153  
3154      if (name_is_illegal(name.data)) {
3155          err = -ENOENT;
3156          goto out_nofid;
3157      }
3158  
3159      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3160          err = -EISDIR;
3161          goto out_nofid;
3162      }
3163  
3164      fidp = get_fid(pdu, fid);
3165      if (fidp == NULL) {
3166          err = -ENOENT;
3167          goto out_nofid;
3168      }
3169      if (fidp->fid_type != P9_FID_NONE) {
3170          err = -EINVAL;
3171          goto out;
3172      }
3173      /* if fs driver is not path based, return EOPNOTSUPP */
3174      if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3175          err = -EOPNOTSUPP;
3176          goto out;
3177      }
3178      v9fs_path_write_lock(s);
3179      err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3180      v9fs_path_unlock(s);
3181      if (!err) {
3182          err = offset;
3183      }
3184  out:
3185      put_fid(pdu, fidp);
3186  out_nofid:
3187      pdu_complete(pdu, err);
3188      v9fs_string_free(&name);
3189  }
3190  
3191  static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3192                                             V9fsString *old_name,
3193                                             V9fsPath *newdir,
3194                                             V9fsString *new_name)
3195  {
3196      V9fsFidState *tfidp;
3197      V9fsPath oldpath, newpath;
3198      V9fsState *s = pdu->s;
3199      int err;
3200  
3201      v9fs_path_init(&oldpath);
3202      v9fs_path_init(&newpath);
3203      err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3204      if (err < 0) {
3205          goto out;
3206      }
3207      err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3208      if (err < 0) {
3209          goto out;
3210      }
3211  
3212      /*
3213       * Fixup fid's pointing to the old name to
3214       * start pointing to the new name
3215       */
3216      for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
3217          if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3218              /* replace the name */
3219              v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3220          }
3221      }
3222  out:
3223      v9fs_path_free(&oldpath);
3224      v9fs_path_free(&newpath);
3225      return err;
3226  }
3227  
3228  static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3229                                                 V9fsString *old_name,
3230                                                 int32_t newdirfid,
3231                                                 V9fsString *new_name)
3232  {
3233      int err = 0;
3234      V9fsState *s = pdu->s;
3235      V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3236  
3237      olddirfidp = get_fid(pdu, olddirfid);
3238      if (olddirfidp == NULL) {
3239          err = -ENOENT;
3240          goto out;
3241      }
3242      if (newdirfid != -1) {
3243          newdirfidp = get_fid(pdu, newdirfid);
3244          if (newdirfidp == NULL) {
3245              err = -ENOENT;
3246              goto out;
3247          }
3248      } else {
3249          newdirfidp = get_fid(pdu, olddirfid);
3250      }
3251  
3252      err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3253                             &newdirfidp->path, new_name);
3254      if (err < 0) {
3255          goto out;
3256      }
3257      if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3258          /* Only for path based fid  we need to do the below fixup */
3259          err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3260                                   &newdirfidp->path, new_name);
3261      }
3262  out:
3263      if (olddirfidp) {
3264          put_fid(pdu, olddirfidp);
3265      }
3266      if (newdirfidp) {
3267          put_fid(pdu, newdirfidp);
3268      }
3269      return err;
3270  }
3271  
3272  static void coroutine_fn v9fs_renameat(void *opaque)
3273  {
3274      ssize_t err = 0;
3275      size_t offset = 7;
3276      V9fsPDU *pdu = opaque;
3277      V9fsState *s = pdu->s;
3278      int32_t olddirfid, newdirfid;
3279      V9fsString old_name, new_name;
3280  
3281      v9fs_string_init(&old_name);
3282      v9fs_string_init(&new_name);
3283      err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3284                          &old_name, &newdirfid, &new_name);
3285      if (err < 0) {
3286          goto out_err;
3287      }
3288  
3289      if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3290          err = -ENOENT;
3291          goto out_err;
3292      }
3293  
3294      if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3295          !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3296          err = -EISDIR;
3297          goto out_err;
3298      }
3299  
3300      v9fs_path_write_lock(s);
3301      err = v9fs_complete_renameat(pdu, olddirfid,
3302                                   &old_name, newdirfid, &new_name);
3303      v9fs_path_unlock(s);
3304      if (!err) {
3305          err = offset;
3306      }
3307  
3308  out_err:
3309      pdu_complete(pdu, err);
3310      v9fs_string_free(&old_name);
3311      v9fs_string_free(&new_name);
3312  }
3313  
3314  static void coroutine_fn v9fs_wstat(void *opaque)
3315  {
3316      int32_t fid;
3317      int err = 0;
3318      int16_t unused;
3319      V9fsStat v9stat;
3320      size_t offset = 7;
3321      struct stat stbuf;
3322      V9fsFidState *fidp;
3323      V9fsPDU *pdu = opaque;
3324      V9fsState *s = pdu->s;
3325  
3326      v9fs_stat_init(&v9stat);
3327      err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3328      if (err < 0) {
3329          goto out_nofid;
3330      }
3331      trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3332                       v9stat.mode, v9stat.atime, v9stat.mtime);
3333  
3334      fidp = get_fid(pdu, fid);
3335      if (fidp == NULL) {
3336          err = -EINVAL;
3337          goto out_nofid;
3338      }
3339      /* do we need to sync the file? */
3340      if (donttouch_stat(&v9stat)) {
3341          err = v9fs_co_fsync(pdu, fidp, 0);
3342          goto out;
3343      }
3344      if (v9stat.mode != -1) {
3345          uint32_t v9_mode;
3346          err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3347          if (err < 0) {
3348              goto out;
3349          }
3350          v9_mode = stat_to_v9mode(&stbuf);
3351          if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3352              (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3353              /* Attempting to change the type */
3354              err = -EIO;
3355              goto out;
3356          }
3357          err = v9fs_co_chmod(pdu, &fidp->path,
3358                              v9mode_to_mode(v9stat.mode,
3359                                             &v9stat.extension));
3360          if (err < 0) {
3361              goto out;
3362          }
3363      }
3364      if (v9stat.mtime != -1 || v9stat.atime != -1) {
3365          struct timespec times[2];
3366          if (v9stat.atime != -1) {
3367              times[0].tv_sec = v9stat.atime;
3368              times[0].tv_nsec = 0;
3369          } else {
3370              times[0].tv_nsec = UTIME_OMIT;
3371          }
3372          if (v9stat.mtime != -1) {
3373              times[1].tv_sec = v9stat.mtime;
3374              times[1].tv_nsec = 0;
3375          } else {
3376              times[1].tv_nsec = UTIME_OMIT;
3377          }
3378          err = v9fs_co_utimensat(pdu, &fidp->path, times);
3379          if (err < 0) {
3380              goto out;
3381          }
3382      }
3383      if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3384          err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3385          if (err < 0) {
3386              goto out;
3387          }
3388      }
3389      if (v9stat.name.size != 0) {
3390          v9fs_path_write_lock(s);
3391          err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3392          v9fs_path_unlock(s);
3393          if (err < 0) {
3394              goto out;
3395          }
3396      }
3397      if (v9stat.length != -1) {
3398          err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3399          if (err < 0) {
3400              goto out;
3401          }
3402      }
3403      err = offset;
3404  out:
3405      put_fid(pdu, fidp);
3406  out_nofid:
3407      v9fs_stat_free(&v9stat);
3408      pdu_complete(pdu, err);
3409  }
3410  
3411  static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3412  {
3413      uint32_t f_type;
3414      uint32_t f_bsize;
3415      uint64_t f_blocks;
3416      uint64_t f_bfree;
3417      uint64_t f_bavail;
3418      uint64_t f_files;
3419      uint64_t f_ffree;
3420      uint64_t fsid_val;
3421      uint32_t f_namelen;
3422      size_t offset = 7;
3423      int32_t bsize_factor;
3424  
3425      /*
3426       * compute bsize factor based on host file system block size
3427       * and client msize
3428       */
3429      bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
3430      if (!bsize_factor) {
3431          bsize_factor = 1;
3432      }
3433      f_type  = stbuf->f_type;
3434      f_bsize = stbuf->f_bsize;
3435      f_bsize *= bsize_factor;
3436      /*
3437       * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3438       * adjust(divide) the number of blocks, free blocks and available
3439       * blocks by bsize factor
3440       */
3441      f_blocks = stbuf->f_blocks/bsize_factor;
3442      f_bfree  = stbuf->f_bfree/bsize_factor;
3443      f_bavail = stbuf->f_bavail/bsize_factor;
3444      f_files  = stbuf->f_files;
3445      f_ffree  = stbuf->f_ffree;
3446      fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3447                 (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3448      f_namelen = stbuf->f_namelen;
3449  
3450      return pdu_marshal(pdu, offset, "ddqqqqqqd",
3451                         f_type, f_bsize, f_blocks, f_bfree,
3452                         f_bavail, f_files, f_ffree,
3453                         fsid_val, f_namelen);
3454  }
3455  
3456  static void coroutine_fn v9fs_statfs(void *opaque)
3457  {
3458      int32_t fid;
3459      ssize_t retval = 0;
3460      size_t offset = 7;
3461      V9fsFidState *fidp;
3462      struct statfs stbuf;
3463      V9fsPDU *pdu = opaque;
3464      V9fsState *s = pdu->s;
3465  
3466      retval = pdu_unmarshal(pdu, offset, "d", &fid);
3467      if (retval < 0) {
3468          goto out_nofid;
3469      }
3470      fidp = get_fid(pdu, fid);
3471      if (fidp == NULL) {
3472          retval = -ENOENT;
3473          goto out_nofid;
3474      }
3475      retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3476      if (retval < 0) {
3477          goto out;
3478      }
3479      retval = v9fs_fill_statfs(s, pdu, &stbuf);
3480      if (retval < 0) {
3481          goto out;
3482      }
3483      retval += offset;
3484  out:
3485      put_fid(pdu, fidp);
3486  out_nofid:
3487      pdu_complete(pdu, retval);
3488  }
3489  
3490  static void coroutine_fn v9fs_mknod(void *opaque)
3491  {
3492  
3493      int mode;
3494      gid_t gid;
3495      int32_t fid;
3496      V9fsQID qid;
3497      int err = 0;
3498      int major, minor;
3499      size_t offset = 7;
3500      V9fsString name;
3501      struct stat stbuf;
3502      V9fsFidState *fidp;
3503      V9fsPDU *pdu = opaque;
3504  
3505      v9fs_string_init(&name);
3506      err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3507                          &major, &minor, &gid);
3508      if (err < 0) {
3509          goto out_nofid;
3510      }
3511      trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3512  
3513      if (name_is_illegal(name.data)) {
3514          err = -ENOENT;
3515          goto out_nofid;
3516      }
3517  
3518      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3519          err = -EEXIST;
3520          goto out_nofid;
3521      }
3522  
3523      fidp = get_fid(pdu, fid);
3524      if (fidp == NULL) {
3525          err = -ENOENT;
3526          goto out_nofid;
3527      }
3528      err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3529                          makedev(major, minor), mode, &stbuf);
3530      if (err < 0) {
3531          goto out;
3532      }
3533      err = stat_to_qid(pdu, &stbuf, &qid);
3534      if (err < 0) {
3535          goto out;
3536      }
3537      err = pdu_marshal(pdu, offset, "Q", &qid);
3538      if (err < 0) {
3539          goto out;
3540      }
3541      err += offset;
3542      trace_v9fs_mknod_return(pdu->tag, pdu->id,
3543                              qid.type, qid.version, qid.path);
3544  out:
3545      put_fid(pdu, fidp);
3546  out_nofid:
3547      pdu_complete(pdu, err);
3548      v9fs_string_free(&name);
3549  }
3550  
3551  /*
3552   * Implement posix byte range locking code
3553   * Server side handling of locking code is very simple, because 9p server in
3554   * QEMU can handle only one client. And most of the lock handling
3555   * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3556   * do any thing in * qemu 9p server side lock code path.
3557   * So when a TLOCK request comes, always return success
3558   */
3559  static void coroutine_fn v9fs_lock(void *opaque)
3560  {
3561      V9fsFlock flock;
3562      size_t offset = 7;
3563      struct stat stbuf;
3564      V9fsFidState *fidp;
3565      int32_t fid, err = 0;
3566      V9fsPDU *pdu = opaque;
3567  
3568      v9fs_string_init(&flock.client_id);
3569      err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3570                          &flock.flags, &flock.start, &flock.length,
3571                          &flock.proc_id, &flock.client_id);
3572      if (err < 0) {
3573          goto out_nofid;
3574      }
3575      trace_v9fs_lock(pdu->tag, pdu->id, fid,
3576                      flock.type, flock.start, flock.length);
3577  
3578  
3579      /* We support only block flag now (that too ignored currently) */
3580      if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3581          err = -EINVAL;
3582          goto out_nofid;
3583      }
3584      fidp = get_fid(pdu, fid);
3585      if (fidp == NULL) {
3586          err = -ENOENT;
3587          goto out_nofid;
3588      }
3589      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3590      if (err < 0) {
3591          goto out;
3592      }
3593      err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3594      if (err < 0) {
3595          goto out;
3596      }
3597      err += offset;
3598      trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3599  out:
3600      put_fid(pdu, fidp);
3601  out_nofid:
3602      pdu_complete(pdu, err);
3603      v9fs_string_free(&flock.client_id);
3604  }
3605  
3606  /*
3607   * When a TGETLOCK request comes, always return success because all lock
3608   * handling is done by client's VFS layer.
3609   */
3610  static void coroutine_fn v9fs_getlock(void *opaque)
3611  {
3612      size_t offset = 7;
3613      struct stat stbuf;
3614      V9fsFidState *fidp;
3615      V9fsGetlock glock;
3616      int32_t fid, err = 0;
3617      V9fsPDU *pdu = opaque;
3618  
3619      v9fs_string_init(&glock.client_id);
3620      err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3621                          &glock.start, &glock.length, &glock.proc_id,
3622                          &glock.client_id);
3623      if (err < 0) {
3624          goto out_nofid;
3625      }
3626      trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3627                         glock.type, glock.start, glock.length);
3628  
3629      fidp = get_fid(pdu, fid);
3630      if (fidp == NULL) {
3631          err = -ENOENT;
3632          goto out_nofid;
3633      }
3634      err = v9fs_co_fstat(pdu, fidp, &stbuf);
3635      if (err < 0) {
3636          goto out;
3637      }
3638      glock.type = P9_LOCK_TYPE_UNLCK;
3639      err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3640                            glock.start, glock.length, glock.proc_id,
3641                            &glock.client_id);
3642      if (err < 0) {
3643          goto out;
3644      }
3645      err += offset;
3646      trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3647                                glock.length, glock.proc_id);
3648  out:
3649      put_fid(pdu, fidp);
3650  out_nofid:
3651      pdu_complete(pdu, err);
3652      v9fs_string_free(&glock.client_id);
3653  }
3654  
3655  static void coroutine_fn v9fs_mkdir(void *opaque)
3656  {
3657      V9fsPDU *pdu = opaque;
3658      size_t offset = 7;
3659      int32_t fid;
3660      struct stat stbuf;
3661      V9fsQID qid;
3662      V9fsString name;
3663      V9fsFidState *fidp;
3664      gid_t gid;
3665      int mode;
3666      int err = 0;
3667  
3668      v9fs_string_init(&name);
3669      err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3670      if (err < 0) {
3671          goto out_nofid;
3672      }
3673      trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3674  
3675      if (name_is_illegal(name.data)) {
3676          err = -ENOENT;
3677          goto out_nofid;
3678      }
3679  
3680      if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3681          err = -EEXIST;
3682          goto out_nofid;
3683      }
3684  
3685      fidp = get_fid(pdu, fid);
3686      if (fidp == NULL) {
3687          err = -ENOENT;
3688          goto out_nofid;
3689      }
3690      err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3691      if (err < 0) {
3692          goto out;
3693      }
3694      err = stat_to_qid(pdu, &stbuf, &qid);
3695      if (err < 0) {
3696          goto out;
3697      }
3698      err = pdu_marshal(pdu, offset, "Q", &qid);
3699      if (err < 0) {
3700          goto out;
3701      }
3702      err += offset;
3703      trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3704                              qid.type, qid.version, qid.path, err);
3705  out:
3706      put_fid(pdu, fidp);
3707  out_nofid:
3708      pdu_complete(pdu, err);
3709      v9fs_string_free(&name);
3710  }
3711  
3712  static void coroutine_fn v9fs_xattrwalk(void *opaque)
3713  {
3714      int64_t size;
3715      V9fsString name;
3716      ssize_t err = 0;
3717      size_t offset = 7;
3718      int32_t fid, newfid;
3719      V9fsFidState *file_fidp;
3720      V9fsFidState *xattr_fidp = NULL;
3721      V9fsPDU *pdu = opaque;
3722      V9fsState *s = pdu->s;
3723  
3724      v9fs_string_init(&name);
3725      err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3726      if (err < 0) {
3727          goto out_nofid;
3728      }
3729      trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3730  
3731      file_fidp = get_fid(pdu, fid);
3732      if (file_fidp == NULL) {
3733          err = -ENOENT;
3734          goto out_nofid;
3735      }
3736      xattr_fidp = alloc_fid(s, newfid);
3737      if (xattr_fidp == NULL) {
3738          err = -EINVAL;
3739          goto out;
3740      }
3741      v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3742      if (!v9fs_string_size(&name)) {
3743          /*
3744           * listxattr request. Get the size first
3745           */
3746          size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3747          if (size < 0) {
3748              err = size;
3749              clunk_fid(s, xattr_fidp->fid);
3750              goto out;
3751          }
3752          /*
3753           * Read the xattr value
3754           */
3755          xattr_fidp->fs.xattr.len = size;
3756          xattr_fidp->fid_type = P9_FID_XATTR;
3757          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3758          xattr_fidp->fs.xattr.value = g_malloc0(size);
3759          if (size) {
3760              err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3761                                       xattr_fidp->fs.xattr.value,
3762                                       xattr_fidp->fs.xattr.len);
3763              if (err < 0) {
3764                  clunk_fid(s, xattr_fidp->fid);
3765                  goto out;
3766              }
3767          }
3768          err = pdu_marshal(pdu, offset, "q", size);
3769          if (err < 0) {
3770              goto out;
3771          }
3772          err += offset;
3773      } else {
3774          /*
3775           * specific xattr fid. We check for xattr
3776           * presence also collect the xattr size
3777           */
3778          size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3779                                   &name, NULL, 0);
3780          if (size < 0) {
3781              err = size;
3782              clunk_fid(s, xattr_fidp->fid);
3783              goto out;
3784          }
3785          /*
3786           * Read the xattr value
3787           */
3788          xattr_fidp->fs.xattr.len = size;
3789          xattr_fidp->fid_type = P9_FID_XATTR;
3790          xattr_fidp->fs.xattr.xattrwalk_fid = true;
3791          xattr_fidp->fs.xattr.value = g_malloc0(size);
3792          if (size) {
3793              err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3794                                      &name, xattr_fidp->fs.xattr.value,
3795                                      xattr_fidp->fs.xattr.len);
3796              if (err < 0) {
3797                  clunk_fid(s, xattr_fidp->fid);
3798                  goto out;
3799              }
3800          }
3801          err = pdu_marshal(pdu, offset, "q", size);
3802          if (err < 0) {
3803              goto out;
3804          }
3805          err += offset;
3806      }
3807      trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3808  out:
3809      put_fid(pdu, file_fidp);
3810      if (xattr_fidp) {
3811          put_fid(pdu, xattr_fidp);
3812      }
3813  out_nofid:
3814      pdu_complete(pdu, err);
3815      v9fs_string_free(&name);
3816  }
3817  
3818  static void coroutine_fn v9fs_xattrcreate(void *opaque)
3819  {
3820      int flags, rflags = 0;
3821      int32_t fid;
3822      uint64_t size;
3823      ssize_t err = 0;
3824      V9fsString name;
3825      size_t offset = 7;
3826      V9fsFidState *file_fidp;
3827      V9fsFidState *xattr_fidp;
3828      V9fsPDU *pdu = opaque;
3829  
3830      v9fs_string_init(&name);
3831      err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3832      if (err < 0) {
3833          goto out_nofid;
3834      }
3835      trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3836  
3837      if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3838          err = -EINVAL;
3839          goto out_nofid;
3840      }
3841  
3842      if (flags & P9_XATTR_CREATE) {
3843          rflags |= XATTR_CREATE;
3844      }
3845  
3846      if (flags & P9_XATTR_REPLACE) {
3847          rflags |= XATTR_REPLACE;
3848      }
3849  
3850      if (size > XATTR_SIZE_MAX) {
3851          err = -E2BIG;
3852          goto out_nofid;
3853      }
3854  
3855      file_fidp = get_fid(pdu, fid);
3856      if (file_fidp == NULL) {
3857          err = -EINVAL;
3858          goto out_nofid;
3859      }
3860      if (file_fidp->fid_type != P9_FID_NONE) {
3861          err = -EINVAL;
3862          goto out_put_fid;
3863      }
3864  
3865      /* Make the file fid point to xattr */
3866      xattr_fidp = file_fidp;
3867      xattr_fidp->fid_type = P9_FID_XATTR;
3868      xattr_fidp->fs.xattr.copied_len = 0;
3869      xattr_fidp->fs.xattr.xattrwalk_fid = false;
3870      xattr_fidp->fs.xattr.len = size;
3871      xattr_fidp->fs.xattr.flags = rflags;
3872      v9fs_string_init(&xattr_fidp->fs.xattr.name);
3873      v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3874      xattr_fidp->fs.xattr.value = g_malloc0(size);
3875      err = offset;
3876  out_put_fid:
3877      put_fid(pdu, file_fidp);
3878  out_nofid:
3879      pdu_complete(pdu, err);
3880      v9fs_string_free(&name);
3881  }
3882  
3883  static void coroutine_fn v9fs_readlink(void *opaque)
3884  {
3885      V9fsPDU *pdu = opaque;
3886      size_t offset = 7;
3887      V9fsString target;
3888      int32_t fid;
3889      int err = 0;
3890      V9fsFidState *fidp;
3891  
3892      err = pdu_unmarshal(pdu, offset, "d", &fid);
3893      if (err < 0) {
3894          goto out_nofid;
3895      }
3896      trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3897      fidp = get_fid(pdu, fid);
3898      if (fidp == NULL) {
3899          err = -ENOENT;
3900          goto out_nofid;
3901      }
3902  
3903      v9fs_string_init(&target);
3904      err = v9fs_co_readlink(pdu, &fidp->path, &target);
3905      if (err < 0) {
3906          goto out;
3907      }
3908      err = pdu_marshal(pdu, offset, "s", &target);
3909      if (err < 0) {
3910          v9fs_string_free(&target);
3911          goto out;
3912      }
3913      err += offset;
3914      trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3915      v9fs_string_free(&target);
3916  out:
3917      put_fid(pdu, fidp);
3918  out_nofid:
3919      pdu_complete(pdu, err);
3920  }
3921  
3922  static CoroutineEntry *pdu_co_handlers[] = {
3923      [P9_TREADDIR] = v9fs_readdir,
3924      [P9_TSTATFS] = v9fs_statfs,
3925      [P9_TGETATTR] = v9fs_getattr,
3926      [P9_TSETATTR] = v9fs_setattr,
3927      [P9_TXATTRWALK] = v9fs_xattrwalk,
3928      [P9_TXATTRCREATE] = v9fs_xattrcreate,
3929      [P9_TMKNOD] = v9fs_mknod,
3930      [P9_TRENAME] = v9fs_rename,
3931      [P9_TLOCK] = v9fs_lock,
3932      [P9_TGETLOCK] = v9fs_getlock,
3933      [P9_TRENAMEAT] = v9fs_renameat,
3934      [P9_TREADLINK] = v9fs_readlink,
3935      [P9_TUNLINKAT] = v9fs_unlinkat,
3936      [P9_TMKDIR] = v9fs_mkdir,
3937      [P9_TVERSION] = v9fs_version,
3938      [P9_TLOPEN] = v9fs_open,
3939      [P9_TATTACH] = v9fs_attach,
3940      [P9_TSTAT] = v9fs_stat,
3941      [P9_TWALK] = v9fs_walk,
3942      [P9_TCLUNK] = v9fs_clunk,
3943      [P9_TFSYNC] = v9fs_fsync,
3944      [P9_TOPEN] = v9fs_open,
3945      [P9_TREAD] = v9fs_read,
3946  #if 0
3947      [P9_TAUTH] = v9fs_auth,
3948  #endif
3949      [P9_TFLUSH] = v9fs_flush,
3950      [P9_TLINK] = v9fs_link,
3951      [P9_TSYMLINK] = v9fs_symlink,
3952      [P9_TCREATE] = v9fs_create,
3953      [P9_TLCREATE] = v9fs_lcreate,
3954      [P9_TWRITE] = v9fs_write,
3955      [P9_TWSTAT] = v9fs_wstat,
3956      [P9_TREMOVE] = v9fs_remove,
3957  };
3958  
3959  static void coroutine_fn v9fs_op_not_supp(void *opaque)
3960  {
3961      V9fsPDU *pdu = opaque;
3962      pdu_complete(pdu, -EOPNOTSUPP);
3963  }
3964  
3965  static void coroutine_fn v9fs_fs_ro(void *opaque)
3966  {
3967      V9fsPDU *pdu = opaque;
3968      pdu_complete(pdu, -EROFS);
3969  }
3970  
3971  static inline bool is_read_only_op(V9fsPDU *pdu)
3972  {
3973      switch (pdu->id) {
3974      case P9_TREADDIR:
3975      case P9_TSTATFS:
3976      case P9_TGETATTR:
3977      case P9_TXATTRWALK:
3978      case P9_TLOCK:
3979      case P9_TGETLOCK:
3980      case P9_TREADLINK:
3981      case P9_TVERSION:
3982      case P9_TLOPEN:
3983      case P9_TATTACH:
3984      case P9_TSTAT:
3985      case P9_TWALK:
3986      case P9_TCLUNK:
3987      case P9_TFSYNC:
3988      case P9_TOPEN:
3989      case P9_TREAD:
3990      case P9_TAUTH:
3991      case P9_TFLUSH:
3992          return 1;
3993      default:
3994          return 0;
3995      }
3996  }
3997  
3998  void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
3999  {
4000      Coroutine *co;
4001      CoroutineEntry *handler;
4002      V9fsState *s = pdu->s;
4003  
4004      pdu->size = le32_to_cpu(hdr->size_le);
4005      pdu->id = hdr->id;
4006      pdu->tag = le16_to_cpu(hdr->tag_le);
4007  
4008      if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4009          (pdu_co_handlers[pdu->id] == NULL)) {
4010          handler = v9fs_op_not_supp;
4011      } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4012          handler = v9fs_fs_ro;
4013      } else {
4014          handler = pdu_co_handlers[pdu->id];
4015      }
4016  
4017      qemu_co_queue_init(&pdu->complete);
4018      co = qemu_coroutine_create(handler, pdu);
4019      qemu_coroutine_enter(co);
4020  }
4021  
4022  /* Returns 0 on success, 1 on failure. */
4023  int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4024                                 Error **errp)
4025  {
4026      int i, len;
4027      struct stat stat;
4028      FsDriverEntry *fse;
4029      V9fsPath path;
4030      int rc = 1;
4031  
4032      assert(!s->transport);
4033      s->transport = t;
4034  
4035      /* initialize pdu allocator */
4036      QLIST_INIT(&s->free_list);
4037      QLIST_INIT(&s->active_list);
4038      for (i = 0; i < MAX_REQ; i++) {
4039          QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4040          s->pdus[i].s = s;
4041          s->pdus[i].idx = i;
4042      }
4043  
4044      v9fs_path_init(&path);
4045  
4046      fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4047  
4048      if (!fse) {
4049          /* We don't have a fsdev identified by fsdev_id */
4050          error_setg(errp, "9pfs device couldn't find fsdev with the "
4051                     "id = %s",
4052                     s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4053          goto out;
4054      }
4055  
4056      if (!s->fsconf.tag) {
4057          /* we haven't specified a mount_tag */
4058          error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4059                     s->fsconf.fsdev_id);
4060          goto out;
4061      }
4062  
4063      s->ctx.export_flags = fse->export_flags;
4064      s->ctx.fs_root = g_strdup(fse->path);
4065      s->ctx.exops.get_st_gen = NULL;
4066      len = strlen(s->fsconf.tag);
4067      if (len > MAX_TAG_LEN - 1) {
4068          error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4069                     "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4070          goto out;
4071      }
4072  
4073      s->tag = g_strdup(s->fsconf.tag);
4074      s->ctx.uid = -1;
4075  
4076      s->ops = fse->ops;
4077  
4078      s->ctx.fmode = fse->fmode;
4079      s->ctx.dmode = fse->dmode;
4080  
4081      s->fid_list = NULL;
4082      qemu_co_rwlock_init(&s->rename_lock);
4083  
4084      if (s->ops->init(&s->ctx, errp) < 0) {
4085          error_prepend(errp, "cannot initialize fsdev '%s': ",
4086                        s->fsconf.fsdev_id);
4087          goto out;
4088      }
4089  
4090      /*
4091       * Check details of export path, We need to use fs driver
4092       * call back to do that. Since we are in the init path, we don't
4093       * use co-routines here.
4094       */
4095      if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4096          error_setg(errp,
4097                     "error in converting name to path %s", strerror(errno));
4098          goto out;
4099      }
4100      if (s->ops->lstat(&s->ctx, &path, &stat)) {
4101          error_setg(errp, "share path %s does not exist", fse->path);
4102          goto out;
4103      } else if (!S_ISDIR(stat.st_mode)) {
4104          error_setg(errp, "share path %s is not a directory", fse->path);
4105          goto out;
4106      }
4107  
4108      s->dev_id = stat.st_dev;
4109  
4110      /* init inode remapping : */
4111      /* hash table for variable length inode suffixes */
4112      qpd_table_init(&s->qpd_table);
4113      /* hash table for slow/full inode remapping (most users won't need it) */
4114      qpf_table_init(&s->qpf_table);
4115      /* hash table for quick inode remapping */
4116      qpp_table_init(&s->qpp_table);
4117      s->qp_ndevices = 0;
4118      s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4119      s->qp_fullpath_next = 1;
4120  
4121      s->ctx.fst = &fse->fst;
4122      fsdev_throttle_init(s->ctx.fst);
4123  
4124      rc = 0;
4125  out:
4126      if (rc) {
4127          v9fs_device_unrealize_common(s, NULL);
4128      }
4129      v9fs_path_free(&path);
4130      return rc;
4131  }
4132  
4133  void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
4134  {
4135      if (s->ops && s->ops->cleanup) {
4136          s->ops->cleanup(&s->ctx);
4137      }
4138      if (s->ctx.fst) {
4139          fsdev_throttle_cleanup(s->ctx.fst);
4140      }
4141      g_free(s->tag);
4142      qp_table_destroy(&s->qpd_table);
4143      qp_table_destroy(&s->qpp_table);
4144      qp_table_destroy(&s->qpf_table);
4145      g_free(s->ctx.fs_root);
4146  }
4147  
4148  typedef struct VirtfsCoResetData {
4149      V9fsPDU pdu;
4150      bool done;
4151  } VirtfsCoResetData;
4152  
4153  static void coroutine_fn virtfs_co_reset(void *opaque)
4154  {
4155      VirtfsCoResetData *data = opaque;
4156  
4157      virtfs_reset(&data->pdu);
4158      data->done = true;
4159  }
4160  
4161  void v9fs_reset(V9fsState *s)
4162  {
4163      VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4164      Coroutine *co;
4165  
4166      while (!QLIST_EMPTY(&s->active_list)) {
4167          aio_poll(qemu_get_aio_context(), true);
4168      }
4169  
4170      co = qemu_coroutine_create(virtfs_co_reset, &data);
4171      qemu_coroutine_enter(co);
4172  
4173      while (!data.done) {
4174          aio_poll(qemu_get_aio_context(), true);
4175      }
4176  }
4177  
4178  static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4179  {
4180      struct rlimit rlim;
4181      if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4182          error_report("Failed to get the resource limit");
4183          exit(1);
4184      }
4185      open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
4186      open_fd_rc = rlim.rlim_cur/2;
4187  }
4188