xref: /openbmc/qemu/hw/9pfs/9p.c (revision feabd6cf78ca3b57da2ce48e95b704e72147bf2c)
1 /*
2  * Virtio 9p backend
3  *
4  * Copyright IBM, Corp. 2010
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include <glib/gprintf.h>
16 #include "hw/virtio/virtio.h"
17 #include "qapi/error.h"
18 #include "qemu/error-report.h"
19 #include "qemu/iov.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/sockets.h"
22 #include "virtio-9p.h"
23 #include "fsdev/qemu-fsdev.h"
24 #include "9p-xattr.h"
25 #include "coth.h"
26 #include "trace.h"
27 #include "migration/blocker.h"
28 #include "sysemu/qtest.h"
29 #include "qemu/xxhash.h"
30 #include <math.h>
31 #include <linux/limits.h>
32 
33 int open_fd_hw;
34 int total_open_fd;
35 static int open_fd_rc;
36 
37 enum {
38     Oread   = 0x00,
39     Owrite  = 0x01,
40     Ordwr   = 0x02,
41     Oexec   = 0x03,
42     Oexcl   = 0x04,
43     Otrunc  = 0x10,
44     Orexec  = 0x20,
45     Orclose = 0x40,
46     Oappend = 0x80,
47 };
48 
49 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
50 {
51     ssize_t ret;
52     va_list ap;
53 
54     va_start(ap, fmt);
55     ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
56     va_end(ap);
57 
58     return ret;
59 }
60 
61 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
62 {
63     ssize_t ret;
64     va_list ap;
65 
66     va_start(ap, fmt);
67     ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
68     va_end(ap);
69 
70     return ret;
71 }
72 
73 static int omode_to_uflags(int8_t mode)
74 {
75     int ret = 0;
76 
77     switch (mode & 3) {
78     case Oread:
79         ret = O_RDONLY;
80         break;
81     case Ordwr:
82         ret = O_RDWR;
83         break;
84     case Owrite:
85         ret = O_WRONLY;
86         break;
87     case Oexec:
88         ret = O_RDONLY;
89         break;
90     }
91 
92     if (mode & Otrunc) {
93         ret |= O_TRUNC;
94     }
95 
96     if (mode & Oappend) {
97         ret |= O_APPEND;
98     }
99 
100     if (mode & Oexcl) {
101         ret |= O_EXCL;
102     }
103 
104     return ret;
105 }
106 
107 typedef struct DotlOpenflagMap {
108     int dotl_flag;
109     int open_flag;
110 } DotlOpenflagMap;
111 
112 static int dotl_to_open_flags(int flags)
113 {
114     int i;
115     /*
116      * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
117      * and P9_DOTL_NOACCESS
118      */
119     int oflags = flags & O_ACCMODE;
120 
121     DotlOpenflagMap dotl_oflag_map[] = {
122         { P9_DOTL_CREATE, O_CREAT },
123         { P9_DOTL_EXCL, O_EXCL },
124         { P9_DOTL_NOCTTY , O_NOCTTY },
125         { P9_DOTL_TRUNC, O_TRUNC },
126         { P9_DOTL_APPEND, O_APPEND },
127         { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
128         { P9_DOTL_DSYNC, O_DSYNC },
129         { P9_DOTL_FASYNC, FASYNC },
130         { P9_DOTL_DIRECT, O_DIRECT },
131         { P9_DOTL_LARGEFILE, O_LARGEFILE },
132         { P9_DOTL_DIRECTORY, O_DIRECTORY },
133         { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
134         { P9_DOTL_NOATIME, O_NOATIME },
135         { P9_DOTL_SYNC, O_SYNC },
136     };
137 
138     for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
139         if (flags & dotl_oflag_map[i].dotl_flag) {
140             oflags |= dotl_oflag_map[i].open_flag;
141         }
142     }
143 
144     return oflags;
145 }
146 
147 void cred_init(FsCred *credp)
148 {
149     credp->fc_uid = -1;
150     credp->fc_gid = -1;
151     credp->fc_mode = -1;
152     credp->fc_rdev = -1;
153 }
154 
155 static int get_dotl_openflags(V9fsState *s, int oflags)
156 {
157     int flags;
158     /*
159      * Filter the client open flags
160      */
161     flags = dotl_to_open_flags(oflags);
162     flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
163     /*
164      * Ignore direct disk access hint until the server supports it.
165      */
166     flags &= ~O_DIRECT;
167     return flags;
168 }
169 
170 void v9fs_path_init(V9fsPath *path)
171 {
172     path->data = NULL;
173     path->size = 0;
174 }
175 
176 void v9fs_path_free(V9fsPath *path)
177 {
178     g_free(path->data);
179     path->data = NULL;
180     path->size = 0;
181 }
182 
183 
184 void GCC_FMT_ATTR(2, 3)
185 v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
186 {
187     va_list ap;
188 
189     v9fs_path_free(path);
190 
191     va_start(ap, fmt);
192     /* Bump the size for including terminating NULL */
193     path->size = g_vasprintf(&path->data, fmt, ap) + 1;
194     va_end(ap);
195 }
196 
197 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
198 {
199     v9fs_path_free(dst);
200     dst->size = src->size;
201     dst->data = g_memdup(src->data, src->size);
202 }
203 
204 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
205                       const char *name, V9fsPath *path)
206 {
207     int err;
208     err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
209     if (err < 0) {
210         err = -errno;
211     }
212     return err;
213 }
214 
215 /*
216  * Return TRUE if s1 is an ancestor of s2.
217  *
218  * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
219  * As a special case, We treat s1 as ancestor of s2 if they are same!
220  */
221 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
222 {
223     if (!strncmp(s1->data, s2->data, s1->size - 1)) {
224         if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
225             return 1;
226         }
227     }
228     return 0;
229 }
230 
231 static size_t v9fs_string_size(V9fsString *str)
232 {
233     return str->size;
234 }
235 
236 /*
237  * returns 0 if fid got re-opened, 1 if not, < 0 on error */
238 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
239 {
240     int err = 1;
241     if (f->fid_type == P9_FID_FILE) {
242         if (f->fs.fd == -1) {
243             do {
244                 err = v9fs_co_open(pdu, f, f->open_flags);
245             } while (err == -EINTR && !pdu->cancelled);
246         }
247     } else if (f->fid_type == P9_FID_DIR) {
248         if (f->fs.dir.stream == NULL) {
249             do {
250                 err = v9fs_co_opendir(pdu, f);
251             } while (err == -EINTR && !pdu->cancelled);
252         }
253     }
254     return err;
255 }
256 
257 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
258 {
259     int err;
260     V9fsFidState *f;
261     V9fsState *s = pdu->s;
262 
263     QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
264         BUG_ON(f->clunked);
265         if (f->fid == fid) {
266             /*
267              * Update the fid ref upfront so that
268              * we don't get reclaimed when we yield
269              * in open later.
270              */
271             f->ref++;
272             /*
273              * check whether we need to reopen the
274              * file. We might have closed the fd
275              * while trying to free up some file
276              * descriptors.
277              */
278             err = v9fs_reopen_fid(pdu, f);
279             if (err < 0) {
280                 f->ref--;
281                 return NULL;
282             }
283             /*
284              * Mark the fid as referenced so that the LRU
285              * reclaim won't close the file descriptor
286              */
287             f->flags |= FID_REFERENCED;
288             return f;
289         }
290     }
291     return NULL;
292 }
293 
294 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
295 {
296     V9fsFidState *f;
297 
298     QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
299         /* If fid is already there return NULL */
300         BUG_ON(f->clunked);
301         if (f->fid == fid) {
302             return NULL;
303         }
304     }
305     f = g_malloc0(sizeof(V9fsFidState));
306     f->fid = fid;
307     f->fid_type = P9_FID_NONE;
308     f->ref = 1;
309     /*
310      * Mark the fid as referenced so that the LRU
311      * reclaim won't close the file descriptor
312      */
313     f->flags |= FID_REFERENCED;
314     QSIMPLEQ_INSERT_HEAD(&s->fid_list, f, next);
315 
316     v9fs_readdir_init(s->proto_version, &f->fs.dir);
317     v9fs_readdir_init(s->proto_version, &f->fs_reclaim.dir);
318 
319     return f;
320 }
321 
322 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
323 {
324     int retval = 0;
325 
326     if (fidp->fs.xattr.xattrwalk_fid) {
327         /* getxattr/listxattr fid */
328         goto free_value;
329     }
330     /*
331      * if this is fid for setxattr. clunk should
332      * result in setxattr localcall
333      */
334     if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
335         /* clunk after partial write */
336         retval = -EINVAL;
337         goto free_out;
338     }
339     if (fidp->fs.xattr.len) {
340         retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
341                                    fidp->fs.xattr.value,
342                                    fidp->fs.xattr.len,
343                                    fidp->fs.xattr.flags);
344     } else {
345         retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
346     }
347 free_out:
348     v9fs_string_free(&fidp->fs.xattr.name);
349 free_value:
350     g_free(fidp->fs.xattr.value);
351     return retval;
352 }
353 
354 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
355 {
356     int retval = 0;
357 
358     if (fidp->fid_type == P9_FID_FILE) {
359         /* If we reclaimed the fd no need to close */
360         if (fidp->fs.fd != -1) {
361             retval = v9fs_co_close(pdu, &fidp->fs);
362         }
363     } else if (fidp->fid_type == P9_FID_DIR) {
364         if (fidp->fs.dir.stream != NULL) {
365             retval = v9fs_co_closedir(pdu, &fidp->fs);
366         }
367     } else if (fidp->fid_type == P9_FID_XATTR) {
368         retval = v9fs_xattr_fid_clunk(pdu, fidp);
369     }
370     v9fs_path_free(&fidp->path);
371     g_free(fidp);
372     return retval;
373 }
374 
375 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
376 {
377     BUG_ON(!fidp->ref);
378     fidp->ref--;
379     /*
380      * Don't free the fid if it is in reclaim list
381      */
382     if (!fidp->ref && fidp->clunked) {
383         if (fidp->fid == pdu->s->root_fid) {
384             /*
385              * if the clunked fid is root fid then we
386              * have unmounted the fs on the client side.
387              * delete the migration blocker. Ideally, this
388              * should be hooked to transport close notification
389              */
390             if (pdu->s->migration_blocker) {
391                 migrate_del_blocker(pdu->s->migration_blocker);
392                 error_free(pdu->s->migration_blocker);
393                 pdu->s->migration_blocker = NULL;
394             }
395         }
396         return free_fid(pdu, fidp);
397     }
398     return 0;
399 }
400 
401 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
402 {
403     V9fsFidState *fidp;
404 
405     QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
406         if (fidp->fid == fid) {
407             QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
408             fidp->clunked = true;
409             return fidp;
410         }
411     }
412     return NULL;
413 }
414 
415 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
416 {
417     int reclaim_count = 0;
418     V9fsState *s = pdu->s;
419     V9fsFidState *f, *reclaim_list = NULL;
420 
421     QSIMPLEQ_FOREACH(f, &s->fid_list, next) {
422         /*
423          * Unlink fids cannot be reclaimed. Check
424          * for them and skip them. Also skip fids
425          * currently being operated on.
426          */
427         if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
428             continue;
429         }
430         /*
431          * if it is a recently referenced fid
432          * we leave the fid untouched and clear the
433          * reference bit. We come back to it later
434          * in the next iteration. (a simple LRU without
435          * moving list elements around)
436          */
437         if (f->flags & FID_REFERENCED) {
438             f->flags &= ~FID_REFERENCED;
439             continue;
440         }
441         /*
442          * Add fids to reclaim list.
443          */
444         if (f->fid_type == P9_FID_FILE) {
445             if (f->fs.fd != -1) {
446                 /*
447                  * Up the reference count so that
448                  * a clunk request won't free this fid
449                  */
450                 f->ref++;
451                 f->rclm_lst = reclaim_list;
452                 reclaim_list = f;
453                 f->fs_reclaim.fd = f->fs.fd;
454                 f->fs.fd = -1;
455                 reclaim_count++;
456             }
457         } else if (f->fid_type == P9_FID_DIR) {
458             if (f->fs.dir.stream != NULL) {
459                 /*
460                  * Up the reference count so that
461                  * a clunk request won't free this fid
462                  */
463                 f->ref++;
464                 f->rclm_lst = reclaim_list;
465                 reclaim_list = f;
466                 f->fs_reclaim.dir.stream = f->fs.dir.stream;
467                 f->fs.dir.stream = NULL;
468                 reclaim_count++;
469             }
470         }
471         if (reclaim_count >= open_fd_rc) {
472             break;
473         }
474     }
475     /*
476      * Now close the fid in reclaim list. Free them if they
477      * are already clunked.
478      */
479     while (reclaim_list) {
480         f = reclaim_list;
481         reclaim_list = f->rclm_lst;
482         if (f->fid_type == P9_FID_FILE) {
483             v9fs_co_close(pdu, &f->fs_reclaim);
484         } else if (f->fid_type == P9_FID_DIR) {
485             v9fs_co_closedir(pdu, &f->fs_reclaim);
486         }
487         f->rclm_lst = NULL;
488         /*
489          * Now drop the fid reference, free it
490          * if clunked.
491          */
492         put_fid(pdu, f);
493     }
494 }
495 
496 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
497 {
498     int err;
499     V9fsState *s = pdu->s;
500     V9fsFidState *fidp;
501 
502 again:
503     QSIMPLEQ_FOREACH(fidp, &s->fid_list, next) {
504         if (fidp->path.size != path->size) {
505             continue;
506         }
507         if (!memcmp(fidp->path.data, path->data, path->size)) {
508             /* Mark the fid non reclaimable. */
509             fidp->flags |= FID_NON_RECLAIMABLE;
510 
511             /* reopen the file/dir if already closed */
512             err = v9fs_reopen_fid(pdu, fidp);
513             if (err < 0) {
514                 return err;
515             }
516             /*
517              * Go back to head of fid list because
518              * the list could have got updated when
519              * switched to the worker thread
520              */
521             if (err == 0) {
522                 goto again;
523             }
524         }
525     }
526     return 0;
527 }
528 
529 static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
530 {
531     V9fsState *s = pdu->s;
532     V9fsFidState *fidp;
533 
534     /* Free all fids */
535     while (!QSIMPLEQ_EMPTY(&s->fid_list)) {
536         /* Get fid */
537         fidp = QSIMPLEQ_FIRST(&s->fid_list);
538         fidp->ref++;
539 
540         /* Clunk fid */
541         QSIMPLEQ_REMOVE(&s->fid_list, fidp, V9fsFidState, next);
542         fidp->clunked = true;
543 
544         put_fid(pdu, fidp);
545     }
546 }
547 
548 #define P9_QID_TYPE_DIR         0x80
549 #define P9_QID_TYPE_SYMLINK     0x02
550 
551 #define P9_STAT_MODE_DIR        0x80000000
552 #define P9_STAT_MODE_APPEND     0x40000000
553 #define P9_STAT_MODE_EXCL       0x20000000
554 #define P9_STAT_MODE_MOUNT      0x10000000
555 #define P9_STAT_MODE_AUTH       0x08000000
556 #define P9_STAT_MODE_TMP        0x04000000
557 #define P9_STAT_MODE_SYMLINK    0x02000000
558 #define P9_STAT_MODE_LINK       0x01000000
559 #define P9_STAT_MODE_DEVICE     0x00800000
560 #define P9_STAT_MODE_NAMED_PIPE 0x00200000
561 #define P9_STAT_MODE_SOCKET     0x00100000
562 #define P9_STAT_MODE_SETUID     0x00080000
563 #define P9_STAT_MODE_SETGID     0x00040000
564 #define P9_STAT_MODE_SETVTX     0x00010000
565 
566 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
567                                 P9_STAT_MODE_SYMLINK |      \
568                                 P9_STAT_MODE_LINK |         \
569                                 P9_STAT_MODE_DEVICE |       \
570                                 P9_STAT_MODE_NAMED_PIPE |   \
571                                 P9_STAT_MODE_SOCKET)
572 
573 /* Mirrors all bits of a byte. So e.g. binary 10100000 would become 00000101. */
574 static inline uint8_t mirror8bit(uint8_t byte)
575 {
576     return (byte * 0x0202020202ULL & 0x010884422010ULL) % 1023;
577 }
578 
579 /* Same as mirror8bit() just for a 64 bit data type instead for a byte. */
580 static inline uint64_t mirror64bit(uint64_t value)
581 {
582     return ((uint64_t)mirror8bit(value         & 0xff) << 56) |
583            ((uint64_t)mirror8bit((value >> 8)  & 0xff) << 48) |
584            ((uint64_t)mirror8bit((value >> 16) & 0xff) << 40) |
585            ((uint64_t)mirror8bit((value >> 24) & 0xff) << 32) |
586            ((uint64_t)mirror8bit((value >> 32) & 0xff) << 24) |
587            ((uint64_t)mirror8bit((value >> 40) & 0xff) << 16) |
588            ((uint64_t)mirror8bit((value >> 48) & 0xff) << 8)  |
589            ((uint64_t)mirror8bit((value >> 56) & 0xff));
590 }
591 
592 /**
593  * @brief Parameter k for the Exponential Golomb algorihm to be used.
594  *
595  * The smaller this value, the smaller the minimum bit count for the Exp.
596  * Golomb generated affixes will be (at lowest index) however for the
597  * price of having higher maximum bit count of generated affixes (at highest
598  * index). Likewise increasing this parameter yields in smaller maximum bit
599  * count for the price of having higher minimum bit count.
600  *
601  * In practice that means: a good value for k depends on the expected amount
602  * of devices to be exposed by one export. For a small amount of devices k
603  * should be small, for a large amount of devices k might be increased
604  * instead. The default of k=0 should be fine for most users though.
605  *
606  * @b IMPORTANT: In case this ever becomes a runtime parameter; the value of
607  * k should not change as long as guest is still running! Because that would
608  * cause completely different inode numbers to be generated on guest.
609  */
610 #define EXP_GOLOMB_K    0
611 
612 /**
613  * @brief Exponential Golomb algorithm for arbitrary k (including k=0).
614  *
615  * The Exponential Golomb algorithm generates @b prefixes (@b not suffixes!)
616  * with growing length and with the mathematical property of being
617  * "prefix-free". The latter means the generated prefixes can be prepended
618  * in front of arbitrary numbers and the resulting concatenated numbers are
619  * guaranteed to be always unique.
620  *
621  * This is a minor adjustment to the original Exp. Golomb algorithm in the
622  * sense that lowest allowed index (@param n) starts with 1, not with zero.
623  *
624  * @param n - natural number (or index) of the prefix to be generated
625  *            (1, 2, 3, ...)
626  * @param k - parameter k of Exp. Golomb algorithm to be used
627  *            (see comment on EXP_GOLOMB_K macro for details about k)
628  */
629 static VariLenAffix expGolombEncode(uint64_t n, int k)
630 {
631     const uint64_t value = n + (1 << k) - 1;
632     const int bits = (int) log2(value) + 1;
633     return (VariLenAffix) {
634         .type = AffixType_Prefix,
635         .value = value,
636         .bits = bits + MAX((bits - 1 - k), 0)
637     };
638 }
639 
640 /**
641  * @brief Converts a suffix into a prefix, or a prefix into a suffix.
642  *
643  * Simply mirror all bits of the affix value, for the purpose to preserve
644  * respectively the mathematical "prefix-free" or "suffix-free" property
645  * after the conversion.
646  *
647  * If a passed prefix is suitable to create unique numbers, then the
648  * returned suffix is suitable to create unique numbers as well (and vice
649  * versa).
650  */
651 static VariLenAffix invertAffix(const VariLenAffix *affix)
652 {
653     return (VariLenAffix) {
654         .type =
655             (affix->type == AffixType_Suffix) ?
656                 AffixType_Prefix : AffixType_Suffix,
657         .value =
658             mirror64bit(affix->value) >>
659             ((sizeof(affix->value) * 8) - affix->bits),
660         .bits = affix->bits
661     };
662 }
663 
664 /**
665  * @brief Generates suffix numbers with "suffix-free" property.
666  *
667  * This is just a wrapper function on top of the Exp. Golomb algorithm.
668  *
669  * Since the Exp. Golomb algorithm generates prefixes, but we need suffixes,
670  * this function converts the Exp. Golomb prefixes into appropriate suffixes
671  * which are still suitable for generating unique numbers.
672  *
673  * @param n - natural number (or index) of the suffix to be generated
674  *            (1, 2, 3, ...)
675  */
676 static VariLenAffix affixForIndex(uint64_t index)
677 {
678     VariLenAffix prefix;
679     prefix = expGolombEncode(index, EXP_GOLOMB_K);
680     return invertAffix(&prefix); /* convert prefix to suffix */
681 }
682 
683 /* creative abuse of tb_hash_func7, which is based on xxhash */
684 static uint32_t qpp_hash(QppEntry e)
685 {
686     return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0);
687 }
688 
689 static uint32_t qpf_hash(QpfEntry e)
690 {
691     return qemu_xxhash7(e.ino, e.dev, 0, 0, 0);
692 }
693 
694 static bool qpd_cmp_func(const void *obj, const void *userp)
695 {
696     const QpdEntry *e1 = obj, *e2 = userp;
697     return e1->dev == e2->dev;
698 }
699 
700 static bool qpp_cmp_func(const void *obj, const void *userp)
701 {
702     const QppEntry *e1 = obj, *e2 = userp;
703     return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix;
704 }
705 
706 static bool qpf_cmp_func(const void *obj, const void *userp)
707 {
708     const QpfEntry *e1 = obj, *e2 = userp;
709     return e1->dev == e2->dev && e1->ino == e2->ino;
710 }
711 
712 static void qp_table_remove(void *p, uint32_t h, void *up)
713 {
714     g_free(p);
715 }
716 
717 static void qp_table_destroy(struct qht *ht)
718 {
719     if (!ht || !ht->map) {
720         return;
721     }
722     qht_iter(ht, qp_table_remove, NULL);
723     qht_destroy(ht);
724 }
725 
726 static void qpd_table_init(struct qht *ht)
727 {
728     qht_init(ht, qpd_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
729 }
730 
731 static void qpp_table_init(struct qht *ht)
732 {
733     qht_init(ht, qpp_cmp_func, 1, QHT_MODE_AUTO_RESIZE);
734 }
735 
736 static void qpf_table_init(struct qht *ht)
737 {
738     qht_init(ht, qpf_cmp_func, 1 << 16, QHT_MODE_AUTO_RESIZE);
739 }
740 
741 /*
742  * Returns how many (high end) bits of inode numbers of the passed fs
743  * device shall be used (in combination with the device number) to
744  * generate hash values for qpp_table entries.
745  *
746  * This function is required if variable length suffixes are used for inode
747  * number mapping on guest level. Since a device may end up having multiple
748  * entries in qpp_table, each entry most probably with a different suffix
749  * length, we thus need this function in conjunction with qpd_table to
750  * "agree" about a fix amount of bits (per device) to be always used for
751  * generating hash values for the purpose of accessing qpp_table in order
752  * get consistent behaviour when accessing qpp_table.
753  */
754 static int qid_inode_prefix_hash_bits(V9fsPDU *pdu, dev_t dev)
755 {
756     QpdEntry lookup = {
757         .dev = dev
758     }, *val;
759     uint32_t hash = dev;
760     VariLenAffix affix;
761 
762     val = qht_lookup(&pdu->s->qpd_table, &lookup, hash);
763     if (!val) {
764         val = g_malloc0(sizeof(QpdEntry));
765         *val = lookup;
766         affix = affixForIndex(pdu->s->qp_affix_next);
767         val->prefix_bits = affix.bits;
768         qht_insert(&pdu->s->qpd_table, val, hash, NULL);
769         pdu->s->qp_ndevices++;
770     }
771     return val->prefix_bits;
772 }
773 
774 /**
775  * @brief Slow / full mapping host inode nr -> guest inode nr.
776  *
777  * This function performs a slower and much more costly remapping of an
778  * original file inode number on host to an appropriate different inode
779  * number on guest. For every (dev, inode) combination on host a new
780  * sequential number is generated, cached and exposed as inode number on
781  * guest.
782  *
783  * This is just a "last resort" fallback solution if the much faster/cheaper
784  * qid_path_suffixmap() failed. In practice this slow / full mapping is not
785  * expected ever to be used at all though.
786  *
787  * @see qid_path_suffixmap() for details
788  *
789  */
790 static int qid_path_fullmap(V9fsPDU *pdu, const struct stat *stbuf,
791                             uint64_t *path)
792 {
793     QpfEntry lookup = {
794         .dev = stbuf->st_dev,
795         .ino = stbuf->st_ino
796     }, *val;
797     uint32_t hash = qpf_hash(lookup);
798     VariLenAffix affix;
799 
800     val = qht_lookup(&pdu->s->qpf_table, &lookup, hash);
801 
802     if (!val) {
803         if (pdu->s->qp_fullpath_next == 0) {
804             /* no more files can be mapped :'( */
805             error_report_once(
806                 "9p: No more prefixes available for remapping inodes from "
807                 "host to guest."
808             );
809             return -ENFILE;
810         }
811 
812         val = g_malloc0(sizeof(QppEntry));
813         *val = lookup;
814 
815         /* new unique inode and device combo */
816         affix = affixForIndex(
817             1ULL << (sizeof(pdu->s->qp_affix_next) * 8)
818         );
819         val->path = (pdu->s->qp_fullpath_next++ << affix.bits) | affix.value;
820         pdu->s->qp_fullpath_next &= ((1ULL << (64 - affix.bits)) - 1);
821         qht_insert(&pdu->s->qpf_table, val, hash, NULL);
822     }
823 
824     *path = val->path;
825     return 0;
826 }
827 
828 /**
829  * @brief Quick mapping host inode nr -> guest inode nr.
830  *
831  * This function performs quick remapping of an original file inode number
832  * on host to an appropriate different inode number on guest. This remapping
833  * of inodes is required to avoid inode nr collisions on guest which would
834  * happen if the 9p export contains more than 1 exported file system (or
835  * more than 1 file system data set), because unlike on host level where the
836  * files would have different device nrs, all files exported by 9p would
837  * share the same device nr on guest (the device nr of the virtual 9p device
838  * that is).
839  *
840  * Inode remapping is performed by chopping off high end bits of the original
841  * inode number from host, shifting the result upwards and then assigning a
842  * generated suffix number for the low end bits, where the same suffix number
843  * will be shared by all inodes with the same device id AND the same high end
844  * bits that have been chopped off. That approach utilizes the fact that inode
845  * numbers very likely share the same high end bits (i.e. due to their common
846  * sequential generation by file systems) and hence we only have to generate
847  * and track a very limited amount of suffixes in practice due to that.
848  *
849  * We generate variable size suffixes for that purpose. The 1st generated
850  * suffix will only have 1 bit and hence we only need to chop off 1 bit from
851  * the original inode number. The subsequent suffixes being generated will
852  * grow in (bit) size subsequently, i.e. the 2nd and 3rd suffix being
853  * generated will have 3 bits and hence we have to chop off 3 bits from their
854  * original inodes, and so on. That approach of using variable length suffixes
855  * (i.e. over fixed size ones) utilizes the fact that in practice only a very
856  * limited amount of devices are shared by the same export (e.g. typically
857  * less than 2 dozen devices per 9p export), so in practice we need to chop
858  * off less bits than with fixed size prefixes and yet are flexible to add
859  * new devices at runtime below host's export directory at any time without
860  * having to reboot guest nor requiring to reconfigure guest for that. And due
861  * to the very limited amount of original high end bits that we chop off that
862  * way, the total amount of suffixes we need to generate is less than by using
863  * fixed size prefixes and hence it also improves performance of the inode
864  * remapping algorithm, and finally has the nice side effect that the inode
865  * numbers on guest will be much smaller & human friendly. ;-)
866  */
867 static int qid_path_suffixmap(V9fsPDU *pdu, const struct stat *stbuf,
868                               uint64_t *path)
869 {
870     const int ino_hash_bits = qid_inode_prefix_hash_bits(pdu, stbuf->st_dev);
871     QppEntry lookup = {
872         .dev = stbuf->st_dev,
873         .ino_prefix = (uint16_t) (stbuf->st_ino >> (64 - ino_hash_bits))
874     }, *val;
875     uint32_t hash = qpp_hash(lookup);
876 
877     val = qht_lookup(&pdu->s->qpp_table, &lookup, hash);
878 
879     if (!val) {
880         if (pdu->s->qp_affix_next == 0) {
881             /* we ran out of affixes */
882             warn_report_once(
883                 "9p: Potential degraded performance of inode remapping"
884             );
885             return -ENFILE;
886         }
887 
888         val = g_malloc0(sizeof(QppEntry));
889         *val = lookup;
890 
891         /* new unique inode affix and device combo */
892         val->qp_affix_index = pdu->s->qp_affix_next++;
893         val->qp_affix = affixForIndex(val->qp_affix_index);
894         qht_insert(&pdu->s->qpp_table, val, hash, NULL);
895     }
896     /* assuming generated affix to be suffix type, not prefix */
897     *path = (stbuf->st_ino << val->qp_affix.bits) | val->qp_affix.value;
898     return 0;
899 }
900 
901 static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp)
902 {
903     int err;
904     size_t size;
905 
906     if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
907         /* map inode+device to qid path (fast path) */
908         err = qid_path_suffixmap(pdu, stbuf, &qidp->path);
909         if (err == -ENFILE) {
910             /* fast path didn't work, fall back to full map */
911             err = qid_path_fullmap(pdu, stbuf, &qidp->path);
912         }
913         if (err) {
914             return err;
915         }
916     } else {
917         if (pdu->s->dev_id != stbuf->st_dev) {
918             if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) {
919                 error_report_once(
920                     "9p: Multiple devices detected in same VirtFS export. "
921                     "Access of guest to additional devices is (partly) "
922                     "denied due to virtfs option 'multidevs=forbid' being "
923                     "effective."
924                 );
925                 return -ENODEV;
926             } else {
927                 warn_report_once(
928                     "9p: Multiple devices detected in same VirtFS export, "
929                     "which might lead to file ID collisions and severe "
930                     "misbehaviours on guest! You should either use a "
931                     "separate export for each device shared from host or "
932                     "use virtfs option 'multidevs=remap'!"
933                 );
934             }
935         }
936         memset(&qidp->path, 0, sizeof(qidp->path));
937         size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
938         memcpy(&qidp->path, &stbuf->st_ino, size);
939     }
940 
941     qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
942     qidp->type = 0;
943     if (S_ISDIR(stbuf->st_mode)) {
944         qidp->type |= P9_QID_TYPE_DIR;
945     }
946     if (S_ISLNK(stbuf->st_mode)) {
947         qidp->type |= P9_QID_TYPE_SYMLINK;
948     }
949 
950     return 0;
951 }
952 
953 static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
954                                    V9fsQID *qidp)
955 {
956     struct stat stbuf;
957     int err;
958 
959     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
960     if (err < 0) {
961         return err;
962     }
963     err = stat_to_qid(pdu, &stbuf, qidp);
964     if (err < 0) {
965         return err;
966     }
967     return 0;
968 }
969 
970 V9fsPDU *pdu_alloc(V9fsState *s)
971 {
972     V9fsPDU *pdu = NULL;
973 
974     if (!QLIST_EMPTY(&s->free_list)) {
975         pdu = QLIST_FIRST(&s->free_list);
976         QLIST_REMOVE(pdu, next);
977         QLIST_INSERT_HEAD(&s->active_list, pdu, next);
978     }
979     return pdu;
980 }
981 
982 void pdu_free(V9fsPDU *pdu)
983 {
984     V9fsState *s = pdu->s;
985 
986     g_assert(!pdu->cancelled);
987     QLIST_REMOVE(pdu, next);
988     QLIST_INSERT_HEAD(&s->free_list, pdu, next);
989 }
990 
991 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
992 {
993     int8_t id = pdu->id + 1; /* Response */
994     V9fsState *s = pdu->s;
995     int ret;
996 
997     /*
998      * The 9p spec requires that successfully cancelled pdus receive no reply.
999      * Sending a reply would confuse clients because they would
1000      * assume that any EINTR is the actual result of the operation,
1001      * rather than a consequence of the cancellation. However, if
1002      * the operation completed (succesfully or with an error other
1003      * than caused be cancellation), we do send out that reply, both
1004      * for efficiency and to avoid confusing the rest of the state machine
1005      * that assumes passing a non-error here will mean a successful
1006      * transmission of the reply.
1007      */
1008     bool discard = pdu->cancelled && len == -EINTR;
1009     if (discard) {
1010         trace_v9fs_rcancel(pdu->tag, pdu->id);
1011         pdu->size = 0;
1012         goto out_notify;
1013     }
1014 
1015     if (len < 0) {
1016         int err = -len;
1017         len = 7;
1018 
1019         if (s->proto_version != V9FS_PROTO_2000L) {
1020             V9fsString str;
1021 
1022             str.data = strerror(err);
1023             str.size = strlen(str.data);
1024 
1025             ret = pdu_marshal(pdu, len, "s", &str);
1026             if (ret < 0) {
1027                 goto out_notify;
1028             }
1029             len += ret;
1030             id = P9_RERROR;
1031         }
1032 
1033         ret = pdu_marshal(pdu, len, "d", err);
1034         if (ret < 0) {
1035             goto out_notify;
1036         }
1037         len += ret;
1038 
1039         if (s->proto_version == V9FS_PROTO_2000L) {
1040             id = P9_RLERROR;
1041         }
1042         trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
1043     }
1044 
1045     /* fill out the header */
1046     if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
1047         goto out_notify;
1048     }
1049 
1050     /* keep these in sync */
1051     pdu->size = len;
1052     pdu->id = id;
1053 
1054 out_notify:
1055     pdu->s->transport->push_and_notify(pdu);
1056 
1057     /* Now wakeup anybody waiting in flush for this request */
1058     if (!qemu_co_queue_next(&pdu->complete)) {
1059         pdu_free(pdu);
1060     }
1061 }
1062 
1063 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
1064 {
1065     mode_t ret;
1066 
1067     ret = mode & 0777;
1068     if (mode & P9_STAT_MODE_DIR) {
1069         ret |= S_IFDIR;
1070     }
1071 
1072     if (mode & P9_STAT_MODE_SYMLINK) {
1073         ret |= S_IFLNK;
1074     }
1075     if (mode & P9_STAT_MODE_SOCKET) {
1076         ret |= S_IFSOCK;
1077     }
1078     if (mode & P9_STAT_MODE_NAMED_PIPE) {
1079         ret |= S_IFIFO;
1080     }
1081     if (mode & P9_STAT_MODE_DEVICE) {
1082         if (extension->size && extension->data[0] == 'c') {
1083             ret |= S_IFCHR;
1084         } else {
1085             ret |= S_IFBLK;
1086         }
1087     }
1088 
1089     if (!(ret & ~0777)) {
1090         ret |= S_IFREG;
1091     }
1092 
1093     if (mode & P9_STAT_MODE_SETUID) {
1094         ret |= S_ISUID;
1095     }
1096     if (mode & P9_STAT_MODE_SETGID) {
1097         ret |= S_ISGID;
1098     }
1099     if (mode & P9_STAT_MODE_SETVTX) {
1100         ret |= S_ISVTX;
1101     }
1102 
1103     return ret;
1104 }
1105 
1106 static int donttouch_stat(V9fsStat *stat)
1107 {
1108     if (stat->type == -1 &&
1109         stat->dev == -1 &&
1110         stat->qid.type == 0xff &&
1111         stat->qid.version == (uint32_t) -1 &&
1112         stat->qid.path == (uint64_t) -1 &&
1113         stat->mode == -1 &&
1114         stat->atime == -1 &&
1115         stat->mtime == -1 &&
1116         stat->length == -1 &&
1117         !stat->name.size &&
1118         !stat->uid.size &&
1119         !stat->gid.size &&
1120         !stat->muid.size &&
1121         stat->n_uid == -1 &&
1122         stat->n_gid == -1 &&
1123         stat->n_muid == -1) {
1124         return 1;
1125     }
1126 
1127     return 0;
1128 }
1129 
1130 static void v9fs_stat_init(V9fsStat *stat)
1131 {
1132     v9fs_string_init(&stat->name);
1133     v9fs_string_init(&stat->uid);
1134     v9fs_string_init(&stat->gid);
1135     v9fs_string_init(&stat->muid);
1136     v9fs_string_init(&stat->extension);
1137 }
1138 
1139 static void v9fs_stat_free(V9fsStat *stat)
1140 {
1141     v9fs_string_free(&stat->name);
1142     v9fs_string_free(&stat->uid);
1143     v9fs_string_free(&stat->gid);
1144     v9fs_string_free(&stat->muid);
1145     v9fs_string_free(&stat->extension);
1146 }
1147 
1148 static uint32_t stat_to_v9mode(const struct stat *stbuf)
1149 {
1150     uint32_t mode;
1151 
1152     mode = stbuf->st_mode & 0777;
1153     if (S_ISDIR(stbuf->st_mode)) {
1154         mode |= P9_STAT_MODE_DIR;
1155     }
1156 
1157     if (S_ISLNK(stbuf->st_mode)) {
1158         mode |= P9_STAT_MODE_SYMLINK;
1159     }
1160 
1161     if (S_ISSOCK(stbuf->st_mode)) {
1162         mode |= P9_STAT_MODE_SOCKET;
1163     }
1164 
1165     if (S_ISFIFO(stbuf->st_mode)) {
1166         mode |= P9_STAT_MODE_NAMED_PIPE;
1167     }
1168 
1169     if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
1170         mode |= P9_STAT_MODE_DEVICE;
1171     }
1172 
1173     if (stbuf->st_mode & S_ISUID) {
1174         mode |= P9_STAT_MODE_SETUID;
1175     }
1176 
1177     if (stbuf->st_mode & S_ISGID) {
1178         mode |= P9_STAT_MODE_SETGID;
1179     }
1180 
1181     if (stbuf->st_mode & S_ISVTX) {
1182         mode |= P9_STAT_MODE_SETVTX;
1183     }
1184 
1185     return mode;
1186 }
1187 
1188 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
1189                                        const char *basename,
1190                                        const struct stat *stbuf,
1191                                        V9fsStat *v9stat)
1192 {
1193     int err;
1194 
1195     memset(v9stat, 0, sizeof(*v9stat));
1196 
1197     err = stat_to_qid(pdu, stbuf, &v9stat->qid);
1198     if (err < 0) {
1199         return err;
1200     }
1201     v9stat->mode = stat_to_v9mode(stbuf);
1202     v9stat->atime = stbuf->st_atime;
1203     v9stat->mtime = stbuf->st_mtime;
1204     v9stat->length = stbuf->st_size;
1205 
1206     v9fs_string_free(&v9stat->uid);
1207     v9fs_string_free(&v9stat->gid);
1208     v9fs_string_free(&v9stat->muid);
1209 
1210     v9stat->n_uid = stbuf->st_uid;
1211     v9stat->n_gid = stbuf->st_gid;
1212     v9stat->n_muid = 0;
1213 
1214     v9fs_string_free(&v9stat->extension);
1215 
1216     if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
1217         err = v9fs_co_readlink(pdu, path, &v9stat->extension);
1218         if (err < 0) {
1219             return err;
1220         }
1221     } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
1222         v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
1223                 S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
1224                 major(stbuf->st_rdev), minor(stbuf->st_rdev));
1225     } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
1226         v9fs_string_sprintf(&v9stat->extension, "%s %lu",
1227                 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
1228     }
1229 
1230     v9fs_string_sprintf(&v9stat->name, "%s", basename);
1231 
1232     v9stat->size = 61 +
1233         v9fs_string_size(&v9stat->name) +
1234         v9fs_string_size(&v9stat->uid) +
1235         v9fs_string_size(&v9stat->gid) +
1236         v9fs_string_size(&v9stat->muid) +
1237         v9fs_string_size(&v9stat->extension);
1238     return 0;
1239 }
1240 
1241 #define P9_STATS_MODE          0x00000001ULL
1242 #define P9_STATS_NLINK         0x00000002ULL
1243 #define P9_STATS_UID           0x00000004ULL
1244 #define P9_STATS_GID           0x00000008ULL
1245 #define P9_STATS_RDEV          0x00000010ULL
1246 #define P9_STATS_ATIME         0x00000020ULL
1247 #define P9_STATS_MTIME         0x00000040ULL
1248 #define P9_STATS_CTIME         0x00000080ULL
1249 #define P9_STATS_INO           0x00000100ULL
1250 #define P9_STATS_SIZE          0x00000200ULL
1251 #define P9_STATS_BLOCKS        0x00000400ULL
1252 
1253 #define P9_STATS_BTIME         0x00000800ULL
1254 #define P9_STATS_GEN           0x00001000ULL
1255 #define P9_STATS_DATA_VERSION  0x00002000ULL
1256 
1257 #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
1258 #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
1259 
1260 
1261 static int stat_to_v9stat_dotl(V9fsPDU *pdu, const struct stat *stbuf,
1262                                 V9fsStatDotl *v9lstat)
1263 {
1264     memset(v9lstat, 0, sizeof(*v9lstat));
1265 
1266     v9lstat->st_mode = stbuf->st_mode;
1267     v9lstat->st_nlink = stbuf->st_nlink;
1268     v9lstat->st_uid = stbuf->st_uid;
1269     v9lstat->st_gid = stbuf->st_gid;
1270     v9lstat->st_rdev = stbuf->st_rdev;
1271     v9lstat->st_size = stbuf->st_size;
1272     v9lstat->st_blksize = stbuf->st_blksize;
1273     v9lstat->st_blocks = stbuf->st_blocks;
1274     v9lstat->st_atime_sec = stbuf->st_atime;
1275     v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
1276     v9lstat->st_mtime_sec = stbuf->st_mtime;
1277     v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
1278     v9lstat->st_ctime_sec = stbuf->st_ctime;
1279     v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
1280     /* Currently we only support BASIC fields in stat */
1281     v9lstat->st_result_mask = P9_STATS_BASIC;
1282 
1283     return stat_to_qid(pdu, stbuf, &v9lstat->qid);
1284 }
1285 
1286 static void print_sg(struct iovec *sg, int cnt)
1287 {
1288     int i;
1289 
1290     printf("sg[%d]: {", cnt);
1291     for (i = 0; i < cnt; i++) {
1292         if (i) {
1293             printf(", ");
1294         }
1295         printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
1296     }
1297     printf("}\n");
1298 }
1299 
1300 /* Will call this only for path name based fid */
1301 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
1302 {
1303     V9fsPath str;
1304     v9fs_path_init(&str);
1305     v9fs_path_copy(&str, dst);
1306     v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
1307     v9fs_path_free(&str);
1308 }
1309 
1310 static inline bool is_ro_export(FsContext *ctx)
1311 {
1312     return ctx->export_flags & V9FS_RDONLY;
1313 }
1314 
1315 static void coroutine_fn v9fs_version(void *opaque)
1316 {
1317     ssize_t err;
1318     V9fsPDU *pdu = opaque;
1319     V9fsState *s = pdu->s;
1320     V9fsString version;
1321     size_t offset = 7;
1322 
1323     v9fs_string_init(&version);
1324     err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
1325     if (err < 0) {
1326         goto out;
1327     }
1328     trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
1329 
1330     virtfs_reset(pdu);
1331 
1332     if (!strcmp(version.data, "9P2000.u")) {
1333         s->proto_version = V9FS_PROTO_2000U;
1334     } else if (!strcmp(version.data, "9P2000.L")) {
1335         s->proto_version = V9FS_PROTO_2000L;
1336     } else {
1337         v9fs_string_sprintf(&version, "unknown");
1338         /* skip min. msize check, reporting invalid version has priority */
1339         goto marshal;
1340     }
1341 
1342     if (s->msize < P9_MIN_MSIZE) {
1343         err = -EMSGSIZE;
1344         error_report(
1345             "9pfs: Client requested msize < minimum msize ("
1346             stringify(P9_MIN_MSIZE) ") supported by this server."
1347         );
1348         goto out;
1349     }
1350 
1351     /* 8192 is the default msize of Linux clients */
1352     if (s->msize <= 8192 && !(s->ctx.export_flags & V9FS_NO_PERF_WARN)) {
1353         warn_report_once(
1354             "9p: degraded performance: a reasonable high msize should be "
1355             "chosen on client/guest side (chosen msize is <= 8192). See "
1356             "https://wiki.qemu.org/Documentation/9psetup#msize for details."
1357         );
1358     }
1359 
1360 marshal:
1361     err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
1362     if (err < 0) {
1363         goto out;
1364     }
1365     err += offset;
1366     trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
1367 out:
1368     pdu_complete(pdu, err);
1369     v9fs_string_free(&version);
1370 }
1371 
1372 static void coroutine_fn v9fs_attach(void *opaque)
1373 {
1374     V9fsPDU *pdu = opaque;
1375     V9fsState *s = pdu->s;
1376     int32_t fid, afid, n_uname;
1377     V9fsString uname, aname;
1378     V9fsFidState *fidp;
1379     size_t offset = 7;
1380     V9fsQID qid;
1381     ssize_t err;
1382 
1383     v9fs_string_init(&uname);
1384     v9fs_string_init(&aname);
1385     err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1386                         &afid, &uname, &aname, &n_uname);
1387     if (err < 0) {
1388         goto out_nofid;
1389     }
1390     trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1391 
1392     fidp = alloc_fid(s, fid);
1393     if (fidp == NULL) {
1394         err = -EINVAL;
1395         goto out_nofid;
1396     }
1397     fidp->uid = n_uname;
1398     err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1399     if (err < 0) {
1400         err = -EINVAL;
1401         clunk_fid(s, fid);
1402         goto out;
1403     }
1404     err = fid_to_qid(pdu, fidp, &qid);
1405     if (err < 0) {
1406         err = -EINVAL;
1407         clunk_fid(s, fid);
1408         goto out;
1409     }
1410 
1411     /*
1412      * disable migration if we haven't done already.
1413      * attach could get called multiple times for the same export.
1414      */
1415     if (!s->migration_blocker) {
1416         error_setg(&s->migration_blocker,
1417                    "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1418                    s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1419         err = migrate_add_blocker(s->migration_blocker, NULL);
1420         if (err < 0) {
1421             error_free(s->migration_blocker);
1422             s->migration_blocker = NULL;
1423             clunk_fid(s, fid);
1424             goto out;
1425         }
1426         s->root_fid = fid;
1427     }
1428 
1429     err = pdu_marshal(pdu, offset, "Q", &qid);
1430     if (err < 0) {
1431         clunk_fid(s, fid);
1432         goto out;
1433     }
1434     err += offset;
1435 
1436     memcpy(&s->root_qid, &qid, sizeof(qid));
1437     trace_v9fs_attach_return(pdu->tag, pdu->id,
1438                              qid.type, qid.version, qid.path);
1439 out:
1440     put_fid(pdu, fidp);
1441 out_nofid:
1442     pdu_complete(pdu, err);
1443     v9fs_string_free(&uname);
1444     v9fs_string_free(&aname);
1445 }
1446 
1447 static void coroutine_fn v9fs_stat(void *opaque)
1448 {
1449     int32_t fid;
1450     V9fsStat v9stat;
1451     ssize_t err = 0;
1452     size_t offset = 7;
1453     struct stat stbuf;
1454     V9fsFidState *fidp;
1455     V9fsPDU *pdu = opaque;
1456     char *basename;
1457 
1458     err = pdu_unmarshal(pdu, offset, "d", &fid);
1459     if (err < 0) {
1460         goto out_nofid;
1461     }
1462     trace_v9fs_stat(pdu->tag, pdu->id, fid);
1463 
1464     fidp = get_fid(pdu, fid);
1465     if (fidp == NULL) {
1466         err = -ENOENT;
1467         goto out_nofid;
1468     }
1469     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1470     if (err < 0) {
1471         goto out;
1472     }
1473     basename = g_path_get_basename(fidp->path.data);
1474     err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1475     g_free(basename);
1476     if (err < 0) {
1477         goto out;
1478     }
1479     err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1480     if (err < 0) {
1481         v9fs_stat_free(&v9stat);
1482         goto out;
1483     }
1484     trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1485                            v9stat.atime, v9stat.mtime, v9stat.length);
1486     err += offset;
1487     v9fs_stat_free(&v9stat);
1488 out:
1489     put_fid(pdu, fidp);
1490 out_nofid:
1491     pdu_complete(pdu, err);
1492 }
1493 
1494 static void coroutine_fn v9fs_getattr(void *opaque)
1495 {
1496     int32_t fid;
1497     size_t offset = 7;
1498     ssize_t retval = 0;
1499     struct stat stbuf;
1500     V9fsFidState *fidp;
1501     uint64_t request_mask;
1502     V9fsStatDotl v9stat_dotl;
1503     V9fsPDU *pdu = opaque;
1504 
1505     retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1506     if (retval < 0) {
1507         goto out_nofid;
1508     }
1509     trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1510 
1511     fidp = get_fid(pdu, fid);
1512     if (fidp == NULL) {
1513         retval = -ENOENT;
1514         goto out_nofid;
1515     }
1516     /*
1517      * Currently we only support BASIC fields in stat, so there is no
1518      * need to look at request_mask.
1519      */
1520     retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1521     if (retval < 0) {
1522         goto out;
1523     }
1524     retval = stat_to_v9stat_dotl(pdu, &stbuf, &v9stat_dotl);
1525     if (retval < 0) {
1526         goto out;
1527     }
1528 
1529     /*  fill st_gen if requested and supported by underlying fs */
1530     if (request_mask & P9_STATS_GEN) {
1531         retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1532         switch (retval) {
1533         case 0:
1534             /* we have valid st_gen: update result mask */
1535             v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1536             break;
1537         case -EINTR:
1538             /* request cancelled, e.g. by Tflush */
1539             goto out;
1540         default:
1541             /* failed to get st_gen: not fatal, ignore */
1542             break;
1543         }
1544     }
1545     retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1546     if (retval < 0) {
1547         goto out;
1548     }
1549     retval += offset;
1550     trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1551                               v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1552                               v9stat_dotl.st_gid);
1553 out:
1554     put_fid(pdu, fidp);
1555 out_nofid:
1556     pdu_complete(pdu, retval);
1557 }
1558 
1559 /* Attribute flags */
1560 #define P9_ATTR_MODE       (1 << 0)
1561 #define P9_ATTR_UID        (1 << 1)
1562 #define P9_ATTR_GID        (1 << 2)
1563 #define P9_ATTR_SIZE       (1 << 3)
1564 #define P9_ATTR_ATIME      (1 << 4)
1565 #define P9_ATTR_MTIME      (1 << 5)
1566 #define P9_ATTR_CTIME      (1 << 6)
1567 #define P9_ATTR_ATIME_SET  (1 << 7)
1568 #define P9_ATTR_MTIME_SET  (1 << 8)
1569 
1570 #define P9_ATTR_MASK    127
1571 
1572 static void coroutine_fn v9fs_setattr(void *opaque)
1573 {
1574     int err = 0;
1575     int32_t fid;
1576     V9fsFidState *fidp;
1577     size_t offset = 7;
1578     V9fsIattr v9iattr;
1579     V9fsPDU *pdu = opaque;
1580 
1581     err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1582     if (err < 0) {
1583         goto out_nofid;
1584     }
1585 
1586     trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1587                        v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1588                        v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1589 
1590     fidp = get_fid(pdu, fid);
1591     if (fidp == NULL) {
1592         err = -EINVAL;
1593         goto out_nofid;
1594     }
1595     if (v9iattr.valid & P9_ATTR_MODE) {
1596         err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1597         if (err < 0) {
1598             goto out;
1599         }
1600     }
1601     if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1602         struct timespec times[2];
1603         if (v9iattr.valid & P9_ATTR_ATIME) {
1604             if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1605                 times[0].tv_sec = v9iattr.atime_sec;
1606                 times[0].tv_nsec = v9iattr.atime_nsec;
1607             } else {
1608                 times[0].tv_nsec = UTIME_NOW;
1609             }
1610         } else {
1611             times[0].tv_nsec = UTIME_OMIT;
1612         }
1613         if (v9iattr.valid & P9_ATTR_MTIME) {
1614             if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1615                 times[1].tv_sec = v9iattr.mtime_sec;
1616                 times[1].tv_nsec = v9iattr.mtime_nsec;
1617             } else {
1618                 times[1].tv_nsec = UTIME_NOW;
1619             }
1620         } else {
1621             times[1].tv_nsec = UTIME_OMIT;
1622         }
1623         err = v9fs_co_utimensat(pdu, &fidp->path, times);
1624         if (err < 0) {
1625             goto out;
1626         }
1627     }
1628     /*
1629      * If the only valid entry in iattr is ctime we can call
1630      * chown(-1,-1) to update the ctime of the file
1631      */
1632     if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1633         ((v9iattr.valid & P9_ATTR_CTIME)
1634          && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1635         if (!(v9iattr.valid & P9_ATTR_UID)) {
1636             v9iattr.uid = -1;
1637         }
1638         if (!(v9iattr.valid & P9_ATTR_GID)) {
1639             v9iattr.gid = -1;
1640         }
1641         err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1642                             v9iattr.gid);
1643         if (err < 0) {
1644             goto out;
1645         }
1646     }
1647     if (v9iattr.valid & (P9_ATTR_SIZE)) {
1648         err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1649         if (err < 0) {
1650             goto out;
1651         }
1652     }
1653     err = offset;
1654     trace_v9fs_setattr_return(pdu->tag, pdu->id);
1655 out:
1656     put_fid(pdu, fidp);
1657 out_nofid:
1658     pdu_complete(pdu, err);
1659 }
1660 
1661 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1662 {
1663     int i;
1664     ssize_t err;
1665     size_t offset = 7;
1666 
1667     err = pdu_marshal(pdu, offset, "w", nwnames);
1668     if (err < 0) {
1669         return err;
1670     }
1671     offset += err;
1672     for (i = 0; i < nwnames; i++) {
1673         err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1674         if (err < 0) {
1675             return err;
1676         }
1677         offset += err;
1678     }
1679     return offset;
1680 }
1681 
1682 static bool name_is_illegal(const char *name)
1683 {
1684     return !*name || strchr(name, '/') != NULL;
1685 }
1686 
1687 static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1688 {
1689     return
1690         qid1->type != qid2->type ||
1691         qid1->version != qid2->version ||
1692         qid1->path != qid2->path;
1693 }
1694 
1695 static void coroutine_fn v9fs_walk(void *opaque)
1696 {
1697     int name_idx;
1698     V9fsQID *qids = NULL;
1699     int i, err = 0;
1700     V9fsPath dpath, path;
1701     uint16_t nwnames;
1702     struct stat stbuf;
1703     size_t offset = 7;
1704     int32_t fid, newfid;
1705     V9fsString *wnames = NULL;
1706     V9fsFidState *fidp;
1707     V9fsFidState *newfidp = NULL;
1708     V9fsPDU *pdu = opaque;
1709     V9fsState *s = pdu->s;
1710     V9fsQID qid;
1711 
1712     err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1713     if (err < 0) {
1714         pdu_complete(pdu, err);
1715         return ;
1716     }
1717     offset += err;
1718 
1719     trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1720 
1721     if (nwnames && nwnames <= P9_MAXWELEM) {
1722         wnames = g_new0(V9fsString, nwnames);
1723         qids   = g_new0(V9fsQID, nwnames);
1724         for (i = 0; i < nwnames; i++) {
1725             err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1726             if (err < 0) {
1727                 goto out_nofid;
1728             }
1729             if (name_is_illegal(wnames[i].data)) {
1730                 err = -ENOENT;
1731                 goto out_nofid;
1732             }
1733             offset += err;
1734         }
1735     } else if (nwnames > P9_MAXWELEM) {
1736         err = -EINVAL;
1737         goto out_nofid;
1738     }
1739     fidp = get_fid(pdu, fid);
1740     if (fidp == NULL) {
1741         err = -ENOENT;
1742         goto out_nofid;
1743     }
1744 
1745     v9fs_path_init(&dpath);
1746     v9fs_path_init(&path);
1747 
1748     err = fid_to_qid(pdu, fidp, &qid);
1749     if (err < 0) {
1750         goto out;
1751     }
1752 
1753     /*
1754      * Both dpath and path initially poin to fidp.
1755      * Needed to handle request with nwnames == 0
1756      */
1757     v9fs_path_copy(&dpath, &fidp->path);
1758     v9fs_path_copy(&path, &fidp->path);
1759     for (name_idx = 0; name_idx < nwnames; name_idx++) {
1760         if (not_same_qid(&pdu->s->root_qid, &qid) ||
1761             strcmp("..", wnames[name_idx].data)) {
1762             err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1763                                        &path);
1764             if (err < 0) {
1765                 goto out;
1766             }
1767 
1768             err = v9fs_co_lstat(pdu, &path, &stbuf);
1769             if (err < 0) {
1770                 goto out;
1771             }
1772             err = stat_to_qid(pdu, &stbuf, &qid);
1773             if (err < 0) {
1774                 goto out;
1775             }
1776             v9fs_path_copy(&dpath, &path);
1777         }
1778         memcpy(&qids[name_idx], &qid, sizeof(qid));
1779     }
1780     if (fid == newfid) {
1781         if (fidp->fid_type != P9_FID_NONE) {
1782             err = -EINVAL;
1783             goto out;
1784         }
1785         v9fs_path_write_lock(s);
1786         v9fs_path_copy(&fidp->path, &path);
1787         v9fs_path_unlock(s);
1788     } else {
1789         newfidp = alloc_fid(s, newfid);
1790         if (newfidp == NULL) {
1791             err = -EINVAL;
1792             goto out;
1793         }
1794         newfidp->uid = fidp->uid;
1795         v9fs_path_copy(&newfidp->path, &path);
1796     }
1797     err = v9fs_walk_marshal(pdu, nwnames, qids);
1798     trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1799 out:
1800     put_fid(pdu, fidp);
1801     if (newfidp) {
1802         put_fid(pdu, newfidp);
1803     }
1804     v9fs_path_free(&dpath);
1805     v9fs_path_free(&path);
1806 out_nofid:
1807     pdu_complete(pdu, err);
1808     if (nwnames && nwnames <= P9_MAXWELEM) {
1809         for (name_idx = 0; name_idx < nwnames; name_idx++) {
1810             v9fs_string_free(&wnames[name_idx]);
1811         }
1812         g_free(wnames);
1813         g_free(qids);
1814     }
1815 }
1816 
1817 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1818 {
1819     struct statfs stbuf;
1820     int32_t iounit = 0;
1821     V9fsState *s = pdu->s;
1822 
1823     /*
1824      * iounit should be multiples of f_bsize (host filesystem block size
1825      * and as well as less than (client msize - P9_IOHDRSZ))
1826      */
1827     if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1828         if (stbuf.f_bsize) {
1829             iounit = stbuf.f_bsize;
1830             iounit *= (s->msize - P9_IOHDRSZ) / stbuf.f_bsize;
1831         }
1832     }
1833     if (!iounit) {
1834         iounit = s->msize - P9_IOHDRSZ;
1835     }
1836     return iounit;
1837 }
1838 
1839 static void coroutine_fn v9fs_open(void *opaque)
1840 {
1841     int flags;
1842     int32_t fid;
1843     int32_t mode;
1844     V9fsQID qid;
1845     int iounit = 0;
1846     ssize_t err = 0;
1847     size_t offset = 7;
1848     struct stat stbuf;
1849     V9fsFidState *fidp;
1850     V9fsPDU *pdu = opaque;
1851     V9fsState *s = pdu->s;
1852 
1853     if (s->proto_version == V9FS_PROTO_2000L) {
1854         err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1855     } else {
1856         uint8_t modebyte;
1857         err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1858         mode = modebyte;
1859     }
1860     if (err < 0) {
1861         goto out_nofid;
1862     }
1863     trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1864 
1865     fidp = get_fid(pdu, fid);
1866     if (fidp == NULL) {
1867         err = -ENOENT;
1868         goto out_nofid;
1869     }
1870     if (fidp->fid_type != P9_FID_NONE) {
1871         err = -EINVAL;
1872         goto out;
1873     }
1874 
1875     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1876     if (err < 0) {
1877         goto out;
1878     }
1879     err = stat_to_qid(pdu, &stbuf, &qid);
1880     if (err < 0) {
1881         goto out;
1882     }
1883     if (S_ISDIR(stbuf.st_mode)) {
1884         err = v9fs_co_opendir(pdu, fidp);
1885         if (err < 0) {
1886             goto out;
1887         }
1888         fidp->fid_type = P9_FID_DIR;
1889         err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1890         if (err < 0) {
1891             goto out;
1892         }
1893         err += offset;
1894     } else {
1895         if (s->proto_version == V9FS_PROTO_2000L) {
1896             flags = get_dotl_openflags(s, mode);
1897         } else {
1898             flags = omode_to_uflags(mode);
1899         }
1900         if (is_ro_export(&s->ctx)) {
1901             if (mode & O_WRONLY || mode & O_RDWR ||
1902                 mode & O_APPEND || mode & O_TRUNC) {
1903                 err = -EROFS;
1904                 goto out;
1905             }
1906         }
1907         err = v9fs_co_open(pdu, fidp, flags);
1908         if (err < 0) {
1909             goto out;
1910         }
1911         fidp->fid_type = P9_FID_FILE;
1912         fidp->open_flags = flags;
1913         if (flags & O_EXCL) {
1914             /*
1915              * We let the host file system do O_EXCL check
1916              * We should not reclaim such fd
1917              */
1918             fidp->flags |= FID_NON_RECLAIMABLE;
1919         }
1920         iounit = get_iounit(pdu, &fidp->path);
1921         err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1922         if (err < 0) {
1923             goto out;
1924         }
1925         err += offset;
1926     }
1927     trace_v9fs_open_return(pdu->tag, pdu->id,
1928                            qid.type, qid.version, qid.path, iounit);
1929 out:
1930     put_fid(pdu, fidp);
1931 out_nofid:
1932     pdu_complete(pdu, err);
1933 }
1934 
1935 static void coroutine_fn v9fs_lcreate(void *opaque)
1936 {
1937     int32_t dfid, flags, mode;
1938     gid_t gid;
1939     ssize_t err = 0;
1940     ssize_t offset = 7;
1941     V9fsString name;
1942     V9fsFidState *fidp;
1943     struct stat stbuf;
1944     V9fsQID qid;
1945     int32_t iounit;
1946     V9fsPDU *pdu = opaque;
1947 
1948     v9fs_string_init(&name);
1949     err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1950                         &name, &flags, &mode, &gid);
1951     if (err < 0) {
1952         goto out_nofid;
1953     }
1954     trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1955 
1956     if (name_is_illegal(name.data)) {
1957         err = -ENOENT;
1958         goto out_nofid;
1959     }
1960 
1961     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1962         err = -EEXIST;
1963         goto out_nofid;
1964     }
1965 
1966     fidp = get_fid(pdu, dfid);
1967     if (fidp == NULL) {
1968         err = -ENOENT;
1969         goto out_nofid;
1970     }
1971     if (fidp->fid_type != P9_FID_NONE) {
1972         err = -EINVAL;
1973         goto out;
1974     }
1975 
1976     flags = get_dotl_openflags(pdu->s, flags);
1977     err = v9fs_co_open2(pdu, fidp, &name, gid,
1978                         flags | O_CREAT, mode, &stbuf);
1979     if (err < 0) {
1980         goto out;
1981     }
1982     fidp->fid_type = P9_FID_FILE;
1983     fidp->open_flags = flags;
1984     if (flags & O_EXCL) {
1985         /*
1986          * We let the host file system do O_EXCL check
1987          * We should not reclaim such fd
1988          */
1989         fidp->flags |= FID_NON_RECLAIMABLE;
1990     }
1991     iounit =  get_iounit(pdu, &fidp->path);
1992     err = stat_to_qid(pdu, &stbuf, &qid);
1993     if (err < 0) {
1994         goto out;
1995     }
1996     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1997     if (err < 0) {
1998         goto out;
1999     }
2000     err += offset;
2001     trace_v9fs_lcreate_return(pdu->tag, pdu->id,
2002                               qid.type, qid.version, qid.path, iounit);
2003 out:
2004     put_fid(pdu, fidp);
2005 out_nofid:
2006     pdu_complete(pdu, err);
2007     v9fs_string_free(&name);
2008 }
2009 
2010 static void coroutine_fn v9fs_fsync(void *opaque)
2011 {
2012     int err;
2013     int32_t fid;
2014     int datasync;
2015     size_t offset = 7;
2016     V9fsFidState *fidp;
2017     V9fsPDU *pdu = opaque;
2018 
2019     err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
2020     if (err < 0) {
2021         goto out_nofid;
2022     }
2023     trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
2024 
2025     fidp = get_fid(pdu, fid);
2026     if (fidp == NULL) {
2027         err = -ENOENT;
2028         goto out_nofid;
2029     }
2030     err = v9fs_co_fsync(pdu, fidp, datasync);
2031     if (!err) {
2032         err = offset;
2033     }
2034     put_fid(pdu, fidp);
2035 out_nofid:
2036     pdu_complete(pdu, err);
2037 }
2038 
2039 static void coroutine_fn v9fs_clunk(void *opaque)
2040 {
2041     int err;
2042     int32_t fid;
2043     size_t offset = 7;
2044     V9fsFidState *fidp;
2045     V9fsPDU *pdu = opaque;
2046     V9fsState *s = pdu->s;
2047 
2048     err = pdu_unmarshal(pdu, offset, "d", &fid);
2049     if (err < 0) {
2050         goto out_nofid;
2051     }
2052     trace_v9fs_clunk(pdu->tag, pdu->id, fid);
2053 
2054     fidp = clunk_fid(s, fid);
2055     if (fidp == NULL) {
2056         err = -ENOENT;
2057         goto out_nofid;
2058     }
2059     /*
2060      * Bump the ref so that put_fid will
2061      * free the fid.
2062      */
2063     fidp->ref++;
2064     err = put_fid(pdu, fidp);
2065     if (!err) {
2066         err = offset;
2067     }
2068 out_nofid:
2069     pdu_complete(pdu, err);
2070 }
2071 
2072 /*
2073  * Create a QEMUIOVector for a sub-region of PDU iovecs
2074  *
2075  * @qiov:       uninitialized QEMUIOVector
2076  * @skip:       number of bytes to skip from beginning of PDU
2077  * @size:       number of bytes to include
2078  * @is_write:   true - write, false - read
2079  *
2080  * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
2081  * with qemu_iovec_destroy().
2082  */
2083 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
2084                                     size_t skip, size_t size,
2085                                     bool is_write)
2086 {
2087     QEMUIOVector elem;
2088     struct iovec *iov;
2089     unsigned int niov;
2090 
2091     if (is_write) {
2092         pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
2093     } else {
2094         pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
2095     }
2096 
2097     qemu_iovec_init_external(&elem, iov, niov);
2098     qemu_iovec_init(qiov, niov);
2099     qemu_iovec_concat(qiov, &elem, skip, size);
2100 }
2101 
2102 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2103                            uint64_t off, uint32_t max_count)
2104 {
2105     ssize_t err;
2106     size_t offset = 7;
2107     uint64_t read_count;
2108     QEMUIOVector qiov_full;
2109 
2110     if (fidp->fs.xattr.len < off) {
2111         read_count = 0;
2112     } else {
2113         read_count = fidp->fs.xattr.len - off;
2114     }
2115     if (read_count > max_count) {
2116         read_count = max_count;
2117     }
2118     err = pdu_marshal(pdu, offset, "d", read_count);
2119     if (err < 0) {
2120         return err;
2121     }
2122     offset += err;
2123 
2124     v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
2125     err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
2126                     ((char *)fidp->fs.xattr.value) + off,
2127                     read_count);
2128     qemu_iovec_destroy(&qiov_full);
2129     if (err < 0) {
2130         return err;
2131     }
2132     offset += err;
2133     return offset;
2134 }
2135 
2136 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
2137                                                   V9fsFidState *fidp,
2138                                                   uint32_t max_count)
2139 {
2140     V9fsPath path;
2141     V9fsStat v9stat;
2142     int len, err = 0;
2143     int32_t count = 0;
2144     struct stat stbuf;
2145     off_t saved_dir_pos;
2146     struct dirent *dent;
2147 
2148     /* save the directory position */
2149     saved_dir_pos = v9fs_co_telldir(pdu, fidp);
2150     if (saved_dir_pos < 0) {
2151         return saved_dir_pos;
2152     }
2153 
2154     while (1) {
2155         v9fs_path_init(&path);
2156 
2157         v9fs_readdir_lock(&fidp->fs.dir);
2158 
2159         err = v9fs_co_readdir(pdu, fidp, &dent);
2160         if (err || !dent) {
2161             break;
2162         }
2163         err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
2164         if (err < 0) {
2165             break;
2166         }
2167         err = v9fs_co_lstat(pdu, &path, &stbuf);
2168         if (err < 0) {
2169             break;
2170         }
2171         err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
2172         if (err < 0) {
2173             break;
2174         }
2175         if ((count + v9stat.size + 2) > max_count) {
2176             v9fs_readdir_unlock(&fidp->fs.dir);
2177 
2178             /* Ran out of buffer. Set dir back to old position and return */
2179             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2180             v9fs_stat_free(&v9stat);
2181             v9fs_path_free(&path);
2182             return count;
2183         }
2184 
2185         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2186         len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
2187 
2188         v9fs_readdir_unlock(&fidp->fs.dir);
2189 
2190         if (len < 0) {
2191             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
2192             v9fs_stat_free(&v9stat);
2193             v9fs_path_free(&path);
2194             return len;
2195         }
2196         count += len;
2197         v9fs_stat_free(&v9stat);
2198         v9fs_path_free(&path);
2199         saved_dir_pos = dent->d_off;
2200     }
2201 
2202     v9fs_readdir_unlock(&fidp->fs.dir);
2203 
2204     v9fs_path_free(&path);
2205     if (err < 0) {
2206         return err;
2207     }
2208     return count;
2209 }
2210 
2211 static void coroutine_fn v9fs_read(void *opaque)
2212 {
2213     int32_t fid;
2214     uint64_t off;
2215     ssize_t err = 0;
2216     int32_t count = 0;
2217     size_t offset = 7;
2218     uint32_t max_count;
2219     V9fsFidState *fidp;
2220     V9fsPDU *pdu = opaque;
2221     V9fsState *s = pdu->s;
2222 
2223     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
2224     if (err < 0) {
2225         goto out_nofid;
2226     }
2227     trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
2228 
2229     fidp = get_fid(pdu, fid);
2230     if (fidp == NULL) {
2231         err = -EINVAL;
2232         goto out_nofid;
2233     }
2234     if (fidp->fid_type == P9_FID_DIR) {
2235         if (s->proto_version != V9FS_PROTO_2000U) {
2236             warn_report_once(
2237                 "9p: bad client: T_read request on directory only expected "
2238                 "with 9P2000.u protocol version"
2239             );
2240             err = -EOPNOTSUPP;
2241             goto out;
2242         }
2243         if (off == 0) {
2244             v9fs_co_rewinddir(pdu, fidp);
2245         }
2246         count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
2247         if (count < 0) {
2248             err = count;
2249             goto out;
2250         }
2251         err = pdu_marshal(pdu, offset, "d", count);
2252         if (err < 0) {
2253             goto out;
2254         }
2255         err += offset + count;
2256     } else if (fidp->fid_type == P9_FID_FILE) {
2257         QEMUIOVector qiov_full;
2258         QEMUIOVector qiov;
2259         int32_t len;
2260 
2261         v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
2262         qemu_iovec_init(&qiov, qiov_full.niov);
2263         do {
2264             qemu_iovec_reset(&qiov);
2265             qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
2266             if (0) {
2267                 print_sg(qiov.iov, qiov.niov);
2268             }
2269             /* Loop in case of EINTR */
2270             do {
2271                 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
2272                 if (len >= 0) {
2273                     off   += len;
2274                     count += len;
2275                 }
2276             } while (len == -EINTR && !pdu->cancelled);
2277             if (len < 0) {
2278                 /* IO error return the error */
2279                 err = len;
2280                 goto out_free_iovec;
2281             }
2282         } while (count < max_count && len > 0);
2283         err = pdu_marshal(pdu, offset, "d", count);
2284         if (err < 0) {
2285             goto out_free_iovec;
2286         }
2287         err += offset + count;
2288 out_free_iovec:
2289         qemu_iovec_destroy(&qiov);
2290         qemu_iovec_destroy(&qiov_full);
2291     } else if (fidp->fid_type == P9_FID_XATTR) {
2292         err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
2293     } else {
2294         err = -EINVAL;
2295     }
2296     trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
2297 out:
2298     put_fid(pdu, fidp);
2299 out_nofid:
2300     pdu_complete(pdu, err);
2301 }
2302 
2303 /**
2304  * Returns size required in Rreaddir response for the passed dirent @p name.
2305  *
2306  * @param name - directory entry's name (i.e. file name, directory name)
2307  * @returns required size in bytes
2308  */
2309 size_t v9fs_readdir_response_size(V9fsString *name)
2310 {
2311     /*
2312      * Size of each dirent on the wire: size of qid (13) + size of offset (8)
2313      * size of type (1) + size of name.size (2) + strlen(name.data)
2314      */
2315     return 24 + v9fs_string_size(name);
2316 }
2317 
2318 static void v9fs_free_dirents(struct V9fsDirEnt *e)
2319 {
2320     struct V9fsDirEnt *next = NULL;
2321 
2322     for (; e; e = next) {
2323         next = e->next;
2324         g_free(e->dent);
2325         g_free(e->st);
2326         g_free(e);
2327     }
2328 }
2329 
2330 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
2331                                         off_t offset, int32_t max_count)
2332 {
2333     size_t size;
2334     V9fsQID qid;
2335     V9fsString name;
2336     int len, err = 0;
2337     int32_t count = 0;
2338     struct dirent *dent;
2339     struct stat *st;
2340     struct V9fsDirEnt *entries = NULL;
2341 
2342     /*
2343      * inode remapping requires the device id, which in turn might be
2344      * different for different directory entries, so if inode remapping is
2345      * enabled we have to make a full stat for each directory entry
2346      */
2347     const bool dostat = pdu->s->ctx.export_flags & V9FS_REMAP_INODES;
2348 
2349     /*
2350      * Fetch all required directory entries altogether on a background IO
2351      * thread from fs driver. We don't want to do that for each entry
2352      * individually, because hopping between threads (this main IO thread
2353      * and background IO driver thread) would sum up to huge latencies.
2354      */
2355     count = v9fs_co_readdir_many(pdu, fidp, &entries, offset, max_count,
2356                                  dostat);
2357     if (count < 0) {
2358         err = count;
2359         count = 0;
2360         goto out;
2361     }
2362     count = 0;
2363 
2364     for (struct V9fsDirEnt *e = entries; e; e = e->next) {
2365         dent = e->dent;
2366 
2367         if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) {
2368             st = e->st;
2369             /* e->st should never be NULL, but just to be sure */
2370             if (!st) {
2371                 err = -1;
2372                 break;
2373             }
2374 
2375             /* remap inode */
2376             err = stat_to_qid(pdu, st, &qid);
2377             if (err < 0) {
2378                 break;
2379             }
2380         } else {
2381             /*
2382              * Fill up just the path field of qid because the client uses
2383              * only that. To fill the entire qid structure we will have
2384              * to stat each dirent found, which is expensive. For the
2385              * latter reason we don't call stat_to_qid() here. Only drawback
2386              * is that no multi-device export detection of stat_to_qid()
2387              * would be done and provided as error to the user here. But
2388              * user would get that error anyway when accessing those
2389              * files/dirs through other ways.
2390              */
2391             size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
2392             memcpy(&qid.path, &dent->d_ino, size);
2393             /* Fill the other fields with dummy values */
2394             qid.type = 0;
2395             qid.version = 0;
2396         }
2397 
2398         v9fs_string_init(&name);
2399         v9fs_string_sprintf(&name, "%s", dent->d_name);
2400 
2401         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
2402         len = pdu_marshal(pdu, 11 + count, "Qqbs",
2403                           &qid, dent->d_off,
2404                           dent->d_type, &name);
2405 
2406         v9fs_string_free(&name);
2407 
2408         if (len < 0) {
2409             err = len;
2410             break;
2411         }
2412 
2413         count += len;
2414     }
2415 
2416 out:
2417     v9fs_free_dirents(entries);
2418     if (err < 0) {
2419         return err;
2420     }
2421     return count;
2422 }
2423 
2424 static void coroutine_fn v9fs_readdir(void *opaque)
2425 {
2426     int32_t fid;
2427     V9fsFidState *fidp;
2428     ssize_t retval = 0;
2429     size_t offset = 7;
2430     uint64_t initial_offset;
2431     int32_t count;
2432     uint32_t max_count;
2433     V9fsPDU *pdu = opaque;
2434     V9fsState *s = pdu->s;
2435 
2436     retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
2437                            &initial_offset, &max_count);
2438     if (retval < 0) {
2439         goto out_nofid;
2440     }
2441     trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
2442 
2443     /* Enough space for a R_readdir header: size[4] Rreaddir tag[2] count[4] */
2444     if (max_count > s->msize - 11) {
2445         max_count = s->msize - 11;
2446         warn_report_once(
2447             "9p: bad client: T_readdir with count > msize - 11"
2448         );
2449     }
2450 
2451     fidp = get_fid(pdu, fid);
2452     if (fidp == NULL) {
2453         retval = -EINVAL;
2454         goto out_nofid;
2455     }
2456     if (!fidp->fs.dir.stream) {
2457         retval = -EINVAL;
2458         goto out;
2459     }
2460     if (s->proto_version != V9FS_PROTO_2000L) {
2461         warn_report_once(
2462             "9p: bad client: T_readdir request only expected with 9P2000.L "
2463             "protocol version"
2464         );
2465         retval = -EOPNOTSUPP;
2466         goto out;
2467     }
2468     count = v9fs_do_readdir(pdu, fidp, (off_t) initial_offset, max_count);
2469     if (count < 0) {
2470         retval = count;
2471         goto out;
2472     }
2473     retval = pdu_marshal(pdu, offset, "d", count);
2474     if (retval < 0) {
2475         goto out;
2476     }
2477     retval += count + offset;
2478     trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2479 out:
2480     put_fid(pdu, fidp);
2481 out_nofid:
2482     pdu_complete(pdu, retval);
2483 }
2484 
2485 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2486                             uint64_t off, uint32_t count,
2487                             struct iovec *sg, int cnt)
2488 {
2489     int i, to_copy;
2490     ssize_t err = 0;
2491     uint64_t write_count;
2492     size_t offset = 7;
2493 
2494 
2495     if (fidp->fs.xattr.len < off) {
2496         return -ENOSPC;
2497     }
2498     write_count = fidp->fs.xattr.len - off;
2499     if (write_count > count) {
2500         write_count = count;
2501     }
2502     err = pdu_marshal(pdu, offset, "d", write_count);
2503     if (err < 0) {
2504         return err;
2505     }
2506     err += offset;
2507     fidp->fs.xattr.copied_len += write_count;
2508     /*
2509      * Now copy the content from sg list
2510      */
2511     for (i = 0; i < cnt; i++) {
2512         if (write_count > sg[i].iov_len) {
2513             to_copy = sg[i].iov_len;
2514         } else {
2515             to_copy = write_count;
2516         }
2517         memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2518         /* updating vs->off since we are not using below */
2519         off += to_copy;
2520         write_count -= to_copy;
2521     }
2522 
2523     return err;
2524 }
2525 
2526 static void coroutine_fn v9fs_write(void *opaque)
2527 {
2528     ssize_t err;
2529     int32_t fid;
2530     uint64_t off;
2531     uint32_t count;
2532     int32_t len = 0;
2533     int32_t total = 0;
2534     size_t offset = 7;
2535     V9fsFidState *fidp;
2536     V9fsPDU *pdu = opaque;
2537     V9fsState *s = pdu->s;
2538     QEMUIOVector qiov_full;
2539     QEMUIOVector qiov;
2540 
2541     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2542     if (err < 0) {
2543         pdu_complete(pdu, err);
2544         return;
2545     }
2546     offset += err;
2547     v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2548     trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2549 
2550     fidp = get_fid(pdu, fid);
2551     if (fidp == NULL) {
2552         err = -EINVAL;
2553         goto out_nofid;
2554     }
2555     if (fidp->fid_type == P9_FID_FILE) {
2556         if (fidp->fs.fd == -1) {
2557             err = -EINVAL;
2558             goto out;
2559         }
2560     } else if (fidp->fid_type == P9_FID_XATTR) {
2561         /*
2562          * setxattr operation
2563          */
2564         err = v9fs_xattr_write(s, pdu, fidp, off, count,
2565                                qiov_full.iov, qiov_full.niov);
2566         goto out;
2567     } else {
2568         err = -EINVAL;
2569         goto out;
2570     }
2571     qemu_iovec_init(&qiov, qiov_full.niov);
2572     do {
2573         qemu_iovec_reset(&qiov);
2574         qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2575         if (0) {
2576             print_sg(qiov.iov, qiov.niov);
2577         }
2578         /* Loop in case of EINTR */
2579         do {
2580             len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2581             if (len >= 0) {
2582                 off   += len;
2583                 total += len;
2584             }
2585         } while (len == -EINTR && !pdu->cancelled);
2586         if (len < 0) {
2587             /* IO error return the error */
2588             err = len;
2589             goto out_qiov;
2590         }
2591     } while (total < count && len > 0);
2592 
2593     offset = 7;
2594     err = pdu_marshal(pdu, offset, "d", total);
2595     if (err < 0) {
2596         goto out_qiov;
2597     }
2598     err += offset;
2599     trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2600 out_qiov:
2601     qemu_iovec_destroy(&qiov);
2602 out:
2603     put_fid(pdu, fidp);
2604 out_nofid:
2605     qemu_iovec_destroy(&qiov_full);
2606     pdu_complete(pdu, err);
2607 }
2608 
2609 static void coroutine_fn v9fs_create(void *opaque)
2610 {
2611     int32_t fid;
2612     int err = 0;
2613     size_t offset = 7;
2614     V9fsFidState *fidp;
2615     V9fsQID qid;
2616     int32_t perm;
2617     int8_t mode;
2618     V9fsPath path;
2619     struct stat stbuf;
2620     V9fsString name;
2621     V9fsString extension;
2622     int iounit;
2623     V9fsPDU *pdu = opaque;
2624     V9fsState *s = pdu->s;
2625 
2626     v9fs_path_init(&path);
2627     v9fs_string_init(&name);
2628     v9fs_string_init(&extension);
2629     err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2630                         &perm, &mode, &extension);
2631     if (err < 0) {
2632         goto out_nofid;
2633     }
2634     trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2635 
2636     if (name_is_illegal(name.data)) {
2637         err = -ENOENT;
2638         goto out_nofid;
2639     }
2640 
2641     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2642         err = -EEXIST;
2643         goto out_nofid;
2644     }
2645 
2646     fidp = get_fid(pdu, fid);
2647     if (fidp == NULL) {
2648         err = -EINVAL;
2649         goto out_nofid;
2650     }
2651     if (fidp->fid_type != P9_FID_NONE) {
2652         err = -EINVAL;
2653         goto out;
2654     }
2655     if (perm & P9_STAT_MODE_DIR) {
2656         err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2657                             fidp->uid, -1, &stbuf);
2658         if (err < 0) {
2659             goto out;
2660         }
2661         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2662         if (err < 0) {
2663             goto out;
2664         }
2665         v9fs_path_write_lock(s);
2666         v9fs_path_copy(&fidp->path, &path);
2667         v9fs_path_unlock(s);
2668         err = v9fs_co_opendir(pdu, fidp);
2669         if (err < 0) {
2670             goto out;
2671         }
2672         fidp->fid_type = P9_FID_DIR;
2673     } else if (perm & P9_STAT_MODE_SYMLINK) {
2674         err = v9fs_co_symlink(pdu, fidp, &name,
2675                               extension.data, -1 , &stbuf);
2676         if (err < 0) {
2677             goto out;
2678         }
2679         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2680         if (err < 0) {
2681             goto out;
2682         }
2683         v9fs_path_write_lock(s);
2684         v9fs_path_copy(&fidp->path, &path);
2685         v9fs_path_unlock(s);
2686     } else if (perm & P9_STAT_MODE_LINK) {
2687         int32_t ofid = atoi(extension.data);
2688         V9fsFidState *ofidp = get_fid(pdu, ofid);
2689         if (ofidp == NULL) {
2690             err = -EINVAL;
2691             goto out;
2692         }
2693         err = v9fs_co_link(pdu, ofidp, fidp, &name);
2694         put_fid(pdu, ofidp);
2695         if (err < 0) {
2696             goto out;
2697         }
2698         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2699         if (err < 0) {
2700             fidp->fid_type = P9_FID_NONE;
2701             goto out;
2702         }
2703         v9fs_path_write_lock(s);
2704         v9fs_path_copy(&fidp->path, &path);
2705         v9fs_path_unlock(s);
2706         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2707         if (err < 0) {
2708             fidp->fid_type = P9_FID_NONE;
2709             goto out;
2710         }
2711     } else if (perm & P9_STAT_MODE_DEVICE) {
2712         char ctype;
2713         uint32_t major, minor;
2714         mode_t nmode = 0;
2715 
2716         if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2717             err = -errno;
2718             goto out;
2719         }
2720 
2721         switch (ctype) {
2722         case 'c':
2723             nmode = S_IFCHR;
2724             break;
2725         case 'b':
2726             nmode = S_IFBLK;
2727             break;
2728         default:
2729             err = -EIO;
2730             goto out;
2731         }
2732 
2733         nmode |= perm & 0777;
2734         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2735                             makedev(major, minor), nmode, &stbuf);
2736         if (err < 0) {
2737             goto out;
2738         }
2739         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2740         if (err < 0) {
2741             goto out;
2742         }
2743         v9fs_path_write_lock(s);
2744         v9fs_path_copy(&fidp->path, &path);
2745         v9fs_path_unlock(s);
2746     } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2747         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2748                             0, S_IFIFO | (perm & 0777), &stbuf);
2749         if (err < 0) {
2750             goto out;
2751         }
2752         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2753         if (err < 0) {
2754             goto out;
2755         }
2756         v9fs_path_write_lock(s);
2757         v9fs_path_copy(&fidp->path, &path);
2758         v9fs_path_unlock(s);
2759     } else if (perm & P9_STAT_MODE_SOCKET) {
2760         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2761                             0, S_IFSOCK | (perm & 0777), &stbuf);
2762         if (err < 0) {
2763             goto out;
2764         }
2765         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2766         if (err < 0) {
2767             goto out;
2768         }
2769         v9fs_path_write_lock(s);
2770         v9fs_path_copy(&fidp->path, &path);
2771         v9fs_path_unlock(s);
2772     } else {
2773         err = v9fs_co_open2(pdu, fidp, &name, -1,
2774                             omode_to_uflags(mode) | O_CREAT, perm, &stbuf);
2775         if (err < 0) {
2776             goto out;
2777         }
2778         fidp->fid_type = P9_FID_FILE;
2779         fidp->open_flags = omode_to_uflags(mode);
2780         if (fidp->open_flags & O_EXCL) {
2781             /*
2782              * We let the host file system do O_EXCL check
2783              * We should not reclaim such fd
2784              */
2785             fidp->flags |= FID_NON_RECLAIMABLE;
2786         }
2787     }
2788     iounit = get_iounit(pdu, &fidp->path);
2789     err = stat_to_qid(pdu, &stbuf, &qid);
2790     if (err < 0) {
2791         goto out;
2792     }
2793     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2794     if (err < 0) {
2795         goto out;
2796     }
2797     err += offset;
2798     trace_v9fs_create_return(pdu->tag, pdu->id,
2799                              qid.type, qid.version, qid.path, iounit);
2800 out:
2801     put_fid(pdu, fidp);
2802 out_nofid:
2803    pdu_complete(pdu, err);
2804    v9fs_string_free(&name);
2805    v9fs_string_free(&extension);
2806    v9fs_path_free(&path);
2807 }
2808 
2809 static void coroutine_fn v9fs_symlink(void *opaque)
2810 {
2811     V9fsPDU *pdu = opaque;
2812     V9fsString name;
2813     V9fsString symname;
2814     V9fsFidState *dfidp;
2815     V9fsQID qid;
2816     struct stat stbuf;
2817     int32_t dfid;
2818     int err = 0;
2819     gid_t gid;
2820     size_t offset = 7;
2821 
2822     v9fs_string_init(&name);
2823     v9fs_string_init(&symname);
2824     err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2825     if (err < 0) {
2826         goto out_nofid;
2827     }
2828     trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2829 
2830     if (name_is_illegal(name.data)) {
2831         err = -ENOENT;
2832         goto out_nofid;
2833     }
2834 
2835     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2836         err = -EEXIST;
2837         goto out_nofid;
2838     }
2839 
2840     dfidp = get_fid(pdu, dfid);
2841     if (dfidp == NULL) {
2842         err = -EINVAL;
2843         goto out_nofid;
2844     }
2845     err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2846     if (err < 0) {
2847         goto out;
2848     }
2849     err = stat_to_qid(pdu, &stbuf, &qid);
2850     if (err < 0) {
2851         goto out;
2852     }
2853     err =  pdu_marshal(pdu, offset, "Q", &qid);
2854     if (err < 0) {
2855         goto out;
2856     }
2857     err += offset;
2858     trace_v9fs_symlink_return(pdu->tag, pdu->id,
2859                               qid.type, qid.version, qid.path);
2860 out:
2861     put_fid(pdu, dfidp);
2862 out_nofid:
2863     pdu_complete(pdu, err);
2864     v9fs_string_free(&name);
2865     v9fs_string_free(&symname);
2866 }
2867 
2868 static void coroutine_fn v9fs_flush(void *opaque)
2869 {
2870     ssize_t err;
2871     int16_t tag;
2872     size_t offset = 7;
2873     V9fsPDU *cancel_pdu = NULL;
2874     V9fsPDU *pdu = opaque;
2875     V9fsState *s = pdu->s;
2876 
2877     err = pdu_unmarshal(pdu, offset, "w", &tag);
2878     if (err < 0) {
2879         pdu_complete(pdu, err);
2880         return;
2881     }
2882     trace_v9fs_flush(pdu->tag, pdu->id, tag);
2883 
2884     if (pdu->tag == tag) {
2885         warn_report("the guest sent a self-referencing 9P flush request");
2886     } else {
2887         QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2888             if (cancel_pdu->tag == tag) {
2889                 break;
2890             }
2891         }
2892     }
2893     if (cancel_pdu) {
2894         cancel_pdu->cancelled = 1;
2895         /*
2896          * Wait for pdu to complete.
2897          */
2898         qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2899         if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2900             cancel_pdu->cancelled = 0;
2901             pdu_free(cancel_pdu);
2902         }
2903     }
2904     pdu_complete(pdu, 7);
2905 }
2906 
2907 static void coroutine_fn v9fs_link(void *opaque)
2908 {
2909     V9fsPDU *pdu = opaque;
2910     int32_t dfid, oldfid;
2911     V9fsFidState *dfidp, *oldfidp;
2912     V9fsString name;
2913     size_t offset = 7;
2914     int err = 0;
2915 
2916     v9fs_string_init(&name);
2917     err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2918     if (err < 0) {
2919         goto out_nofid;
2920     }
2921     trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2922 
2923     if (name_is_illegal(name.data)) {
2924         err = -ENOENT;
2925         goto out_nofid;
2926     }
2927 
2928     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2929         err = -EEXIST;
2930         goto out_nofid;
2931     }
2932 
2933     dfidp = get_fid(pdu, dfid);
2934     if (dfidp == NULL) {
2935         err = -ENOENT;
2936         goto out_nofid;
2937     }
2938 
2939     oldfidp = get_fid(pdu, oldfid);
2940     if (oldfidp == NULL) {
2941         err = -ENOENT;
2942         goto out;
2943     }
2944     err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2945     if (!err) {
2946         err = offset;
2947     }
2948     put_fid(pdu, oldfidp);
2949 out:
2950     put_fid(pdu, dfidp);
2951 out_nofid:
2952     v9fs_string_free(&name);
2953     pdu_complete(pdu, err);
2954 }
2955 
2956 /* Only works with path name based fid */
2957 static void coroutine_fn v9fs_remove(void *opaque)
2958 {
2959     int32_t fid;
2960     int err = 0;
2961     size_t offset = 7;
2962     V9fsFidState *fidp;
2963     V9fsPDU *pdu = opaque;
2964 
2965     err = pdu_unmarshal(pdu, offset, "d", &fid);
2966     if (err < 0) {
2967         goto out_nofid;
2968     }
2969     trace_v9fs_remove(pdu->tag, pdu->id, fid);
2970 
2971     fidp = get_fid(pdu, fid);
2972     if (fidp == NULL) {
2973         err = -EINVAL;
2974         goto out_nofid;
2975     }
2976     /* if fs driver is not path based, return EOPNOTSUPP */
2977     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2978         err = -EOPNOTSUPP;
2979         goto out_err;
2980     }
2981     /*
2982      * IF the file is unlinked, we cannot reopen
2983      * the file later. So don't reclaim fd
2984      */
2985     err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2986     if (err < 0) {
2987         goto out_err;
2988     }
2989     err = v9fs_co_remove(pdu, &fidp->path);
2990     if (!err) {
2991         err = offset;
2992     }
2993 out_err:
2994     /* For TREMOVE we need to clunk the fid even on failed remove */
2995     clunk_fid(pdu->s, fidp->fid);
2996     put_fid(pdu, fidp);
2997 out_nofid:
2998     pdu_complete(pdu, err);
2999 }
3000 
3001 static void coroutine_fn v9fs_unlinkat(void *opaque)
3002 {
3003     int err = 0;
3004     V9fsString name;
3005     int32_t dfid, flags, rflags = 0;
3006     size_t offset = 7;
3007     V9fsPath path;
3008     V9fsFidState *dfidp;
3009     V9fsPDU *pdu = opaque;
3010 
3011     v9fs_string_init(&name);
3012     err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
3013     if (err < 0) {
3014         goto out_nofid;
3015     }
3016 
3017     if (name_is_illegal(name.data)) {
3018         err = -ENOENT;
3019         goto out_nofid;
3020     }
3021 
3022     if (!strcmp(".", name.data)) {
3023         err = -EINVAL;
3024         goto out_nofid;
3025     }
3026 
3027     if (!strcmp("..", name.data)) {
3028         err = -ENOTEMPTY;
3029         goto out_nofid;
3030     }
3031 
3032     if (flags & ~P9_DOTL_AT_REMOVEDIR) {
3033         err = -EINVAL;
3034         goto out_nofid;
3035     }
3036 
3037     if (flags & P9_DOTL_AT_REMOVEDIR) {
3038         rflags |= AT_REMOVEDIR;
3039     }
3040 
3041     dfidp = get_fid(pdu, dfid);
3042     if (dfidp == NULL) {
3043         err = -EINVAL;
3044         goto out_nofid;
3045     }
3046     /*
3047      * IF the file is unlinked, we cannot reopen
3048      * the file later. So don't reclaim fd
3049      */
3050     v9fs_path_init(&path);
3051     err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
3052     if (err < 0) {
3053         goto out_err;
3054     }
3055     err = v9fs_mark_fids_unreclaim(pdu, &path);
3056     if (err < 0) {
3057         goto out_err;
3058     }
3059     err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
3060     if (!err) {
3061         err = offset;
3062     }
3063 out_err:
3064     put_fid(pdu, dfidp);
3065     v9fs_path_free(&path);
3066 out_nofid:
3067     pdu_complete(pdu, err);
3068     v9fs_string_free(&name);
3069 }
3070 
3071 
3072 /* Only works with path name based fid */
3073 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
3074                                              int32_t newdirfid,
3075                                              V9fsString *name)
3076 {
3077     int err = 0;
3078     V9fsPath new_path;
3079     V9fsFidState *tfidp;
3080     V9fsState *s = pdu->s;
3081     V9fsFidState *dirfidp = NULL;
3082 
3083     v9fs_path_init(&new_path);
3084     if (newdirfid != -1) {
3085         dirfidp = get_fid(pdu, newdirfid);
3086         if (dirfidp == NULL) {
3087             return -ENOENT;
3088         }
3089         if (fidp->fid_type != P9_FID_NONE) {
3090             err = -EINVAL;
3091             goto out;
3092         }
3093         err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
3094         if (err < 0) {
3095             goto out;
3096         }
3097     } else {
3098         char *dir_name = g_path_get_dirname(fidp->path.data);
3099         V9fsPath dir_path;
3100 
3101         v9fs_path_init(&dir_path);
3102         v9fs_path_sprintf(&dir_path, "%s", dir_name);
3103         g_free(dir_name);
3104 
3105         err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
3106         v9fs_path_free(&dir_path);
3107         if (err < 0) {
3108             goto out;
3109         }
3110     }
3111     err = v9fs_co_rename(pdu, &fidp->path, &new_path);
3112     if (err < 0) {
3113         goto out;
3114     }
3115     /*
3116      * Fixup fid's pointing to the old name to
3117      * start pointing to the new name
3118      */
3119     QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3120         if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
3121             /* replace the name */
3122             v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
3123         }
3124     }
3125 out:
3126     if (dirfidp) {
3127         put_fid(pdu, dirfidp);
3128     }
3129     v9fs_path_free(&new_path);
3130     return err;
3131 }
3132 
3133 /* Only works with path name based fid */
3134 static void coroutine_fn v9fs_rename(void *opaque)
3135 {
3136     int32_t fid;
3137     ssize_t err = 0;
3138     size_t offset = 7;
3139     V9fsString name;
3140     int32_t newdirfid;
3141     V9fsFidState *fidp;
3142     V9fsPDU *pdu = opaque;
3143     V9fsState *s = pdu->s;
3144 
3145     v9fs_string_init(&name);
3146     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
3147     if (err < 0) {
3148         goto out_nofid;
3149     }
3150 
3151     if (name_is_illegal(name.data)) {
3152         err = -ENOENT;
3153         goto out_nofid;
3154     }
3155 
3156     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3157         err = -EISDIR;
3158         goto out_nofid;
3159     }
3160 
3161     fidp = get_fid(pdu, fid);
3162     if (fidp == NULL) {
3163         err = -ENOENT;
3164         goto out_nofid;
3165     }
3166     if (fidp->fid_type != P9_FID_NONE) {
3167         err = -EINVAL;
3168         goto out;
3169     }
3170     /* if fs driver is not path based, return EOPNOTSUPP */
3171     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
3172         err = -EOPNOTSUPP;
3173         goto out;
3174     }
3175     v9fs_path_write_lock(s);
3176     err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
3177     v9fs_path_unlock(s);
3178     if (!err) {
3179         err = offset;
3180     }
3181 out:
3182     put_fid(pdu, fidp);
3183 out_nofid:
3184     pdu_complete(pdu, err);
3185     v9fs_string_free(&name);
3186 }
3187 
3188 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
3189                                            V9fsString *old_name,
3190                                            V9fsPath *newdir,
3191                                            V9fsString *new_name)
3192 {
3193     V9fsFidState *tfidp;
3194     V9fsPath oldpath, newpath;
3195     V9fsState *s = pdu->s;
3196     int err;
3197 
3198     v9fs_path_init(&oldpath);
3199     v9fs_path_init(&newpath);
3200     err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
3201     if (err < 0) {
3202         goto out;
3203     }
3204     err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
3205     if (err < 0) {
3206         goto out;
3207     }
3208 
3209     /*
3210      * Fixup fid's pointing to the old name to
3211      * start pointing to the new name
3212      */
3213     QSIMPLEQ_FOREACH(tfidp, &s->fid_list, next) {
3214         if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
3215             /* replace the name */
3216             v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
3217         }
3218     }
3219 out:
3220     v9fs_path_free(&oldpath);
3221     v9fs_path_free(&newpath);
3222     return err;
3223 }
3224 
3225 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
3226                                                V9fsString *old_name,
3227                                                int32_t newdirfid,
3228                                                V9fsString *new_name)
3229 {
3230     int err = 0;
3231     V9fsState *s = pdu->s;
3232     V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
3233 
3234     olddirfidp = get_fid(pdu, olddirfid);
3235     if (olddirfidp == NULL) {
3236         err = -ENOENT;
3237         goto out;
3238     }
3239     if (newdirfid != -1) {
3240         newdirfidp = get_fid(pdu, newdirfid);
3241         if (newdirfidp == NULL) {
3242             err = -ENOENT;
3243             goto out;
3244         }
3245     } else {
3246         newdirfidp = get_fid(pdu, olddirfid);
3247     }
3248 
3249     err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
3250                            &newdirfidp->path, new_name);
3251     if (err < 0) {
3252         goto out;
3253     }
3254     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
3255         /* Only for path based fid  we need to do the below fixup */
3256         err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
3257                                  &newdirfidp->path, new_name);
3258     }
3259 out:
3260     if (olddirfidp) {
3261         put_fid(pdu, olddirfidp);
3262     }
3263     if (newdirfidp) {
3264         put_fid(pdu, newdirfidp);
3265     }
3266     return err;
3267 }
3268 
3269 static void coroutine_fn v9fs_renameat(void *opaque)
3270 {
3271     ssize_t err = 0;
3272     size_t offset = 7;
3273     V9fsPDU *pdu = opaque;
3274     V9fsState *s = pdu->s;
3275     int32_t olddirfid, newdirfid;
3276     V9fsString old_name, new_name;
3277 
3278     v9fs_string_init(&old_name);
3279     v9fs_string_init(&new_name);
3280     err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
3281                         &old_name, &newdirfid, &new_name);
3282     if (err < 0) {
3283         goto out_err;
3284     }
3285 
3286     if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
3287         err = -ENOENT;
3288         goto out_err;
3289     }
3290 
3291     if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
3292         !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
3293         err = -EISDIR;
3294         goto out_err;
3295     }
3296 
3297     v9fs_path_write_lock(s);
3298     err = v9fs_complete_renameat(pdu, olddirfid,
3299                                  &old_name, newdirfid, &new_name);
3300     v9fs_path_unlock(s);
3301     if (!err) {
3302         err = offset;
3303     }
3304 
3305 out_err:
3306     pdu_complete(pdu, err);
3307     v9fs_string_free(&old_name);
3308     v9fs_string_free(&new_name);
3309 }
3310 
3311 static void coroutine_fn v9fs_wstat(void *opaque)
3312 {
3313     int32_t fid;
3314     int err = 0;
3315     int16_t unused;
3316     V9fsStat v9stat;
3317     size_t offset = 7;
3318     struct stat stbuf;
3319     V9fsFidState *fidp;
3320     V9fsPDU *pdu = opaque;
3321     V9fsState *s = pdu->s;
3322 
3323     v9fs_stat_init(&v9stat);
3324     err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
3325     if (err < 0) {
3326         goto out_nofid;
3327     }
3328     trace_v9fs_wstat(pdu->tag, pdu->id, fid,
3329                      v9stat.mode, v9stat.atime, v9stat.mtime);
3330 
3331     fidp = get_fid(pdu, fid);
3332     if (fidp == NULL) {
3333         err = -EINVAL;
3334         goto out_nofid;
3335     }
3336     /* do we need to sync the file? */
3337     if (donttouch_stat(&v9stat)) {
3338         err = v9fs_co_fsync(pdu, fidp, 0);
3339         goto out;
3340     }
3341     if (v9stat.mode != -1) {
3342         uint32_t v9_mode;
3343         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
3344         if (err < 0) {
3345             goto out;
3346         }
3347         v9_mode = stat_to_v9mode(&stbuf);
3348         if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
3349             (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
3350             /* Attempting to change the type */
3351             err = -EIO;
3352             goto out;
3353         }
3354         err = v9fs_co_chmod(pdu, &fidp->path,
3355                             v9mode_to_mode(v9stat.mode,
3356                                            &v9stat.extension));
3357         if (err < 0) {
3358             goto out;
3359         }
3360     }
3361     if (v9stat.mtime != -1 || v9stat.atime != -1) {
3362         struct timespec times[2];
3363         if (v9stat.atime != -1) {
3364             times[0].tv_sec = v9stat.atime;
3365             times[0].tv_nsec = 0;
3366         } else {
3367             times[0].tv_nsec = UTIME_OMIT;
3368         }
3369         if (v9stat.mtime != -1) {
3370             times[1].tv_sec = v9stat.mtime;
3371             times[1].tv_nsec = 0;
3372         } else {
3373             times[1].tv_nsec = UTIME_OMIT;
3374         }
3375         err = v9fs_co_utimensat(pdu, &fidp->path, times);
3376         if (err < 0) {
3377             goto out;
3378         }
3379     }
3380     if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
3381         err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
3382         if (err < 0) {
3383             goto out;
3384         }
3385     }
3386     if (v9stat.name.size != 0) {
3387         v9fs_path_write_lock(s);
3388         err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
3389         v9fs_path_unlock(s);
3390         if (err < 0) {
3391             goto out;
3392         }
3393     }
3394     if (v9stat.length != -1) {
3395         err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
3396         if (err < 0) {
3397             goto out;
3398         }
3399     }
3400     err = offset;
3401 out:
3402     put_fid(pdu, fidp);
3403 out_nofid:
3404     v9fs_stat_free(&v9stat);
3405     pdu_complete(pdu, err);
3406 }
3407 
3408 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
3409 {
3410     uint32_t f_type;
3411     uint32_t f_bsize;
3412     uint64_t f_blocks;
3413     uint64_t f_bfree;
3414     uint64_t f_bavail;
3415     uint64_t f_files;
3416     uint64_t f_ffree;
3417     uint64_t fsid_val;
3418     uint32_t f_namelen;
3419     size_t offset = 7;
3420     int32_t bsize_factor;
3421 
3422     /*
3423      * compute bsize factor based on host file system block size
3424      * and client msize
3425      */
3426     bsize_factor = (s->msize - P9_IOHDRSZ) / stbuf->f_bsize;
3427     if (!bsize_factor) {
3428         bsize_factor = 1;
3429     }
3430     f_type  = stbuf->f_type;
3431     f_bsize = stbuf->f_bsize;
3432     f_bsize *= bsize_factor;
3433     /*
3434      * f_bsize is adjusted(multiplied) by bsize factor, so we need to
3435      * adjust(divide) the number of blocks, free blocks and available
3436      * blocks by bsize factor
3437      */
3438     f_blocks = stbuf->f_blocks / bsize_factor;
3439     f_bfree  = stbuf->f_bfree / bsize_factor;
3440     f_bavail = stbuf->f_bavail / bsize_factor;
3441     f_files  = stbuf->f_files;
3442     f_ffree  = stbuf->f_ffree;
3443     fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
3444                (unsigned long long)stbuf->f_fsid.__val[1] << 32;
3445     f_namelen = stbuf->f_namelen;
3446 
3447     return pdu_marshal(pdu, offset, "ddqqqqqqd",
3448                        f_type, f_bsize, f_blocks, f_bfree,
3449                        f_bavail, f_files, f_ffree,
3450                        fsid_val, f_namelen);
3451 }
3452 
3453 static void coroutine_fn v9fs_statfs(void *opaque)
3454 {
3455     int32_t fid;
3456     ssize_t retval = 0;
3457     size_t offset = 7;
3458     V9fsFidState *fidp;
3459     struct statfs stbuf;
3460     V9fsPDU *pdu = opaque;
3461     V9fsState *s = pdu->s;
3462 
3463     retval = pdu_unmarshal(pdu, offset, "d", &fid);
3464     if (retval < 0) {
3465         goto out_nofid;
3466     }
3467     fidp = get_fid(pdu, fid);
3468     if (fidp == NULL) {
3469         retval = -ENOENT;
3470         goto out_nofid;
3471     }
3472     retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3473     if (retval < 0) {
3474         goto out;
3475     }
3476     retval = v9fs_fill_statfs(s, pdu, &stbuf);
3477     if (retval < 0) {
3478         goto out;
3479     }
3480     retval += offset;
3481 out:
3482     put_fid(pdu, fidp);
3483 out_nofid:
3484     pdu_complete(pdu, retval);
3485 }
3486 
3487 static void coroutine_fn v9fs_mknod(void *opaque)
3488 {
3489 
3490     int mode;
3491     gid_t gid;
3492     int32_t fid;
3493     V9fsQID qid;
3494     int err = 0;
3495     int major, minor;
3496     size_t offset = 7;
3497     V9fsString name;
3498     struct stat stbuf;
3499     V9fsFidState *fidp;
3500     V9fsPDU *pdu = opaque;
3501 
3502     v9fs_string_init(&name);
3503     err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3504                         &major, &minor, &gid);
3505     if (err < 0) {
3506         goto out_nofid;
3507     }
3508     trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3509 
3510     if (name_is_illegal(name.data)) {
3511         err = -ENOENT;
3512         goto out_nofid;
3513     }
3514 
3515     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3516         err = -EEXIST;
3517         goto out_nofid;
3518     }
3519 
3520     fidp = get_fid(pdu, fid);
3521     if (fidp == NULL) {
3522         err = -ENOENT;
3523         goto out_nofid;
3524     }
3525     err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3526                         makedev(major, minor), mode, &stbuf);
3527     if (err < 0) {
3528         goto out;
3529     }
3530     err = stat_to_qid(pdu, &stbuf, &qid);
3531     if (err < 0) {
3532         goto out;
3533     }
3534     err = pdu_marshal(pdu, offset, "Q", &qid);
3535     if (err < 0) {
3536         goto out;
3537     }
3538     err += offset;
3539     trace_v9fs_mknod_return(pdu->tag, pdu->id,
3540                             qid.type, qid.version, qid.path);
3541 out:
3542     put_fid(pdu, fidp);
3543 out_nofid:
3544     pdu_complete(pdu, err);
3545     v9fs_string_free(&name);
3546 }
3547 
3548 /*
3549  * Implement posix byte range locking code
3550  * Server side handling of locking code is very simple, because 9p server in
3551  * QEMU can handle only one client. And most of the lock handling
3552  * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3553  * do any thing in * qemu 9p server side lock code path.
3554  * So when a TLOCK request comes, always return success
3555  */
3556 static void coroutine_fn v9fs_lock(void *opaque)
3557 {
3558     V9fsFlock flock;
3559     size_t offset = 7;
3560     struct stat stbuf;
3561     V9fsFidState *fidp;
3562     int32_t fid, err = 0;
3563     V9fsPDU *pdu = opaque;
3564 
3565     v9fs_string_init(&flock.client_id);
3566     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3567                         &flock.flags, &flock.start, &flock.length,
3568                         &flock.proc_id, &flock.client_id);
3569     if (err < 0) {
3570         goto out_nofid;
3571     }
3572     trace_v9fs_lock(pdu->tag, pdu->id, fid,
3573                     flock.type, flock.start, flock.length);
3574 
3575 
3576     /* We support only block flag now (that too ignored currently) */
3577     if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3578         err = -EINVAL;
3579         goto out_nofid;
3580     }
3581     fidp = get_fid(pdu, fid);
3582     if (fidp == NULL) {
3583         err = -ENOENT;
3584         goto out_nofid;
3585     }
3586     err = v9fs_co_fstat(pdu, fidp, &stbuf);
3587     if (err < 0) {
3588         goto out;
3589     }
3590     err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3591     if (err < 0) {
3592         goto out;
3593     }
3594     err += offset;
3595     trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3596 out:
3597     put_fid(pdu, fidp);
3598 out_nofid:
3599     pdu_complete(pdu, err);
3600     v9fs_string_free(&flock.client_id);
3601 }
3602 
3603 /*
3604  * When a TGETLOCK request comes, always return success because all lock
3605  * handling is done by client's VFS layer.
3606  */
3607 static void coroutine_fn v9fs_getlock(void *opaque)
3608 {
3609     size_t offset = 7;
3610     struct stat stbuf;
3611     V9fsFidState *fidp;
3612     V9fsGetlock glock;
3613     int32_t fid, err = 0;
3614     V9fsPDU *pdu = opaque;
3615 
3616     v9fs_string_init(&glock.client_id);
3617     err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3618                         &glock.start, &glock.length, &glock.proc_id,
3619                         &glock.client_id);
3620     if (err < 0) {
3621         goto out_nofid;
3622     }
3623     trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3624                        glock.type, glock.start, glock.length);
3625 
3626     fidp = get_fid(pdu, fid);
3627     if (fidp == NULL) {
3628         err = -ENOENT;
3629         goto out_nofid;
3630     }
3631     err = v9fs_co_fstat(pdu, fidp, &stbuf);
3632     if (err < 0) {
3633         goto out;
3634     }
3635     glock.type = P9_LOCK_TYPE_UNLCK;
3636     err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3637                           glock.start, glock.length, glock.proc_id,
3638                           &glock.client_id);
3639     if (err < 0) {
3640         goto out;
3641     }
3642     err += offset;
3643     trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3644                               glock.length, glock.proc_id);
3645 out:
3646     put_fid(pdu, fidp);
3647 out_nofid:
3648     pdu_complete(pdu, err);
3649     v9fs_string_free(&glock.client_id);
3650 }
3651 
3652 static void coroutine_fn v9fs_mkdir(void *opaque)
3653 {
3654     V9fsPDU *pdu = opaque;
3655     size_t offset = 7;
3656     int32_t fid;
3657     struct stat stbuf;
3658     V9fsQID qid;
3659     V9fsString name;
3660     V9fsFidState *fidp;
3661     gid_t gid;
3662     int mode;
3663     int err = 0;
3664 
3665     v9fs_string_init(&name);
3666     err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3667     if (err < 0) {
3668         goto out_nofid;
3669     }
3670     trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3671 
3672     if (name_is_illegal(name.data)) {
3673         err = -ENOENT;
3674         goto out_nofid;
3675     }
3676 
3677     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3678         err = -EEXIST;
3679         goto out_nofid;
3680     }
3681 
3682     fidp = get_fid(pdu, fid);
3683     if (fidp == NULL) {
3684         err = -ENOENT;
3685         goto out_nofid;
3686     }
3687     err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3688     if (err < 0) {
3689         goto out;
3690     }
3691     err = stat_to_qid(pdu, &stbuf, &qid);
3692     if (err < 0) {
3693         goto out;
3694     }
3695     err = pdu_marshal(pdu, offset, "Q", &qid);
3696     if (err < 0) {
3697         goto out;
3698     }
3699     err += offset;
3700     trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3701                             qid.type, qid.version, qid.path, err);
3702 out:
3703     put_fid(pdu, fidp);
3704 out_nofid:
3705     pdu_complete(pdu, err);
3706     v9fs_string_free(&name);
3707 }
3708 
3709 static void coroutine_fn v9fs_xattrwalk(void *opaque)
3710 {
3711     int64_t size;
3712     V9fsString name;
3713     ssize_t err = 0;
3714     size_t offset = 7;
3715     int32_t fid, newfid;
3716     V9fsFidState *file_fidp;
3717     V9fsFidState *xattr_fidp = NULL;
3718     V9fsPDU *pdu = opaque;
3719     V9fsState *s = pdu->s;
3720 
3721     v9fs_string_init(&name);
3722     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3723     if (err < 0) {
3724         goto out_nofid;
3725     }
3726     trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3727 
3728     file_fidp = get_fid(pdu, fid);
3729     if (file_fidp == NULL) {
3730         err = -ENOENT;
3731         goto out_nofid;
3732     }
3733     xattr_fidp = alloc_fid(s, newfid);
3734     if (xattr_fidp == NULL) {
3735         err = -EINVAL;
3736         goto out;
3737     }
3738     v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3739     if (!v9fs_string_size(&name)) {
3740         /*
3741          * listxattr request. Get the size first
3742          */
3743         size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3744         if (size < 0) {
3745             err = size;
3746             clunk_fid(s, xattr_fidp->fid);
3747             goto out;
3748         }
3749         /*
3750          * Read the xattr value
3751          */
3752         xattr_fidp->fs.xattr.len = size;
3753         xattr_fidp->fid_type = P9_FID_XATTR;
3754         xattr_fidp->fs.xattr.xattrwalk_fid = true;
3755         xattr_fidp->fs.xattr.value = g_malloc0(size);
3756         if (size) {
3757             err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3758                                      xattr_fidp->fs.xattr.value,
3759                                      xattr_fidp->fs.xattr.len);
3760             if (err < 0) {
3761                 clunk_fid(s, xattr_fidp->fid);
3762                 goto out;
3763             }
3764         }
3765         err = pdu_marshal(pdu, offset, "q", size);
3766         if (err < 0) {
3767             goto out;
3768         }
3769         err += offset;
3770     } else {
3771         /*
3772          * specific xattr fid. We check for xattr
3773          * presence also collect the xattr size
3774          */
3775         size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3776                                  &name, NULL, 0);
3777         if (size < 0) {
3778             err = size;
3779             clunk_fid(s, xattr_fidp->fid);
3780             goto out;
3781         }
3782         /*
3783          * Read the xattr value
3784          */
3785         xattr_fidp->fs.xattr.len = size;
3786         xattr_fidp->fid_type = P9_FID_XATTR;
3787         xattr_fidp->fs.xattr.xattrwalk_fid = true;
3788         xattr_fidp->fs.xattr.value = g_malloc0(size);
3789         if (size) {
3790             err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3791                                     &name, xattr_fidp->fs.xattr.value,
3792                                     xattr_fidp->fs.xattr.len);
3793             if (err < 0) {
3794                 clunk_fid(s, xattr_fidp->fid);
3795                 goto out;
3796             }
3797         }
3798         err = pdu_marshal(pdu, offset, "q", size);
3799         if (err < 0) {
3800             goto out;
3801         }
3802         err += offset;
3803     }
3804     trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3805 out:
3806     put_fid(pdu, file_fidp);
3807     if (xattr_fidp) {
3808         put_fid(pdu, xattr_fidp);
3809     }
3810 out_nofid:
3811     pdu_complete(pdu, err);
3812     v9fs_string_free(&name);
3813 }
3814 
3815 static void coroutine_fn v9fs_xattrcreate(void *opaque)
3816 {
3817     int flags, rflags = 0;
3818     int32_t fid;
3819     uint64_t size;
3820     ssize_t err = 0;
3821     V9fsString name;
3822     size_t offset = 7;
3823     V9fsFidState *file_fidp;
3824     V9fsFidState *xattr_fidp;
3825     V9fsPDU *pdu = opaque;
3826 
3827     v9fs_string_init(&name);
3828     err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3829     if (err < 0) {
3830         goto out_nofid;
3831     }
3832     trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3833 
3834     if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3835         err = -EINVAL;
3836         goto out_nofid;
3837     }
3838 
3839     if (flags & P9_XATTR_CREATE) {
3840         rflags |= XATTR_CREATE;
3841     }
3842 
3843     if (flags & P9_XATTR_REPLACE) {
3844         rflags |= XATTR_REPLACE;
3845     }
3846 
3847     if (size > XATTR_SIZE_MAX) {
3848         err = -E2BIG;
3849         goto out_nofid;
3850     }
3851 
3852     file_fidp = get_fid(pdu, fid);
3853     if (file_fidp == NULL) {
3854         err = -EINVAL;
3855         goto out_nofid;
3856     }
3857     if (file_fidp->fid_type != P9_FID_NONE) {
3858         err = -EINVAL;
3859         goto out_put_fid;
3860     }
3861 
3862     /* Make the file fid point to xattr */
3863     xattr_fidp = file_fidp;
3864     xattr_fidp->fid_type = P9_FID_XATTR;
3865     xattr_fidp->fs.xattr.copied_len = 0;
3866     xattr_fidp->fs.xattr.xattrwalk_fid = false;
3867     xattr_fidp->fs.xattr.len = size;
3868     xattr_fidp->fs.xattr.flags = rflags;
3869     v9fs_string_init(&xattr_fidp->fs.xattr.name);
3870     v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3871     xattr_fidp->fs.xattr.value = g_malloc0(size);
3872     err = offset;
3873 out_put_fid:
3874     put_fid(pdu, file_fidp);
3875 out_nofid:
3876     pdu_complete(pdu, err);
3877     v9fs_string_free(&name);
3878 }
3879 
3880 static void coroutine_fn v9fs_readlink(void *opaque)
3881 {
3882     V9fsPDU *pdu = opaque;
3883     size_t offset = 7;
3884     V9fsString target;
3885     int32_t fid;
3886     int err = 0;
3887     V9fsFidState *fidp;
3888 
3889     err = pdu_unmarshal(pdu, offset, "d", &fid);
3890     if (err < 0) {
3891         goto out_nofid;
3892     }
3893     trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3894     fidp = get_fid(pdu, fid);
3895     if (fidp == NULL) {
3896         err = -ENOENT;
3897         goto out_nofid;
3898     }
3899 
3900     v9fs_string_init(&target);
3901     err = v9fs_co_readlink(pdu, &fidp->path, &target);
3902     if (err < 0) {
3903         goto out;
3904     }
3905     err = pdu_marshal(pdu, offset, "s", &target);
3906     if (err < 0) {
3907         v9fs_string_free(&target);
3908         goto out;
3909     }
3910     err += offset;
3911     trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3912     v9fs_string_free(&target);
3913 out:
3914     put_fid(pdu, fidp);
3915 out_nofid:
3916     pdu_complete(pdu, err);
3917 }
3918 
3919 static CoroutineEntry *pdu_co_handlers[] = {
3920     [P9_TREADDIR] = v9fs_readdir,
3921     [P9_TSTATFS] = v9fs_statfs,
3922     [P9_TGETATTR] = v9fs_getattr,
3923     [P9_TSETATTR] = v9fs_setattr,
3924     [P9_TXATTRWALK] = v9fs_xattrwalk,
3925     [P9_TXATTRCREATE] = v9fs_xattrcreate,
3926     [P9_TMKNOD] = v9fs_mknod,
3927     [P9_TRENAME] = v9fs_rename,
3928     [P9_TLOCK] = v9fs_lock,
3929     [P9_TGETLOCK] = v9fs_getlock,
3930     [P9_TRENAMEAT] = v9fs_renameat,
3931     [P9_TREADLINK] = v9fs_readlink,
3932     [P9_TUNLINKAT] = v9fs_unlinkat,
3933     [P9_TMKDIR] = v9fs_mkdir,
3934     [P9_TVERSION] = v9fs_version,
3935     [P9_TLOPEN] = v9fs_open,
3936     [P9_TATTACH] = v9fs_attach,
3937     [P9_TSTAT] = v9fs_stat,
3938     [P9_TWALK] = v9fs_walk,
3939     [P9_TCLUNK] = v9fs_clunk,
3940     [P9_TFSYNC] = v9fs_fsync,
3941     [P9_TOPEN] = v9fs_open,
3942     [P9_TREAD] = v9fs_read,
3943 #if 0
3944     [P9_TAUTH] = v9fs_auth,
3945 #endif
3946     [P9_TFLUSH] = v9fs_flush,
3947     [P9_TLINK] = v9fs_link,
3948     [P9_TSYMLINK] = v9fs_symlink,
3949     [P9_TCREATE] = v9fs_create,
3950     [P9_TLCREATE] = v9fs_lcreate,
3951     [P9_TWRITE] = v9fs_write,
3952     [P9_TWSTAT] = v9fs_wstat,
3953     [P9_TREMOVE] = v9fs_remove,
3954 };
3955 
3956 static void coroutine_fn v9fs_op_not_supp(void *opaque)
3957 {
3958     V9fsPDU *pdu = opaque;
3959     pdu_complete(pdu, -EOPNOTSUPP);
3960 }
3961 
3962 static void coroutine_fn v9fs_fs_ro(void *opaque)
3963 {
3964     V9fsPDU *pdu = opaque;
3965     pdu_complete(pdu, -EROFS);
3966 }
3967 
3968 static inline bool is_read_only_op(V9fsPDU *pdu)
3969 {
3970     switch (pdu->id) {
3971     case P9_TREADDIR:
3972     case P9_TSTATFS:
3973     case P9_TGETATTR:
3974     case P9_TXATTRWALK:
3975     case P9_TLOCK:
3976     case P9_TGETLOCK:
3977     case P9_TREADLINK:
3978     case P9_TVERSION:
3979     case P9_TLOPEN:
3980     case P9_TATTACH:
3981     case P9_TSTAT:
3982     case P9_TWALK:
3983     case P9_TCLUNK:
3984     case P9_TFSYNC:
3985     case P9_TOPEN:
3986     case P9_TREAD:
3987     case P9_TAUTH:
3988     case P9_TFLUSH:
3989         return 1;
3990     default:
3991         return 0;
3992     }
3993 }
3994 
3995 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
3996 {
3997     Coroutine *co;
3998     CoroutineEntry *handler;
3999     V9fsState *s = pdu->s;
4000 
4001     pdu->size = le32_to_cpu(hdr->size_le);
4002     pdu->id = hdr->id;
4003     pdu->tag = le16_to_cpu(hdr->tag_le);
4004 
4005     if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
4006         (pdu_co_handlers[pdu->id] == NULL)) {
4007         handler = v9fs_op_not_supp;
4008     } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
4009         handler = v9fs_fs_ro;
4010     } else {
4011         handler = pdu_co_handlers[pdu->id];
4012     }
4013 
4014     qemu_co_queue_init(&pdu->complete);
4015     co = qemu_coroutine_create(handler, pdu);
4016     qemu_coroutine_enter(co);
4017 }
4018 
4019 /* Returns 0 on success, 1 on failure. */
4020 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
4021                                Error **errp)
4022 {
4023     ERRP_GUARD();
4024     int i, len;
4025     struct stat stat;
4026     FsDriverEntry *fse;
4027     V9fsPath path;
4028     int rc = 1;
4029 
4030     assert(!s->transport);
4031     s->transport = t;
4032 
4033     /* initialize pdu allocator */
4034     QLIST_INIT(&s->free_list);
4035     QLIST_INIT(&s->active_list);
4036     for (i = 0; i < MAX_REQ; i++) {
4037         QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
4038         s->pdus[i].s = s;
4039         s->pdus[i].idx = i;
4040     }
4041 
4042     v9fs_path_init(&path);
4043 
4044     fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
4045 
4046     if (!fse) {
4047         /* We don't have a fsdev identified by fsdev_id */
4048         error_setg(errp, "9pfs device couldn't find fsdev with the "
4049                    "id = %s",
4050                    s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
4051         goto out;
4052     }
4053 
4054     if (!s->fsconf.tag) {
4055         /* we haven't specified a mount_tag */
4056         error_setg(errp, "fsdev with id %s needs mount_tag arguments",
4057                    s->fsconf.fsdev_id);
4058         goto out;
4059     }
4060 
4061     s->ctx.export_flags = fse->export_flags;
4062     s->ctx.fs_root = g_strdup(fse->path);
4063     s->ctx.exops.get_st_gen = NULL;
4064     len = strlen(s->fsconf.tag);
4065     if (len > MAX_TAG_LEN - 1) {
4066         error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
4067                    "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
4068         goto out;
4069     }
4070 
4071     s->tag = g_strdup(s->fsconf.tag);
4072     s->ctx.uid = -1;
4073 
4074     s->ops = fse->ops;
4075 
4076     s->ctx.fmode = fse->fmode;
4077     s->ctx.dmode = fse->dmode;
4078 
4079     QSIMPLEQ_INIT(&s->fid_list);
4080     qemu_co_rwlock_init(&s->rename_lock);
4081 
4082     if (s->ops->init(&s->ctx, errp) < 0) {
4083         error_prepend(errp, "cannot initialize fsdev '%s': ",
4084                       s->fsconf.fsdev_id);
4085         goto out;
4086     }
4087 
4088     /*
4089      * Check details of export path, We need to use fs driver
4090      * call back to do that. Since we are in the init path, we don't
4091      * use co-routines here.
4092      */
4093     if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
4094         error_setg(errp,
4095                    "error in converting name to path %s", strerror(errno));
4096         goto out;
4097     }
4098     if (s->ops->lstat(&s->ctx, &path, &stat)) {
4099         error_setg(errp, "share path %s does not exist", fse->path);
4100         goto out;
4101     } else if (!S_ISDIR(stat.st_mode)) {
4102         error_setg(errp, "share path %s is not a directory", fse->path);
4103         goto out;
4104     }
4105 
4106     s->dev_id = stat.st_dev;
4107 
4108     /* init inode remapping : */
4109     /* hash table for variable length inode suffixes */
4110     qpd_table_init(&s->qpd_table);
4111     /* hash table for slow/full inode remapping (most users won't need it) */
4112     qpf_table_init(&s->qpf_table);
4113     /* hash table for quick inode remapping */
4114     qpp_table_init(&s->qpp_table);
4115     s->qp_ndevices = 0;
4116     s->qp_affix_next = 1; /* reserve 0 to detect overflow */
4117     s->qp_fullpath_next = 1;
4118 
4119     s->ctx.fst = &fse->fst;
4120     fsdev_throttle_init(s->ctx.fst);
4121 
4122     rc = 0;
4123 out:
4124     if (rc) {
4125         v9fs_device_unrealize_common(s);
4126     }
4127     v9fs_path_free(&path);
4128     return rc;
4129 }
4130 
4131 void v9fs_device_unrealize_common(V9fsState *s)
4132 {
4133     if (s->ops && s->ops->cleanup) {
4134         s->ops->cleanup(&s->ctx);
4135     }
4136     if (s->ctx.fst) {
4137         fsdev_throttle_cleanup(s->ctx.fst);
4138     }
4139     g_free(s->tag);
4140     qp_table_destroy(&s->qpd_table);
4141     qp_table_destroy(&s->qpp_table);
4142     qp_table_destroy(&s->qpf_table);
4143     g_free(s->ctx.fs_root);
4144 }
4145 
4146 typedef struct VirtfsCoResetData {
4147     V9fsPDU pdu;
4148     bool done;
4149 } VirtfsCoResetData;
4150 
4151 static void coroutine_fn virtfs_co_reset(void *opaque)
4152 {
4153     VirtfsCoResetData *data = opaque;
4154 
4155     virtfs_reset(&data->pdu);
4156     data->done = true;
4157 }
4158 
4159 void v9fs_reset(V9fsState *s)
4160 {
4161     VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
4162     Coroutine *co;
4163 
4164     while (!QLIST_EMPTY(&s->active_list)) {
4165         aio_poll(qemu_get_aio_context(), true);
4166     }
4167 
4168     co = qemu_coroutine_create(virtfs_co_reset, &data);
4169     qemu_coroutine_enter(co);
4170 
4171     while (!data.done) {
4172         aio_poll(qemu_get_aio_context(), true);
4173     }
4174 }
4175 
4176 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
4177 {
4178     struct rlimit rlim;
4179     if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
4180         error_report("Failed to get the resource limit");
4181         exit(1);
4182     }
4183     open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur / 3);
4184     open_fd_rc = rlim.rlim_cur / 2;
4185 }
4186