xref: /openbmc/qemu/hw/9pfs/9p.c (revision 90191d07a6c0480802dfcaee99328b1c5e520579)
1 /*
2  * Virtio 9p backend
3  *
4  * Copyright IBM, Corp. 2010
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include "hw/virtio/virtio.h"
16 #include "hw/i386/pc.h"
17 #include "qemu/error-report.h"
18 #include "qemu/iov.h"
19 #include "qemu/sockets.h"
20 #include "virtio-9p.h"
21 #include "fsdev/qemu-fsdev.h"
22 #include "9p-xattr.h"
23 #include "coth.h"
24 #include "trace.h"
25 #include "migration/migration.h"
26 
27 int open_fd_hw;
28 int total_open_fd;
29 static int open_fd_rc;
30 
31 enum {
32     Oread   = 0x00,
33     Owrite  = 0x01,
34     Ordwr   = 0x02,
35     Oexec   = 0x03,
36     Oexcl   = 0x04,
37     Otrunc  = 0x10,
38     Orexec  = 0x20,
39     Orclose = 0x40,
40     Oappend = 0x80,
41 };
42 
43 ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
44 {
45     ssize_t ret;
46     va_list ap;
47 
48     va_start(ap, fmt);
49     ret = virtio_pdu_vmarshal(pdu, offset, fmt, ap);
50     va_end(ap);
51 
52     return ret;
53 }
54 
55 ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
56 {
57     ssize_t ret;
58     va_list ap;
59 
60     va_start(ap, fmt);
61     ret = virtio_pdu_vunmarshal(pdu, offset, fmt, ap);
62     va_end(ap);
63 
64     return ret;
65 }
66 
67 static void pdu_push_and_notify(V9fsPDU *pdu)
68 {
69     virtio_9p_push_and_notify(pdu);
70 }
71 
72 static int omode_to_uflags(int8_t mode)
73 {
74     int ret = 0;
75 
76     switch (mode & 3) {
77     case Oread:
78         ret = O_RDONLY;
79         break;
80     case Ordwr:
81         ret = O_RDWR;
82         break;
83     case Owrite:
84         ret = O_WRONLY;
85         break;
86     case Oexec:
87         ret = O_RDONLY;
88         break;
89     }
90 
91     if (mode & Otrunc) {
92         ret |= O_TRUNC;
93     }
94 
95     if (mode & Oappend) {
96         ret |= O_APPEND;
97     }
98 
99     if (mode & Oexcl) {
100         ret |= O_EXCL;
101     }
102 
103     return ret;
104 }
105 
106 struct dotl_openflag_map {
107     int dotl_flag;
108     int open_flag;
109 };
110 
111 static int dotl_to_open_flags(int flags)
112 {
113     int i;
114     /*
115      * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
116      * and P9_DOTL_NOACCESS
117      */
118     int oflags = flags & O_ACCMODE;
119 
120     struct dotl_openflag_map dotl_oflag_map[] = {
121         { P9_DOTL_CREATE, O_CREAT },
122         { P9_DOTL_EXCL, O_EXCL },
123         { P9_DOTL_NOCTTY , O_NOCTTY },
124         { P9_DOTL_TRUNC, O_TRUNC },
125         { P9_DOTL_APPEND, O_APPEND },
126         { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
127         { P9_DOTL_DSYNC, O_DSYNC },
128         { P9_DOTL_FASYNC, FASYNC },
129         { P9_DOTL_DIRECT, O_DIRECT },
130         { P9_DOTL_LARGEFILE, O_LARGEFILE },
131         { P9_DOTL_DIRECTORY, O_DIRECTORY },
132         { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
133         { P9_DOTL_NOATIME, O_NOATIME },
134         { P9_DOTL_SYNC, O_SYNC },
135     };
136 
137     for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
138         if (flags & dotl_oflag_map[i].dotl_flag) {
139             oflags |= dotl_oflag_map[i].open_flag;
140         }
141     }
142 
143     return oflags;
144 }
145 
146 void cred_init(FsCred *credp)
147 {
148     credp->fc_uid = -1;
149     credp->fc_gid = -1;
150     credp->fc_mode = -1;
151     credp->fc_rdev = -1;
152 }
153 
154 static int get_dotl_openflags(V9fsState *s, int oflags)
155 {
156     int flags;
157     /*
158      * Filter the client open flags
159      */
160     flags = dotl_to_open_flags(oflags);
161     flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
162     /*
163      * Ignore direct disk access hint until the server supports it.
164      */
165     flags &= ~O_DIRECT;
166     return flags;
167 }
168 
169 void v9fs_path_init(V9fsPath *path)
170 {
171     path->data = NULL;
172     path->size = 0;
173 }
174 
175 void v9fs_path_free(V9fsPath *path)
176 {
177     g_free(path->data);
178     path->data = NULL;
179     path->size = 0;
180 }
181 
182 void v9fs_path_copy(V9fsPath *lhs, V9fsPath *rhs)
183 {
184     v9fs_path_free(lhs);
185     lhs->data = g_malloc(rhs->size);
186     memcpy(lhs->data, rhs->data, rhs->size);
187     lhs->size = rhs->size;
188 }
189 
190 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
191                       const char *name, V9fsPath *path)
192 {
193     int err;
194     err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
195     if (err < 0) {
196         err = -errno;
197     }
198     return err;
199 }
200 
201 /*
202  * Return TRUE if s1 is an ancestor of s2.
203  *
204  * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
205  * As a special case, We treat s1 as ancestor of s2 if they are same!
206  */
207 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
208 {
209     if (!strncmp(s1->data, s2->data, s1->size - 1)) {
210         if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
211             return 1;
212         }
213     }
214     return 0;
215 }
216 
217 static size_t v9fs_string_size(V9fsString *str)
218 {
219     return str->size;
220 }
221 
222 /*
223  * returns 0 if fid got re-opened, 1 if not, < 0 on error */
224 static int v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
225 {
226     int err = 1;
227     if (f->fid_type == P9_FID_FILE) {
228         if (f->fs.fd == -1) {
229             do {
230                 err = v9fs_co_open(pdu, f, f->open_flags);
231             } while (err == -EINTR && !pdu->cancelled);
232         }
233     } else if (f->fid_type == P9_FID_DIR) {
234         if (f->fs.dir == NULL) {
235             do {
236                 err = v9fs_co_opendir(pdu, f);
237             } while (err == -EINTR && !pdu->cancelled);
238         }
239     }
240     return err;
241 }
242 
243 static V9fsFidState *get_fid(V9fsPDU *pdu, int32_t fid)
244 {
245     int err;
246     V9fsFidState *f;
247     V9fsState *s = pdu->s;
248 
249     for (f = s->fid_list; f; f = f->next) {
250         BUG_ON(f->clunked);
251         if (f->fid == fid) {
252             /*
253              * Update the fid ref upfront so that
254              * we don't get reclaimed when we yield
255              * in open later.
256              */
257             f->ref++;
258             /*
259              * check whether we need to reopen the
260              * file. We might have closed the fd
261              * while trying to free up some file
262              * descriptors.
263              */
264             err = v9fs_reopen_fid(pdu, f);
265             if (err < 0) {
266                 f->ref--;
267                 return NULL;
268             }
269             /*
270              * Mark the fid as referenced so that the LRU
271              * reclaim won't close the file descriptor
272              */
273             f->flags |= FID_REFERENCED;
274             return f;
275         }
276     }
277     return NULL;
278 }
279 
280 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
281 {
282     V9fsFidState *f;
283 
284     for (f = s->fid_list; f; f = f->next) {
285         /* If fid is already there return NULL */
286         BUG_ON(f->clunked);
287         if (f->fid == fid) {
288             return NULL;
289         }
290     }
291     f = g_malloc0(sizeof(V9fsFidState));
292     f->fid = fid;
293     f->fid_type = P9_FID_NONE;
294     f->ref = 1;
295     /*
296      * Mark the fid as referenced so that the LRU
297      * reclaim won't close the file descriptor
298      */
299     f->flags |= FID_REFERENCED;
300     f->next = s->fid_list;
301     s->fid_list = f;
302 
303     return f;
304 }
305 
306 static int v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
307 {
308     int retval = 0;
309 
310     if (fidp->fs.xattr.copied_len == -1) {
311         /* getxattr/listxattr fid */
312         goto free_value;
313     }
314     /*
315      * if this is fid for setxattr. clunk should
316      * result in setxattr localcall
317      */
318     if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
319         /* clunk after partial write */
320         retval = -EINVAL;
321         goto free_out;
322     }
323     if (fidp->fs.xattr.len) {
324         retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
325                                    fidp->fs.xattr.value,
326                                    fidp->fs.xattr.len,
327                                    fidp->fs.xattr.flags);
328     } else {
329         retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
330     }
331 free_out:
332     v9fs_string_free(&fidp->fs.xattr.name);
333 free_value:
334     g_free(fidp->fs.xattr.value);
335     return retval;
336 }
337 
338 static int free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
339 {
340     int retval = 0;
341 
342     if (fidp->fid_type == P9_FID_FILE) {
343         /* If we reclaimed the fd no need to close */
344         if (fidp->fs.fd != -1) {
345             retval = v9fs_co_close(pdu, &fidp->fs);
346         }
347     } else if (fidp->fid_type == P9_FID_DIR) {
348         if (fidp->fs.dir != NULL) {
349             retval = v9fs_co_closedir(pdu, &fidp->fs);
350         }
351     } else if (fidp->fid_type == P9_FID_XATTR) {
352         retval = v9fs_xattr_fid_clunk(pdu, fidp);
353     }
354     v9fs_path_free(&fidp->path);
355     g_free(fidp);
356     return retval;
357 }
358 
359 static int put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
360 {
361     BUG_ON(!fidp->ref);
362     fidp->ref--;
363     /*
364      * Don't free the fid if it is in reclaim list
365      */
366     if (!fidp->ref && fidp->clunked) {
367         if (fidp->fid == pdu->s->root_fid) {
368             /*
369              * if the clunked fid is root fid then we
370              * have unmounted the fs on the client side.
371              * delete the migration blocker. Ideally, this
372              * should be hooked to transport close notification
373              */
374             if (pdu->s->migration_blocker) {
375                 migrate_del_blocker(pdu->s->migration_blocker);
376                 error_free(pdu->s->migration_blocker);
377                 pdu->s->migration_blocker = NULL;
378             }
379         }
380         return free_fid(pdu, fidp);
381     }
382     return 0;
383 }
384 
385 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
386 {
387     V9fsFidState **fidpp, *fidp;
388 
389     for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
390         if ((*fidpp)->fid == fid) {
391             break;
392         }
393     }
394     if (*fidpp == NULL) {
395         return NULL;
396     }
397     fidp = *fidpp;
398     *fidpp = fidp->next;
399     fidp->clunked = 1;
400     return fidp;
401 }
402 
403 void v9fs_reclaim_fd(V9fsPDU *pdu)
404 {
405     int reclaim_count = 0;
406     V9fsState *s = pdu->s;
407     V9fsFidState *f, *reclaim_list = NULL;
408 
409     for (f = s->fid_list; f; f = f->next) {
410         /*
411          * Unlink fids cannot be reclaimed. Check
412          * for them and skip them. Also skip fids
413          * currently being operated on.
414          */
415         if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
416             continue;
417         }
418         /*
419          * if it is a recently referenced fid
420          * we leave the fid untouched and clear the
421          * reference bit. We come back to it later
422          * in the next iteration. (a simple LRU without
423          * moving list elements around)
424          */
425         if (f->flags & FID_REFERENCED) {
426             f->flags &= ~FID_REFERENCED;
427             continue;
428         }
429         /*
430          * Add fids to reclaim list.
431          */
432         if (f->fid_type == P9_FID_FILE) {
433             if (f->fs.fd != -1) {
434                 /*
435                  * Up the reference count so that
436                  * a clunk request won't free this fid
437                  */
438                 f->ref++;
439                 f->rclm_lst = reclaim_list;
440                 reclaim_list = f;
441                 f->fs_reclaim.fd = f->fs.fd;
442                 f->fs.fd = -1;
443                 reclaim_count++;
444             }
445         } else if (f->fid_type == P9_FID_DIR) {
446             if (f->fs.dir != NULL) {
447                 /*
448                  * Up the reference count so that
449                  * a clunk request won't free this fid
450                  */
451                 f->ref++;
452                 f->rclm_lst = reclaim_list;
453                 reclaim_list = f;
454                 f->fs_reclaim.dir = f->fs.dir;
455                 f->fs.dir = NULL;
456                 reclaim_count++;
457             }
458         }
459         if (reclaim_count >= open_fd_rc) {
460             break;
461         }
462     }
463     /*
464      * Now close the fid in reclaim list. Free them if they
465      * are already clunked.
466      */
467     while (reclaim_list) {
468         f = reclaim_list;
469         reclaim_list = f->rclm_lst;
470         if (f->fid_type == P9_FID_FILE) {
471             v9fs_co_close(pdu, &f->fs_reclaim);
472         } else if (f->fid_type == P9_FID_DIR) {
473             v9fs_co_closedir(pdu, &f->fs_reclaim);
474         }
475         f->rclm_lst = NULL;
476         /*
477          * Now drop the fid reference, free it
478          * if clunked.
479          */
480         put_fid(pdu, f);
481     }
482 }
483 
484 static int v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
485 {
486     int err;
487     V9fsState *s = pdu->s;
488     V9fsFidState *fidp, head_fid;
489 
490     head_fid.next = s->fid_list;
491     for (fidp = s->fid_list; fidp; fidp = fidp->next) {
492         if (fidp->path.size != path->size) {
493             continue;
494         }
495         if (!memcmp(fidp->path.data, path->data, path->size)) {
496             /* Mark the fid non reclaimable. */
497             fidp->flags |= FID_NON_RECLAIMABLE;
498 
499             /* reopen the file/dir if already closed */
500             err = v9fs_reopen_fid(pdu, fidp);
501             if (err < 0) {
502                 return -1;
503             }
504             /*
505              * Go back to head of fid list because
506              * the list could have got updated when
507              * switched to the worker thread
508              */
509             if (err == 0) {
510                 fidp = &head_fid;
511             }
512         }
513     }
514     return 0;
515 }
516 
517 static void virtfs_reset(V9fsPDU *pdu)
518 {
519     V9fsState *s = pdu->s;
520     V9fsFidState *fidp = NULL;
521 
522     /* Free all fids */
523     while (s->fid_list) {
524         fidp = s->fid_list;
525         s->fid_list = fidp->next;
526 
527         if (fidp->ref) {
528             fidp->clunked = 1;
529         } else {
530             free_fid(pdu, fidp);
531         }
532     }
533     if (fidp) {
534         /* One or more unclunked fids found... */
535         error_report("9pfs:%s: One or more uncluncked fids "
536                      "found during reset", __func__);
537     }
538 }
539 
540 #define P9_QID_TYPE_DIR         0x80
541 #define P9_QID_TYPE_SYMLINK     0x02
542 
543 #define P9_STAT_MODE_DIR        0x80000000
544 #define P9_STAT_MODE_APPEND     0x40000000
545 #define P9_STAT_MODE_EXCL       0x20000000
546 #define P9_STAT_MODE_MOUNT      0x10000000
547 #define P9_STAT_MODE_AUTH       0x08000000
548 #define P9_STAT_MODE_TMP        0x04000000
549 #define P9_STAT_MODE_SYMLINK    0x02000000
550 #define P9_STAT_MODE_LINK       0x01000000
551 #define P9_STAT_MODE_DEVICE     0x00800000
552 #define P9_STAT_MODE_NAMED_PIPE 0x00200000
553 #define P9_STAT_MODE_SOCKET     0x00100000
554 #define P9_STAT_MODE_SETUID     0x00080000
555 #define P9_STAT_MODE_SETGID     0x00040000
556 #define P9_STAT_MODE_SETVTX     0x00010000
557 
558 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
559                                 P9_STAT_MODE_SYMLINK |      \
560                                 P9_STAT_MODE_LINK |         \
561                                 P9_STAT_MODE_DEVICE |       \
562                                 P9_STAT_MODE_NAMED_PIPE |   \
563                                 P9_STAT_MODE_SOCKET)
564 
565 /* This is the algorithm from ufs in spfs */
566 static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
567 {
568     size_t size;
569 
570     memset(&qidp->path, 0, sizeof(qidp->path));
571     size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
572     memcpy(&qidp->path, &stbuf->st_ino, size);
573     qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
574     qidp->type = 0;
575     if (S_ISDIR(stbuf->st_mode)) {
576         qidp->type |= P9_QID_TYPE_DIR;
577     }
578     if (S_ISLNK(stbuf->st_mode)) {
579         qidp->type |= P9_QID_TYPE_SYMLINK;
580     }
581 }
582 
583 static int fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, V9fsQID *qidp)
584 {
585     struct stat stbuf;
586     int err;
587 
588     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
589     if (err < 0) {
590         return err;
591     }
592     stat_to_qid(&stbuf, qidp);
593     return 0;
594 }
595 
596 V9fsPDU *pdu_alloc(V9fsState *s)
597 {
598     V9fsPDU *pdu = NULL;
599 
600     if (!QLIST_EMPTY(&s->free_list)) {
601         pdu = QLIST_FIRST(&s->free_list);
602         QLIST_REMOVE(pdu, next);
603         QLIST_INSERT_HEAD(&s->active_list, pdu, next);
604     }
605     return pdu;
606 }
607 
608 void pdu_free(V9fsPDU *pdu)
609 {
610     if (pdu) {
611         V9fsState *s = pdu->s;
612         /*
613          * Cancelled pdu are added back to the freelist
614          * by flush request .
615          */
616         if (!pdu->cancelled) {
617             QLIST_REMOVE(pdu, next);
618             QLIST_INSERT_HEAD(&s->free_list, pdu, next);
619         }
620     }
621 }
622 
623 /*
624  * We don't do error checking for pdu_marshal/unmarshal here
625  * because we always expect to have enough space to encode
626  * error details
627  */
628 static void pdu_complete(V9fsPDU *pdu, ssize_t len)
629 {
630     int8_t id = pdu->id + 1; /* Response */
631     V9fsState *s = pdu->s;
632 
633     if (len < 0) {
634         int err = -len;
635         len = 7;
636 
637         if (s->proto_version != V9FS_PROTO_2000L) {
638             V9fsString str;
639 
640             str.data = strerror(err);
641             str.size = strlen(str.data);
642 
643             len += pdu_marshal(pdu, len, "s", &str);
644             id = P9_RERROR;
645         }
646 
647         len += pdu_marshal(pdu, len, "d", err);
648 
649         if (s->proto_version == V9FS_PROTO_2000L) {
650             id = P9_RLERROR;
651         }
652         trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
653     }
654 
655     /* fill out the header */
656     pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag);
657 
658     /* keep these in sync */
659     pdu->size = len;
660     pdu->id = id;
661 
662     pdu_push_and_notify(pdu);
663 
664     /* Now wakeup anybody waiting in flush for this request */
665     qemu_co_queue_next(&pdu->complete);
666 
667     pdu_free(pdu);
668 }
669 
670 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
671 {
672     mode_t ret;
673 
674     ret = mode & 0777;
675     if (mode & P9_STAT_MODE_DIR) {
676         ret |= S_IFDIR;
677     }
678 
679     if (mode & P9_STAT_MODE_SYMLINK) {
680         ret |= S_IFLNK;
681     }
682     if (mode & P9_STAT_MODE_SOCKET) {
683         ret |= S_IFSOCK;
684     }
685     if (mode & P9_STAT_MODE_NAMED_PIPE) {
686         ret |= S_IFIFO;
687     }
688     if (mode & P9_STAT_MODE_DEVICE) {
689         if (extension->size && extension->data[0] == 'c') {
690             ret |= S_IFCHR;
691         } else {
692             ret |= S_IFBLK;
693         }
694     }
695 
696     if (!(ret&~0777)) {
697         ret |= S_IFREG;
698     }
699 
700     if (mode & P9_STAT_MODE_SETUID) {
701         ret |= S_ISUID;
702     }
703     if (mode & P9_STAT_MODE_SETGID) {
704         ret |= S_ISGID;
705     }
706     if (mode & P9_STAT_MODE_SETVTX) {
707         ret |= S_ISVTX;
708     }
709 
710     return ret;
711 }
712 
713 static int donttouch_stat(V9fsStat *stat)
714 {
715     if (stat->type == -1 &&
716         stat->dev == -1 &&
717         stat->qid.type == -1 &&
718         stat->qid.version == -1 &&
719         stat->qid.path == -1 &&
720         stat->mode == -1 &&
721         stat->atime == -1 &&
722         stat->mtime == -1 &&
723         stat->length == -1 &&
724         !stat->name.size &&
725         !stat->uid.size &&
726         !stat->gid.size &&
727         !stat->muid.size &&
728         stat->n_uid == -1 &&
729         stat->n_gid == -1 &&
730         stat->n_muid == -1) {
731         return 1;
732     }
733 
734     return 0;
735 }
736 
737 static void v9fs_stat_init(V9fsStat *stat)
738 {
739     v9fs_string_init(&stat->name);
740     v9fs_string_init(&stat->uid);
741     v9fs_string_init(&stat->gid);
742     v9fs_string_init(&stat->muid);
743     v9fs_string_init(&stat->extension);
744 }
745 
746 static void v9fs_stat_free(V9fsStat *stat)
747 {
748     v9fs_string_free(&stat->name);
749     v9fs_string_free(&stat->uid);
750     v9fs_string_free(&stat->gid);
751     v9fs_string_free(&stat->muid);
752     v9fs_string_free(&stat->extension);
753 }
754 
755 static uint32_t stat_to_v9mode(const struct stat *stbuf)
756 {
757     uint32_t mode;
758 
759     mode = stbuf->st_mode & 0777;
760     if (S_ISDIR(stbuf->st_mode)) {
761         mode |= P9_STAT_MODE_DIR;
762     }
763 
764     if (S_ISLNK(stbuf->st_mode)) {
765         mode |= P9_STAT_MODE_SYMLINK;
766     }
767 
768     if (S_ISSOCK(stbuf->st_mode)) {
769         mode |= P9_STAT_MODE_SOCKET;
770     }
771 
772     if (S_ISFIFO(stbuf->st_mode)) {
773         mode |= P9_STAT_MODE_NAMED_PIPE;
774     }
775 
776     if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
777         mode |= P9_STAT_MODE_DEVICE;
778     }
779 
780     if (stbuf->st_mode & S_ISUID) {
781         mode |= P9_STAT_MODE_SETUID;
782     }
783 
784     if (stbuf->st_mode & S_ISGID) {
785         mode |= P9_STAT_MODE_SETGID;
786     }
787 
788     if (stbuf->st_mode & S_ISVTX) {
789         mode |= P9_STAT_MODE_SETVTX;
790     }
791 
792     return mode;
793 }
794 
795 static int stat_to_v9stat(V9fsPDU *pdu, V9fsPath *name,
796                             const struct stat *stbuf,
797                             V9fsStat *v9stat)
798 {
799     int err;
800     const char *str;
801 
802     memset(v9stat, 0, sizeof(*v9stat));
803 
804     stat_to_qid(stbuf, &v9stat->qid);
805     v9stat->mode = stat_to_v9mode(stbuf);
806     v9stat->atime = stbuf->st_atime;
807     v9stat->mtime = stbuf->st_mtime;
808     v9stat->length = stbuf->st_size;
809 
810     v9fs_string_null(&v9stat->uid);
811     v9fs_string_null(&v9stat->gid);
812     v9fs_string_null(&v9stat->muid);
813 
814     v9stat->n_uid = stbuf->st_uid;
815     v9stat->n_gid = stbuf->st_gid;
816     v9stat->n_muid = 0;
817 
818     v9fs_string_null(&v9stat->extension);
819 
820     if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
821         err = v9fs_co_readlink(pdu, name, &v9stat->extension);
822         if (err < 0) {
823             return err;
824         }
825     } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
826         v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
827                 S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
828                 major(stbuf->st_rdev), minor(stbuf->st_rdev));
829     } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
830         v9fs_string_sprintf(&v9stat->extension, "%s %lu",
831                 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
832     }
833 
834     str = strrchr(name->data, '/');
835     if (str) {
836         str += 1;
837     } else {
838         str = name->data;
839     }
840 
841     v9fs_string_sprintf(&v9stat->name, "%s", str);
842 
843     v9stat->size = 61 +
844         v9fs_string_size(&v9stat->name) +
845         v9fs_string_size(&v9stat->uid) +
846         v9fs_string_size(&v9stat->gid) +
847         v9fs_string_size(&v9stat->muid) +
848         v9fs_string_size(&v9stat->extension);
849     return 0;
850 }
851 
852 #define P9_STATS_MODE          0x00000001ULL
853 #define P9_STATS_NLINK         0x00000002ULL
854 #define P9_STATS_UID           0x00000004ULL
855 #define P9_STATS_GID           0x00000008ULL
856 #define P9_STATS_RDEV          0x00000010ULL
857 #define P9_STATS_ATIME         0x00000020ULL
858 #define P9_STATS_MTIME         0x00000040ULL
859 #define P9_STATS_CTIME         0x00000080ULL
860 #define P9_STATS_INO           0x00000100ULL
861 #define P9_STATS_SIZE          0x00000200ULL
862 #define P9_STATS_BLOCKS        0x00000400ULL
863 
864 #define P9_STATS_BTIME         0x00000800ULL
865 #define P9_STATS_GEN           0x00001000ULL
866 #define P9_STATS_DATA_VERSION  0x00002000ULL
867 
868 #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
869 #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
870 
871 
872 static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
873                                 V9fsStatDotl *v9lstat)
874 {
875     memset(v9lstat, 0, sizeof(*v9lstat));
876 
877     v9lstat->st_mode = stbuf->st_mode;
878     v9lstat->st_nlink = stbuf->st_nlink;
879     v9lstat->st_uid = stbuf->st_uid;
880     v9lstat->st_gid = stbuf->st_gid;
881     v9lstat->st_rdev = stbuf->st_rdev;
882     v9lstat->st_size = stbuf->st_size;
883     v9lstat->st_blksize = stbuf->st_blksize;
884     v9lstat->st_blocks = stbuf->st_blocks;
885     v9lstat->st_atime_sec = stbuf->st_atime;
886     v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
887     v9lstat->st_mtime_sec = stbuf->st_mtime;
888     v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
889     v9lstat->st_ctime_sec = stbuf->st_ctime;
890     v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
891     /* Currently we only support BASIC fields in stat */
892     v9lstat->st_result_mask = P9_STATS_BASIC;
893 
894     stat_to_qid(stbuf, &v9lstat->qid);
895 }
896 
897 static void print_sg(struct iovec *sg, int cnt)
898 {
899     int i;
900 
901     printf("sg[%d]: {", cnt);
902     for (i = 0; i < cnt; i++) {
903         if (i) {
904             printf(", ");
905         }
906         printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
907     }
908     printf("}\n");
909 }
910 
911 /* Will call this only for path name based fid */
912 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
913 {
914     V9fsPath str;
915     v9fs_path_init(&str);
916     v9fs_path_copy(&str, dst);
917     v9fs_string_sprintf((V9fsString *)dst, "%s%s", src->data, str.data+len);
918     v9fs_path_free(&str);
919     /* +1 to include terminating NULL */
920     dst->size++;
921 }
922 
923 static inline bool is_ro_export(FsContext *ctx)
924 {
925     return ctx->export_flags & V9FS_RDONLY;
926 }
927 
928 static void v9fs_version(void *opaque)
929 {
930     ssize_t err;
931     V9fsPDU *pdu = opaque;
932     V9fsState *s = pdu->s;
933     V9fsString version;
934     size_t offset = 7;
935 
936     v9fs_string_init(&version);
937     err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
938     if (err < 0) {
939         offset = err;
940         goto out;
941     }
942     trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
943 
944     virtfs_reset(pdu);
945 
946     if (!strcmp(version.data, "9P2000.u")) {
947         s->proto_version = V9FS_PROTO_2000U;
948     } else if (!strcmp(version.data, "9P2000.L")) {
949         s->proto_version = V9FS_PROTO_2000L;
950     } else {
951         v9fs_string_sprintf(&version, "unknown");
952     }
953 
954     err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
955     if (err < 0) {
956         offset = err;
957         goto out;
958     }
959     offset += err;
960     trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
961 out:
962     pdu_complete(pdu, offset);
963     v9fs_string_free(&version);
964 }
965 
966 static void v9fs_attach(void *opaque)
967 {
968     V9fsPDU *pdu = opaque;
969     V9fsState *s = pdu->s;
970     int32_t fid, afid, n_uname;
971     V9fsString uname, aname;
972     V9fsFidState *fidp;
973     size_t offset = 7;
974     V9fsQID qid;
975     ssize_t err;
976 
977     v9fs_string_init(&uname);
978     v9fs_string_init(&aname);
979     err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
980                         &afid, &uname, &aname, &n_uname);
981     if (err < 0) {
982         goto out_nofid;
983     }
984     trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
985 
986     fidp = alloc_fid(s, fid);
987     if (fidp == NULL) {
988         err = -EINVAL;
989         goto out_nofid;
990     }
991     fidp->uid = n_uname;
992     err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
993     if (err < 0) {
994         err = -EINVAL;
995         clunk_fid(s, fid);
996         goto out;
997     }
998     err = fid_to_qid(pdu, fidp, &qid);
999     if (err < 0) {
1000         err = -EINVAL;
1001         clunk_fid(s, fid);
1002         goto out;
1003     }
1004     err = pdu_marshal(pdu, offset, "Q", &qid);
1005     if (err < 0) {
1006         clunk_fid(s, fid);
1007         goto out;
1008     }
1009     err += offset;
1010     trace_v9fs_attach_return(pdu->tag, pdu->id,
1011                              qid.type, qid.version, qid.path);
1012     /*
1013      * disable migration if we haven't done already.
1014      * attach could get called multiple times for the same export.
1015      */
1016     if (!s->migration_blocker) {
1017         s->root_fid = fid;
1018         error_setg(&s->migration_blocker,
1019                    "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1020                    s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1021         migrate_add_blocker(s->migration_blocker);
1022     }
1023 out:
1024     put_fid(pdu, fidp);
1025 out_nofid:
1026     pdu_complete(pdu, err);
1027     v9fs_string_free(&uname);
1028     v9fs_string_free(&aname);
1029 }
1030 
1031 static void v9fs_stat(void *opaque)
1032 {
1033     int32_t fid;
1034     V9fsStat v9stat;
1035     ssize_t err = 0;
1036     size_t offset = 7;
1037     struct stat stbuf;
1038     V9fsFidState *fidp;
1039     V9fsPDU *pdu = opaque;
1040 
1041     err = pdu_unmarshal(pdu, offset, "d", &fid);
1042     if (err < 0) {
1043         goto out_nofid;
1044     }
1045     trace_v9fs_stat(pdu->tag, pdu->id, fid);
1046 
1047     fidp = get_fid(pdu, fid);
1048     if (fidp == NULL) {
1049         err = -ENOENT;
1050         goto out_nofid;
1051     }
1052     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1053     if (err < 0) {
1054         goto out;
1055     }
1056     err = stat_to_v9stat(pdu, &fidp->path, &stbuf, &v9stat);
1057     if (err < 0) {
1058         goto out;
1059     }
1060     err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1061     if (err < 0) {
1062         v9fs_stat_free(&v9stat);
1063         goto out;
1064     }
1065     trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1066                            v9stat.atime, v9stat.mtime, v9stat.length);
1067     err += offset;
1068     v9fs_stat_free(&v9stat);
1069 out:
1070     put_fid(pdu, fidp);
1071 out_nofid:
1072     pdu_complete(pdu, err);
1073 }
1074 
1075 static void v9fs_getattr(void *opaque)
1076 {
1077     int32_t fid;
1078     size_t offset = 7;
1079     ssize_t retval = 0;
1080     struct stat stbuf;
1081     V9fsFidState *fidp;
1082     uint64_t request_mask;
1083     V9fsStatDotl v9stat_dotl;
1084     V9fsPDU *pdu = opaque;
1085     V9fsState *s = pdu->s;
1086 
1087     retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1088     if (retval < 0) {
1089         goto out_nofid;
1090     }
1091     trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1092 
1093     fidp = get_fid(pdu, fid);
1094     if (fidp == NULL) {
1095         retval = -ENOENT;
1096         goto out_nofid;
1097     }
1098     /*
1099      * Currently we only support BASIC fields in stat, so there is no
1100      * need to look at request_mask.
1101      */
1102     retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1103     if (retval < 0) {
1104         goto out;
1105     }
1106     stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
1107 
1108     /*  fill st_gen if requested and supported by underlying fs */
1109     if (request_mask & P9_STATS_GEN) {
1110         retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1111         switch (retval) {
1112         case 0:
1113             /* we have valid st_gen: update result mask */
1114             v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1115             break;
1116         case -EINTR:
1117             /* request cancelled, e.g. by Tflush */
1118             goto out;
1119         default:
1120             /* failed to get st_gen: not fatal, ignore */
1121             break;
1122         }
1123     }
1124     retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1125     if (retval < 0) {
1126         goto out;
1127     }
1128     retval += offset;
1129     trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1130                               v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1131                               v9stat_dotl.st_gid);
1132 out:
1133     put_fid(pdu, fidp);
1134 out_nofid:
1135     pdu_complete(pdu, retval);
1136 }
1137 
1138 /* Attribute flags */
1139 #define P9_ATTR_MODE       (1 << 0)
1140 #define P9_ATTR_UID        (1 << 1)
1141 #define P9_ATTR_GID        (1 << 2)
1142 #define P9_ATTR_SIZE       (1 << 3)
1143 #define P9_ATTR_ATIME      (1 << 4)
1144 #define P9_ATTR_MTIME      (1 << 5)
1145 #define P9_ATTR_CTIME      (1 << 6)
1146 #define P9_ATTR_ATIME_SET  (1 << 7)
1147 #define P9_ATTR_MTIME_SET  (1 << 8)
1148 
1149 #define P9_ATTR_MASK    127
1150 
1151 static void v9fs_setattr(void *opaque)
1152 {
1153     int err = 0;
1154     int32_t fid;
1155     V9fsFidState *fidp;
1156     size_t offset = 7;
1157     V9fsIattr v9iattr;
1158     V9fsPDU *pdu = opaque;
1159 
1160     err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1161     if (err < 0) {
1162         goto out_nofid;
1163     }
1164 
1165     fidp = get_fid(pdu, fid);
1166     if (fidp == NULL) {
1167         err = -EINVAL;
1168         goto out_nofid;
1169     }
1170     if (v9iattr.valid & P9_ATTR_MODE) {
1171         err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1172         if (err < 0) {
1173             goto out;
1174         }
1175     }
1176     if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1177         struct timespec times[2];
1178         if (v9iattr.valid & P9_ATTR_ATIME) {
1179             if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1180                 times[0].tv_sec = v9iattr.atime_sec;
1181                 times[0].tv_nsec = v9iattr.atime_nsec;
1182             } else {
1183                 times[0].tv_nsec = UTIME_NOW;
1184             }
1185         } else {
1186             times[0].tv_nsec = UTIME_OMIT;
1187         }
1188         if (v9iattr.valid & P9_ATTR_MTIME) {
1189             if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1190                 times[1].tv_sec = v9iattr.mtime_sec;
1191                 times[1].tv_nsec = v9iattr.mtime_nsec;
1192             } else {
1193                 times[1].tv_nsec = UTIME_NOW;
1194             }
1195         } else {
1196             times[1].tv_nsec = UTIME_OMIT;
1197         }
1198         err = v9fs_co_utimensat(pdu, &fidp->path, times);
1199         if (err < 0) {
1200             goto out;
1201         }
1202     }
1203     /*
1204      * If the only valid entry in iattr is ctime we can call
1205      * chown(-1,-1) to update the ctime of the file
1206      */
1207     if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1208         ((v9iattr.valid & P9_ATTR_CTIME)
1209          && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1210         if (!(v9iattr.valid & P9_ATTR_UID)) {
1211             v9iattr.uid = -1;
1212         }
1213         if (!(v9iattr.valid & P9_ATTR_GID)) {
1214             v9iattr.gid = -1;
1215         }
1216         err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1217                             v9iattr.gid);
1218         if (err < 0) {
1219             goto out;
1220         }
1221     }
1222     if (v9iattr.valid & (P9_ATTR_SIZE)) {
1223         err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1224         if (err < 0) {
1225             goto out;
1226         }
1227     }
1228     err = offset;
1229 out:
1230     put_fid(pdu, fidp);
1231 out_nofid:
1232     pdu_complete(pdu, err);
1233 }
1234 
1235 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1236 {
1237     int i;
1238     ssize_t err;
1239     size_t offset = 7;
1240 
1241     err = pdu_marshal(pdu, offset, "w", nwnames);
1242     if (err < 0) {
1243         return err;
1244     }
1245     offset += err;
1246     for (i = 0; i < nwnames; i++) {
1247         err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1248         if (err < 0) {
1249             return err;
1250         }
1251         offset += err;
1252     }
1253     return offset;
1254 }
1255 
1256 static void v9fs_walk(void *opaque)
1257 {
1258     int name_idx;
1259     V9fsQID *qids = NULL;
1260     int i, err = 0;
1261     V9fsPath dpath, path;
1262     uint16_t nwnames;
1263     struct stat stbuf;
1264     size_t offset = 7;
1265     int32_t fid, newfid;
1266     V9fsString *wnames = NULL;
1267     V9fsFidState *fidp;
1268     V9fsFidState *newfidp = NULL;
1269     V9fsPDU *pdu = opaque;
1270     V9fsState *s = pdu->s;
1271 
1272     err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1273     if (err < 0) {
1274         pdu_complete(pdu, err);
1275         return ;
1276     }
1277     offset += err;
1278 
1279     trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1280 
1281     if (nwnames && nwnames <= P9_MAXWELEM) {
1282         wnames = g_malloc0(sizeof(wnames[0]) * nwnames);
1283         qids   = g_malloc0(sizeof(qids[0]) * nwnames);
1284         for (i = 0; i < nwnames; i++) {
1285             err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1286             if (err < 0) {
1287                 goto out_nofid;
1288             }
1289             offset += err;
1290         }
1291     } else if (nwnames > P9_MAXWELEM) {
1292         err = -EINVAL;
1293         goto out_nofid;
1294     }
1295     fidp = get_fid(pdu, fid);
1296     if (fidp == NULL) {
1297         err = -ENOENT;
1298         goto out_nofid;
1299     }
1300     v9fs_path_init(&dpath);
1301     v9fs_path_init(&path);
1302     /*
1303      * Both dpath and path initially poin to fidp.
1304      * Needed to handle request with nwnames == 0
1305      */
1306     v9fs_path_copy(&dpath, &fidp->path);
1307     v9fs_path_copy(&path, &fidp->path);
1308     for (name_idx = 0; name_idx < nwnames; name_idx++) {
1309         err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, &path);
1310         if (err < 0) {
1311             goto out;
1312         }
1313         err = v9fs_co_lstat(pdu, &path, &stbuf);
1314         if (err < 0) {
1315             goto out;
1316         }
1317         stat_to_qid(&stbuf, &qids[name_idx]);
1318         v9fs_path_copy(&dpath, &path);
1319     }
1320     if (fid == newfid) {
1321         BUG_ON(fidp->fid_type != P9_FID_NONE);
1322         v9fs_path_copy(&fidp->path, &path);
1323     } else {
1324         newfidp = alloc_fid(s, newfid);
1325         if (newfidp == NULL) {
1326             err = -EINVAL;
1327             goto out;
1328         }
1329         newfidp->uid = fidp->uid;
1330         v9fs_path_copy(&newfidp->path, &path);
1331     }
1332     err = v9fs_walk_marshal(pdu, nwnames, qids);
1333     trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1334 out:
1335     put_fid(pdu, fidp);
1336     if (newfidp) {
1337         put_fid(pdu, newfidp);
1338     }
1339     v9fs_path_free(&dpath);
1340     v9fs_path_free(&path);
1341 out_nofid:
1342     pdu_complete(pdu, err);
1343     if (nwnames && nwnames <= P9_MAXWELEM) {
1344         for (name_idx = 0; name_idx < nwnames; name_idx++) {
1345             v9fs_string_free(&wnames[name_idx]);
1346         }
1347         g_free(wnames);
1348         g_free(qids);
1349     }
1350 }
1351 
1352 static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
1353 {
1354     struct statfs stbuf;
1355     int32_t iounit = 0;
1356     V9fsState *s = pdu->s;
1357 
1358     /*
1359      * iounit should be multiples of f_bsize (host filesystem block size
1360      * and as well as less than (client msize - P9_IOHDRSZ))
1361      */
1362     if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1363         iounit = stbuf.f_bsize;
1364         iounit *= (s->msize - P9_IOHDRSZ)/stbuf.f_bsize;
1365     }
1366     if (!iounit) {
1367         iounit = s->msize - P9_IOHDRSZ;
1368     }
1369     return iounit;
1370 }
1371 
1372 static void v9fs_open(void *opaque)
1373 {
1374     int flags;
1375     int32_t fid;
1376     int32_t mode;
1377     V9fsQID qid;
1378     int iounit = 0;
1379     ssize_t err = 0;
1380     size_t offset = 7;
1381     struct stat stbuf;
1382     V9fsFidState *fidp;
1383     V9fsPDU *pdu = opaque;
1384     V9fsState *s = pdu->s;
1385 
1386     if (s->proto_version == V9FS_PROTO_2000L) {
1387         err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1388     } else {
1389         uint8_t modebyte;
1390         err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1391         mode = modebyte;
1392     }
1393     if (err < 0) {
1394         goto out_nofid;
1395     }
1396     trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1397 
1398     fidp = get_fid(pdu, fid);
1399     if (fidp == NULL) {
1400         err = -ENOENT;
1401         goto out_nofid;
1402     }
1403     BUG_ON(fidp->fid_type != P9_FID_NONE);
1404 
1405     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1406     if (err < 0) {
1407         goto out;
1408     }
1409     stat_to_qid(&stbuf, &qid);
1410     if (S_ISDIR(stbuf.st_mode)) {
1411         err = v9fs_co_opendir(pdu, fidp);
1412         if (err < 0) {
1413             goto out;
1414         }
1415         fidp->fid_type = P9_FID_DIR;
1416         err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1417         if (err < 0) {
1418             goto out;
1419         }
1420         err += offset;
1421     } else {
1422         if (s->proto_version == V9FS_PROTO_2000L) {
1423             flags = get_dotl_openflags(s, mode);
1424         } else {
1425             flags = omode_to_uflags(mode);
1426         }
1427         if (is_ro_export(&s->ctx)) {
1428             if (mode & O_WRONLY || mode & O_RDWR ||
1429                 mode & O_APPEND || mode & O_TRUNC) {
1430                 err = -EROFS;
1431                 goto out;
1432             }
1433         }
1434         err = v9fs_co_open(pdu, fidp, flags);
1435         if (err < 0) {
1436             goto out;
1437         }
1438         fidp->fid_type = P9_FID_FILE;
1439         fidp->open_flags = flags;
1440         if (flags & O_EXCL) {
1441             /*
1442              * We let the host file system do O_EXCL check
1443              * We should not reclaim such fd
1444              */
1445             fidp->flags |= FID_NON_RECLAIMABLE;
1446         }
1447         iounit = get_iounit(pdu, &fidp->path);
1448         err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1449         if (err < 0) {
1450             goto out;
1451         }
1452         err += offset;
1453     }
1454     trace_v9fs_open_return(pdu->tag, pdu->id,
1455                            qid.type, qid.version, qid.path, iounit);
1456 out:
1457     put_fid(pdu, fidp);
1458 out_nofid:
1459     pdu_complete(pdu, err);
1460 }
1461 
1462 static void v9fs_lcreate(void *opaque)
1463 {
1464     int32_t dfid, flags, mode;
1465     gid_t gid;
1466     ssize_t err = 0;
1467     ssize_t offset = 7;
1468     V9fsString name;
1469     V9fsFidState *fidp;
1470     struct stat stbuf;
1471     V9fsQID qid;
1472     int32_t iounit;
1473     V9fsPDU *pdu = opaque;
1474 
1475     v9fs_string_init(&name);
1476     err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1477                         &name, &flags, &mode, &gid);
1478     if (err < 0) {
1479         goto out_nofid;
1480     }
1481     trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1482 
1483     fidp = get_fid(pdu, dfid);
1484     if (fidp == NULL) {
1485         err = -ENOENT;
1486         goto out_nofid;
1487     }
1488 
1489     flags = get_dotl_openflags(pdu->s, flags);
1490     err = v9fs_co_open2(pdu, fidp, &name, gid,
1491                         flags | O_CREAT, mode, &stbuf);
1492     if (err < 0) {
1493         goto out;
1494     }
1495     fidp->fid_type = P9_FID_FILE;
1496     fidp->open_flags = flags;
1497     if (flags & O_EXCL) {
1498         /*
1499          * We let the host file system do O_EXCL check
1500          * We should not reclaim such fd
1501          */
1502         fidp->flags |= FID_NON_RECLAIMABLE;
1503     }
1504     iounit =  get_iounit(pdu, &fidp->path);
1505     stat_to_qid(&stbuf, &qid);
1506     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1507     if (err < 0) {
1508         goto out;
1509     }
1510     err += offset;
1511     trace_v9fs_lcreate_return(pdu->tag, pdu->id,
1512                               qid.type, qid.version, qid.path, iounit);
1513 out:
1514     put_fid(pdu, fidp);
1515 out_nofid:
1516     pdu_complete(pdu, err);
1517     v9fs_string_free(&name);
1518 }
1519 
1520 static void v9fs_fsync(void *opaque)
1521 {
1522     int err;
1523     int32_t fid;
1524     int datasync;
1525     size_t offset = 7;
1526     V9fsFidState *fidp;
1527     V9fsPDU *pdu = opaque;
1528 
1529     err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
1530     if (err < 0) {
1531         goto out_nofid;
1532     }
1533     trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
1534 
1535     fidp = get_fid(pdu, fid);
1536     if (fidp == NULL) {
1537         err = -ENOENT;
1538         goto out_nofid;
1539     }
1540     err = v9fs_co_fsync(pdu, fidp, datasync);
1541     if (!err) {
1542         err = offset;
1543     }
1544     put_fid(pdu, fidp);
1545 out_nofid:
1546     pdu_complete(pdu, err);
1547 }
1548 
1549 static void v9fs_clunk(void *opaque)
1550 {
1551     int err;
1552     int32_t fid;
1553     size_t offset = 7;
1554     V9fsFidState *fidp;
1555     V9fsPDU *pdu = opaque;
1556     V9fsState *s = pdu->s;
1557 
1558     err = pdu_unmarshal(pdu, offset, "d", &fid);
1559     if (err < 0) {
1560         goto out_nofid;
1561     }
1562     trace_v9fs_clunk(pdu->tag, pdu->id, fid);
1563 
1564     fidp = clunk_fid(s, fid);
1565     if (fidp == NULL) {
1566         err = -ENOENT;
1567         goto out_nofid;
1568     }
1569     /*
1570      * Bump the ref so that put_fid will
1571      * free the fid.
1572      */
1573     fidp->ref++;
1574     err = put_fid(pdu, fidp);
1575     if (!err) {
1576         err = offset;
1577     }
1578 out_nofid:
1579     pdu_complete(pdu, err);
1580 }
1581 
1582 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1583                            uint64_t off, uint32_t max_count)
1584 {
1585     ssize_t err;
1586     size_t offset = 7;
1587     int read_count;
1588     int64_t xattr_len;
1589     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
1590     VirtQueueElement *elem = &v->elems[pdu->idx];
1591 
1592     xattr_len = fidp->fs.xattr.len;
1593     read_count = xattr_len - off;
1594     if (read_count > max_count) {
1595         read_count = max_count;
1596     } else if (read_count < 0) {
1597         /*
1598          * read beyond XATTR value
1599          */
1600         read_count = 0;
1601     }
1602     err = pdu_marshal(pdu, offset, "d", read_count);
1603     if (err < 0) {
1604         return err;
1605     }
1606     offset += err;
1607 
1608     err = v9fs_pack(elem->in_sg, elem->in_num, offset,
1609                     ((char *)fidp->fs.xattr.value) + off,
1610                     read_count);
1611     if (err < 0) {
1612         return err;
1613     }
1614     offset += err;
1615     return offset;
1616 }
1617 
1618 static int v9fs_do_readdir_with_stat(V9fsPDU *pdu,
1619                                      V9fsFidState *fidp, uint32_t max_count)
1620 {
1621     V9fsPath path;
1622     V9fsStat v9stat;
1623     int len, err = 0;
1624     int32_t count = 0;
1625     struct stat stbuf;
1626     off_t saved_dir_pos;
1627     struct dirent *dent, *result;
1628 
1629     /* save the directory position */
1630     saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1631     if (saved_dir_pos < 0) {
1632         return saved_dir_pos;
1633     }
1634 
1635     dent = g_malloc(sizeof(struct dirent));
1636 
1637     while (1) {
1638         v9fs_path_init(&path);
1639         err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1640         if (err || !result) {
1641             break;
1642         }
1643         err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
1644         if (err < 0) {
1645             goto out;
1646         }
1647         err = v9fs_co_lstat(pdu, &path, &stbuf);
1648         if (err < 0) {
1649             goto out;
1650         }
1651         err = stat_to_v9stat(pdu, &path, &stbuf, &v9stat);
1652         if (err < 0) {
1653             goto out;
1654         }
1655         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1656         len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
1657         if ((len != (v9stat.size + 2)) || ((count + len) > max_count)) {
1658             /* Ran out of buffer. Set dir back to old position and return */
1659             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1660             v9fs_stat_free(&v9stat);
1661             v9fs_path_free(&path);
1662             g_free(dent);
1663             return count;
1664         }
1665         count += len;
1666         v9fs_stat_free(&v9stat);
1667         v9fs_path_free(&path);
1668         saved_dir_pos = dent->d_off;
1669     }
1670 out:
1671     g_free(dent);
1672     v9fs_path_free(&path);
1673     if (err < 0) {
1674         return err;
1675     }
1676     return count;
1677 }
1678 
1679 /*
1680  * Create a QEMUIOVector for a sub-region of PDU iovecs
1681  *
1682  * @qiov:       uninitialized QEMUIOVector
1683  * @skip:       number of bytes to skip from beginning of PDU
1684  * @size:       number of bytes to include
1685  * @is_write:   true - write, false - read
1686  *
1687  * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
1688  * with qemu_iovec_destroy().
1689  */
1690 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
1691                                     size_t skip, size_t size,
1692                                     bool is_write)
1693 {
1694     QEMUIOVector elem;
1695     struct iovec *iov;
1696     unsigned int niov;
1697 
1698     virtio_init_iov_from_pdu(pdu, &iov, &niov, is_write);
1699 
1700     qemu_iovec_init_external(&elem, iov, niov);
1701     qemu_iovec_init(qiov, niov);
1702     qemu_iovec_concat(qiov, &elem, skip, size);
1703 }
1704 
1705 static void v9fs_read(void *opaque)
1706 {
1707     int32_t fid;
1708     uint64_t off;
1709     ssize_t err = 0;
1710     int32_t count = 0;
1711     size_t offset = 7;
1712     uint32_t max_count;
1713     V9fsFidState *fidp;
1714     V9fsPDU *pdu = opaque;
1715     V9fsState *s = pdu->s;
1716 
1717     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
1718     if (err < 0) {
1719         goto out_nofid;
1720     }
1721     trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
1722 
1723     fidp = get_fid(pdu, fid);
1724     if (fidp == NULL) {
1725         err = -EINVAL;
1726         goto out_nofid;
1727     }
1728     if (fidp->fid_type == P9_FID_DIR) {
1729 
1730         if (off == 0) {
1731             v9fs_co_rewinddir(pdu, fidp);
1732         }
1733         count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
1734         if (count < 0) {
1735             err = count;
1736             goto out;
1737         }
1738         err = pdu_marshal(pdu, offset, "d", count);
1739         if (err < 0) {
1740             goto out;
1741         }
1742         err += offset + count;
1743     } else if (fidp->fid_type == P9_FID_FILE) {
1744         QEMUIOVector qiov_full;
1745         QEMUIOVector qiov;
1746         int32_t len;
1747 
1748         v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
1749         qemu_iovec_init(&qiov, qiov_full.niov);
1750         do {
1751             qemu_iovec_reset(&qiov);
1752             qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
1753             if (0) {
1754                 print_sg(qiov.iov, qiov.niov);
1755             }
1756             /* Loop in case of EINTR */
1757             do {
1758                 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
1759                 if (len >= 0) {
1760                     off   += len;
1761                     count += len;
1762                 }
1763             } while (len == -EINTR && !pdu->cancelled);
1764             if (len < 0) {
1765                 /* IO error return the error */
1766                 err = len;
1767                 goto out;
1768             }
1769         } while (count < max_count && len > 0);
1770         err = pdu_marshal(pdu, offset, "d", count);
1771         if (err < 0) {
1772             goto out;
1773         }
1774         err += offset + count;
1775         qemu_iovec_destroy(&qiov);
1776         qemu_iovec_destroy(&qiov_full);
1777     } else if (fidp->fid_type == P9_FID_XATTR) {
1778         err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
1779     } else {
1780         err = -EINVAL;
1781     }
1782     trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
1783 out:
1784     put_fid(pdu, fidp);
1785 out_nofid:
1786     pdu_complete(pdu, err);
1787 }
1788 
1789 static size_t v9fs_readdir_data_size(V9fsString *name)
1790 {
1791     /*
1792      * Size of each dirent on the wire: size of qid (13) + size of offset (8)
1793      * size of type (1) + size of name.size (2) + strlen(name.data)
1794      */
1795     return 24 + v9fs_string_size(name);
1796 }
1797 
1798 static int v9fs_do_readdir(V9fsPDU *pdu,
1799                            V9fsFidState *fidp, int32_t max_count)
1800 {
1801     size_t size;
1802     V9fsQID qid;
1803     V9fsString name;
1804     int len, err = 0;
1805     int32_t count = 0;
1806     off_t saved_dir_pos;
1807     struct dirent *dent, *result;
1808 
1809     /* save the directory position */
1810     saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1811     if (saved_dir_pos < 0) {
1812         return saved_dir_pos;
1813     }
1814 
1815     dent = g_malloc(sizeof(struct dirent));
1816 
1817     while (1) {
1818         err = v9fs_co_readdir_r(pdu, fidp, dent, &result);
1819         if (err || !result) {
1820             break;
1821         }
1822         v9fs_string_init(&name);
1823         v9fs_string_sprintf(&name, "%s", dent->d_name);
1824         if ((count + v9fs_readdir_data_size(&name)) > max_count) {
1825             /* Ran out of buffer. Set dir back to old position and return */
1826             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1827             v9fs_string_free(&name);
1828             g_free(dent);
1829             return count;
1830         }
1831         /*
1832          * Fill up just the path field of qid because the client uses
1833          * only that. To fill the entire qid structure we will have
1834          * to stat each dirent found, which is expensive
1835          */
1836         size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1837         memcpy(&qid.path, &dent->d_ino, size);
1838         /* Fill the other fields with dummy values */
1839         qid.type = 0;
1840         qid.version = 0;
1841 
1842         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1843         len = pdu_marshal(pdu, 11 + count, "Qqbs",
1844                           &qid, dent->d_off,
1845                           dent->d_type, &name);
1846         if (len < 0) {
1847             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1848             v9fs_string_free(&name);
1849             g_free(dent);
1850             return len;
1851         }
1852         count += len;
1853         v9fs_string_free(&name);
1854         saved_dir_pos = dent->d_off;
1855     }
1856     g_free(dent);
1857     if (err < 0) {
1858         return err;
1859     }
1860     return count;
1861 }
1862 
1863 static void v9fs_readdir(void *opaque)
1864 {
1865     int32_t fid;
1866     V9fsFidState *fidp;
1867     ssize_t retval = 0;
1868     size_t offset = 7;
1869     uint64_t initial_offset;
1870     int32_t count;
1871     uint32_t max_count;
1872     V9fsPDU *pdu = opaque;
1873 
1874     retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
1875                            &initial_offset, &max_count);
1876     if (retval < 0) {
1877         goto out_nofid;
1878     }
1879     trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
1880 
1881     fidp = get_fid(pdu, fid);
1882     if (fidp == NULL) {
1883         retval = -EINVAL;
1884         goto out_nofid;
1885     }
1886     if (!fidp->fs.dir) {
1887         retval = -EINVAL;
1888         goto out;
1889     }
1890     if (initial_offset == 0) {
1891         v9fs_co_rewinddir(pdu, fidp);
1892     } else {
1893         v9fs_co_seekdir(pdu, fidp, initial_offset);
1894     }
1895     count = v9fs_do_readdir(pdu, fidp, max_count);
1896     if (count < 0) {
1897         retval = count;
1898         goto out;
1899     }
1900     retval = pdu_marshal(pdu, offset, "d", count);
1901     if (retval < 0) {
1902         goto out;
1903     }
1904     retval += count + offset;
1905     trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
1906 out:
1907     put_fid(pdu, fidp);
1908 out_nofid:
1909     pdu_complete(pdu, retval);
1910 }
1911 
1912 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1913                             uint64_t off, uint32_t count,
1914                             struct iovec *sg, int cnt)
1915 {
1916     int i, to_copy;
1917     ssize_t err = 0;
1918     int write_count;
1919     int64_t xattr_len;
1920     size_t offset = 7;
1921 
1922 
1923     xattr_len = fidp->fs.xattr.len;
1924     write_count = xattr_len - off;
1925     if (write_count > count) {
1926         write_count = count;
1927     } else if (write_count < 0) {
1928         /*
1929          * write beyond XATTR value len specified in
1930          * xattrcreate
1931          */
1932         err = -ENOSPC;
1933         goto out;
1934     }
1935     err = pdu_marshal(pdu, offset, "d", write_count);
1936     if (err < 0) {
1937         return err;
1938     }
1939     err += offset;
1940     fidp->fs.xattr.copied_len += write_count;
1941     /*
1942      * Now copy the content from sg list
1943      */
1944     for (i = 0; i < cnt; i++) {
1945         if (write_count > sg[i].iov_len) {
1946             to_copy = sg[i].iov_len;
1947         } else {
1948             to_copy = write_count;
1949         }
1950         memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
1951         /* updating vs->off since we are not using below */
1952         off += to_copy;
1953         write_count -= to_copy;
1954     }
1955 out:
1956     return err;
1957 }
1958 
1959 static void v9fs_write(void *opaque)
1960 {
1961     ssize_t err;
1962     int32_t fid;
1963     uint64_t off;
1964     uint32_t count;
1965     int32_t len = 0;
1966     int32_t total = 0;
1967     size_t offset = 7;
1968     V9fsFidState *fidp;
1969     V9fsPDU *pdu = opaque;
1970     V9fsState *s = pdu->s;
1971     QEMUIOVector qiov_full;
1972     QEMUIOVector qiov;
1973 
1974     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
1975     if (err < 0) {
1976         pdu_complete(pdu, err);
1977         return;
1978     }
1979     offset += err;
1980     v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
1981     trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
1982 
1983     fidp = get_fid(pdu, fid);
1984     if (fidp == NULL) {
1985         err = -EINVAL;
1986         goto out_nofid;
1987     }
1988     if (fidp->fid_type == P9_FID_FILE) {
1989         if (fidp->fs.fd == -1) {
1990             err = -EINVAL;
1991             goto out;
1992         }
1993     } else if (fidp->fid_type == P9_FID_XATTR) {
1994         /*
1995          * setxattr operation
1996          */
1997         err = v9fs_xattr_write(s, pdu, fidp, off, count,
1998                                qiov_full.iov, qiov_full.niov);
1999         goto out;
2000     } else {
2001         err = -EINVAL;
2002         goto out;
2003     }
2004     qemu_iovec_init(&qiov, qiov_full.niov);
2005     do {
2006         qemu_iovec_reset(&qiov);
2007         qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2008         if (0) {
2009             print_sg(qiov.iov, qiov.niov);
2010         }
2011         /* Loop in case of EINTR */
2012         do {
2013             len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2014             if (len >= 0) {
2015                 off   += len;
2016                 total += len;
2017             }
2018         } while (len == -EINTR && !pdu->cancelled);
2019         if (len < 0) {
2020             /* IO error return the error */
2021             err = len;
2022             goto out_qiov;
2023         }
2024     } while (total < count && len > 0);
2025 
2026     offset = 7;
2027     err = pdu_marshal(pdu, offset, "d", total);
2028     if (err < 0) {
2029         goto out;
2030     }
2031     err += offset;
2032     trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2033 out_qiov:
2034     qemu_iovec_destroy(&qiov);
2035 out:
2036     put_fid(pdu, fidp);
2037 out_nofid:
2038     qemu_iovec_destroy(&qiov_full);
2039     pdu_complete(pdu, err);
2040 }
2041 
2042 static void v9fs_create(void *opaque)
2043 {
2044     int32_t fid;
2045     int err = 0;
2046     size_t offset = 7;
2047     V9fsFidState *fidp;
2048     V9fsQID qid;
2049     int32_t perm;
2050     int8_t mode;
2051     V9fsPath path;
2052     struct stat stbuf;
2053     V9fsString name;
2054     V9fsString extension;
2055     int iounit;
2056     V9fsPDU *pdu = opaque;
2057 
2058     v9fs_path_init(&path);
2059     v9fs_string_init(&name);
2060     v9fs_string_init(&extension);
2061     err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2062                         &perm, &mode, &extension);
2063     if (err < 0) {
2064         goto out_nofid;
2065     }
2066     trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2067 
2068     fidp = get_fid(pdu, fid);
2069     if (fidp == NULL) {
2070         err = -EINVAL;
2071         goto out_nofid;
2072     }
2073     if (perm & P9_STAT_MODE_DIR) {
2074         err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2075                             fidp->uid, -1, &stbuf);
2076         if (err < 0) {
2077             goto out;
2078         }
2079         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2080         if (err < 0) {
2081             goto out;
2082         }
2083         v9fs_path_copy(&fidp->path, &path);
2084         err = v9fs_co_opendir(pdu, fidp);
2085         if (err < 0) {
2086             goto out;
2087         }
2088         fidp->fid_type = P9_FID_DIR;
2089     } else if (perm & P9_STAT_MODE_SYMLINK) {
2090         err = v9fs_co_symlink(pdu, fidp, &name,
2091                               extension.data, -1 , &stbuf);
2092         if (err < 0) {
2093             goto out;
2094         }
2095         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2096         if (err < 0) {
2097             goto out;
2098         }
2099         v9fs_path_copy(&fidp->path, &path);
2100     } else if (perm & P9_STAT_MODE_LINK) {
2101         int32_t ofid = atoi(extension.data);
2102         V9fsFidState *ofidp = get_fid(pdu, ofid);
2103         if (ofidp == NULL) {
2104             err = -EINVAL;
2105             goto out;
2106         }
2107         err = v9fs_co_link(pdu, ofidp, fidp, &name);
2108         put_fid(pdu, ofidp);
2109         if (err < 0) {
2110             goto out;
2111         }
2112         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2113         if (err < 0) {
2114             fidp->fid_type = P9_FID_NONE;
2115             goto out;
2116         }
2117         v9fs_path_copy(&fidp->path, &path);
2118         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2119         if (err < 0) {
2120             fidp->fid_type = P9_FID_NONE;
2121             goto out;
2122         }
2123     } else if (perm & P9_STAT_MODE_DEVICE) {
2124         char ctype;
2125         uint32_t major, minor;
2126         mode_t nmode = 0;
2127 
2128         if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2129             err = -errno;
2130             goto out;
2131         }
2132 
2133         switch (ctype) {
2134         case 'c':
2135             nmode = S_IFCHR;
2136             break;
2137         case 'b':
2138             nmode = S_IFBLK;
2139             break;
2140         default:
2141             err = -EIO;
2142             goto out;
2143         }
2144 
2145         nmode |= perm & 0777;
2146         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2147                             makedev(major, minor), nmode, &stbuf);
2148         if (err < 0) {
2149             goto out;
2150         }
2151         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2152         if (err < 0) {
2153             goto out;
2154         }
2155         v9fs_path_copy(&fidp->path, &path);
2156     } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2157         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2158                             0, S_IFIFO | (perm & 0777), &stbuf);
2159         if (err < 0) {
2160             goto out;
2161         }
2162         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2163         if (err < 0) {
2164             goto out;
2165         }
2166         v9fs_path_copy(&fidp->path, &path);
2167     } else if (perm & P9_STAT_MODE_SOCKET) {
2168         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2169                             0, S_IFSOCK | (perm & 0777), &stbuf);
2170         if (err < 0) {
2171             goto out;
2172         }
2173         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2174         if (err < 0) {
2175             goto out;
2176         }
2177         v9fs_path_copy(&fidp->path, &path);
2178     } else {
2179         err = v9fs_co_open2(pdu, fidp, &name, -1,
2180                             omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2181         if (err < 0) {
2182             goto out;
2183         }
2184         fidp->fid_type = P9_FID_FILE;
2185         fidp->open_flags = omode_to_uflags(mode);
2186         if (fidp->open_flags & O_EXCL) {
2187             /*
2188              * We let the host file system do O_EXCL check
2189              * We should not reclaim such fd
2190              */
2191             fidp->flags |= FID_NON_RECLAIMABLE;
2192         }
2193     }
2194     iounit = get_iounit(pdu, &fidp->path);
2195     stat_to_qid(&stbuf, &qid);
2196     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2197     if (err < 0) {
2198         goto out;
2199     }
2200     err += offset;
2201     trace_v9fs_create_return(pdu->tag, pdu->id,
2202                              qid.type, qid.version, qid.path, iounit);
2203 out:
2204     put_fid(pdu, fidp);
2205 out_nofid:
2206    pdu_complete(pdu, err);
2207    v9fs_string_free(&name);
2208    v9fs_string_free(&extension);
2209    v9fs_path_free(&path);
2210 }
2211 
2212 static void v9fs_symlink(void *opaque)
2213 {
2214     V9fsPDU *pdu = opaque;
2215     V9fsString name;
2216     V9fsString symname;
2217     V9fsFidState *dfidp;
2218     V9fsQID qid;
2219     struct stat stbuf;
2220     int32_t dfid;
2221     int err = 0;
2222     gid_t gid;
2223     size_t offset = 7;
2224 
2225     v9fs_string_init(&name);
2226     v9fs_string_init(&symname);
2227     err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2228     if (err < 0) {
2229         goto out_nofid;
2230     }
2231     trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2232 
2233     dfidp = get_fid(pdu, dfid);
2234     if (dfidp == NULL) {
2235         err = -EINVAL;
2236         goto out_nofid;
2237     }
2238     err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2239     if (err < 0) {
2240         goto out;
2241     }
2242     stat_to_qid(&stbuf, &qid);
2243     err =  pdu_marshal(pdu, offset, "Q", &qid);
2244     if (err < 0) {
2245         goto out;
2246     }
2247     err += offset;
2248     trace_v9fs_symlink_return(pdu->tag, pdu->id,
2249                               qid.type, qid.version, qid.path);
2250 out:
2251     put_fid(pdu, dfidp);
2252 out_nofid:
2253     pdu_complete(pdu, err);
2254     v9fs_string_free(&name);
2255     v9fs_string_free(&symname);
2256 }
2257 
2258 static void v9fs_flush(void *opaque)
2259 {
2260     ssize_t err;
2261     int16_t tag;
2262     size_t offset = 7;
2263     V9fsPDU *cancel_pdu;
2264     V9fsPDU *pdu = opaque;
2265     V9fsState *s = pdu->s;
2266 
2267     err = pdu_unmarshal(pdu, offset, "w", &tag);
2268     if (err < 0) {
2269         pdu_complete(pdu, err);
2270         return;
2271     }
2272     trace_v9fs_flush(pdu->tag, pdu->id, tag);
2273 
2274     QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2275         if (cancel_pdu->tag == tag) {
2276             break;
2277         }
2278     }
2279     if (cancel_pdu) {
2280         cancel_pdu->cancelled = 1;
2281         /*
2282          * Wait for pdu to complete.
2283          */
2284         qemu_co_queue_wait(&cancel_pdu->complete);
2285         cancel_pdu->cancelled = 0;
2286         pdu_free(cancel_pdu);
2287     }
2288     pdu_complete(pdu, 7);
2289 }
2290 
2291 static void v9fs_link(void *opaque)
2292 {
2293     V9fsPDU *pdu = opaque;
2294     int32_t dfid, oldfid;
2295     V9fsFidState *dfidp, *oldfidp;
2296     V9fsString name;
2297     size_t offset = 7;
2298     int err = 0;
2299 
2300     v9fs_string_init(&name);
2301     err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2302     if (err < 0) {
2303         goto out_nofid;
2304     }
2305     trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2306 
2307     dfidp = get_fid(pdu, dfid);
2308     if (dfidp == NULL) {
2309         err = -ENOENT;
2310         goto out_nofid;
2311     }
2312 
2313     oldfidp = get_fid(pdu, oldfid);
2314     if (oldfidp == NULL) {
2315         err = -ENOENT;
2316         goto out;
2317     }
2318     err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2319     if (!err) {
2320         err = offset;
2321     }
2322 out:
2323     put_fid(pdu, dfidp);
2324 out_nofid:
2325     v9fs_string_free(&name);
2326     pdu_complete(pdu, err);
2327 }
2328 
2329 /* Only works with path name based fid */
2330 static void v9fs_remove(void *opaque)
2331 {
2332     int32_t fid;
2333     int err = 0;
2334     size_t offset = 7;
2335     V9fsFidState *fidp;
2336     V9fsPDU *pdu = opaque;
2337 
2338     err = pdu_unmarshal(pdu, offset, "d", &fid);
2339     if (err < 0) {
2340         goto out_nofid;
2341     }
2342     trace_v9fs_remove(pdu->tag, pdu->id, fid);
2343 
2344     fidp = get_fid(pdu, fid);
2345     if (fidp == NULL) {
2346         err = -EINVAL;
2347         goto out_nofid;
2348     }
2349     /* if fs driver is not path based, return EOPNOTSUPP */
2350     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2351         err = -EOPNOTSUPP;
2352         goto out_err;
2353     }
2354     /*
2355      * IF the file is unlinked, we cannot reopen
2356      * the file later. So don't reclaim fd
2357      */
2358     err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2359     if (err < 0) {
2360         goto out_err;
2361     }
2362     err = v9fs_co_remove(pdu, &fidp->path);
2363     if (!err) {
2364         err = offset;
2365     }
2366 out_err:
2367     /* For TREMOVE we need to clunk the fid even on failed remove */
2368     clunk_fid(pdu->s, fidp->fid);
2369     put_fid(pdu, fidp);
2370 out_nofid:
2371     pdu_complete(pdu, err);
2372 }
2373 
2374 static void v9fs_unlinkat(void *opaque)
2375 {
2376     int err = 0;
2377     V9fsString name;
2378     int32_t dfid, flags;
2379     size_t offset = 7;
2380     V9fsPath path;
2381     V9fsFidState *dfidp;
2382     V9fsPDU *pdu = opaque;
2383 
2384     v9fs_string_init(&name);
2385     err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2386     if (err < 0) {
2387         goto out_nofid;
2388     }
2389     dfidp = get_fid(pdu, dfid);
2390     if (dfidp == NULL) {
2391         err = -EINVAL;
2392         goto out_nofid;
2393     }
2394     /*
2395      * IF the file is unlinked, we cannot reopen
2396      * the file later. So don't reclaim fd
2397      */
2398     v9fs_path_init(&path);
2399     err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
2400     if (err < 0) {
2401         goto out_err;
2402     }
2403     err = v9fs_mark_fids_unreclaim(pdu, &path);
2404     if (err < 0) {
2405         goto out_err;
2406     }
2407     err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, flags);
2408     if (!err) {
2409         err = offset;
2410     }
2411 out_err:
2412     put_fid(pdu, dfidp);
2413     v9fs_path_free(&path);
2414 out_nofid:
2415     pdu_complete(pdu, err);
2416     v9fs_string_free(&name);
2417 }
2418 
2419 
2420 /* Only works with path name based fid */
2421 static int v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
2422                                 int32_t newdirfid, V9fsString *name)
2423 {
2424     char *end;
2425     int err = 0;
2426     V9fsPath new_path;
2427     V9fsFidState *tfidp;
2428     V9fsState *s = pdu->s;
2429     V9fsFidState *dirfidp = NULL;
2430     char *old_name, *new_name;
2431 
2432     v9fs_path_init(&new_path);
2433     if (newdirfid != -1) {
2434         dirfidp = get_fid(pdu, newdirfid);
2435         if (dirfidp == NULL) {
2436             err = -ENOENT;
2437             goto out_nofid;
2438         }
2439         BUG_ON(dirfidp->fid_type != P9_FID_NONE);
2440         v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
2441     } else {
2442         old_name = fidp->path.data;
2443         end = strrchr(old_name, '/');
2444         if (end) {
2445             end++;
2446         } else {
2447             end = old_name;
2448         }
2449         new_name = g_malloc0(end - old_name + name->size + 1);
2450         strncat(new_name, old_name, end - old_name);
2451         strncat(new_name + (end - old_name), name->data, name->size);
2452         v9fs_co_name_to_path(pdu, NULL, new_name, &new_path);
2453         g_free(new_name);
2454     }
2455     err = v9fs_co_rename(pdu, &fidp->path, &new_path);
2456     if (err < 0) {
2457         goto out;
2458     }
2459     /*
2460      * Fixup fid's pointing to the old name to
2461      * start pointing to the new name
2462      */
2463     for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2464         if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
2465             /* replace the name */
2466             v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
2467         }
2468     }
2469 out:
2470     if (dirfidp) {
2471         put_fid(pdu, dirfidp);
2472     }
2473     v9fs_path_free(&new_path);
2474 out_nofid:
2475     return err;
2476 }
2477 
2478 /* Only works with path name based fid */
2479 static void v9fs_rename(void *opaque)
2480 {
2481     int32_t fid;
2482     ssize_t err = 0;
2483     size_t offset = 7;
2484     V9fsString name;
2485     int32_t newdirfid;
2486     V9fsFidState *fidp;
2487     V9fsPDU *pdu = opaque;
2488     V9fsState *s = pdu->s;
2489 
2490     v9fs_string_init(&name);
2491     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
2492     if (err < 0) {
2493         goto out_nofid;
2494     }
2495     fidp = get_fid(pdu, fid);
2496     if (fidp == NULL) {
2497         err = -ENOENT;
2498         goto out_nofid;
2499     }
2500     BUG_ON(fidp->fid_type != P9_FID_NONE);
2501     /* if fs driver is not path based, return EOPNOTSUPP */
2502     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2503         err = -EOPNOTSUPP;
2504         goto out;
2505     }
2506     v9fs_path_write_lock(s);
2507     err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
2508     v9fs_path_unlock(s);
2509     if (!err) {
2510         err = offset;
2511     }
2512 out:
2513     put_fid(pdu, fidp);
2514 out_nofid:
2515     pdu_complete(pdu, err);
2516     v9fs_string_free(&name);
2517 }
2518 
2519 static void v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
2520                                V9fsString *old_name, V9fsPath *newdir,
2521                                V9fsString *new_name)
2522 {
2523     V9fsFidState *tfidp;
2524     V9fsPath oldpath, newpath;
2525     V9fsState *s = pdu->s;
2526 
2527 
2528     v9fs_path_init(&oldpath);
2529     v9fs_path_init(&newpath);
2530     v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
2531     v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
2532 
2533     /*
2534      * Fixup fid's pointing to the old name to
2535      * start pointing to the new name
2536      */
2537     for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2538         if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
2539             /* replace the name */
2540             v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
2541         }
2542     }
2543     v9fs_path_free(&oldpath);
2544     v9fs_path_free(&newpath);
2545 }
2546 
2547 static int v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
2548                                   V9fsString *old_name, int32_t newdirfid,
2549                                   V9fsString *new_name)
2550 {
2551     int err = 0;
2552     V9fsState *s = pdu->s;
2553     V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
2554 
2555     olddirfidp = get_fid(pdu, olddirfid);
2556     if (olddirfidp == NULL) {
2557         err = -ENOENT;
2558         goto out;
2559     }
2560     if (newdirfid != -1) {
2561         newdirfidp = get_fid(pdu, newdirfid);
2562         if (newdirfidp == NULL) {
2563             err = -ENOENT;
2564             goto out;
2565         }
2566     } else {
2567         newdirfidp = get_fid(pdu, olddirfid);
2568     }
2569 
2570     err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
2571                            &newdirfidp->path, new_name);
2572     if (err < 0) {
2573         goto out;
2574     }
2575     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
2576         /* Only for path based fid  we need to do the below fixup */
2577         v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
2578                            &newdirfidp->path, new_name);
2579     }
2580 out:
2581     if (olddirfidp) {
2582         put_fid(pdu, olddirfidp);
2583     }
2584     if (newdirfidp) {
2585         put_fid(pdu, newdirfidp);
2586     }
2587     return err;
2588 }
2589 
2590 static void v9fs_renameat(void *opaque)
2591 {
2592     ssize_t err = 0;
2593     size_t offset = 7;
2594     V9fsPDU *pdu = opaque;
2595     V9fsState *s = pdu->s;
2596     int32_t olddirfid, newdirfid;
2597     V9fsString old_name, new_name;
2598 
2599     v9fs_string_init(&old_name);
2600     v9fs_string_init(&new_name);
2601     err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
2602                         &old_name, &newdirfid, &new_name);
2603     if (err < 0) {
2604         goto out_err;
2605     }
2606 
2607     v9fs_path_write_lock(s);
2608     err = v9fs_complete_renameat(pdu, olddirfid,
2609                                  &old_name, newdirfid, &new_name);
2610     v9fs_path_unlock(s);
2611     if (!err) {
2612         err = offset;
2613     }
2614 
2615 out_err:
2616     pdu_complete(pdu, err);
2617     v9fs_string_free(&old_name);
2618     v9fs_string_free(&new_name);
2619 }
2620 
2621 static void v9fs_wstat(void *opaque)
2622 {
2623     int32_t fid;
2624     int err = 0;
2625     int16_t unused;
2626     V9fsStat v9stat;
2627     size_t offset = 7;
2628     struct stat stbuf;
2629     V9fsFidState *fidp;
2630     V9fsPDU *pdu = opaque;
2631 
2632     v9fs_stat_init(&v9stat);
2633     err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
2634     if (err < 0) {
2635         goto out_nofid;
2636     }
2637     trace_v9fs_wstat(pdu->tag, pdu->id, fid,
2638                      v9stat.mode, v9stat.atime, v9stat.mtime);
2639 
2640     fidp = get_fid(pdu, fid);
2641     if (fidp == NULL) {
2642         err = -EINVAL;
2643         goto out_nofid;
2644     }
2645     /* do we need to sync the file? */
2646     if (donttouch_stat(&v9stat)) {
2647         err = v9fs_co_fsync(pdu, fidp, 0);
2648         goto out;
2649     }
2650     if (v9stat.mode != -1) {
2651         uint32_t v9_mode;
2652         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2653         if (err < 0) {
2654             goto out;
2655         }
2656         v9_mode = stat_to_v9mode(&stbuf);
2657         if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
2658             (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
2659             /* Attempting to change the type */
2660             err = -EIO;
2661             goto out;
2662         }
2663         err = v9fs_co_chmod(pdu, &fidp->path,
2664                             v9mode_to_mode(v9stat.mode,
2665                                            &v9stat.extension));
2666         if (err < 0) {
2667             goto out;
2668         }
2669     }
2670     if (v9stat.mtime != -1 || v9stat.atime != -1) {
2671         struct timespec times[2];
2672         if (v9stat.atime != -1) {
2673             times[0].tv_sec = v9stat.atime;
2674             times[0].tv_nsec = 0;
2675         } else {
2676             times[0].tv_nsec = UTIME_OMIT;
2677         }
2678         if (v9stat.mtime != -1) {
2679             times[1].tv_sec = v9stat.mtime;
2680             times[1].tv_nsec = 0;
2681         } else {
2682             times[1].tv_nsec = UTIME_OMIT;
2683         }
2684         err = v9fs_co_utimensat(pdu, &fidp->path, times);
2685         if (err < 0) {
2686             goto out;
2687         }
2688     }
2689     if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
2690         err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
2691         if (err < 0) {
2692             goto out;
2693         }
2694     }
2695     if (v9stat.name.size != 0) {
2696         err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
2697         if (err < 0) {
2698             goto out;
2699         }
2700     }
2701     if (v9stat.length != -1) {
2702         err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
2703         if (err < 0) {
2704             goto out;
2705         }
2706     }
2707     err = offset;
2708 out:
2709     put_fid(pdu, fidp);
2710 out_nofid:
2711     v9fs_stat_free(&v9stat);
2712     pdu_complete(pdu, err);
2713 }
2714 
2715 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
2716 {
2717     uint32_t f_type;
2718     uint32_t f_bsize;
2719     uint64_t f_blocks;
2720     uint64_t f_bfree;
2721     uint64_t f_bavail;
2722     uint64_t f_files;
2723     uint64_t f_ffree;
2724     uint64_t fsid_val;
2725     uint32_t f_namelen;
2726     size_t offset = 7;
2727     int32_t bsize_factor;
2728 
2729     /*
2730      * compute bsize factor based on host file system block size
2731      * and client msize
2732      */
2733     bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
2734     if (!bsize_factor) {
2735         bsize_factor = 1;
2736     }
2737     f_type  = stbuf->f_type;
2738     f_bsize = stbuf->f_bsize;
2739     f_bsize *= bsize_factor;
2740     /*
2741      * f_bsize is adjusted(multiplied) by bsize factor, so we need to
2742      * adjust(divide) the number of blocks, free blocks and available
2743      * blocks by bsize factor
2744      */
2745     f_blocks = stbuf->f_blocks/bsize_factor;
2746     f_bfree  = stbuf->f_bfree/bsize_factor;
2747     f_bavail = stbuf->f_bavail/bsize_factor;
2748     f_files  = stbuf->f_files;
2749     f_ffree  = stbuf->f_ffree;
2750     fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
2751                (unsigned long long)stbuf->f_fsid.__val[1] << 32;
2752     f_namelen = stbuf->f_namelen;
2753 
2754     return pdu_marshal(pdu, offset, "ddqqqqqqd",
2755                        f_type, f_bsize, f_blocks, f_bfree,
2756                        f_bavail, f_files, f_ffree,
2757                        fsid_val, f_namelen);
2758 }
2759 
2760 static void v9fs_statfs(void *opaque)
2761 {
2762     int32_t fid;
2763     ssize_t retval = 0;
2764     size_t offset = 7;
2765     V9fsFidState *fidp;
2766     struct statfs stbuf;
2767     V9fsPDU *pdu = opaque;
2768     V9fsState *s = pdu->s;
2769 
2770     retval = pdu_unmarshal(pdu, offset, "d", &fid);
2771     if (retval < 0) {
2772         goto out_nofid;
2773     }
2774     fidp = get_fid(pdu, fid);
2775     if (fidp == NULL) {
2776         retval = -ENOENT;
2777         goto out_nofid;
2778     }
2779     retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
2780     if (retval < 0) {
2781         goto out;
2782     }
2783     retval = v9fs_fill_statfs(s, pdu, &stbuf);
2784     if (retval < 0) {
2785         goto out;
2786     }
2787     retval += offset;
2788 out:
2789     put_fid(pdu, fidp);
2790 out_nofid:
2791     pdu_complete(pdu, retval);
2792 }
2793 
2794 static void v9fs_mknod(void *opaque)
2795 {
2796 
2797     int mode;
2798     gid_t gid;
2799     int32_t fid;
2800     V9fsQID qid;
2801     int err = 0;
2802     int major, minor;
2803     size_t offset = 7;
2804     V9fsString name;
2805     struct stat stbuf;
2806     V9fsFidState *fidp;
2807     V9fsPDU *pdu = opaque;
2808 
2809     v9fs_string_init(&name);
2810     err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
2811                         &major, &minor, &gid);
2812     if (err < 0) {
2813         goto out_nofid;
2814     }
2815     trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
2816 
2817     fidp = get_fid(pdu, fid);
2818     if (fidp == NULL) {
2819         err = -ENOENT;
2820         goto out_nofid;
2821     }
2822     err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
2823                         makedev(major, minor), mode, &stbuf);
2824     if (err < 0) {
2825         goto out;
2826     }
2827     stat_to_qid(&stbuf, &qid);
2828     err = pdu_marshal(pdu, offset, "Q", &qid);
2829     if (err < 0) {
2830         goto out;
2831     }
2832     err += offset;
2833     trace_v9fs_mknod_return(pdu->tag, pdu->id,
2834                             qid.type, qid.version, qid.path);
2835 out:
2836     put_fid(pdu, fidp);
2837 out_nofid:
2838     pdu_complete(pdu, err);
2839     v9fs_string_free(&name);
2840 }
2841 
2842 /*
2843  * Implement posix byte range locking code
2844  * Server side handling of locking code is very simple, because 9p server in
2845  * QEMU can handle only one client. And most of the lock handling
2846  * (like conflict, merging) etc is done by the VFS layer itself, so no need to
2847  * do any thing in * qemu 9p server side lock code path.
2848  * So when a TLOCK request comes, always return success
2849  */
2850 static void v9fs_lock(void *opaque)
2851 {
2852     int8_t status;
2853     V9fsFlock flock;
2854     size_t offset = 7;
2855     struct stat stbuf;
2856     V9fsFidState *fidp;
2857     int32_t fid, err = 0;
2858     V9fsPDU *pdu = opaque;
2859 
2860     status = P9_LOCK_ERROR;
2861     v9fs_string_init(&flock.client_id);
2862     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
2863                         &flock.flags, &flock.start, &flock.length,
2864                         &flock.proc_id, &flock.client_id);
2865     if (err < 0) {
2866         goto out_nofid;
2867     }
2868     trace_v9fs_lock(pdu->tag, pdu->id, fid,
2869                     flock.type, flock.start, flock.length);
2870 
2871 
2872     /* We support only block flag now (that too ignored currently) */
2873     if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
2874         err = -EINVAL;
2875         goto out_nofid;
2876     }
2877     fidp = get_fid(pdu, fid);
2878     if (fidp == NULL) {
2879         err = -ENOENT;
2880         goto out_nofid;
2881     }
2882     err = v9fs_co_fstat(pdu, fidp, &stbuf);
2883     if (err < 0) {
2884         goto out;
2885     }
2886     status = P9_LOCK_SUCCESS;
2887 out:
2888     put_fid(pdu, fidp);
2889 out_nofid:
2890     err = pdu_marshal(pdu, offset, "b", status);
2891     if (err > 0) {
2892         err += offset;
2893     }
2894     trace_v9fs_lock_return(pdu->tag, pdu->id, status);
2895     pdu_complete(pdu, err);
2896     v9fs_string_free(&flock.client_id);
2897 }
2898 
2899 /*
2900  * When a TGETLOCK request comes, always return success because all lock
2901  * handling is done by client's VFS layer.
2902  */
2903 static void v9fs_getlock(void *opaque)
2904 {
2905     size_t offset = 7;
2906     struct stat stbuf;
2907     V9fsFidState *fidp;
2908     V9fsGetlock glock;
2909     int32_t fid, err = 0;
2910     V9fsPDU *pdu = opaque;
2911 
2912     v9fs_string_init(&glock.client_id);
2913     err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
2914                         &glock.start, &glock.length, &glock.proc_id,
2915                         &glock.client_id);
2916     if (err < 0) {
2917         goto out_nofid;
2918     }
2919     trace_v9fs_getlock(pdu->tag, pdu->id, fid,
2920                        glock.type, glock.start, glock.length);
2921 
2922     fidp = get_fid(pdu, fid);
2923     if (fidp == NULL) {
2924         err = -ENOENT;
2925         goto out_nofid;
2926     }
2927     err = v9fs_co_fstat(pdu, fidp, &stbuf);
2928     if (err < 0) {
2929         goto out;
2930     }
2931     glock.type = P9_LOCK_TYPE_UNLCK;
2932     err = pdu_marshal(pdu, offset, "bqqds", glock.type,
2933                           glock.start, glock.length, glock.proc_id,
2934                           &glock.client_id);
2935     if (err < 0) {
2936         goto out;
2937     }
2938     err += offset;
2939     trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
2940                               glock.length, glock.proc_id);
2941 out:
2942     put_fid(pdu, fidp);
2943 out_nofid:
2944     pdu_complete(pdu, err);
2945     v9fs_string_free(&glock.client_id);
2946 }
2947 
2948 static void v9fs_mkdir(void *opaque)
2949 {
2950     V9fsPDU *pdu = opaque;
2951     size_t offset = 7;
2952     int32_t fid;
2953     struct stat stbuf;
2954     V9fsQID qid;
2955     V9fsString name;
2956     V9fsFidState *fidp;
2957     gid_t gid;
2958     int mode;
2959     int err = 0;
2960 
2961     v9fs_string_init(&name);
2962     err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
2963     if (err < 0) {
2964         goto out_nofid;
2965     }
2966     trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
2967 
2968     fidp = get_fid(pdu, fid);
2969     if (fidp == NULL) {
2970         err = -ENOENT;
2971         goto out_nofid;
2972     }
2973     err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
2974     if (err < 0) {
2975         goto out;
2976     }
2977     stat_to_qid(&stbuf, &qid);
2978     err = pdu_marshal(pdu, offset, "Q", &qid);
2979     if (err < 0) {
2980         goto out;
2981     }
2982     err += offset;
2983     trace_v9fs_mkdir_return(pdu->tag, pdu->id,
2984                             qid.type, qid.version, qid.path, err);
2985 out:
2986     put_fid(pdu, fidp);
2987 out_nofid:
2988     pdu_complete(pdu, err);
2989     v9fs_string_free(&name);
2990 }
2991 
2992 static void v9fs_xattrwalk(void *opaque)
2993 {
2994     int64_t size;
2995     V9fsString name;
2996     ssize_t err = 0;
2997     size_t offset = 7;
2998     int32_t fid, newfid;
2999     V9fsFidState *file_fidp;
3000     V9fsFidState *xattr_fidp = NULL;
3001     V9fsPDU *pdu = opaque;
3002     V9fsState *s = pdu->s;
3003 
3004     v9fs_string_init(&name);
3005     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3006     if (err < 0) {
3007         goto out_nofid;
3008     }
3009     trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3010 
3011     file_fidp = get_fid(pdu, fid);
3012     if (file_fidp == NULL) {
3013         err = -ENOENT;
3014         goto out_nofid;
3015     }
3016     xattr_fidp = alloc_fid(s, newfid);
3017     if (xattr_fidp == NULL) {
3018         err = -EINVAL;
3019         goto out;
3020     }
3021     v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3022     if (name.data == NULL) {
3023         /*
3024          * listxattr request. Get the size first
3025          */
3026         size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3027         if (size < 0) {
3028             err = size;
3029             clunk_fid(s, xattr_fidp->fid);
3030             goto out;
3031         }
3032         /*
3033          * Read the xattr value
3034          */
3035         xattr_fidp->fs.xattr.len = size;
3036         xattr_fidp->fid_type = P9_FID_XATTR;
3037         xattr_fidp->fs.xattr.copied_len = -1;
3038         if (size) {
3039             xattr_fidp->fs.xattr.value = g_malloc(size);
3040             err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3041                                      xattr_fidp->fs.xattr.value,
3042                                      xattr_fidp->fs.xattr.len);
3043             if (err < 0) {
3044                 clunk_fid(s, xattr_fidp->fid);
3045                 goto out;
3046             }
3047         }
3048         err = pdu_marshal(pdu, offset, "q", size);
3049         if (err < 0) {
3050             goto out;
3051         }
3052         err += offset;
3053     } else {
3054         /*
3055          * specific xattr fid. We check for xattr
3056          * presence also collect the xattr size
3057          */
3058         size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3059                                  &name, NULL, 0);
3060         if (size < 0) {
3061             err = size;
3062             clunk_fid(s, xattr_fidp->fid);
3063             goto out;
3064         }
3065         /*
3066          * Read the xattr value
3067          */
3068         xattr_fidp->fs.xattr.len = size;
3069         xattr_fidp->fid_type = P9_FID_XATTR;
3070         xattr_fidp->fs.xattr.copied_len = -1;
3071         if (size) {
3072             xattr_fidp->fs.xattr.value = g_malloc(size);
3073             err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3074                                     &name, xattr_fidp->fs.xattr.value,
3075                                     xattr_fidp->fs.xattr.len);
3076             if (err < 0) {
3077                 clunk_fid(s, xattr_fidp->fid);
3078                 goto out;
3079             }
3080         }
3081         err = pdu_marshal(pdu, offset, "q", size);
3082         if (err < 0) {
3083             goto out;
3084         }
3085         err += offset;
3086     }
3087     trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3088 out:
3089     put_fid(pdu, file_fidp);
3090     if (xattr_fidp) {
3091         put_fid(pdu, xattr_fidp);
3092     }
3093 out_nofid:
3094     pdu_complete(pdu, err);
3095     v9fs_string_free(&name);
3096 }
3097 
3098 static void v9fs_xattrcreate(void *opaque)
3099 {
3100     int flags;
3101     int32_t fid;
3102     int64_t size;
3103     ssize_t err = 0;
3104     V9fsString name;
3105     size_t offset = 7;
3106     V9fsFidState *file_fidp;
3107     V9fsFidState *xattr_fidp;
3108     V9fsPDU *pdu = opaque;
3109 
3110     v9fs_string_init(&name);
3111     err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3112     if (err < 0) {
3113         goto out_nofid;
3114     }
3115     trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3116 
3117     file_fidp = get_fid(pdu, fid);
3118     if (file_fidp == NULL) {
3119         err = -EINVAL;
3120         goto out_nofid;
3121     }
3122     /* Make the file fid point to xattr */
3123     xattr_fidp = file_fidp;
3124     xattr_fidp->fid_type = P9_FID_XATTR;
3125     xattr_fidp->fs.xattr.copied_len = 0;
3126     xattr_fidp->fs.xattr.len = size;
3127     xattr_fidp->fs.xattr.flags = flags;
3128     v9fs_string_init(&xattr_fidp->fs.xattr.name);
3129     v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3130     xattr_fidp->fs.xattr.value = g_malloc(size);
3131     err = offset;
3132     put_fid(pdu, file_fidp);
3133 out_nofid:
3134     pdu_complete(pdu, err);
3135     v9fs_string_free(&name);
3136 }
3137 
3138 static void v9fs_readlink(void *opaque)
3139 {
3140     V9fsPDU *pdu = opaque;
3141     size_t offset = 7;
3142     V9fsString target;
3143     int32_t fid;
3144     int err = 0;
3145     V9fsFidState *fidp;
3146 
3147     err = pdu_unmarshal(pdu, offset, "d", &fid);
3148     if (err < 0) {
3149         goto out_nofid;
3150     }
3151     trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3152     fidp = get_fid(pdu, fid);
3153     if (fidp == NULL) {
3154         err = -ENOENT;
3155         goto out_nofid;
3156     }
3157 
3158     v9fs_string_init(&target);
3159     err = v9fs_co_readlink(pdu, &fidp->path, &target);
3160     if (err < 0) {
3161         goto out;
3162     }
3163     err = pdu_marshal(pdu, offset, "s", &target);
3164     if (err < 0) {
3165         v9fs_string_free(&target);
3166         goto out;
3167     }
3168     err += offset;
3169     trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3170     v9fs_string_free(&target);
3171 out:
3172     put_fid(pdu, fidp);
3173 out_nofid:
3174     pdu_complete(pdu, err);
3175 }
3176 
3177 static CoroutineEntry *pdu_co_handlers[] = {
3178     [P9_TREADDIR] = v9fs_readdir,
3179     [P9_TSTATFS] = v9fs_statfs,
3180     [P9_TGETATTR] = v9fs_getattr,
3181     [P9_TSETATTR] = v9fs_setattr,
3182     [P9_TXATTRWALK] = v9fs_xattrwalk,
3183     [P9_TXATTRCREATE] = v9fs_xattrcreate,
3184     [P9_TMKNOD] = v9fs_mknod,
3185     [P9_TRENAME] = v9fs_rename,
3186     [P9_TLOCK] = v9fs_lock,
3187     [P9_TGETLOCK] = v9fs_getlock,
3188     [P9_TRENAMEAT] = v9fs_renameat,
3189     [P9_TREADLINK] = v9fs_readlink,
3190     [P9_TUNLINKAT] = v9fs_unlinkat,
3191     [P9_TMKDIR] = v9fs_mkdir,
3192     [P9_TVERSION] = v9fs_version,
3193     [P9_TLOPEN] = v9fs_open,
3194     [P9_TATTACH] = v9fs_attach,
3195     [P9_TSTAT] = v9fs_stat,
3196     [P9_TWALK] = v9fs_walk,
3197     [P9_TCLUNK] = v9fs_clunk,
3198     [P9_TFSYNC] = v9fs_fsync,
3199     [P9_TOPEN] = v9fs_open,
3200     [P9_TREAD] = v9fs_read,
3201 #if 0
3202     [P9_TAUTH] = v9fs_auth,
3203 #endif
3204     [P9_TFLUSH] = v9fs_flush,
3205     [P9_TLINK] = v9fs_link,
3206     [P9_TSYMLINK] = v9fs_symlink,
3207     [P9_TCREATE] = v9fs_create,
3208     [P9_TLCREATE] = v9fs_lcreate,
3209     [P9_TWRITE] = v9fs_write,
3210     [P9_TWSTAT] = v9fs_wstat,
3211     [P9_TREMOVE] = v9fs_remove,
3212 };
3213 
3214 static void v9fs_op_not_supp(void *opaque)
3215 {
3216     V9fsPDU *pdu = opaque;
3217     pdu_complete(pdu, -EOPNOTSUPP);
3218 }
3219 
3220 static void v9fs_fs_ro(void *opaque)
3221 {
3222     V9fsPDU *pdu = opaque;
3223     pdu_complete(pdu, -EROFS);
3224 }
3225 
3226 static inline bool is_read_only_op(V9fsPDU *pdu)
3227 {
3228     switch (pdu->id) {
3229     case P9_TREADDIR:
3230     case P9_TSTATFS:
3231     case P9_TGETATTR:
3232     case P9_TXATTRWALK:
3233     case P9_TLOCK:
3234     case P9_TGETLOCK:
3235     case P9_TREADLINK:
3236     case P9_TVERSION:
3237     case P9_TLOPEN:
3238     case P9_TATTACH:
3239     case P9_TSTAT:
3240     case P9_TWALK:
3241     case P9_TCLUNK:
3242     case P9_TFSYNC:
3243     case P9_TOPEN:
3244     case P9_TREAD:
3245     case P9_TAUTH:
3246     case P9_TFLUSH:
3247         return 1;
3248     default:
3249         return 0;
3250     }
3251 }
3252 
3253 void pdu_submit(V9fsPDU *pdu)
3254 {
3255     Coroutine *co;
3256     CoroutineEntry *handler;
3257     V9fsState *s = pdu->s;
3258 
3259     if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3260         (pdu_co_handlers[pdu->id] == NULL)) {
3261         handler = v9fs_op_not_supp;
3262     } else {
3263         handler = pdu_co_handlers[pdu->id];
3264     }
3265 
3266     if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3267         handler = v9fs_fs_ro;
3268     }
3269     co = qemu_coroutine_create(handler);
3270     qemu_coroutine_enter(co, pdu);
3271 }
3272 
3273 /* Returns 0 on success, 1 on failure. */
3274 int v9fs_device_realize_common(V9fsState *s, Error **errp)
3275 {
3276     V9fsVirtioState *v = container_of(s, V9fsVirtioState, state);
3277     int i, len;
3278     struct stat stat;
3279     FsDriverEntry *fse;
3280     V9fsPath path;
3281     int rc = 1;
3282 
3283     /* initialize pdu allocator */
3284     QLIST_INIT(&s->free_list);
3285     QLIST_INIT(&s->active_list);
3286     for (i = 0; i < (MAX_REQ - 1); i++) {
3287         QLIST_INSERT_HEAD(&s->free_list, &v->pdus[i], next);
3288         v->pdus[i].s = s;
3289         v->pdus[i].idx = i;
3290     }
3291 
3292     v9fs_path_init(&path);
3293 
3294     fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
3295 
3296     if (!fse) {
3297         /* We don't have a fsdev identified by fsdev_id */
3298         error_setg(errp, "9pfs device couldn't find fsdev with the "
3299                    "id = %s",
3300                    s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
3301         goto out;
3302     }
3303 
3304     if (!s->fsconf.tag) {
3305         /* we haven't specified a mount_tag */
3306         error_setg(errp, "fsdev with id %s needs mount_tag arguments",
3307                    s->fsconf.fsdev_id);
3308         goto out;
3309     }
3310 
3311     s->ctx.export_flags = fse->export_flags;
3312     s->ctx.fs_root = g_strdup(fse->path);
3313     s->ctx.exops.get_st_gen = NULL;
3314     len = strlen(s->fsconf.tag);
3315     if (len > MAX_TAG_LEN - 1) {
3316         error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
3317                    "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
3318         goto out;
3319     }
3320 
3321     s->tag = g_strdup(s->fsconf.tag);
3322     s->ctx.uid = -1;
3323 
3324     s->ops = fse->ops;
3325 
3326     s->fid_list = NULL;
3327     qemu_co_rwlock_init(&s->rename_lock);
3328 
3329     if (s->ops->init(&s->ctx) < 0) {
3330         error_setg(errp, "9pfs Failed to initialize fs-driver with id:%s"
3331                    " and export path:%s", s->fsconf.fsdev_id, s->ctx.fs_root);
3332         goto out;
3333     }
3334 
3335     /*
3336      * Check details of export path, We need to use fs driver
3337      * call back to do that. Since we are in the init path, we don't
3338      * use co-routines here.
3339      */
3340     if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
3341         error_setg(errp,
3342                    "error in converting name to path %s", strerror(errno));
3343         goto out;
3344     }
3345     if (s->ops->lstat(&s->ctx, &path, &stat)) {
3346         error_setg(errp, "share path %s does not exist", fse->path);
3347         goto out;
3348     } else if (!S_ISDIR(stat.st_mode)) {
3349         error_setg(errp, "share path %s is not a directory", fse->path);
3350         goto out;
3351     }
3352     v9fs_path_free(&path);
3353 
3354     rc = 0;
3355 out:
3356     if (rc) {
3357         g_free(s->ctx.fs_root);
3358         g_free(s->tag);
3359         v9fs_path_free(&path);
3360     }
3361     return rc;
3362 }
3363 
3364 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
3365 {
3366     g_free(s->ctx.fs_root);
3367     g_free(s->tag);
3368 }
3369 
3370 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
3371 {
3372     struct rlimit rlim;
3373     if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
3374         error_report("Failed to get the resource limit");
3375         exit(1);
3376     }
3377     open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
3378     open_fd_rc = rlim.rlim_cur/2;
3379 }
3380