xref: /openbmc/qemu/hw/9pfs/9p.c (revision 89260905)
1 /*
2  * Virtio 9p backend
3  *
4  * Copyright IBM, Corp. 2010
5  *
6  * Authors:
7  *  Anthony Liguori   <aliguori@us.ibm.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13 
14 #include "qemu/osdep.h"
15 #include <glib/gprintf.h>
16 #include "hw/virtio/virtio.h"
17 #include "qapi/error.h"
18 #include "qemu/error-report.h"
19 #include "qemu/iov.h"
20 #include "qemu/main-loop.h"
21 #include "qemu/sockets.h"
22 #include "virtio-9p.h"
23 #include "fsdev/qemu-fsdev.h"
24 #include "9p-xattr.h"
25 #include "coth.h"
26 #include "trace.h"
27 #include "migration/blocker.h"
28 #include "sysemu/qtest.h"
29 
30 int open_fd_hw;
31 int total_open_fd;
32 static int open_fd_rc;
33 
34 enum {
35     Oread   = 0x00,
36     Owrite  = 0x01,
37     Ordwr   = 0x02,
38     Oexec   = 0x03,
39     Oexcl   = 0x04,
40     Otrunc  = 0x10,
41     Orexec  = 0x20,
42     Orclose = 0x40,
43     Oappend = 0x80,
44 };
45 
46 static ssize_t pdu_marshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
47 {
48     ssize_t ret;
49     va_list ap;
50 
51     va_start(ap, fmt);
52     ret = pdu->s->transport->pdu_vmarshal(pdu, offset, fmt, ap);
53     va_end(ap);
54 
55     return ret;
56 }
57 
58 static ssize_t pdu_unmarshal(V9fsPDU *pdu, size_t offset, const char *fmt, ...)
59 {
60     ssize_t ret;
61     va_list ap;
62 
63     va_start(ap, fmt);
64     ret = pdu->s->transport->pdu_vunmarshal(pdu, offset, fmt, ap);
65     va_end(ap);
66 
67     return ret;
68 }
69 
70 static int omode_to_uflags(int8_t mode)
71 {
72     int ret = 0;
73 
74     switch (mode & 3) {
75     case Oread:
76         ret = O_RDONLY;
77         break;
78     case Ordwr:
79         ret = O_RDWR;
80         break;
81     case Owrite:
82         ret = O_WRONLY;
83         break;
84     case Oexec:
85         ret = O_RDONLY;
86         break;
87     }
88 
89     if (mode & Otrunc) {
90         ret |= O_TRUNC;
91     }
92 
93     if (mode & Oappend) {
94         ret |= O_APPEND;
95     }
96 
97     if (mode & Oexcl) {
98         ret |= O_EXCL;
99     }
100 
101     return ret;
102 }
103 
104 typedef struct DotlOpenflagMap {
105     int dotl_flag;
106     int open_flag;
107 } DotlOpenflagMap;
108 
109 static int dotl_to_open_flags(int flags)
110 {
111     int i;
112     /*
113      * We have same bits for P9_DOTL_READONLY, P9_DOTL_WRONLY
114      * and P9_DOTL_NOACCESS
115      */
116     int oflags = flags & O_ACCMODE;
117 
118     DotlOpenflagMap dotl_oflag_map[] = {
119         { P9_DOTL_CREATE, O_CREAT },
120         { P9_DOTL_EXCL, O_EXCL },
121         { P9_DOTL_NOCTTY , O_NOCTTY },
122         { P9_DOTL_TRUNC, O_TRUNC },
123         { P9_DOTL_APPEND, O_APPEND },
124         { P9_DOTL_NONBLOCK, O_NONBLOCK } ,
125         { P9_DOTL_DSYNC, O_DSYNC },
126         { P9_DOTL_FASYNC, FASYNC },
127         { P9_DOTL_DIRECT, O_DIRECT },
128         { P9_DOTL_LARGEFILE, O_LARGEFILE },
129         { P9_DOTL_DIRECTORY, O_DIRECTORY },
130         { P9_DOTL_NOFOLLOW, O_NOFOLLOW },
131         { P9_DOTL_NOATIME, O_NOATIME },
132         { P9_DOTL_SYNC, O_SYNC },
133     };
134 
135     for (i = 0; i < ARRAY_SIZE(dotl_oflag_map); i++) {
136         if (flags & dotl_oflag_map[i].dotl_flag) {
137             oflags |= dotl_oflag_map[i].open_flag;
138         }
139     }
140 
141     return oflags;
142 }
143 
144 void cred_init(FsCred *credp)
145 {
146     credp->fc_uid = -1;
147     credp->fc_gid = -1;
148     credp->fc_mode = -1;
149     credp->fc_rdev = -1;
150 }
151 
152 static int get_dotl_openflags(V9fsState *s, int oflags)
153 {
154     int flags;
155     /*
156      * Filter the client open flags
157      */
158     flags = dotl_to_open_flags(oflags);
159     flags &= ~(O_NOCTTY | O_ASYNC | O_CREAT);
160     /*
161      * Ignore direct disk access hint until the server supports it.
162      */
163     flags &= ~O_DIRECT;
164     return flags;
165 }
166 
167 void v9fs_path_init(V9fsPath *path)
168 {
169     path->data = NULL;
170     path->size = 0;
171 }
172 
173 void v9fs_path_free(V9fsPath *path)
174 {
175     g_free(path->data);
176     path->data = NULL;
177     path->size = 0;
178 }
179 
180 
181 void GCC_FMT_ATTR(2, 3)
182 v9fs_path_sprintf(V9fsPath *path, const char *fmt, ...)
183 {
184     va_list ap;
185 
186     v9fs_path_free(path);
187 
188     va_start(ap, fmt);
189     /* Bump the size for including terminating NULL */
190     path->size = g_vasprintf(&path->data, fmt, ap) + 1;
191     va_end(ap);
192 }
193 
194 void v9fs_path_copy(V9fsPath *dst, const V9fsPath *src)
195 {
196     v9fs_path_free(dst);
197     dst->size = src->size;
198     dst->data = g_memdup(src->data, src->size);
199 }
200 
201 int v9fs_name_to_path(V9fsState *s, V9fsPath *dirpath,
202                       const char *name, V9fsPath *path)
203 {
204     int err;
205     err = s->ops->name_to_path(&s->ctx, dirpath, name, path);
206     if (err < 0) {
207         err = -errno;
208     }
209     return err;
210 }
211 
212 /*
213  * Return TRUE if s1 is an ancestor of s2.
214  *
215  * E.g. "a/b" is an ancestor of "a/b/c" but not of "a/bc/d".
216  * As a special case, We treat s1 as ancestor of s2 if they are same!
217  */
218 static int v9fs_path_is_ancestor(V9fsPath *s1, V9fsPath *s2)
219 {
220     if (!strncmp(s1->data, s2->data, s1->size - 1)) {
221         if (s2->data[s1->size - 1] == '\0' || s2->data[s1->size - 1] == '/') {
222             return 1;
223         }
224     }
225     return 0;
226 }
227 
228 static size_t v9fs_string_size(V9fsString *str)
229 {
230     return str->size;
231 }
232 
233 /*
234  * returns 0 if fid got re-opened, 1 if not, < 0 on error */
235 static int coroutine_fn v9fs_reopen_fid(V9fsPDU *pdu, V9fsFidState *f)
236 {
237     int err = 1;
238     if (f->fid_type == P9_FID_FILE) {
239         if (f->fs.fd == -1) {
240             do {
241                 err = v9fs_co_open(pdu, f, f->open_flags);
242             } while (err == -EINTR && !pdu->cancelled);
243         }
244     } else if (f->fid_type == P9_FID_DIR) {
245         if (f->fs.dir.stream == NULL) {
246             do {
247                 err = v9fs_co_opendir(pdu, f);
248             } while (err == -EINTR && !pdu->cancelled);
249         }
250     }
251     return err;
252 }
253 
254 static V9fsFidState *coroutine_fn get_fid(V9fsPDU *pdu, int32_t fid)
255 {
256     int err;
257     V9fsFidState *f;
258     V9fsState *s = pdu->s;
259 
260     for (f = s->fid_list; f; f = f->next) {
261         BUG_ON(f->clunked);
262         if (f->fid == fid) {
263             /*
264              * Update the fid ref upfront so that
265              * we don't get reclaimed when we yield
266              * in open later.
267              */
268             f->ref++;
269             /*
270              * check whether we need to reopen the
271              * file. We might have closed the fd
272              * while trying to free up some file
273              * descriptors.
274              */
275             err = v9fs_reopen_fid(pdu, f);
276             if (err < 0) {
277                 f->ref--;
278                 return NULL;
279             }
280             /*
281              * Mark the fid as referenced so that the LRU
282              * reclaim won't close the file descriptor
283              */
284             f->flags |= FID_REFERENCED;
285             return f;
286         }
287     }
288     return NULL;
289 }
290 
291 static V9fsFidState *alloc_fid(V9fsState *s, int32_t fid)
292 {
293     V9fsFidState *f;
294 
295     for (f = s->fid_list; f; f = f->next) {
296         /* If fid is already there return NULL */
297         BUG_ON(f->clunked);
298         if (f->fid == fid) {
299             return NULL;
300         }
301     }
302     f = g_malloc0(sizeof(V9fsFidState));
303     f->fid = fid;
304     f->fid_type = P9_FID_NONE;
305     f->ref = 1;
306     /*
307      * Mark the fid as referenced so that the LRU
308      * reclaim won't close the file descriptor
309      */
310     f->flags |= FID_REFERENCED;
311     f->next = s->fid_list;
312     s->fid_list = f;
313 
314     v9fs_readdir_init(&f->fs.dir);
315     v9fs_readdir_init(&f->fs_reclaim.dir);
316 
317     return f;
318 }
319 
320 static int coroutine_fn v9fs_xattr_fid_clunk(V9fsPDU *pdu, V9fsFidState *fidp)
321 {
322     int retval = 0;
323 
324     if (fidp->fs.xattr.xattrwalk_fid) {
325         /* getxattr/listxattr fid */
326         goto free_value;
327     }
328     /*
329      * if this is fid for setxattr. clunk should
330      * result in setxattr localcall
331      */
332     if (fidp->fs.xattr.len != fidp->fs.xattr.copied_len) {
333         /* clunk after partial write */
334         retval = -EINVAL;
335         goto free_out;
336     }
337     if (fidp->fs.xattr.len) {
338         retval = v9fs_co_lsetxattr(pdu, &fidp->path, &fidp->fs.xattr.name,
339                                    fidp->fs.xattr.value,
340                                    fidp->fs.xattr.len,
341                                    fidp->fs.xattr.flags);
342     } else {
343         retval = v9fs_co_lremovexattr(pdu, &fidp->path, &fidp->fs.xattr.name);
344     }
345 free_out:
346     v9fs_string_free(&fidp->fs.xattr.name);
347 free_value:
348     g_free(fidp->fs.xattr.value);
349     return retval;
350 }
351 
352 static int coroutine_fn free_fid(V9fsPDU *pdu, V9fsFidState *fidp)
353 {
354     int retval = 0;
355 
356     if (fidp->fid_type == P9_FID_FILE) {
357         /* If we reclaimed the fd no need to close */
358         if (fidp->fs.fd != -1) {
359             retval = v9fs_co_close(pdu, &fidp->fs);
360         }
361     } else if (fidp->fid_type == P9_FID_DIR) {
362         if (fidp->fs.dir.stream != NULL) {
363             retval = v9fs_co_closedir(pdu, &fidp->fs);
364         }
365     } else if (fidp->fid_type == P9_FID_XATTR) {
366         retval = v9fs_xattr_fid_clunk(pdu, fidp);
367     }
368     v9fs_path_free(&fidp->path);
369     g_free(fidp);
370     return retval;
371 }
372 
373 static int coroutine_fn put_fid(V9fsPDU *pdu, V9fsFidState *fidp)
374 {
375     BUG_ON(!fidp->ref);
376     fidp->ref--;
377     /*
378      * Don't free the fid if it is in reclaim list
379      */
380     if (!fidp->ref && fidp->clunked) {
381         if (fidp->fid == pdu->s->root_fid) {
382             /*
383              * if the clunked fid is root fid then we
384              * have unmounted the fs on the client side.
385              * delete the migration blocker. Ideally, this
386              * should be hooked to transport close notification
387              */
388             if (pdu->s->migration_blocker) {
389                 migrate_del_blocker(pdu->s->migration_blocker);
390                 error_free(pdu->s->migration_blocker);
391                 pdu->s->migration_blocker = NULL;
392             }
393         }
394         return free_fid(pdu, fidp);
395     }
396     return 0;
397 }
398 
399 static V9fsFidState *clunk_fid(V9fsState *s, int32_t fid)
400 {
401     V9fsFidState **fidpp, *fidp;
402 
403     for (fidpp = &s->fid_list; *fidpp; fidpp = &(*fidpp)->next) {
404         if ((*fidpp)->fid == fid) {
405             break;
406         }
407     }
408     if (*fidpp == NULL) {
409         return NULL;
410     }
411     fidp = *fidpp;
412     *fidpp = fidp->next;
413     fidp->clunked = 1;
414     return fidp;
415 }
416 
417 void coroutine_fn v9fs_reclaim_fd(V9fsPDU *pdu)
418 {
419     int reclaim_count = 0;
420     V9fsState *s = pdu->s;
421     V9fsFidState *f, *reclaim_list = NULL;
422 
423     for (f = s->fid_list; f; f = f->next) {
424         /*
425          * Unlink fids cannot be reclaimed. Check
426          * for them and skip them. Also skip fids
427          * currently being operated on.
428          */
429         if (f->ref || f->flags & FID_NON_RECLAIMABLE) {
430             continue;
431         }
432         /*
433          * if it is a recently referenced fid
434          * we leave the fid untouched and clear the
435          * reference bit. We come back to it later
436          * in the next iteration. (a simple LRU without
437          * moving list elements around)
438          */
439         if (f->flags & FID_REFERENCED) {
440             f->flags &= ~FID_REFERENCED;
441             continue;
442         }
443         /*
444          * Add fids to reclaim list.
445          */
446         if (f->fid_type == P9_FID_FILE) {
447             if (f->fs.fd != -1) {
448                 /*
449                  * Up the reference count so that
450                  * a clunk request won't free this fid
451                  */
452                 f->ref++;
453                 f->rclm_lst = reclaim_list;
454                 reclaim_list = f;
455                 f->fs_reclaim.fd = f->fs.fd;
456                 f->fs.fd = -1;
457                 reclaim_count++;
458             }
459         } else if (f->fid_type == P9_FID_DIR) {
460             if (f->fs.dir.stream != NULL) {
461                 /*
462                  * Up the reference count so that
463                  * a clunk request won't free this fid
464                  */
465                 f->ref++;
466                 f->rclm_lst = reclaim_list;
467                 reclaim_list = f;
468                 f->fs_reclaim.dir.stream = f->fs.dir.stream;
469                 f->fs.dir.stream = NULL;
470                 reclaim_count++;
471             }
472         }
473         if (reclaim_count >= open_fd_rc) {
474             break;
475         }
476     }
477     /*
478      * Now close the fid in reclaim list. Free them if they
479      * are already clunked.
480      */
481     while (reclaim_list) {
482         f = reclaim_list;
483         reclaim_list = f->rclm_lst;
484         if (f->fid_type == P9_FID_FILE) {
485             v9fs_co_close(pdu, &f->fs_reclaim);
486         } else if (f->fid_type == P9_FID_DIR) {
487             v9fs_co_closedir(pdu, &f->fs_reclaim);
488         }
489         f->rclm_lst = NULL;
490         /*
491          * Now drop the fid reference, free it
492          * if clunked.
493          */
494         put_fid(pdu, f);
495     }
496 }
497 
498 static int coroutine_fn v9fs_mark_fids_unreclaim(V9fsPDU *pdu, V9fsPath *path)
499 {
500     int err;
501     V9fsState *s = pdu->s;
502     V9fsFidState *fidp, head_fid;
503 
504     head_fid.next = s->fid_list;
505     for (fidp = s->fid_list; fidp; fidp = fidp->next) {
506         if (fidp->path.size != path->size) {
507             continue;
508         }
509         if (!memcmp(fidp->path.data, path->data, path->size)) {
510             /* Mark the fid non reclaimable. */
511             fidp->flags |= FID_NON_RECLAIMABLE;
512 
513             /* reopen the file/dir if already closed */
514             err = v9fs_reopen_fid(pdu, fidp);
515             if (err < 0) {
516                 return err;
517             }
518             /*
519              * Go back to head of fid list because
520              * the list could have got updated when
521              * switched to the worker thread
522              */
523             if (err == 0) {
524                 fidp = &head_fid;
525             }
526         }
527     }
528     return 0;
529 }
530 
531 static void coroutine_fn virtfs_reset(V9fsPDU *pdu)
532 {
533     V9fsState *s = pdu->s;
534     V9fsFidState *fidp;
535 
536     /* Free all fids */
537     while (s->fid_list) {
538         /* Get fid */
539         fidp = s->fid_list;
540         fidp->ref++;
541 
542         /* Clunk fid */
543         s->fid_list = fidp->next;
544         fidp->clunked = 1;
545 
546         put_fid(pdu, fidp);
547     }
548 }
549 
550 #define P9_QID_TYPE_DIR         0x80
551 #define P9_QID_TYPE_SYMLINK     0x02
552 
553 #define P9_STAT_MODE_DIR        0x80000000
554 #define P9_STAT_MODE_APPEND     0x40000000
555 #define P9_STAT_MODE_EXCL       0x20000000
556 #define P9_STAT_MODE_MOUNT      0x10000000
557 #define P9_STAT_MODE_AUTH       0x08000000
558 #define P9_STAT_MODE_TMP        0x04000000
559 #define P9_STAT_MODE_SYMLINK    0x02000000
560 #define P9_STAT_MODE_LINK       0x01000000
561 #define P9_STAT_MODE_DEVICE     0x00800000
562 #define P9_STAT_MODE_NAMED_PIPE 0x00200000
563 #define P9_STAT_MODE_SOCKET     0x00100000
564 #define P9_STAT_MODE_SETUID     0x00080000
565 #define P9_STAT_MODE_SETGID     0x00040000
566 #define P9_STAT_MODE_SETVTX     0x00010000
567 
568 #define P9_STAT_MODE_TYPE_BITS (P9_STAT_MODE_DIR |          \
569                                 P9_STAT_MODE_SYMLINK |      \
570                                 P9_STAT_MODE_LINK |         \
571                                 P9_STAT_MODE_DEVICE |       \
572                                 P9_STAT_MODE_NAMED_PIPE |   \
573                                 P9_STAT_MODE_SOCKET)
574 
575 /* This is the algorithm from ufs in spfs */
576 static void stat_to_qid(const struct stat *stbuf, V9fsQID *qidp)
577 {
578     size_t size;
579 
580     memset(&qidp->path, 0, sizeof(qidp->path));
581     size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path));
582     memcpy(&qidp->path, &stbuf->st_ino, size);
583     qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8);
584     qidp->type = 0;
585     if (S_ISDIR(stbuf->st_mode)) {
586         qidp->type |= P9_QID_TYPE_DIR;
587     }
588     if (S_ISLNK(stbuf->st_mode)) {
589         qidp->type |= P9_QID_TYPE_SYMLINK;
590     }
591 }
592 
593 static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp,
594                                    V9fsQID *qidp)
595 {
596     struct stat stbuf;
597     int err;
598 
599     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
600     if (err < 0) {
601         return err;
602     }
603     stat_to_qid(&stbuf, qidp);
604     return 0;
605 }
606 
607 V9fsPDU *pdu_alloc(V9fsState *s)
608 {
609     V9fsPDU *pdu = NULL;
610 
611     if (!QLIST_EMPTY(&s->free_list)) {
612         pdu = QLIST_FIRST(&s->free_list);
613         QLIST_REMOVE(pdu, next);
614         QLIST_INSERT_HEAD(&s->active_list, pdu, next);
615     }
616     return pdu;
617 }
618 
619 void pdu_free(V9fsPDU *pdu)
620 {
621     V9fsState *s = pdu->s;
622 
623     g_assert(!pdu->cancelled);
624     QLIST_REMOVE(pdu, next);
625     QLIST_INSERT_HEAD(&s->free_list, pdu, next);
626 }
627 
628 static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
629 {
630     int8_t id = pdu->id + 1; /* Response */
631     V9fsState *s = pdu->s;
632     int ret;
633 
634     /*
635      * The 9p spec requires that successfully cancelled pdus receive no reply.
636      * Sending a reply would confuse clients because they would
637      * assume that any EINTR is the actual result of the operation,
638      * rather than a consequence of the cancellation. However, if
639      * the operation completed (succesfully or with an error other
640      * than caused be cancellation), we do send out that reply, both
641      * for efficiency and to avoid confusing the rest of the state machine
642      * that assumes passing a non-error here will mean a successful
643      * transmission of the reply.
644      */
645     bool discard = pdu->cancelled && len == -EINTR;
646     if (discard) {
647         trace_v9fs_rcancel(pdu->tag, pdu->id);
648         pdu->size = 0;
649         goto out_notify;
650     }
651 
652     if (len < 0) {
653         int err = -len;
654         len = 7;
655 
656         if (s->proto_version != V9FS_PROTO_2000L) {
657             V9fsString str;
658 
659             str.data = strerror(err);
660             str.size = strlen(str.data);
661 
662             ret = pdu_marshal(pdu, len, "s", &str);
663             if (ret < 0) {
664                 goto out_notify;
665             }
666             len += ret;
667             id = P9_RERROR;
668         }
669 
670         ret = pdu_marshal(pdu, len, "d", err);
671         if (ret < 0) {
672             goto out_notify;
673         }
674         len += ret;
675 
676         if (s->proto_version == V9FS_PROTO_2000L) {
677             id = P9_RLERROR;
678         }
679         trace_v9fs_rerror(pdu->tag, pdu->id, err); /* Trace ERROR */
680     }
681 
682     /* fill out the header */
683     if (pdu_marshal(pdu, 0, "dbw", (int32_t)len, id, pdu->tag) < 0) {
684         goto out_notify;
685     }
686 
687     /* keep these in sync */
688     pdu->size = len;
689     pdu->id = id;
690 
691 out_notify:
692     pdu->s->transport->push_and_notify(pdu);
693 
694     /* Now wakeup anybody waiting in flush for this request */
695     if (!qemu_co_queue_next(&pdu->complete)) {
696         pdu_free(pdu);
697     }
698 }
699 
700 static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
701 {
702     mode_t ret;
703 
704     ret = mode & 0777;
705     if (mode & P9_STAT_MODE_DIR) {
706         ret |= S_IFDIR;
707     }
708 
709     if (mode & P9_STAT_MODE_SYMLINK) {
710         ret |= S_IFLNK;
711     }
712     if (mode & P9_STAT_MODE_SOCKET) {
713         ret |= S_IFSOCK;
714     }
715     if (mode & P9_STAT_MODE_NAMED_PIPE) {
716         ret |= S_IFIFO;
717     }
718     if (mode & P9_STAT_MODE_DEVICE) {
719         if (extension->size && extension->data[0] == 'c') {
720             ret |= S_IFCHR;
721         } else {
722             ret |= S_IFBLK;
723         }
724     }
725 
726     if (!(ret&~0777)) {
727         ret |= S_IFREG;
728     }
729 
730     if (mode & P9_STAT_MODE_SETUID) {
731         ret |= S_ISUID;
732     }
733     if (mode & P9_STAT_MODE_SETGID) {
734         ret |= S_ISGID;
735     }
736     if (mode & P9_STAT_MODE_SETVTX) {
737         ret |= S_ISVTX;
738     }
739 
740     return ret;
741 }
742 
743 static int donttouch_stat(V9fsStat *stat)
744 {
745     if (stat->type == -1 &&
746         stat->dev == -1 &&
747         stat->qid.type == -1 &&
748         stat->qid.version == -1 &&
749         stat->qid.path == -1 &&
750         stat->mode == -1 &&
751         stat->atime == -1 &&
752         stat->mtime == -1 &&
753         stat->length == -1 &&
754         !stat->name.size &&
755         !stat->uid.size &&
756         !stat->gid.size &&
757         !stat->muid.size &&
758         stat->n_uid == -1 &&
759         stat->n_gid == -1 &&
760         stat->n_muid == -1) {
761         return 1;
762     }
763 
764     return 0;
765 }
766 
767 static void v9fs_stat_init(V9fsStat *stat)
768 {
769     v9fs_string_init(&stat->name);
770     v9fs_string_init(&stat->uid);
771     v9fs_string_init(&stat->gid);
772     v9fs_string_init(&stat->muid);
773     v9fs_string_init(&stat->extension);
774 }
775 
776 static void v9fs_stat_free(V9fsStat *stat)
777 {
778     v9fs_string_free(&stat->name);
779     v9fs_string_free(&stat->uid);
780     v9fs_string_free(&stat->gid);
781     v9fs_string_free(&stat->muid);
782     v9fs_string_free(&stat->extension);
783 }
784 
785 static uint32_t stat_to_v9mode(const struct stat *stbuf)
786 {
787     uint32_t mode;
788 
789     mode = stbuf->st_mode & 0777;
790     if (S_ISDIR(stbuf->st_mode)) {
791         mode |= P9_STAT_MODE_DIR;
792     }
793 
794     if (S_ISLNK(stbuf->st_mode)) {
795         mode |= P9_STAT_MODE_SYMLINK;
796     }
797 
798     if (S_ISSOCK(stbuf->st_mode)) {
799         mode |= P9_STAT_MODE_SOCKET;
800     }
801 
802     if (S_ISFIFO(stbuf->st_mode)) {
803         mode |= P9_STAT_MODE_NAMED_PIPE;
804     }
805 
806     if (S_ISBLK(stbuf->st_mode) || S_ISCHR(stbuf->st_mode)) {
807         mode |= P9_STAT_MODE_DEVICE;
808     }
809 
810     if (stbuf->st_mode & S_ISUID) {
811         mode |= P9_STAT_MODE_SETUID;
812     }
813 
814     if (stbuf->st_mode & S_ISGID) {
815         mode |= P9_STAT_MODE_SETGID;
816     }
817 
818     if (stbuf->st_mode & S_ISVTX) {
819         mode |= P9_STAT_MODE_SETVTX;
820     }
821 
822     return mode;
823 }
824 
825 static int coroutine_fn stat_to_v9stat(V9fsPDU *pdu, V9fsPath *path,
826                                        const char *basename,
827                                        const struct stat *stbuf,
828                                        V9fsStat *v9stat)
829 {
830     int err;
831 
832     memset(v9stat, 0, sizeof(*v9stat));
833 
834     stat_to_qid(stbuf, &v9stat->qid);
835     v9stat->mode = stat_to_v9mode(stbuf);
836     v9stat->atime = stbuf->st_atime;
837     v9stat->mtime = stbuf->st_mtime;
838     v9stat->length = stbuf->st_size;
839 
840     v9fs_string_free(&v9stat->uid);
841     v9fs_string_free(&v9stat->gid);
842     v9fs_string_free(&v9stat->muid);
843 
844     v9stat->n_uid = stbuf->st_uid;
845     v9stat->n_gid = stbuf->st_gid;
846     v9stat->n_muid = 0;
847 
848     v9fs_string_free(&v9stat->extension);
849 
850     if (v9stat->mode & P9_STAT_MODE_SYMLINK) {
851         err = v9fs_co_readlink(pdu, path, &v9stat->extension);
852         if (err < 0) {
853             return err;
854         }
855     } else if (v9stat->mode & P9_STAT_MODE_DEVICE) {
856         v9fs_string_sprintf(&v9stat->extension, "%c %u %u",
857                 S_ISCHR(stbuf->st_mode) ? 'c' : 'b',
858                 major(stbuf->st_rdev), minor(stbuf->st_rdev));
859     } else if (S_ISDIR(stbuf->st_mode) || S_ISREG(stbuf->st_mode)) {
860         v9fs_string_sprintf(&v9stat->extension, "%s %lu",
861                 "HARDLINKCOUNT", (unsigned long)stbuf->st_nlink);
862     }
863 
864     v9fs_string_sprintf(&v9stat->name, "%s", basename);
865 
866     v9stat->size = 61 +
867         v9fs_string_size(&v9stat->name) +
868         v9fs_string_size(&v9stat->uid) +
869         v9fs_string_size(&v9stat->gid) +
870         v9fs_string_size(&v9stat->muid) +
871         v9fs_string_size(&v9stat->extension);
872     return 0;
873 }
874 
875 #define P9_STATS_MODE          0x00000001ULL
876 #define P9_STATS_NLINK         0x00000002ULL
877 #define P9_STATS_UID           0x00000004ULL
878 #define P9_STATS_GID           0x00000008ULL
879 #define P9_STATS_RDEV          0x00000010ULL
880 #define P9_STATS_ATIME         0x00000020ULL
881 #define P9_STATS_MTIME         0x00000040ULL
882 #define P9_STATS_CTIME         0x00000080ULL
883 #define P9_STATS_INO           0x00000100ULL
884 #define P9_STATS_SIZE          0x00000200ULL
885 #define P9_STATS_BLOCKS        0x00000400ULL
886 
887 #define P9_STATS_BTIME         0x00000800ULL
888 #define P9_STATS_GEN           0x00001000ULL
889 #define P9_STATS_DATA_VERSION  0x00002000ULL
890 
891 #define P9_STATS_BASIC         0x000007ffULL /* Mask for fields up to BLOCKS */
892 #define P9_STATS_ALL           0x00003fffULL /* Mask for All fields above */
893 
894 
895 static void stat_to_v9stat_dotl(V9fsState *s, const struct stat *stbuf,
896                                 V9fsStatDotl *v9lstat)
897 {
898     memset(v9lstat, 0, sizeof(*v9lstat));
899 
900     v9lstat->st_mode = stbuf->st_mode;
901     v9lstat->st_nlink = stbuf->st_nlink;
902     v9lstat->st_uid = stbuf->st_uid;
903     v9lstat->st_gid = stbuf->st_gid;
904     v9lstat->st_rdev = stbuf->st_rdev;
905     v9lstat->st_size = stbuf->st_size;
906     v9lstat->st_blksize = stbuf->st_blksize;
907     v9lstat->st_blocks = stbuf->st_blocks;
908     v9lstat->st_atime_sec = stbuf->st_atime;
909     v9lstat->st_atime_nsec = stbuf->st_atim.tv_nsec;
910     v9lstat->st_mtime_sec = stbuf->st_mtime;
911     v9lstat->st_mtime_nsec = stbuf->st_mtim.tv_nsec;
912     v9lstat->st_ctime_sec = stbuf->st_ctime;
913     v9lstat->st_ctime_nsec = stbuf->st_ctim.tv_nsec;
914     /* Currently we only support BASIC fields in stat */
915     v9lstat->st_result_mask = P9_STATS_BASIC;
916 
917     stat_to_qid(stbuf, &v9lstat->qid);
918 }
919 
920 static void print_sg(struct iovec *sg, int cnt)
921 {
922     int i;
923 
924     printf("sg[%d]: {", cnt);
925     for (i = 0; i < cnt; i++) {
926         if (i) {
927             printf(", ");
928         }
929         printf("(%p, %zd)", sg[i].iov_base, sg[i].iov_len);
930     }
931     printf("}\n");
932 }
933 
934 /* Will call this only for path name based fid */
935 static void v9fs_fix_path(V9fsPath *dst, V9fsPath *src, int len)
936 {
937     V9fsPath str;
938     v9fs_path_init(&str);
939     v9fs_path_copy(&str, dst);
940     v9fs_path_sprintf(dst, "%s%s", src->data, str.data + len);
941     v9fs_path_free(&str);
942 }
943 
944 static inline bool is_ro_export(FsContext *ctx)
945 {
946     return ctx->export_flags & V9FS_RDONLY;
947 }
948 
949 static void coroutine_fn v9fs_version(void *opaque)
950 {
951     ssize_t err;
952     V9fsPDU *pdu = opaque;
953     V9fsState *s = pdu->s;
954     V9fsString version;
955     size_t offset = 7;
956 
957     v9fs_string_init(&version);
958     err = pdu_unmarshal(pdu, offset, "ds", &s->msize, &version);
959     if (err < 0) {
960         goto out;
961     }
962     trace_v9fs_version(pdu->tag, pdu->id, s->msize, version.data);
963 
964     virtfs_reset(pdu);
965 
966     if (!strcmp(version.data, "9P2000.u")) {
967         s->proto_version = V9FS_PROTO_2000U;
968     } else if (!strcmp(version.data, "9P2000.L")) {
969         s->proto_version = V9FS_PROTO_2000L;
970     } else {
971         v9fs_string_sprintf(&version, "unknown");
972     }
973 
974     err = pdu_marshal(pdu, offset, "ds", s->msize, &version);
975     if (err < 0) {
976         goto out;
977     }
978     err += offset;
979     trace_v9fs_version_return(pdu->tag, pdu->id, s->msize, version.data);
980 out:
981     pdu_complete(pdu, err);
982     v9fs_string_free(&version);
983 }
984 
985 static void coroutine_fn v9fs_attach(void *opaque)
986 {
987     V9fsPDU *pdu = opaque;
988     V9fsState *s = pdu->s;
989     int32_t fid, afid, n_uname;
990     V9fsString uname, aname;
991     V9fsFidState *fidp;
992     size_t offset = 7;
993     V9fsQID qid;
994     ssize_t err;
995     Error *local_err = NULL;
996 
997     v9fs_string_init(&uname);
998     v9fs_string_init(&aname);
999     err = pdu_unmarshal(pdu, offset, "ddssd", &fid,
1000                         &afid, &uname, &aname, &n_uname);
1001     if (err < 0) {
1002         goto out_nofid;
1003     }
1004     trace_v9fs_attach(pdu->tag, pdu->id, fid, afid, uname.data, aname.data);
1005 
1006     fidp = alloc_fid(s, fid);
1007     if (fidp == NULL) {
1008         err = -EINVAL;
1009         goto out_nofid;
1010     }
1011     fidp->uid = n_uname;
1012     err = v9fs_co_name_to_path(pdu, NULL, "/", &fidp->path);
1013     if (err < 0) {
1014         err = -EINVAL;
1015         clunk_fid(s, fid);
1016         goto out;
1017     }
1018     err = fid_to_qid(pdu, fidp, &qid);
1019     if (err < 0) {
1020         err = -EINVAL;
1021         clunk_fid(s, fid);
1022         goto out;
1023     }
1024 
1025     /*
1026      * disable migration if we haven't done already.
1027      * attach could get called multiple times for the same export.
1028      */
1029     if (!s->migration_blocker) {
1030         error_setg(&s->migration_blocker,
1031                    "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'",
1032                    s->ctx.fs_root ? s->ctx.fs_root : "NULL", s->tag);
1033         err = migrate_add_blocker(s->migration_blocker, &local_err);
1034         if (local_err) {
1035             error_free(local_err);
1036             error_free(s->migration_blocker);
1037             s->migration_blocker = NULL;
1038             clunk_fid(s, fid);
1039             goto out;
1040         }
1041         s->root_fid = fid;
1042     }
1043 
1044     err = pdu_marshal(pdu, offset, "Q", &qid);
1045     if (err < 0) {
1046         clunk_fid(s, fid);
1047         goto out;
1048     }
1049     err += offset;
1050 
1051     memcpy(&s->root_qid, &qid, sizeof(qid));
1052     trace_v9fs_attach_return(pdu->tag, pdu->id,
1053                              qid.type, qid.version, qid.path);
1054 out:
1055     put_fid(pdu, fidp);
1056 out_nofid:
1057     pdu_complete(pdu, err);
1058     v9fs_string_free(&uname);
1059     v9fs_string_free(&aname);
1060 }
1061 
1062 static void coroutine_fn v9fs_stat(void *opaque)
1063 {
1064     int32_t fid;
1065     V9fsStat v9stat;
1066     ssize_t err = 0;
1067     size_t offset = 7;
1068     struct stat stbuf;
1069     V9fsFidState *fidp;
1070     V9fsPDU *pdu = opaque;
1071     char *basename;
1072 
1073     err = pdu_unmarshal(pdu, offset, "d", &fid);
1074     if (err < 0) {
1075         goto out_nofid;
1076     }
1077     trace_v9fs_stat(pdu->tag, pdu->id, fid);
1078 
1079     fidp = get_fid(pdu, fid);
1080     if (fidp == NULL) {
1081         err = -ENOENT;
1082         goto out_nofid;
1083     }
1084     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1085     if (err < 0) {
1086         goto out;
1087     }
1088     basename = g_path_get_basename(fidp->path.data);
1089     err = stat_to_v9stat(pdu, &fidp->path, basename, &stbuf, &v9stat);
1090     g_free(basename);
1091     if (err < 0) {
1092         goto out;
1093     }
1094     err = pdu_marshal(pdu, offset, "wS", 0, &v9stat);
1095     if (err < 0) {
1096         v9fs_stat_free(&v9stat);
1097         goto out;
1098     }
1099     trace_v9fs_stat_return(pdu->tag, pdu->id, v9stat.mode,
1100                            v9stat.atime, v9stat.mtime, v9stat.length);
1101     err += offset;
1102     v9fs_stat_free(&v9stat);
1103 out:
1104     put_fid(pdu, fidp);
1105 out_nofid:
1106     pdu_complete(pdu, err);
1107 }
1108 
1109 static void coroutine_fn v9fs_getattr(void *opaque)
1110 {
1111     int32_t fid;
1112     size_t offset = 7;
1113     ssize_t retval = 0;
1114     struct stat stbuf;
1115     V9fsFidState *fidp;
1116     uint64_t request_mask;
1117     V9fsStatDotl v9stat_dotl;
1118     V9fsPDU *pdu = opaque;
1119     V9fsState *s = pdu->s;
1120 
1121     retval = pdu_unmarshal(pdu, offset, "dq", &fid, &request_mask);
1122     if (retval < 0) {
1123         goto out_nofid;
1124     }
1125     trace_v9fs_getattr(pdu->tag, pdu->id, fid, request_mask);
1126 
1127     fidp = get_fid(pdu, fid);
1128     if (fidp == NULL) {
1129         retval = -ENOENT;
1130         goto out_nofid;
1131     }
1132     /*
1133      * Currently we only support BASIC fields in stat, so there is no
1134      * need to look at request_mask.
1135      */
1136     retval = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1137     if (retval < 0) {
1138         goto out;
1139     }
1140     stat_to_v9stat_dotl(s, &stbuf, &v9stat_dotl);
1141 
1142     /*  fill st_gen if requested and supported by underlying fs */
1143     if (request_mask & P9_STATS_GEN) {
1144         retval = v9fs_co_st_gen(pdu, &fidp->path, stbuf.st_mode, &v9stat_dotl);
1145         switch (retval) {
1146         case 0:
1147             /* we have valid st_gen: update result mask */
1148             v9stat_dotl.st_result_mask |= P9_STATS_GEN;
1149             break;
1150         case -EINTR:
1151             /* request cancelled, e.g. by Tflush */
1152             goto out;
1153         default:
1154             /* failed to get st_gen: not fatal, ignore */
1155             break;
1156         }
1157     }
1158     retval = pdu_marshal(pdu, offset, "A", &v9stat_dotl);
1159     if (retval < 0) {
1160         goto out;
1161     }
1162     retval += offset;
1163     trace_v9fs_getattr_return(pdu->tag, pdu->id, v9stat_dotl.st_result_mask,
1164                               v9stat_dotl.st_mode, v9stat_dotl.st_uid,
1165                               v9stat_dotl.st_gid);
1166 out:
1167     put_fid(pdu, fidp);
1168 out_nofid:
1169     pdu_complete(pdu, retval);
1170 }
1171 
1172 /* Attribute flags */
1173 #define P9_ATTR_MODE       (1 << 0)
1174 #define P9_ATTR_UID        (1 << 1)
1175 #define P9_ATTR_GID        (1 << 2)
1176 #define P9_ATTR_SIZE       (1 << 3)
1177 #define P9_ATTR_ATIME      (1 << 4)
1178 #define P9_ATTR_MTIME      (1 << 5)
1179 #define P9_ATTR_CTIME      (1 << 6)
1180 #define P9_ATTR_ATIME_SET  (1 << 7)
1181 #define P9_ATTR_MTIME_SET  (1 << 8)
1182 
1183 #define P9_ATTR_MASK    127
1184 
1185 static void coroutine_fn v9fs_setattr(void *opaque)
1186 {
1187     int err = 0;
1188     int32_t fid;
1189     V9fsFidState *fidp;
1190     size_t offset = 7;
1191     V9fsIattr v9iattr;
1192     V9fsPDU *pdu = opaque;
1193 
1194     err = pdu_unmarshal(pdu, offset, "dI", &fid, &v9iattr);
1195     if (err < 0) {
1196         goto out_nofid;
1197     }
1198 
1199     trace_v9fs_setattr(pdu->tag, pdu->id, fid,
1200                        v9iattr.valid, v9iattr.mode, v9iattr.uid, v9iattr.gid,
1201                        v9iattr.size, v9iattr.atime_sec, v9iattr.mtime_sec);
1202 
1203     fidp = get_fid(pdu, fid);
1204     if (fidp == NULL) {
1205         err = -EINVAL;
1206         goto out_nofid;
1207     }
1208     if (v9iattr.valid & P9_ATTR_MODE) {
1209         err = v9fs_co_chmod(pdu, &fidp->path, v9iattr.mode);
1210         if (err < 0) {
1211             goto out;
1212         }
1213     }
1214     if (v9iattr.valid & (P9_ATTR_ATIME | P9_ATTR_MTIME)) {
1215         struct timespec times[2];
1216         if (v9iattr.valid & P9_ATTR_ATIME) {
1217             if (v9iattr.valid & P9_ATTR_ATIME_SET) {
1218                 times[0].tv_sec = v9iattr.atime_sec;
1219                 times[0].tv_nsec = v9iattr.atime_nsec;
1220             } else {
1221                 times[0].tv_nsec = UTIME_NOW;
1222             }
1223         } else {
1224             times[0].tv_nsec = UTIME_OMIT;
1225         }
1226         if (v9iattr.valid & P9_ATTR_MTIME) {
1227             if (v9iattr.valid & P9_ATTR_MTIME_SET) {
1228                 times[1].tv_sec = v9iattr.mtime_sec;
1229                 times[1].tv_nsec = v9iattr.mtime_nsec;
1230             } else {
1231                 times[1].tv_nsec = UTIME_NOW;
1232             }
1233         } else {
1234             times[1].tv_nsec = UTIME_OMIT;
1235         }
1236         err = v9fs_co_utimensat(pdu, &fidp->path, times);
1237         if (err < 0) {
1238             goto out;
1239         }
1240     }
1241     /*
1242      * If the only valid entry in iattr is ctime we can call
1243      * chown(-1,-1) to update the ctime of the file
1244      */
1245     if ((v9iattr.valid & (P9_ATTR_UID | P9_ATTR_GID)) ||
1246         ((v9iattr.valid & P9_ATTR_CTIME)
1247          && !((v9iattr.valid & P9_ATTR_MASK) & ~P9_ATTR_CTIME))) {
1248         if (!(v9iattr.valid & P9_ATTR_UID)) {
1249             v9iattr.uid = -1;
1250         }
1251         if (!(v9iattr.valid & P9_ATTR_GID)) {
1252             v9iattr.gid = -1;
1253         }
1254         err = v9fs_co_chown(pdu, &fidp->path, v9iattr.uid,
1255                             v9iattr.gid);
1256         if (err < 0) {
1257             goto out;
1258         }
1259     }
1260     if (v9iattr.valid & (P9_ATTR_SIZE)) {
1261         err = v9fs_co_truncate(pdu, &fidp->path, v9iattr.size);
1262         if (err < 0) {
1263             goto out;
1264         }
1265     }
1266     err = offset;
1267     trace_v9fs_setattr_return(pdu->tag, pdu->id);
1268 out:
1269     put_fid(pdu, fidp);
1270 out_nofid:
1271     pdu_complete(pdu, err);
1272 }
1273 
1274 static int v9fs_walk_marshal(V9fsPDU *pdu, uint16_t nwnames, V9fsQID *qids)
1275 {
1276     int i;
1277     ssize_t err;
1278     size_t offset = 7;
1279 
1280     err = pdu_marshal(pdu, offset, "w", nwnames);
1281     if (err < 0) {
1282         return err;
1283     }
1284     offset += err;
1285     for (i = 0; i < nwnames; i++) {
1286         err = pdu_marshal(pdu, offset, "Q", &qids[i]);
1287         if (err < 0) {
1288             return err;
1289         }
1290         offset += err;
1291     }
1292     return offset;
1293 }
1294 
1295 static bool name_is_illegal(const char *name)
1296 {
1297     return !*name || strchr(name, '/') != NULL;
1298 }
1299 
1300 static bool not_same_qid(const V9fsQID *qid1, const V9fsQID *qid2)
1301 {
1302     return
1303         qid1->type != qid2->type ||
1304         qid1->version != qid2->version ||
1305         qid1->path != qid2->path;
1306 }
1307 
1308 static void coroutine_fn v9fs_walk(void *opaque)
1309 {
1310     int name_idx;
1311     V9fsQID *qids = NULL;
1312     int i, err = 0;
1313     V9fsPath dpath, path;
1314     uint16_t nwnames;
1315     struct stat stbuf;
1316     size_t offset = 7;
1317     int32_t fid, newfid;
1318     V9fsString *wnames = NULL;
1319     V9fsFidState *fidp;
1320     V9fsFidState *newfidp = NULL;
1321     V9fsPDU *pdu = opaque;
1322     V9fsState *s = pdu->s;
1323     V9fsQID qid;
1324 
1325     err = pdu_unmarshal(pdu, offset, "ddw", &fid, &newfid, &nwnames);
1326     if (err < 0) {
1327         pdu_complete(pdu, err);
1328         return ;
1329     }
1330     offset += err;
1331 
1332     trace_v9fs_walk(pdu->tag, pdu->id, fid, newfid, nwnames);
1333 
1334     if (nwnames && nwnames <= P9_MAXWELEM) {
1335         wnames = g_new0(V9fsString, nwnames);
1336         qids   = g_new0(V9fsQID, nwnames);
1337         for (i = 0; i < nwnames; i++) {
1338             err = pdu_unmarshal(pdu, offset, "s", &wnames[i]);
1339             if (err < 0) {
1340                 goto out_nofid;
1341             }
1342             if (name_is_illegal(wnames[i].data)) {
1343                 err = -ENOENT;
1344                 goto out_nofid;
1345             }
1346             offset += err;
1347         }
1348     } else if (nwnames > P9_MAXWELEM) {
1349         err = -EINVAL;
1350         goto out_nofid;
1351     }
1352     fidp = get_fid(pdu, fid);
1353     if (fidp == NULL) {
1354         err = -ENOENT;
1355         goto out_nofid;
1356     }
1357 
1358     v9fs_path_init(&dpath);
1359     v9fs_path_init(&path);
1360 
1361     err = fid_to_qid(pdu, fidp, &qid);
1362     if (err < 0) {
1363         goto out;
1364     }
1365 
1366     /*
1367      * Both dpath and path initially poin to fidp.
1368      * Needed to handle request with nwnames == 0
1369      */
1370     v9fs_path_copy(&dpath, &fidp->path);
1371     v9fs_path_copy(&path, &fidp->path);
1372     for (name_idx = 0; name_idx < nwnames; name_idx++) {
1373         if (not_same_qid(&pdu->s->root_qid, &qid) ||
1374             strcmp("..", wnames[name_idx].data)) {
1375             err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data,
1376                                        &path);
1377             if (err < 0) {
1378                 goto out;
1379             }
1380 
1381             err = v9fs_co_lstat(pdu, &path, &stbuf);
1382             if (err < 0) {
1383                 goto out;
1384             }
1385             stat_to_qid(&stbuf, &qid);
1386             v9fs_path_copy(&dpath, &path);
1387         }
1388         memcpy(&qids[name_idx], &qid, sizeof(qid));
1389     }
1390     if (fid == newfid) {
1391         if (fidp->fid_type != P9_FID_NONE) {
1392             err = -EINVAL;
1393             goto out;
1394         }
1395         v9fs_path_write_lock(s);
1396         v9fs_path_copy(&fidp->path, &path);
1397         v9fs_path_unlock(s);
1398     } else {
1399         newfidp = alloc_fid(s, newfid);
1400         if (newfidp == NULL) {
1401             err = -EINVAL;
1402             goto out;
1403         }
1404         newfidp->uid = fidp->uid;
1405         v9fs_path_copy(&newfidp->path, &path);
1406     }
1407     err = v9fs_walk_marshal(pdu, nwnames, qids);
1408     trace_v9fs_walk_return(pdu->tag, pdu->id, nwnames, qids);
1409 out:
1410     put_fid(pdu, fidp);
1411     if (newfidp) {
1412         put_fid(pdu, newfidp);
1413     }
1414     v9fs_path_free(&dpath);
1415     v9fs_path_free(&path);
1416 out_nofid:
1417     pdu_complete(pdu, err);
1418     if (nwnames && nwnames <= P9_MAXWELEM) {
1419         for (name_idx = 0; name_idx < nwnames; name_idx++) {
1420             v9fs_string_free(&wnames[name_idx]);
1421         }
1422         g_free(wnames);
1423         g_free(qids);
1424     }
1425 }
1426 
1427 static int32_t coroutine_fn get_iounit(V9fsPDU *pdu, V9fsPath *path)
1428 {
1429     struct statfs stbuf;
1430     int32_t iounit = 0;
1431     V9fsState *s = pdu->s;
1432 
1433     /*
1434      * iounit should be multiples of f_bsize (host filesystem block size
1435      * and as well as less than (client msize - P9_IOHDRSZ))
1436      */
1437     if (!v9fs_co_statfs(pdu, path, &stbuf)) {
1438         iounit = stbuf.f_bsize;
1439         iounit *= (s->msize - P9_IOHDRSZ)/stbuf.f_bsize;
1440     }
1441     if (!iounit) {
1442         iounit = s->msize - P9_IOHDRSZ;
1443     }
1444     return iounit;
1445 }
1446 
1447 static void coroutine_fn v9fs_open(void *opaque)
1448 {
1449     int flags;
1450     int32_t fid;
1451     int32_t mode;
1452     V9fsQID qid;
1453     int iounit = 0;
1454     ssize_t err = 0;
1455     size_t offset = 7;
1456     struct stat stbuf;
1457     V9fsFidState *fidp;
1458     V9fsPDU *pdu = opaque;
1459     V9fsState *s = pdu->s;
1460 
1461     if (s->proto_version == V9FS_PROTO_2000L) {
1462         err = pdu_unmarshal(pdu, offset, "dd", &fid, &mode);
1463     } else {
1464         uint8_t modebyte;
1465         err = pdu_unmarshal(pdu, offset, "db", &fid, &modebyte);
1466         mode = modebyte;
1467     }
1468     if (err < 0) {
1469         goto out_nofid;
1470     }
1471     trace_v9fs_open(pdu->tag, pdu->id, fid, mode);
1472 
1473     fidp = get_fid(pdu, fid);
1474     if (fidp == NULL) {
1475         err = -ENOENT;
1476         goto out_nofid;
1477     }
1478     if (fidp->fid_type != P9_FID_NONE) {
1479         err = -EINVAL;
1480         goto out;
1481     }
1482 
1483     err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
1484     if (err < 0) {
1485         goto out;
1486     }
1487     stat_to_qid(&stbuf, &qid);
1488     if (S_ISDIR(stbuf.st_mode)) {
1489         err = v9fs_co_opendir(pdu, fidp);
1490         if (err < 0) {
1491             goto out;
1492         }
1493         fidp->fid_type = P9_FID_DIR;
1494         err = pdu_marshal(pdu, offset, "Qd", &qid, 0);
1495         if (err < 0) {
1496             goto out;
1497         }
1498         err += offset;
1499     } else {
1500         if (s->proto_version == V9FS_PROTO_2000L) {
1501             flags = get_dotl_openflags(s, mode);
1502         } else {
1503             flags = omode_to_uflags(mode);
1504         }
1505         if (is_ro_export(&s->ctx)) {
1506             if (mode & O_WRONLY || mode & O_RDWR ||
1507                 mode & O_APPEND || mode & O_TRUNC) {
1508                 err = -EROFS;
1509                 goto out;
1510             }
1511         }
1512         err = v9fs_co_open(pdu, fidp, flags);
1513         if (err < 0) {
1514             goto out;
1515         }
1516         fidp->fid_type = P9_FID_FILE;
1517         fidp->open_flags = flags;
1518         if (flags & O_EXCL) {
1519             /*
1520              * We let the host file system do O_EXCL check
1521              * We should not reclaim such fd
1522              */
1523             fidp->flags |= FID_NON_RECLAIMABLE;
1524         }
1525         iounit = get_iounit(pdu, &fidp->path);
1526         err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1527         if (err < 0) {
1528             goto out;
1529         }
1530         err += offset;
1531     }
1532     trace_v9fs_open_return(pdu->tag, pdu->id,
1533                            qid.type, qid.version, qid.path, iounit);
1534 out:
1535     put_fid(pdu, fidp);
1536 out_nofid:
1537     pdu_complete(pdu, err);
1538 }
1539 
1540 static void coroutine_fn v9fs_lcreate(void *opaque)
1541 {
1542     int32_t dfid, flags, mode;
1543     gid_t gid;
1544     ssize_t err = 0;
1545     ssize_t offset = 7;
1546     V9fsString name;
1547     V9fsFidState *fidp;
1548     struct stat stbuf;
1549     V9fsQID qid;
1550     int32_t iounit;
1551     V9fsPDU *pdu = opaque;
1552 
1553     v9fs_string_init(&name);
1554     err = pdu_unmarshal(pdu, offset, "dsddd", &dfid,
1555                         &name, &flags, &mode, &gid);
1556     if (err < 0) {
1557         goto out_nofid;
1558     }
1559     trace_v9fs_lcreate(pdu->tag, pdu->id, dfid, flags, mode, gid);
1560 
1561     if (name_is_illegal(name.data)) {
1562         err = -ENOENT;
1563         goto out_nofid;
1564     }
1565 
1566     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
1567         err = -EEXIST;
1568         goto out_nofid;
1569     }
1570 
1571     fidp = get_fid(pdu, dfid);
1572     if (fidp == NULL) {
1573         err = -ENOENT;
1574         goto out_nofid;
1575     }
1576     if (fidp->fid_type != P9_FID_NONE) {
1577         err = -EINVAL;
1578         goto out;
1579     }
1580 
1581     flags = get_dotl_openflags(pdu->s, flags);
1582     err = v9fs_co_open2(pdu, fidp, &name, gid,
1583                         flags | O_CREAT, mode, &stbuf);
1584     if (err < 0) {
1585         goto out;
1586     }
1587     fidp->fid_type = P9_FID_FILE;
1588     fidp->open_flags = flags;
1589     if (flags & O_EXCL) {
1590         /*
1591          * We let the host file system do O_EXCL check
1592          * We should not reclaim such fd
1593          */
1594         fidp->flags |= FID_NON_RECLAIMABLE;
1595     }
1596     iounit =  get_iounit(pdu, &fidp->path);
1597     stat_to_qid(&stbuf, &qid);
1598     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
1599     if (err < 0) {
1600         goto out;
1601     }
1602     err += offset;
1603     trace_v9fs_lcreate_return(pdu->tag, pdu->id,
1604                               qid.type, qid.version, qid.path, iounit);
1605 out:
1606     put_fid(pdu, fidp);
1607 out_nofid:
1608     pdu_complete(pdu, err);
1609     v9fs_string_free(&name);
1610 }
1611 
1612 static void coroutine_fn v9fs_fsync(void *opaque)
1613 {
1614     int err;
1615     int32_t fid;
1616     int datasync;
1617     size_t offset = 7;
1618     V9fsFidState *fidp;
1619     V9fsPDU *pdu = opaque;
1620 
1621     err = pdu_unmarshal(pdu, offset, "dd", &fid, &datasync);
1622     if (err < 0) {
1623         goto out_nofid;
1624     }
1625     trace_v9fs_fsync(pdu->tag, pdu->id, fid, datasync);
1626 
1627     fidp = get_fid(pdu, fid);
1628     if (fidp == NULL) {
1629         err = -ENOENT;
1630         goto out_nofid;
1631     }
1632     err = v9fs_co_fsync(pdu, fidp, datasync);
1633     if (!err) {
1634         err = offset;
1635     }
1636     put_fid(pdu, fidp);
1637 out_nofid:
1638     pdu_complete(pdu, err);
1639 }
1640 
1641 static void coroutine_fn v9fs_clunk(void *opaque)
1642 {
1643     int err;
1644     int32_t fid;
1645     size_t offset = 7;
1646     V9fsFidState *fidp;
1647     V9fsPDU *pdu = opaque;
1648     V9fsState *s = pdu->s;
1649 
1650     err = pdu_unmarshal(pdu, offset, "d", &fid);
1651     if (err < 0) {
1652         goto out_nofid;
1653     }
1654     trace_v9fs_clunk(pdu->tag, pdu->id, fid);
1655 
1656     fidp = clunk_fid(s, fid);
1657     if (fidp == NULL) {
1658         err = -ENOENT;
1659         goto out_nofid;
1660     }
1661     /*
1662      * Bump the ref so that put_fid will
1663      * free the fid.
1664      */
1665     fidp->ref++;
1666     err = put_fid(pdu, fidp);
1667     if (!err) {
1668         err = offset;
1669     }
1670 out_nofid:
1671     pdu_complete(pdu, err);
1672 }
1673 
1674 /*
1675  * Create a QEMUIOVector for a sub-region of PDU iovecs
1676  *
1677  * @qiov:       uninitialized QEMUIOVector
1678  * @skip:       number of bytes to skip from beginning of PDU
1679  * @size:       number of bytes to include
1680  * @is_write:   true - write, false - read
1681  *
1682  * The resulting QEMUIOVector has heap-allocated iovecs and must be cleaned up
1683  * with qemu_iovec_destroy().
1684  */
1685 static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu,
1686                                     size_t skip, size_t size,
1687                                     bool is_write)
1688 {
1689     QEMUIOVector elem;
1690     struct iovec *iov;
1691     unsigned int niov;
1692 
1693     if (is_write) {
1694         pdu->s->transport->init_out_iov_from_pdu(pdu, &iov, &niov, size + skip);
1695     } else {
1696         pdu->s->transport->init_in_iov_from_pdu(pdu, &iov, &niov, size + skip);
1697     }
1698 
1699     qemu_iovec_init_external(&elem, iov, niov);
1700     qemu_iovec_init(qiov, niov);
1701     qemu_iovec_concat(qiov, &elem, skip, size);
1702 }
1703 
1704 static int v9fs_xattr_read(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
1705                            uint64_t off, uint32_t max_count)
1706 {
1707     ssize_t err;
1708     size_t offset = 7;
1709     uint64_t read_count;
1710     QEMUIOVector qiov_full;
1711 
1712     if (fidp->fs.xattr.len < off) {
1713         read_count = 0;
1714     } else {
1715         read_count = fidp->fs.xattr.len - off;
1716     }
1717     if (read_count > max_count) {
1718         read_count = max_count;
1719     }
1720     err = pdu_marshal(pdu, offset, "d", read_count);
1721     if (err < 0) {
1722         return err;
1723     }
1724     offset += err;
1725 
1726     v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, read_count, false);
1727     err = v9fs_pack(qiov_full.iov, qiov_full.niov, 0,
1728                     ((char *)fidp->fs.xattr.value) + off,
1729                     read_count);
1730     qemu_iovec_destroy(&qiov_full);
1731     if (err < 0) {
1732         return err;
1733     }
1734     offset += err;
1735     return offset;
1736 }
1737 
1738 static int coroutine_fn v9fs_do_readdir_with_stat(V9fsPDU *pdu,
1739                                                   V9fsFidState *fidp,
1740                                                   uint32_t max_count)
1741 {
1742     V9fsPath path;
1743     V9fsStat v9stat;
1744     int len, err = 0;
1745     int32_t count = 0;
1746     struct stat stbuf;
1747     off_t saved_dir_pos;
1748     struct dirent *dent;
1749 
1750     /* save the directory position */
1751     saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1752     if (saved_dir_pos < 0) {
1753         return saved_dir_pos;
1754     }
1755 
1756     while (1) {
1757         v9fs_path_init(&path);
1758 
1759         v9fs_readdir_lock(&fidp->fs.dir);
1760 
1761         err = v9fs_co_readdir(pdu, fidp, &dent);
1762         if (err || !dent) {
1763             break;
1764         }
1765         err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path);
1766         if (err < 0) {
1767             break;
1768         }
1769         err = v9fs_co_lstat(pdu, &path, &stbuf);
1770         if (err < 0) {
1771             break;
1772         }
1773         err = stat_to_v9stat(pdu, &path, dent->d_name, &stbuf, &v9stat);
1774         if (err < 0) {
1775             break;
1776         }
1777         if ((count + v9stat.size + 2) > max_count) {
1778             v9fs_readdir_unlock(&fidp->fs.dir);
1779 
1780             /* Ran out of buffer. Set dir back to old position and return */
1781             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1782             v9fs_stat_free(&v9stat);
1783             v9fs_path_free(&path);
1784             return count;
1785         }
1786 
1787         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1788         len = pdu_marshal(pdu, 11 + count, "S", &v9stat);
1789 
1790         v9fs_readdir_unlock(&fidp->fs.dir);
1791 
1792         if (len < 0) {
1793             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1794             v9fs_stat_free(&v9stat);
1795             v9fs_path_free(&path);
1796             return len;
1797         }
1798         count += len;
1799         v9fs_stat_free(&v9stat);
1800         v9fs_path_free(&path);
1801         saved_dir_pos = dent->d_off;
1802     }
1803 
1804     v9fs_readdir_unlock(&fidp->fs.dir);
1805 
1806     v9fs_path_free(&path);
1807     if (err < 0) {
1808         return err;
1809     }
1810     return count;
1811 }
1812 
1813 static void coroutine_fn v9fs_read(void *opaque)
1814 {
1815     int32_t fid;
1816     uint64_t off;
1817     ssize_t err = 0;
1818     int32_t count = 0;
1819     size_t offset = 7;
1820     uint32_t max_count;
1821     V9fsFidState *fidp;
1822     V9fsPDU *pdu = opaque;
1823     V9fsState *s = pdu->s;
1824 
1825     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &max_count);
1826     if (err < 0) {
1827         goto out_nofid;
1828     }
1829     trace_v9fs_read(pdu->tag, pdu->id, fid, off, max_count);
1830 
1831     fidp = get_fid(pdu, fid);
1832     if (fidp == NULL) {
1833         err = -EINVAL;
1834         goto out_nofid;
1835     }
1836     if (fidp->fid_type == P9_FID_DIR) {
1837 
1838         if (off == 0) {
1839             v9fs_co_rewinddir(pdu, fidp);
1840         }
1841         count = v9fs_do_readdir_with_stat(pdu, fidp, max_count);
1842         if (count < 0) {
1843             err = count;
1844             goto out;
1845         }
1846         err = pdu_marshal(pdu, offset, "d", count);
1847         if (err < 0) {
1848             goto out;
1849         }
1850         err += offset + count;
1851     } else if (fidp->fid_type == P9_FID_FILE) {
1852         QEMUIOVector qiov_full;
1853         QEMUIOVector qiov;
1854         int32_t len;
1855 
1856         v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset + 4, max_count, false);
1857         qemu_iovec_init(&qiov, qiov_full.niov);
1858         do {
1859             qemu_iovec_reset(&qiov);
1860             qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count);
1861             if (0) {
1862                 print_sg(qiov.iov, qiov.niov);
1863             }
1864             /* Loop in case of EINTR */
1865             do {
1866                 len = v9fs_co_preadv(pdu, fidp, qiov.iov, qiov.niov, off);
1867                 if (len >= 0) {
1868                     off   += len;
1869                     count += len;
1870                 }
1871             } while (len == -EINTR && !pdu->cancelled);
1872             if (len < 0) {
1873                 /* IO error return the error */
1874                 err = len;
1875                 goto out_free_iovec;
1876             }
1877         } while (count < max_count && len > 0);
1878         err = pdu_marshal(pdu, offset, "d", count);
1879         if (err < 0) {
1880             goto out_free_iovec;
1881         }
1882         err += offset + count;
1883 out_free_iovec:
1884         qemu_iovec_destroy(&qiov);
1885         qemu_iovec_destroy(&qiov_full);
1886     } else if (fidp->fid_type == P9_FID_XATTR) {
1887         err = v9fs_xattr_read(s, pdu, fidp, off, max_count);
1888     } else {
1889         err = -EINVAL;
1890     }
1891     trace_v9fs_read_return(pdu->tag, pdu->id, count, err);
1892 out:
1893     put_fid(pdu, fidp);
1894 out_nofid:
1895     pdu_complete(pdu, err);
1896 }
1897 
1898 static size_t v9fs_readdir_data_size(V9fsString *name)
1899 {
1900     /*
1901      * Size of each dirent on the wire: size of qid (13) + size of offset (8)
1902      * size of type (1) + size of name.size (2) + strlen(name.data)
1903      */
1904     return 24 + v9fs_string_size(name);
1905 }
1906 
1907 static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp,
1908                                         int32_t max_count)
1909 {
1910     size_t size;
1911     V9fsQID qid;
1912     V9fsString name;
1913     int len, err = 0;
1914     int32_t count = 0;
1915     off_t saved_dir_pos;
1916     struct dirent *dent;
1917 
1918     /* save the directory position */
1919     saved_dir_pos = v9fs_co_telldir(pdu, fidp);
1920     if (saved_dir_pos < 0) {
1921         return saved_dir_pos;
1922     }
1923 
1924     while (1) {
1925         v9fs_readdir_lock(&fidp->fs.dir);
1926 
1927         err = v9fs_co_readdir(pdu, fidp, &dent);
1928         if (err || !dent) {
1929             break;
1930         }
1931         v9fs_string_init(&name);
1932         v9fs_string_sprintf(&name, "%s", dent->d_name);
1933         if ((count + v9fs_readdir_data_size(&name)) > max_count) {
1934             v9fs_readdir_unlock(&fidp->fs.dir);
1935 
1936             /* Ran out of buffer. Set dir back to old position and return */
1937             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1938             v9fs_string_free(&name);
1939             return count;
1940         }
1941         /*
1942          * Fill up just the path field of qid because the client uses
1943          * only that. To fill the entire qid structure we will have
1944          * to stat each dirent found, which is expensive
1945          */
1946         size = MIN(sizeof(dent->d_ino), sizeof(qid.path));
1947         memcpy(&qid.path, &dent->d_ino, size);
1948         /* Fill the other fields with dummy values */
1949         qid.type = 0;
1950         qid.version = 0;
1951 
1952         /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */
1953         len = pdu_marshal(pdu, 11 + count, "Qqbs",
1954                           &qid, dent->d_off,
1955                           dent->d_type, &name);
1956 
1957         v9fs_readdir_unlock(&fidp->fs.dir);
1958 
1959         if (len < 0) {
1960             v9fs_co_seekdir(pdu, fidp, saved_dir_pos);
1961             v9fs_string_free(&name);
1962             return len;
1963         }
1964         count += len;
1965         v9fs_string_free(&name);
1966         saved_dir_pos = dent->d_off;
1967     }
1968 
1969     v9fs_readdir_unlock(&fidp->fs.dir);
1970 
1971     if (err < 0) {
1972         return err;
1973     }
1974     return count;
1975 }
1976 
1977 static void coroutine_fn v9fs_readdir(void *opaque)
1978 {
1979     int32_t fid;
1980     V9fsFidState *fidp;
1981     ssize_t retval = 0;
1982     size_t offset = 7;
1983     uint64_t initial_offset;
1984     int32_t count;
1985     uint32_t max_count;
1986     V9fsPDU *pdu = opaque;
1987 
1988     retval = pdu_unmarshal(pdu, offset, "dqd", &fid,
1989                            &initial_offset, &max_count);
1990     if (retval < 0) {
1991         goto out_nofid;
1992     }
1993     trace_v9fs_readdir(pdu->tag, pdu->id, fid, initial_offset, max_count);
1994 
1995     fidp = get_fid(pdu, fid);
1996     if (fidp == NULL) {
1997         retval = -EINVAL;
1998         goto out_nofid;
1999     }
2000     if (!fidp->fs.dir.stream) {
2001         retval = -EINVAL;
2002         goto out;
2003     }
2004     if (initial_offset == 0) {
2005         v9fs_co_rewinddir(pdu, fidp);
2006     } else {
2007         v9fs_co_seekdir(pdu, fidp, initial_offset);
2008     }
2009     count = v9fs_do_readdir(pdu, fidp, max_count);
2010     if (count < 0) {
2011         retval = count;
2012         goto out;
2013     }
2014     retval = pdu_marshal(pdu, offset, "d", count);
2015     if (retval < 0) {
2016         goto out;
2017     }
2018     retval += count + offset;
2019     trace_v9fs_readdir_return(pdu->tag, pdu->id, count, retval);
2020 out:
2021     put_fid(pdu, fidp);
2022 out_nofid:
2023     pdu_complete(pdu, retval);
2024 }
2025 
2026 static int v9fs_xattr_write(V9fsState *s, V9fsPDU *pdu, V9fsFidState *fidp,
2027                             uint64_t off, uint32_t count,
2028                             struct iovec *sg, int cnt)
2029 {
2030     int i, to_copy;
2031     ssize_t err = 0;
2032     uint64_t write_count;
2033     size_t offset = 7;
2034 
2035 
2036     if (fidp->fs.xattr.len < off) {
2037         err = -ENOSPC;
2038         goto out;
2039     }
2040     write_count = fidp->fs.xattr.len - off;
2041     if (write_count > count) {
2042         write_count = count;
2043     }
2044     err = pdu_marshal(pdu, offset, "d", write_count);
2045     if (err < 0) {
2046         return err;
2047     }
2048     err += offset;
2049     fidp->fs.xattr.copied_len += write_count;
2050     /*
2051      * Now copy the content from sg list
2052      */
2053     for (i = 0; i < cnt; i++) {
2054         if (write_count > sg[i].iov_len) {
2055             to_copy = sg[i].iov_len;
2056         } else {
2057             to_copy = write_count;
2058         }
2059         memcpy((char *)fidp->fs.xattr.value + off, sg[i].iov_base, to_copy);
2060         /* updating vs->off since we are not using below */
2061         off += to_copy;
2062         write_count -= to_copy;
2063     }
2064 out:
2065     return err;
2066 }
2067 
2068 static void coroutine_fn v9fs_write(void *opaque)
2069 {
2070     ssize_t err;
2071     int32_t fid;
2072     uint64_t off;
2073     uint32_t count;
2074     int32_t len = 0;
2075     int32_t total = 0;
2076     size_t offset = 7;
2077     V9fsFidState *fidp;
2078     V9fsPDU *pdu = opaque;
2079     V9fsState *s = pdu->s;
2080     QEMUIOVector qiov_full;
2081     QEMUIOVector qiov;
2082 
2083     err = pdu_unmarshal(pdu, offset, "dqd", &fid, &off, &count);
2084     if (err < 0) {
2085         pdu_complete(pdu, err);
2086         return;
2087     }
2088     offset += err;
2089     v9fs_init_qiov_from_pdu(&qiov_full, pdu, offset, count, true);
2090     trace_v9fs_write(pdu->tag, pdu->id, fid, off, count, qiov_full.niov);
2091 
2092     fidp = get_fid(pdu, fid);
2093     if (fidp == NULL) {
2094         err = -EINVAL;
2095         goto out_nofid;
2096     }
2097     if (fidp->fid_type == P9_FID_FILE) {
2098         if (fidp->fs.fd == -1) {
2099             err = -EINVAL;
2100             goto out;
2101         }
2102     } else if (fidp->fid_type == P9_FID_XATTR) {
2103         /*
2104          * setxattr operation
2105          */
2106         err = v9fs_xattr_write(s, pdu, fidp, off, count,
2107                                qiov_full.iov, qiov_full.niov);
2108         goto out;
2109     } else {
2110         err = -EINVAL;
2111         goto out;
2112     }
2113     qemu_iovec_init(&qiov, qiov_full.niov);
2114     do {
2115         qemu_iovec_reset(&qiov);
2116         qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total);
2117         if (0) {
2118             print_sg(qiov.iov, qiov.niov);
2119         }
2120         /* Loop in case of EINTR */
2121         do {
2122             len = v9fs_co_pwritev(pdu, fidp, qiov.iov, qiov.niov, off);
2123             if (len >= 0) {
2124                 off   += len;
2125                 total += len;
2126             }
2127         } while (len == -EINTR && !pdu->cancelled);
2128         if (len < 0) {
2129             /* IO error return the error */
2130             err = len;
2131             goto out_qiov;
2132         }
2133     } while (total < count && len > 0);
2134 
2135     offset = 7;
2136     err = pdu_marshal(pdu, offset, "d", total);
2137     if (err < 0) {
2138         goto out_qiov;
2139     }
2140     err += offset;
2141     trace_v9fs_write_return(pdu->tag, pdu->id, total, err);
2142 out_qiov:
2143     qemu_iovec_destroy(&qiov);
2144 out:
2145     put_fid(pdu, fidp);
2146 out_nofid:
2147     qemu_iovec_destroy(&qiov_full);
2148     pdu_complete(pdu, err);
2149 }
2150 
2151 static void coroutine_fn v9fs_create(void *opaque)
2152 {
2153     int32_t fid;
2154     int err = 0;
2155     size_t offset = 7;
2156     V9fsFidState *fidp;
2157     V9fsQID qid;
2158     int32_t perm;
2159     int8_t mode;
2160     V9fsPath path;
2161     struct stat stbuf;
2162     V9fsString name;
2163     V9fsString extension;
2164     int iounit;
2165     V9fsPDU *pdu = opaque;
2166     V9fsState *s = pdu->s;
2167 
2168     v9fs_path_init(&path);
2169     v9fs_string_init(&name);
2170     v9fs_string_init(&extension);
2171     err = pdu_unmarshal(pdu, offset, "dsdbs", &fid, &name,
2172                         &perm, &mode, &extension);
2173     if (err < 0) {
2174         goto out_nofid;
2175     }
2176     trace_v9fs_create(pdu->tag, pdu->id, fid, name.data, perm, mode);
2177 
2178     if (name_is_illegal(name.data)) {
2179         err = -ENOENT;
2180         goto out_nofid;
2181     }
2182 
2183     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2184         err = -EEXIST;
2185         goto out_nofid;
2186     }
2187 
2188     fidp = get_fid(pdu, fid);
2189     if (fidp == NULL) {
2190         err = -EINVAL;
2191         goto out_nofid;
2192     }
2193     if (fidp->fid_type != P9_FID_NONE) {
2194         err = -EINVAL;
2195         goto out;
2196     }
2197     if (perm & P9_STAT_MODE_DIR) {
2198         err = v9fs_co_mkdir(pdu, fidp, &name, perm & 0777,
2199                             fidp->uid, -1, &stbuf);
2200         if (err < 0) {
2201             goto out;
2202         }
2203         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2204         if (err < 0) {
2205             goto out;
2206         }
2207         v9fs_path_write_lock(s);
2208         v9fs_path_copy(&fidp->path, &path);
2209         v9fs_path_unlock(s);
2210         err = v9fs_co_opendir(pdu, fidp);
2211         if (err < 0) {
2212             goto out;
2213         }
2214         fidp->fid_type = P9_FID_DIR;
2215     } else if (perm & P9_STAT_MODE_SYMLINK) {
2216         err = v9fs_co_symlink(pdu, fidp, &name,
2217                               extension.data, -1 , &stbuf);
2218         if (err < 0) {
2219             goto out;
2220         }
2221         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2222         if (err < 0) {
2223             goto out;
2224         }
2225         v9fs_path_write_lock(s);
2226         v9fs_path_copy(&fidp->path, &path);
2227         v9fs_path_unlock(s);
2228     } else if (perm & P9_STAT_MODE_LINK) {
2229         int32_t ofid = atoi(extension.data);
2230         V9fsFidState *ofidp = get_fid(pdu, ofid);
2231         if (ofidp == NULL) {
2232             err = -EINVAL;
2233             goto out;
2234         }
2235         err = v9fs_co_link(pdu, ofidp, fidp, &name);
2236         put_fid(pdu, ofidp);
2237         if (err < 0) {
2238             goto out;
2239         }
2240         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2241         if (err < 0) {
2242             fidp->fid_type = P9_FID_NONE;
2243             goto out;
2244         }
2245         v9fs_path_write_lock(s);
2246         v9fs_path_copy(&fidp->path, &path);
2247         v9fs_path_unlock(s);
2248         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2249         if (err < 0) {
2250             fidp->fid_type = P9_FID_NONE;
2251             goto out;
2252         }
2253     } else if (perm & P9_STAT_MODE_DEVICE) {
2254         char ctype;
2255         uint32_t major, minor;
2256         mode_t nmode = 0;
2257 
2258         if (sscanf(extension.data, "%c %u %u", &ctype, &major, &minor) != 3) {
2259             err = -errno;
2260             goto out;
2261         }
2262 
2263         switch (ctype) {
2264         case 'c':
2265             nmode = S_IFCHR;
2266             break;
2267         case 'b':
2268             nmode = S_IFBLK;
2269             break;
2270         default:
2271             err = -EIO;
2272             goto out;
2273         }
2274 
2275         nmode |= perm & 0777;
2276         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2277                             makedev(major, minor), nmode, &stbuf);
2278         if (err < 0) {
2279             goto out;
2280         }
2281         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2282         if (err < 0) {
2283             goto out;
2284         }
2285         v9fs_path_write_lock(s);
2286         v9fs_path_copy(&fidp->path, &path);
2287         v9fs_path_unlock(s);
2288     } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
2289         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2290                             0, S_IFIFO | (perm & 0777), &stbuf);
2291         if (err < 0) {
2292             goto out;
2293         }
2294         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2295         if (err < 0) {
2296             goto out;
2297         }
2298         v9fs_path_write_lock(s);
2299         v9fs_path_copy(&fidp->path, &path);
2300         v9fs_path_unlock(s);
2301     } else if (perm & P9_STAT_MODE_SOCKET) {
2302         err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, -1,
2303                             0, S_IFSOCK | (perm & 0777), &stbuf);
2304         if (err < 0) {
2305             goto out;
2306         }
2307         err = v9fs_co_name_to_path(pdu, &fidp->path, name.data, &path);
2308         if (err < 0) {
2309             goto out;
2310         }
2311         v9fs_path_write_lock(s);
2312         v9fs_path_copy(&fidp->path, &path);
2313         v9fs_path_unlock(s);
2314     } else {
2315         err = v9fs_co_open2(pdu, fidp, &name, -1,
2316                             omode_to_uflags(mode)|O_CREAT, perm, &stbuf);
2317         if (err < 0) {
2318             goto out;
2319         }
2320         fidp->fid_type = P9_FID_FILE;
2321         fidp->open_flags = omode_to_uflags(mode);
2322         if (fidp->open_flags & O_EXCL) {
2323             /*
2324              * We let the host file system do O_EXCL check
2325              * We should not reclaim such fd
2326              */
2327             fidp->flags |= FID_NON_RECLAIMABLE;
2328         }
2329     }
2330     iounit = get_iounit(pdu, &fidp->path);
2331     stat_to_qid(&stbuf, &qid);
2332     err = pdu_marshal(pdu, offset, "Qd", &qid, iounit);
2333     if (err < 0) {
2334         goto out;
2335     }
2336     err += offset;
2337     trace_v9fs_create_return(pdu->tag, pdu->id,
2338                              qid.type, qid.version, qid.path, iounit);
2339 out:
2340     put_fid(pdu, fidp);
2341 out_nofid:
2342    pdu_complete(pdu, err);
2343    v9fs_string_free(&name);
2344    v9fs_string_free(&extension);
2345    v9fs_path_free(&path);
2346 }
2347 
2348 static void coroutine_fn v9fs_symlink(void *opaque)
2349 {
2350     V9fsPDU *pdu = opaque;
2351     V9fsString name;
2352     V9fsString symname;
2353     V9fsFidState *dfidp;
2354     V9fsQID qid;
2355     struct stat stbuf;
2356     int32_t dfid;
2357     int err = 0;
2358     gid_t gid;
2359     size_t offset = 7;
2360 
2361     v9fs_string_init(&name);
2362     v9fs_string_init(&symname);
2363     err = pdu_unmarshal(pdu, offset, "dssd", &dfid, &name, &symname, &gid);
2364     if (err < 0) {
2365         goto out_nofid;
2366     }
2367     trace_v9fs_symlink(pdu->tag, pdu->id, dfid, name.data, symname.data, gid);
2368 
2369     if (name_is_illegal(name.data)) {
2370         err = -ENOENT;
2371         goto out_nofid;
2372     }
2373 
2374     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2375         err = -EEXIST;
2376         goto out_nofid;
2377     }
2378 
2379     dfidp = get_fid(pdu, dfid);
2380     if (dfidp == NULL) {
2381         err = -EINVAL;
2382         goto out_nofid;
2383     }
2384     err = v9fs_co_symlink(pdu, dfidp, &name, symname.data, gid, &stbuf);
2385     if (err < 0) {
2386         goto out;
2387     }
2388     stat_to_qid(&stbuf, &qid);
2389     err =  pdu_marshal(pdu, offset, "Q", &qid);
2390     if (err < 0) {
2391         goto out;
2392     }
2393     err += offset;
2394     trace_v9fs_symlink_return(pdu->tag, pdu->id,
2395                               qid.type, qid.version, qid.path);
2396 out:
2397     put_fid(pdu, dfidp);
2398 out_nofid:
2399     pdu_complete(pdu, err);
2400     v9fs_string_free(&name);
2401     v9fs_string_free(&symname);
2402 }
2403 
2404 static void coroutine_fn v9fs_flush(void *opaque)
2405 {
2406     ssize_t err;
2407     int16_t tag;
2408     size_t offset = 7;
2409     V9fsPDU *cancel_pdu = NULL;
2410     V9fsPDU *pdu = opaque;
2411     V9fsState *s = pdu->s;
2412 
2413     err = pdu_unmarshal(pdu, offset, "w", &tag);
2414     if (err < 0) {
2415         pdu_complete(pdu, err);
2416         return;
2417     }
2418     trace_v9fs_flush(pdu->tag, pdu->id, tag);
2419 
2420     if (pdu->tag == tag) {
2421         warn_report("the guest sent a self-referencing 9P flush request");
2422     } else {
2423         QLIST_FOREACH(cancel_pdu, &s->active_list, next) {
2424             if (cancel_pdu->tag == tag) {
2425                 break;
2426             }
2427         }
2428     }
2429     if (cancel_pdu) {
2430         cancel_pdu->cancelled = 1;
2431         /*
2432          * Wait for pdu to complete.
2433          */
2434         qemu_co_queue_wait(&cancel_pdu->complete, NULL);
2435         if (!qemu_co_queue_next(&cancel_pdu->complete)) {
2436             cancel_pdu->cancelled = 0;
2437             pdu_free(cancel_pdu);
2438         }
2439     }
2440     pdu_complete(pdu, 7);
2441 }
2442 
2443 static void coroutine_fn v9fs_link(void *opaque)
2444 {
2445     V9fsPDU *pdu = opaque;
2446     int32_t dfid, oldfid;
2447     V9fsFidState *dfidp, *oldfidp;
2448     V9fsString name;
2449     size_t offset = 7;
2450     int err = 0;
2451 
2452     v9fs_string_init(&name);
2453     err = pdu_unmarshal(pdu, offset, "dds", &dfid, &oldfid, &name);
2454     if (err < 0) {
2455         goto out_nofid;
2456     }
2457     trace_v9fs_link(pdu->tag, pdu->id, dfid, oldfid, name.data);
2458 
2459     if (name_is_illegal(name.data)) {
2460         err = -ENOENT;
2461         goto out_nofid;
2462     }
2463 
2464     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2465         err = -EEXIST;
2466         goto out_nofid;
2467     }
2468 
2469     dfidp = get_fid(pdu, dfid);
2470     if (dfidp == NULL) {
2471         err = -ENOENT;
2472         goto out_nofid;
2473     }
2474 
2475     oldfidp = get_fid(pdu, oldfid);
2476     if (oldfidp == NULL) {
2477         err = -ENOENT;
2478         goto out;
2479     }
2480     err = v9fs_co_link(pdu, oldfidp, dfidp, &name);
2481     if (!err) {
2482         err = offset;
2483     }
2484     put_fid(pdu, oldfidp);
2485 out:
2486     put_fid(pdu, dfidp);
2487 out_nofid:
2488     v9fs_string_free(&name);
2489     pdu_complete(pdu, err);
2490 }
2491 
2492 /* Only works with path name based fid */
2493 static void coroutine_fn v9fs_remove(void *opaque)
2494 {
2495     int32_t fid;
2496     int err = 0;
2497     size_t offset = 7;
2498     V9fsFidState *fidp;
2499     V9fsPDU *pdu = opaque;
2500 
2501     err = pdu_unmarshal(pdu, offset, "d", &fid);
2502     if (err < 0) {
2503         goto out_nofid;
2504     }
2505     trace_v9fs_remove(pdu->tag, pdu->id, fid);
2506 
2507     fidp = get_fid(pdu, fid);
2508     if (fidp == NULL) {
2509         err = -EINVAL;
2510         goto out_nofid;
2511     }
2512     /* if fs driver is not path based, return EOPNOTSUPP */
2513     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2514         err = -EOPNOTSUPP;
2515         goto out_err;
2516     }
2517     /*
2518      * IF the file is unlinked, we cannot reopen
2519      * the file later. So don't reclaim fd
2520      */
2521     err = v9fs_mark_fids_unreclaim(pdu, &fidp->path);
2522     if (err < 0) {
2523         goto out_err;
2524     }
2525     err = v9fs_co_remove(pdu, &fidp->path);
2526     if (!err) {
2527         err = offset;
2528     }
2529 out_err:
2530     /* For TREMOVE we need to clunk the fid even on failed remove */
2531     clunk_fid(pdu->s, fidp->fid);
2532     put_fid(pdu, fidp);
2533 out_nofid:
2534     pdu_complete(pdu, err);
2535 }
2536 
2537 static void coroutine_fn v9fs_unlinkat(void *opaque)
2538 {
2539     int err = 0;
2540     V9fsString name;
2541     int32_t dfid, flags, rflags = 0;
2542     size_t offset = 7;
2543     V9fsPath path;
2544     V9fsFidState *dfidp;
2545     V9fsPDU *pdu = opaque;
2546 
2547     v9fs_string_init(&name);
2548     err = pdu_unmarshal(pdu, offset, "dsd", &dfid, &name, &flags);
2549     if (err < 0) {
2550         goto out_nofid;
2551     }
2552 
2553     if (name_is_illegal(name.data)) {
2554         err = -ENOENT;
2555         goto out_nofid;
2556     }
2557 
2558     if (!strcmp(".", name.data)) {
2559         err = -EINVAL;
2560         goto out_nofid;
2561     }
2562 
2563     if (!strcmp("..", name.data)) {
2564         err = -ENOTEMPTY;
2565         goto out_nofid;
2566     }
2567 
2568     if (flags & ~P9_DOTL_AT_REMOVEDIR) {
2569         err = -EINVAL;
2570         goto out_nofid;
2571     }
2572 
2573     if (flags & P9_DOTL_AT_REMOVEDIR) {
2574         rflags |= AT_REMOVEDIR;
2575     }
2576 
2577     dfidp = get_fid(pdu, dfid);
2578     if (dfidp == NULL) {
2579         err = -EINVAL;
2580         goto out_nofid;
2581     }
2582     /*
2583      * IF the file is unlinked, we cannot reopen
2584      * the file later. So don't reclaim fd
2585      */
2586     v9fs_path_init(&path);
2587     err = v9fs_co_name_to_path(pdu, &dfidp->path, name.data, &path);
2588     if (err < 0) {
2589         goto out_err;
2590     }
2591     err = v9fs_mark_fids_unreclaim(pdu, &path);
2592     if (err < 0) {
2593         goto out_err;
2594     }
2595     err = v9fs_co_unlinkat(pdu, &dfidp->path, &name, rflags);
2596     if (!err) {
2597         err = offset;
2598     }
2599 out_err:
2600     put_fid(pdu, dfidp);
2601     v9fs_path_free(&path);
2602 out_nofid:
2603     pdu_complete(pdu, err);
2604     v9fs_string_free(&name);
2605 }
2606 
2607 
2608 /* Only works with path name based fid */
2609 static int coroutine_fn v9fs_complete_rename(V9fsPDU *pdu, V9fsFidState *fidp,
2610                                              int32_t newdirfid,
2611                                              V9fsString *name)
2612 {
2613     int err = 0;
2614     V9fsPath new_path;
2615     V9fsFidState *tfidp;
2616     V9fsState *s = pdu->s;
2617     V9fsFidState *dirfidp = NULL;
2618 
2619     v9fs_path_init(&new_path);
2620     if (newdirfid != -1) {
2621         dirfidp = get_fid(pdu, newdirfid);
2622         if (dirfidp == NULL) {
2623             err = -ENOENT;
2624             goto out_nofid;
2625         }
2626         if (fidp->fid_type != P9_FID_NONE) {
2627             err = -EINVAL;
2628             goto out;
2629         }
2630         err = v9fs_co_name_to_path(pdu, &dirfidp->path, name->data, &new_path);
2631         if (err < 0) {
2632             goto out;
2633         }
2634     } else {
2635         char *dir_name = g_path_get_dirname(fidp->path.data);
2636         V9fsPath dir_path;
2637 
2638         v9fs_path_init(&dir_path);
2639         v9fs_path_sprintf(&dir_path, "%s", dir_name);
2640         g_free(dir_name);
2641 
2642         err = v9fs_co_name_to_path(pdu, &dir_path, name->data, &new_path);
2643         v9fs_path_free(&dir_path);
2644         if (err < 0) {
2645             goto out;
2646         }
2647     }
2648     err = v9fs_co_rename(pdu, &fidp->path, &new_path);
2649     if (err < 0) {
2650         goto out;
2651     }
2652     /*
2653      * Fixup fid's pointing to the old name to
2654      * start pointing to the new name
2655      */
2656     for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2657         if (v9fs_path_is_ancestor(&fidp->path, &tfidp->path)) {
2658             /* replace the name */
2659             v9fs_fix_path(&tfidp->path, &new_path, strlen(fidp->path.data));
2660         }
2661     }
2662 out:
2663     if (dirfidp) {
2664         put_fid(pdu, dirfidp);
2665     }
2666     v9fs_path_free(&new_path);
2667 out_nofid:
2668     return err;
2669 }
2670 
2671 /* Only works with path name based fid */
2672 static void coroutine_fn v9fs_rename(void *opaque)
2673 {
2674     int32_t fid;
2675     ssize_t err = 0;
2676     size_t offset = 7;
2677     V9fsString name;
2678     int32_t newdirfid;
2679     V9fsFidState *fidp;
2680     V9fsPDU *pdu = opaque;
2681     V9fsState *s = pdu->s;
2682 
2683     v9fs_string_init(&name);
2684     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newdirfid, &name);
2685     if (err < 0) {
2686         goto out_nofid;
2687     }
2688 
2689     if (name_is_illegal(name.data)) {
2690         err = -ENOENT;
2691         goto out_nofid;
2692     }
2693 
2694     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
2695         err = -EISDIR;
2696         goto out_nofid;
2697     }
2698 
2699     fidp = get_fid(pdu, fid);
2700     if (fidp == NULL) {
2701         err = -ENOENT;
2702         goto out_nofid;
2703     }
2704     if (fidp->fid_type != P9_FID_NONE) {
2705         err = -EINVAL;
2706         goto out;
2707     }
2708     /* if fs driver is not path based, return EOPNOTSUPP */
2709     if (!(pdu->s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT)) {
2710         err = -EOPNOTSUPP;
2711         goto out;
2712     }
2713     v9fs_path_write_lock(s);
2714     err = v9fs_complete_rename(pdu, fidp, newdirfid, &name);
2715     v9fs_path_unlock(s);
2716     if (!err) {
2717         err = offset;
2718     }
2719 out:
2720     put_fid(pdu, fidp);
2721 out_nofid:
2722     pdu_complete(pdu, err);
2723     v9fs_string_free(&name);
2724 }
2725 
2726 static int coroutine_fn v9fs_fix_fid_paths(V9fsPDU *pdu, V9fsPath *olddir,
2727                                            V9fsString *old_name,
2728                                            V9fsPath *newdir,
2729                                            V9fsString *new_name)
2730 {
2731     V9fsFidState *tfidp;
2732     V9fsPath oldpath, newpath;
2733     V9fsState *s = pdu->s;
2734     int err;
2735 
2736     v9fs_path_init(&oldpath);
2737     v9fs_path_init(&newpath);
2738     err = v9fs_co_name_to_path(pdu, olddir, old_name->data, &oldpath);
2739     if (err < 0) {
2740         goto out;
2741     }
2742     err = v9fs_co_name_to_path(pdu, newdir, new_name->data, &newpath);
2743     if (err < 0) {
2744         goto out;
2745     }
2746 
2747     /*
2748      * Fixup fid's pointing to the old name to
2749      * start pointing to the new name
2750      */
2751     for (tfidp = s->fid_list; tfidp; tfidp = tfidp->next) {
2752         if (v9fs_path_is_ancestor(&oldpath, &tfidp->path)) {
2753             /* replace the name */
2754             v9fs_fix_path(&tfidp->path, &newpath, strlen(oldpath.data));
2755         }
2756     }
2757 out:
2758     v9fs_path_free(&oldpath);
2759     v9fs_path_free(&newpath);
2760     return err;
2761 }
2762 
2763 static int coroutine_fn v9fs_complete_renameat(V9fsPDU *pdu, int32_t olddirfid,
2764                                                V9fsString *old_name,
2765                                                int32_t newdirfid,
2766                                                V9fsString *new_name)
2767 {
2768     int err = 0;
2769     V9fsState *s = pdu->s;
2770     V9fsFidState *newdirfidp = NULL, *olddirfidp = NULL;
2771 
2772     olddirfidp = get_fid(pdu, olddirfid);
2773     if (olddirfidp == NULL) {
2774         err = -ENOENT;
2775         goto out;
2776     }
2777     if (newdirfid != -1) {
2778         newdirfidp = get_fid(pdu, newdirfid);
2779         if (newdirfidp == NULL) {
2780             err = -ENOENT;
2781             goto out;
2782         }
2783     } else {
2784         newdirfidp = get_fid(pdu, olddirfid);
2785     }
2786 
2787     err = v9fs_co_renameat(pdu, &olddirfidp->path, old_name,
2788                            &newdirfidp->path, new_name);
2789     if (err < 0) {
2790         goto out;
2791     }
2792     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
2793         /* Only for path based fid  we need to do the below fixup */
2794         err = v9fs_fix_fid_paths(pdu, &olddirfidp->path, old_name,
2795                                  &newdirfidp->path, new_name);
2796     }
2797 out:
2798     if (olddirfidp) {
2799         put_fid(pdu, olddirfidp);
2800     }
2801     if (newdirfidp) {
2802         put_fid(pdu, newdirfidp);
2803     }
2804     return err;
2805 }
2806 
2807 static void coroutine_fn v9fs_renameat(void *opaque)
2808 {
2809     ssize_t err = 0;
2810     size_t offset = 7;
2811     V9fsPDU *pdu = opaque;
2812     V9fsState *s = pdu->s;
2813     int32_t olddirfid, newdirfid;
2814     V9fsString old_name, new_name;
2815 
2816     v9fs_string_init(&old_name);
2817     v9fs_string_init(&new_name);
2818     err = pdu_unmarshal(pdu, offset, "dsds", &olddirfid,
2819                         &old_name, &newdirfid, &new_name);
2820     if (err < 0) {
2821         goto out_err;
2822     }
2823 
2824     if (name_is_illegal(old_name.data) || name_is_illegal(new_name.data)) {
2825         err = -ENOENT;
2826         goto out_err;
2827     }
2828 
2829     if (!strcmp(".", old_name.data) || !strcmp("..", old_name.data) ||
2830         !strcmp(".", new_name.data) || !strcmp("..", new_name.data)) {
2831         err = -EISDIR;
2832         goto out_err;
2833     }
2834 
2835     v9fs_path_write_lock(s);
2836     err = v9fs_complete_renameat(pdu, olddirfid,
2837                                  &old_name, newdirfid, &new_name);
2838     v9fs_path_unlock(s);
2839     if (!err) {
2840         err = offset;
2841     }
2842 
2843 out_err:
2844     pdu_complete(pdu, err);
2845     v9fs_string_free(&old_name);
2846     v9fs_string_free(&new_name);
2847 }
2848 
2849 static void coroutine_fn v9fs_wstat(void *opaque)
2850 {
2851     int32_t fid;
2852     int err = 0;
2853     int16_t unused;
2854     V9fsStat v9stat;
2855     size_t offset = 7;
2856     struct stat stbuf;
2857     V9fsFidState *fidp;
2858     V9fsPDU *pdu = opaque;
2859     V9fsState *s = pdu->s;
2860 
2861     v9fs_stat_init(&v9stat);
2862     err = pdu_unmarshal(pdu, offset, "dwS", &fid, &unused, &v9stat);
2863     if (err < 0) {
2864         goto out_nofid;
2865     }
2866     trace_v9fs_wstat(pdu->tag, pdu->id, fid,
2867                      v9stat.mode, v9stat.atime, v9stat.mtime);
2868 
2869     fidp = get_fid(pdu, fid);
2870     if (fidp == NULL) {
2871         err = -EINVAL;
2872         goto out_nofid;
2873     }
2874     /* do we need to sync the file? */
2875     if (donttouch_stat(&v9stat)) {
2876         err = v9fs_co_fsync(pdu, fidp, 0);
2877         goto out;
2878     }
2879     if (v9stat.mode != -1) {
2880         uint32_t v9_mode;
2881         err = v9fs_co_lstat(pdu, &fidp->path, &stbuf);
2882         if (err < 0) {
2883             goto out;
2884         }
2885         v9_mode = stat_to_v9mode(&stbuf);
2886         if ((v9stat.mode & P9_STAT_MODE_TYPE_BITS) !=
2887             (v9_mode & P9_STAT_MODE_TYPE_BITS)) {
2888             /* Attempting to change the type */
2889             err = -EIO;
2890             goto out;
2891         }
2892         err = v9fs_co_chmod(pdu, &fidp->path,
2893                             v9mode_to_mode(v9stat.mode,
2894                                            &v9stat.extension));
2895         if (err < 0) {
2896             goto out;
2897         }
2898     }
2899     if (v9stat.mtime != -1 || v9stat.atime != -1) {
2900         struct timespec times[2];
2901         if (v9stat.atime != -1) {
2902             times[0].tv_sec = v9stat.atime;
2903             times[0].tv_nsec = 0;
2904         } else {
2905             times[0].tv_nsec = UTIME_OMIT;
2906         }
2907         if (v9stat.mtime != -1) {
2908             times[1].tv_sec = v9stat.mtime;
2909             times[1].tv_nsec = 0;
2910         } else {
2911             times[1].tv_nsec = UTIME_OMIT;
2912         }
2913         err = v9fs_co_utimensat(pdu, &fidp->path, times);
2914         if (err < 0) {
2915             goto out;
2916         }
2917     }
2918     if (v9stat.n_gid != -1 || v9stat.n_uid != -1) {
2919         err = v9fs_co_chown(pdu, &fidp->path, v9stat.n_uid, v9stat.n_gid);
2920         if (err < 0) {
2921             goto out;
2922         }
2923     }
2924     if (v9stat.name.size != 0) {
2925         v9fs_path_write_lock(s);
2926         err = v9fs_complete_rename(pdu, fidp, -1, &v9stat.name);
2927         v9fs_path_unlock(s);
2928         if (err < 0) {
2929             goto out;
2930         }
2931     }
2932     if (v9stat.length != -1) {
2933         err = v9fs_co_truncate(pdu, &fidp->path, v9stat.length);
2934         if (err < 0) {
2935             goto out;
2936         }
2937     }
2938     err = offset;
2939 out:
2940     put_fid(pdu, fidp);
2941 out_nofid:
2942     v9fs_stat_free(&v9stat);
2943     pdu_complete(pdu, err);
2944 }
2945 
2946 static int v9fs_fill_statfs(V9fsState *s, V9fsPDU *pdu, struct statfs *stbuf)
2947 {
2948     uint32_t f_type;
2949     uint32_t f_bsize;
2950     uint64_t f_blocks;
2951     uint64_t f_bfree;
2952     uint64_t f_bavail;
2953     uint64_t f_files;
2954     uint64_t f_ffree;
2955     uint64_t fsid_val;
2956     uint32_t f_namelen;
2957     size_t offset = 7;
2958     int32_t bsize_factor;
2959 
2960     /*
2961      * compute bsize factor based on host file system block size
2962      * and client msize
2963      */
2964     bsize_factor = (s->msize - P9_IOHDRSZ)/stbuf->f_bsize;
2965     if (!bsize_factor) {
2966         bsize_factor = 1;
2967     }
2968     f_type  = stbuf->f_type;
2969     f_bsize = stbuf->f_bsize;
2970     f_bsize *= bsize_factor;
2971     /*
2972      * f_bsize is adjusted(multiplied) by bsize factor, so we need to
2973      * adjust(divide) the number of blocks, free blocks and available
2974      * blocks by bsize factor
2975      */
2976     f_blocks = stbuf->f_blocks/bsize_factor;
2977     f_bfree  = stbuf->f_bfree/bsize_factor;
2978     f_bavail = stbuf->f_bavail/bsize_factor;
2979     f_files  = stbuf->f_files;
2980     f_ffree  = stbuf->f_ffree;
2981     fsid_val = (unsigned int) stbuf->f_fsid.__val[0] |
2982                (unsigned long long)stbuf->f_fsid.__val[1] << 32;
2983     f_namelen = stbuf->f_namelen;
2984 
2985     return pdu_marshal(pdu, offset, "ddqqqqqqd",
2986                        f_type, f_bsize, f_blocks, f_bfree,
2987                        f_bavail, f_files, f_ffree,
2988                        fsid_val, f_namelen);
2989 }
2990 
2991 static void coroutine_fn v9fs_statfs(void *opaque)
2992 {
2993     int32_t fid;
2994     ssize_t retval = 0;
2995     size_t offset = 7;
2996     V9fsFidState *fidp;
2997     struct statfs stbuf;
2998     V9fsPDU *pdu = opaque;
2999     V9fsState *s = pdu->s;
3000 
3001     retval = pdu_unmarshal(pdu, offset, "d", &fid);
3002     if (retval < 0) {
3003         goto out_nofid;
3004     }
3005     fidp = get_fid(pdu, fid);
3006     if (fidp == NULL) {
3007         retval = -ENOENT;
3008         goto out_nofid;
3009     }
3010     retval = v9fs_co_statfs(pdu, &fidp->path, &stbuf);
3011     if (retval < 0) {
3012         goto out;
3013     }
3014     retval = v9fs_fill_statfs(s, pdu, &stbuf);
3015     if (retval < 0) {
3016         goto out;
3017     }
3018     retval += offset;
3019 out:
3020     put_fid(pdu, fidp);
3021 out_nofid:
3022     pdu_complete(pdu, retval);
3023 }
3024 
3025 static void coroutine_fn v9fs_mknod(void *opaque)
3026 {
3027 
3028     int mode;
3029     gid_t gid;
3030     int32_t fid;
3031     V9fsQID qid;
3032     int err = 0;
3033     int major, minor;
3034     size_t offset = 7;
3035     V9fsString name;
3036     struct stat stbuf;
3037     V9fsFidState *fidp;
3038     V9fsPDU *pdu = opaque;
3039 
3040     v9fs_string_init(&name);
3041     err = pdu_unmarshal(pdu, offset, "dsdddd", &fid, &name, &mode,
3042                         &major, &minor, &gid);
3043     if (err < 0) {
3044         goto out_nofid;
3045     }
3046     trace_v9fs_mknod(pdu->tag, pdu->id, fid, mode, major, minor);
3047 
3048     if (name_is_illegal(name.data)) {
3049         err = -ENOENT;
3050         goto out_nofid;
3051     }
3052 
3053     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3054         err = -EEXIST;
3055         goto out_nofid;
3056     }
3057 
3058     fidp = get_fid(pdu, fid);
3059     if (fidp == NULL) {
3060         err = -ENOENT;
3061         goto out_nofid;
3062     }
3063     err = v9fs_co_mknod(pdu, fidp, &name, fidp->uid, gid,
3064                         makedev(major, minor), mode, &stbuf);
3065     if (err < 0) {
3066         goto out;
3067     }
3068     stat_to_qid(&stbuf, &qid);
3069     err = pdu_marshal(pdu, offset, "Q", &qid);
3070     if (err < 0) {
3071         goto out;
3072     }
3073     err += offset;
3074     trace_v9fs_mknod_return(pdu->tag, pdu->id,
3075                             qid.type, qid.version, qid.path);
3076 out:
3077     put_fid(pdu, fidp);
3078 out_nofid:
3079     pdu_complete(pdu, err);
3080     v9fs_string_free(&name);
3081 }
3082 
3083 /*
3084  * Implement posix byte range locking code
3085  * Server side handling of locking code is very simple, because 9p server in
3086  * QEMU can handle only one client. And most of the lock handling
3087  * (like conflict, merging) etc is done by the VFS layer itself, so no need to
3088  * do any thing in * qemu 9p server side lock code path.
3089  * So when a TLOCK request comes, always return success
3090  */
3091 static void coroutine_fn v9fs_lock(void *opaque)
3092 {
3093     V9fsFlock flock;
3094     size_t offset = 7;
3095     struct stat stbuf;
3096     V9fsFidState *fidp;
3097     int32_t fid, err = 0;
3098     V9fsPDU *pdu = opaque;
3099 
3100     v9fs_string_init(&flock.client_id);
3101     err = pdu_unmarshal(pdu, offset, "dbdqqds", &fid, &flock.type,
3102                         &flock.flags, &flock.start, &flock.length,
3103                         &flock.proc_id, &flock.client_id);
3104     if (err < 0) {
3105         goto out_nofid;
3106     }
3107     trace_v9fs_lock(pdu->tag, pdu->id, fid,
3108                     flock.type, flock.start, flock.length);
3109 
3110 
3111     /* We support only block flag now (that too ignored currently) */
3112     if (flock.flags & ~P9_LOCK_FLAGS_BLOCK) {
3113         err = -EINVAL;
3114         goto out_nofid;
3115     }
3116     fidp = get_fid(pdu, fid);
3117     if (fidp == NULL) {
3118         err = -ENOENT;
3119         goto out_nofid;
3120     }
3121     err = v9fs_co_fstat(pdu, fidp, &stbuf);
3122     if (err < 0) {
3123         goto out;
3124     }
3125     err = pdu_marshal(pdu, offset, "b", P9_LOCK_SUCCESS);
3126     if (err < 0) {
3127         goto out;
3128     }
3129     err += offset;
3130     trace_v9fs_lock_return(pdu->tag, pdu->id, P9_LOCK_SUCCESS);
3131 out:
3132     put_fid(pdu, fidp);
3133 out_nofid:
3134     pdu_complete(pdu, err);
3135     v9fs_string_free(&flock.client_id);
3136 }
3137 
3138 /*
3139  * When a TGETLOCK request comes, always return success because all lock
3140  * handling is done by client's VFS layer.
3141  */
3142 static void coroutine_fn v9fs_getlock(void *opaque)
3143 {
3144     size_t offset = 7;
3145     struct stat stbuf;
3146     V9fsFidState *fidp;
3147     V9fsGetlock glock;
3148     int32_t fid, err = 0;
3149     V9fsPDU *pdu = opaque;
3150 
3151     v9fs_string_init(&glock.client_id);
3152     err = pdu_unmarshal(pdu, offset, "dbqqds", &fid, &glock.type,
3153                         &glock.start, &glock.length, &glock.proc_id,
3154                         &glock.client_id);
3155     if (err < 0) {
3156         goto out_nofid;
3157     }
3158     trace_v9fs_getlock(pdu->tag, pdu->id, fid,
3159                        glock.type, glock.start, glock.length);
3160 
3161     fidp = get_fid(pdu, fid);
3162     if (fidp == NULL) {
3163         err = -ENOENT;
3164         goto out_nofid;
3165     }
3166     err = v9fs_co_fstat(pdu, fidp, &stbuf);
3167     if (err < 0) {
3168         goto out;
3169     }
3170     glock.type = P9_LOCK_TYPE_UNLCK;
3171     err = pdu_marshal(pdu, offset, "bqqds", glock.type,
3172                           glock.start, glock.length, glock.proc_id,
3173                           &glock.client_id);
3174     if (err < 0) {
3175         goto out;
3176     }
3177     err += offset;
3178     trace_v9fs_getlock_return(pdu->tag, pdu->id, glock.type, glock.start,
3179                               glock.length, glock.proc_id);
3180 out:
3181     put_fid(pdu, fidp);
3182 out_nofid:
3183     pdu_complete(pdu, err);
3184     v9fs_string_free(&glock.client_id);
3185 }
3186 
3187 static void coroutine_fn v9fs_mkdir(void *opaque)
3188 {
3189     V9fsPDU *pdu = opaque;
3190     size_t offset = 7;
3191     int32_t fid;
3192     struct stat stbuf;
3193     V9fsQID qid;
3194     V9fsString name;
3195     V9fsFidState *fidp;
3196     gid_t gid;
3197     int mode;
3198     int err = 0;
3199 
3200     v9fs_string_init(&name);
3201     err = pdu_unmarshal(pdu, offset, "dsdd", &fid, &name, &mode, &gid);
3202     if (err < 0) {
3203         goto out_nofid;
3204     }
3205     trace_v9fs_mkdir(pdu->tag, pdu->id, fid, name.data, mode, gid);
3206 
3207     if (name_is_illegal(name.data)) {
3208         err = -ENOENT;
3209         goto out_nofid;
3210     }
3211 
3212     if (!strcmp(".", name.data) || !strcmp("..", name.data)) {
3213         err = -EEXIST;
3214         goto out_nofid;
3215     }
3216 
3217     fidp = get_fid(pdu, fid);
3218     if (fidp == NULL) {
3219         err = -ENOENT;
3220         goto out_nofid;
3221     }
3222     err = v9fs_co_mkdir(pdu, fidp, &name, mode, fidp->uid, gid, &stbuf);
3223     if (err < 0) {
3224         goto out;
3225     }
3226     stat_to_qid(&stbuf, &qid);
3227     err = pdu_marshal(pdu, offset, "Q", &qid);
3228     if (err < 0) {
3229         goto out;
3230     }
3231     err += offset;
3232     trace_v9fs_mkdir_return(pdu->tag, pdu->id,
3233                             qid.type, qid.version, qid.path, err);
3234 out:
3235     put_fid(pdu, fidp);
3236 out_nofid:
3237     pdu_complete(pdu, err);
3238     v9fs_string_free(&name);
3239 }
3240 
3241 static void coroutine_fn v9fs_xattrwalk(void *opaque)
3242 {
3243     int64_t size;
3244     V9fsString name;
3245     ssize_t err = 0;
3246     size_t offset = 7;
3247     int32_t fid, newfid;
3248     V9fsFidState *file_fidp;
3249     V9fsFidState *xattr_fidp = NULL;
3250     V9fsPDU *pdu = opaque;
3251     V9fsState *s = pdu->s;
3252 
3253     v9fs_string_init(&name);
3254     err = pdu_unmarshal(pdu, offset, "dds", &fid, &newfid, &name);
3255     if (err < 0) {
3256         goto out_nofid;
3257     }
3258     trace_v9fs_xattrwalk(pdu->tag, pdu->id, fid, newfid, name.data);
3259 
3260     file_fidp = get_fid(pdu, fid);
3261     if (file_fidp == NULL) {
3262         err = -ENOENT;
3263         goto out_nofid;
3264     }
3265     xattr_fidp = alloc_fid(s, newfid);
3266     if (xattr_fidp == NULL) {
3267         err = -EINVAL;
3268         goto out;
3269     }
3270     v9fs_path_copy(&xattr_fidp->path, &file_fidp->path);
3271     if (!v9fs_string_size(&name)) {
3272         /*
3273          * listxattr request. Get the size first
3274          */
3275         size = v9fs_co_llistxattr(pdu, &xattr_fidp->path, NULL, 0);
3276         if (size < 0) {
3277             err = size;
3278             clunk_fid(s, xattr_fidp->fid);
3279             goto out;
3280         }
3281         /*
3282          * Read the xattr value
3283          */
3284         xattr_fidp->fs.xattr.len = size;
3285         xattr_fidp->fid_type = P9_FID_XATTR;
3286         xattr_fidp->fs.xattr.xattrwalk_fid = true;
3287         xattr_fidp->fs.xattr.value = g_malloc0(size);
3288         if (size) {
3289             err = v9fs_co_llistxattr(pdu, &xattr_fidp->path,
3290                                      xattr_fidp->fs.xattr.value,
3291                                      xattr_fidp->fs.xattr.len);
3292             if (err < 0) {
3293                 clunk_fid(s, xattr_fidp->fid);
3294                 goto out;
3295             }
3296         }
3297         err = pdu_marshal(pdu, offset, "q", size);
3298         if (err < 0) {
3299             goto out;
3300         }
3301         err += offset;
3302     } else {
3303         /*
3304          * specific xattr fid. We check for xattr
3305          * presence also collect the xattr size
3306          */
3307         size = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3308                                  &name, NULL, 0);
3309         if (size < 0) {
3310             err = size;
3311             clunk_fid(s, xattr_fidp->fid);
3312             goto out;
3313         }
3314         /*
3315          * Read the xattr value
3316          */
3317         xattr_fidp->fs.xattr.len = size;
3318         xattr_fidp->fid_type = P9_FID_XATTR;
3319         xattr_fidp->fs.xattr.xattrwalk_fid = true;
3320         xattr_fidp->fs.xattr.value = g_malloc0(size);
3321         if (size) {
3322             err = v9fs_co_lgetxattr(pdu, &xattr_fidp->path,
3323                                     &name, xattr_fidp->fs.xattr.value,
3324                                     xattr_fidp->fs.xattr.len);
3325             if (err < 0) {
3326                 clunk_fid(s, xattr_fidp->fid);
3327                 goto out;
3328             }
3329         }
3330         err = pdu_marshal(pdu, offset, "q", size);
3331         if (err < 0) {
3332             goto out;
3333         }
3334         err += offset;
3335     }
3336     trace_v9fs_xattrwalk_return(pdu->tag, pdu->id, size);
3337 out:
3338     put_fid(pdu, file_fidp);
3339     if (xattr_fidp) {
3340         put_fid(pdu, xattr_fidp);
3341     }
3342 out_nofid:
3343     pdu_complete(pdu, err);
3344     v9fs_string_free(&name);
3345 }
3346 
3347 static void coroutine_fn v9fs_xattrcreate(void *opaque)
3348 {
3349     int flags, rflags = 0;
3350     int32_t fid;
3351     uint64_t size;
3352     ssize_t err = 0;
3353     V9fsString name;
3354     size_t offset = 7;
3355     V9fsFidState *file_fidp;
3356     V9fsFidState *xattr_fidp;
3357     V9fsPDU *pdu = opaque;
3358 
3359     v9fs_string_init(&name);
3360     err = pdu_unmarshal(pdu, offset, "dsqd", &fid, &name, &size, &flags);
3361     if (err < 0) {
3362         goto out_nofid;
3363     }
3364     trace_v9fs_xattrcreate(pdu->tag, pdu->id, fid, name.data, size, flags);
3365 
3366     if (flags & ~(P9_XATTR_CREATE | P9_XATTR_REPLACE)) {
3367         err = -EINVAL;
3368         goto out_nofid;
3369     }
3370 
3371     if (flags & P9_XATTR_CREATE) {
3372         rflags |= XATTR_CREATE;
3373     }
3374 
3375     if (flags & P9_XATTR_REPLACE) {
3376         rflags |= XATTR_REPLACE;
3377     }
3378 
3379     if (size > XATTR_SIZE_MAX) {
3380         err = -E2BIG;
3381         goto out_nofid;
3382     }
3383 
3384     file_fidp = get_fid(pdu, fid);
3385     if (file_fidp == NULL) {
3386         err = -EINVAL;
3387         goto out_nofid;
3388     }
3389     if (file_fidp->fid_type != P9_FID_NONE) {
3390         err = -EINVAL;
3391         goto out_put_fid;
3392     }
3393 
3394     /* Make the file fid point to xattr */
3395     xattr_fidp = file_fidp;
3396     xattr_fidp->fid_type = P9_FID_XATTR;
3397     xattr_fidp->fs.xattr.copied_len = 0;
3398     xattr_fidp->fs.xattr.xattrwalk_fid = false;
3399     xattr_fidp->fs.xattr.len = size;
3400     xattr_fidp->fs.xattr.flags = rflags;
3401     v9fs_string_init(&xattr_fidp->fs.xattr.name);
3402     v9fs_string_copy(&xattr_fidp->fs.xattr.name, &name);
3403     xattr_fidp->fs.xattr.value = g_malloc0(size);
3404     err = offset;
3405 out_put_fid:
3406     put_fid(pdu, file_fidp);
3407 out_nofid:
3408     pdu_complete(pdu, err);
3409     v9fs_string_free(&name);
3410 }
3411 
3412 static void coroutine_fn v9fs_readlink(void *opaque)
3413 {
3414     V9fsPDU *pdu = opaque;
3415     size_t offset = 7;
3416     V9fsString target;
3417     int32_t fid;
3418     int err = 0;
3419     V9fsFidState *fidp;
3420 
3421     err = pdu_unmarshal(pdu, offset, "d", &fid);
3422     if (err < 0) {
3423         goto out_nofid;
3424     }
3425     trace_v9fs_readlink(pdu->tag, pdu->id, fid);
3426     fidp = get_fid(pdu, fid);
3427     if (fidp == NULL) {
3428         err = -ENOENT;
3429         goto out_nofid;
3430     }
3431 
3432     v9fs_string_init(&target);
3433     err = v9fs_co_readlink(pdu, &fidp->path, &target);
3434     if (err < 0) {
3435         goto out;
3436     }
3437     err = pdu_marshal(pdu, offset, "s", &target);
3438     if (err < 0) {
3439         v9fs_string_free(&target);
3440         goto out;
3441     }
3442     err += offset;
3443     trace_v9fs_readlink_return(pdu->tag, pdu->id, target.data);
3444     v9fs_string_free(&target);
3445 out:
3446     put_fid(pdu, fidp);
3447 out_nofid:
3448     pdu_complete(pdu, err);
3449 }
3450 
3451 static CoroutineEntry *pdu_co_handlers[] = {
3452     [P9_TREADDIR] = v9fs_readdir,
3453     [P9_TSTATFS] = v9fs_statfs,
3454     [P9_TGETATTR] = v9fs_getattr,
3455     [P9_TSETATTR] = v9fs_setattr,
3456     [P9_TXATTRWALK] = v9fs_xattrwalk,
3457     [P9_TXATTRCREATE] = v9fs_xattrcreate,
3458     [P9_TMKNOD] = v9fs_mknod,
3459     [P9_TRENAME] = v9fs_rename,
3460     [P9_TLOCK] = v9fs_lock,
3461     [P9_TGETLOCK] = v9fs_getlock,
3462     [P9_TRENAMEAT] = v9fs_renameat,
3463     [P9_TREADLINK] = v9fs_readlink,
3464     [P9_TUNLINKAT] = v9fs_unlinkat,
3465     [P9_TMKDIR] = v9fs_mkdir,
3466     [P9_TVERSION] = v9fs_version,
3467     [P9_TLOPEN] = v9fs_open,
3468     [P9_TATTACH] = v9fs_attach,
3469     [P9_TSTAT] = v9fs_stat,
3470     [P9_TWALK] = v9fs_walk,
3471     [P9_TCLUNK] = v9fs_clunk,
3472     [P9_TFSYNC] = v9fs_fsync,
3473     [P9_TOPEN] = v9fs_open,
3474     [P9_TREAD] = v9fs_read,
3475 #if 0
3476     [P9_TAUTH] = v9fs_auth,
3477 #endif
3478     [P9_TFLUSH] = v9fs_flush,
3479     [P9_TLINK] = v9fs_link,
3480     [P9_TSYMLINK] = v9fs_symlink,
3481     [P9_TCREATE] = v9fs_create,
3482     [P9_TLCREATE] = v9fs_lcreate,
3483     [P9_TWRITE] = v9fs_write,
3484     [P9_TWSTAT] = v9fs_wstat,
3485     [P9_TREMOVE] = v9fs_remove,
3486 };
3487 
3488 static void coroutine_fn v9fs_op_not_supp(void *opaque)
3489 {
3490     V9fsPDU *pdu = opaque;
3491     pdu_complete(pdu, -EOPNOTSUPP);
3492 }
3493 
3494 static void coroutine_fn v9fs_fs_ro(void *opaque)
3495 {
3496     V9fsPDU *pdu = opaque;
3497     pdu_complete(pdu, -EROFS);
3498 }
3499 
3500 static inline bool is_read_only_op(V9fsPDU *pdu)
3501 {
3502     switch (pdu->id) {
3503     case P9_TREADDIR:
3504     case P9_TSTATFS:
3505     case P9_TGETATTR:
3506     case P9_TXATTRWALK:
3507     case P9_TLOCK:
3508     case P9_TGETLOCK:
3509     case P9_TREADLINK:
3510     case P9_TVERSION:
3511     case P9_TLOPEN:
3512     case P9_TATTACH:
3513     case P9_TSTAT:
3514     case P9_TWALK:
3515     case P9_TCLUNK:
3516     case P9_TFSYNC:
3517     case P9_TOPEN:
3518     case P9_TREAD:
3519     case P9_TAUTH:
3520     case P9_TFLUSH:
3521         return 1;
3522     default:
3523         return 0;
3524     }
3525 }
3526 
3527 void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
3528 {
3529     Coroutine *co;
3530     CoroutineEntry *handler;
3531     V9fsState *s = pdu->s;
3532 
3533     pdu->size = le32_to_cpu(hdr->size_le);
3534     pdu->id = hdr->id;
3535     pdu->tag = le16_to_cpu(hdr->tag_le);
3536 
3537     if (pdu->id >= ARRAY_SIZE(pdu_co_handlers) ||
3538         (pdu_co_handlers[pdu->id] == NULL)) {
3539         handler = v9fs_op_not_supp;
3540     } else if (is_ro_export(&s->ctx) && !is_read_only_op(pdu)) {
3541         handler = v9fs_fs_ro;
3542     } else {
3543         handler = pdu_co_handlers[pdu->id];
3544     }
3545 
3546     qemu_co_queue_init(&pdu->complete);
3547     co = qemu_coroutine_create(handler, pdu);
3548     qemu_coroutine_enter(co);
3549 }
3550 
3551 /* Returns 0 on success, 1 on failure. */
3552 int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t,
3553                                Error **errp)
3554 {
3555     int i, len;
3556     struct stat stat;
3557     FsDriverEntry *fse;
3558     V9fsPath path;
3559     int rc = 1;
3560 
3561     assert(!s->transport);
3562     s->transport = t;
3563 
3564     /* initialize pdu allocator */
3565     QLIST_INIT(&s->free_list);
3566     QLIST_INIT(&s->active_list);
3567     for (i = 0; i < MAX_REQ; i++) {
3568         QLIST_INSERT_HEAD(&s->free_list, &s->pdus[i], next);
3569         s->pdus[i].s = s;
3570         s->pdus[i].idx = i;
3571     }
3572 
3573     v9fs_path_init(&path);
3574 
3575     fse = get_fsdev_fsentry(s->fsconf.fsdev_id);
3576 
3577     if (!fse) {
3578         /* We don't have a fsdev identified by fsdev_id */
3579         error_setg(errp, "9pfs device couldn't find fsdev with the "
3580                    "id = %s",
3581                    s->fsconf.fsdev_id ? s->fsconf.fsdev_id : "NULL");
3582         goto out;
3583     }
3584 
3585     if (!s->fsconf.tag) {
3586         /* we haven't specified a mount_tag */
3587         error_setg(errp, "fsdev with id %s needs mount_tag arguments",
3588                    s->fsconf.fsdev_id);
3589         goto out;
3590     }
3591 
3592     s->ctx.export_flags = fse->export_flags;
3593     s->ctx.fs_root = g_strdup(fse->path);
3594     s->ctx.exops.get_st_gen = NULL;
3595     len = strlen(s->fsconf.tag);
3596     if (len > MAX_TAG_LEN - 1) {
3597         error_setg(errp, "mount tag '%s' (%d bytes) is longer than "
3598                    "maximum (%d bytes)", s->fsconf.tag, len, MAX_TAG_LEN - 1);
3599         goto out;
3600     }
3601 
3602     s->tag = g_strdup(s->fsconf.tag);
3603     s->ctx.uid = -1;
3604 
3605     s->ops = fse->ops;
3606 
3607     s->ctx.fmode = fse->fmode;
3608     s->ctx.dmode = fse->dmode;
3609 
3610     s->fid_list = NULL;
3611     qemu_co_rwlock_init(&s->rename_lock);
3612 
3613     if (s->ops->init(&s->ctx, errp) < 0) {
3614         error_prepend(errp, "cannot initialize fsdev '%s': ",
3615                       s->fsconf.fsdev_id);
3616         goto out;
3617     }
3618 
3619     /*
3620      * Check details of export path, We need to use fs driver
3621      * call back to do that. Since we are in the init path, we don't
3622      * use co-routines here.
3623      */
3624     if (s->ops->name_to_path(&s->ctx, NULL, "/", &path) < 0) {
3625         error_setg(errp,
3626                    "error in converting name to path %s", strerror(errno));
3627         goto out;
3628     }
3629     if (s->ops->lstat(&s->ctx, &path, &stat)) {
3630         error_setg(errp, "share path %s does not exist", fse->path);
3631         goto out;
3632     } else if (!S_ISDIR(stat.st_mode)) {
3633         error_setg(errp, "share path %s is not a directory", fse->path);
3634         goto out;
3635     }
3636 
3637     s->ctx.fst = &fse->fst;
3638     fsdev_throttle_init(s->ctx.fst);
3639 
3640     v9fs_path_free(&path);
3641 
3642     rc = 0;
3643 out:
3644     if (rc) {
3645         if (s->ops && s->ops->cleanup && s->ctx.private) {
3646             s->ops->cleanup(&s->ctx);
3647         }
3648         g_free(s->tag);
3649         g_free(s->ctx.fs_root);
3650         v9fs_path_free(&path);
3651     }
3652     return rc;
3653 }
3654 
3655 void v9fs_device_unrealize_common(V9fsState *s, Error **errp)
3656 {
3657     if (s->ops->cleanup) {
3658         s->ops->cleanup(&s->ctx);
3659     }
3660     fsdev_throttle_cleanup(s->ctx.fst);
3661     g_free(s->tag);
3662     g_free(s->ctx.fs_root);
3663 }
3664 
3665 typedef struct VirtfsCoResetData {
3666     V9fsPDU pdu;
3667     bool done;
3668 } VirtfsCoResetData;
3669 
3670 static void coroutine_fn virtfs_co_reset(void *opaque)
3671 {
3672     VirtfsCoResetData *data = opaque;
3673 
3674     virtfs_reset(&data->pdu);
3675     data->done = true;
3676 }
3677 
3678 void v9fs_reset(V9fsState *s)
3679 {
3680     VirtfsCoResetData data = { .pdu = { .s = s }, .done = false };
3681     Coroutine *co;
3682 
3683     while (!QLIST_EMPTY(&s->active_list)) {
3684         aio_poll(qemu_get_aio_context(), true);
3685     }
3686 
3687     co = qemu_coroutine_create(virtfs_co_reset, &data);
3688     qemu_coroutine_enter(co);
3689 
3690     while (!data.done) {
3691         aio_poll(qemu_get_aio_context(), true);
3692     }
3693 }
3694 
3695 static void __attribute__((__constructor__)) v9fs_set_fd_limit(void)
3696 {
3697     struct rlimit rlim;
3698     if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
3699         error_report("Failed to get the resource limit");
3700         exit(1);
3701     }
3702     open_fd_hw = rlim.rlim_cur - MIN(400, rlim.rlim_cur/3);
3703     open_fd_rc = rlim.rlim_cur/2;
3704 }
3705