xref: /openbmc/qemu/qga/commands-linux.c (revision 5ab1c032)
1 /*
2  * QEMU Guest Agent Linux-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qga-qapi-commands.h"
17 #include "qapi/error.h"
18 #include "commands-common.h"
19 #include "cutils.h"
20 #include <mntent.h>
21 #include <sys/ioctl.h>
22 #include <mntent.h>
23 #include <linux/nvme_ioctl.h>
24 #include "block/nvme.h"
25 
26 #ifdef CONFIG_LIBUDEV
27 #include <libudev.h>
28 #endif
29 
30 #ifdef HAVE_GETIFADDRS
31 #include <net/if.h>
32 #endif
33 
34 #include <sys/statvfs.h>
35 
36 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
37 static int dev_major_minor(const char *devpath,
38                            unsigned int *devmajor, unsigned int *devminor)
39 {
40     struct stat st;
41 
42     *devmajor = 0;
43     *devminor = 0;
44 
45     if (stat(devpath, &st) < 0) {
46         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
47         return -1;
48     }
49     if (S_ISDIR(st.st_mode)) {
50         /* It is bind mount */
51         return -2;
52     }
53     if (S_ISBLK(st.st_mode)) {
54         *devmajor = major(st.st_rdev);
55         *devminor = minor(st.st_rdev);
56         return 0;
57     }
58     return -1;
59 }
60 
61 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
62 {
63     struct mntent *ment;
64     FsMount *mount;
65     char const *mtab = "/proc/self/mounts";
66     FILE *fp;
67     unsigned int devmajor, devminor;
68 
69     fp = setmntent(mtab, "r");
70     if (!fp) {
71         error_setg(errp, "failed to open mtab file: '%s'", mtab);
72         return false;
73     }
74 
75     while ((ment = getmntent(fp))) {
76         /*
77          * An entry which device name doesn't start with a '/' is
78          * either a dummy file system or a network file system.
79          * Add special handling for smbfs and cifs as is done by
80          * coreutils as well.
81          */
82         if ((ment->mnt_fsname[0] != '/') ||
83             (strcmp(ment->mnt_type, "smbfs") == 0) ||
84             (strcmp(ment->mnt_type, "cifs") == 0)) {
85             continue;
86         }
87         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
88             /* Skip bind mounts */
89             continue;
90         }
91 
92         mount = g_new0(FsMount, 1);
93         mount->dirname = g_strdup(ment->mnt_dir);
94         mount->devtype = g_strdup(ment->mnt_type);
95         mount->devmajor = devmajor;
96         mount->devminor = devminor;
97 
98         QTAILQ_INSERT_TAIL(mounts, mount, next);
99     }
100 
101     endmntent(fp);
102     return true;
103 }
104 
105 static void decode_mntname(char *name, int len)
106 {
107     int i, j = 0;
108     for (i = 0; i <= len; i++) {
109         if (name[i] != '\\') {
110             name[j++] = name[i];
111         } else if (name[i + 1] == '\\') {
112             name[j++] = '\\';
113             i++;
114         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
115                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
116                    name[i + 3] >= '0' && name[i + 3] <= '7') {
117             name[j++] = (name[i + 1] - '0') * 64 +
118                         (name[i + 2] - '0') * 8 +
119                         (name[i + 3] - '0');
120             i += 3;
121         } else {
122             name[j++] = name[i];
123         }
124     }
125 }
126 
127 /*
128  * Walk the mount table and build a list of local file systems
129  */
130 bool build_fs_mount_list(FsMountList *mounts, Error **errp)
131 {
132     FsMount *mount;
133     char const *mountinfo = "/proc/self/mountinfo";
134     FILE *fp;
135     char *line = NULL, *dash;
136     size_t n;
137     char check;
138     unsigned int devmajor, devminor;
139     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
140 
141     fp = fopen(mountinfo, "r");
142     if (!fp) {
143         return build_fs_mount_list_from_mtab(mounts, errp);
144     }
145 
146     while (getline(&line, &n, fp) != -1) {
147         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
148                      &devmajor, &devminor, &dir_s, &dir_e, &check);
149         if (ret < 3) {
150             continue;
151         }
152         dash = strstr(line + dir_e, " - ");
153         if (!dash) {
154             continue;
155         }
156         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
157                      &type_s, &type_e, &dev_s, &dev_e, &check);
158         if (ret < 1) {
159             continue;
160         }
161         line[dir_e] = 0;
162         dash[type_e] = 0;
163         dash[dev_e] = 0;
164         decode_mntname(line + dir_s, dir_e - dir_s);
165         decode_mntname(dash + dev_s, dev_e - dev_s);
166         if (devmajor == 0) {
167             /* btrfs reports major number = 0 */
168             if (strcmp("btrfs", dash + type_s) != 0 ||
169                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
170                 continue;
171             }
172         }
173 
174         mount = g_new0(FsMount, 1);
175         mount->dirname = g_strdup(line + dir_s);
176         mount->devtype = g_strdup(dash + type_s);
177         mount->devmajor = devmajor;
178         mount->devminor = devminor;
179 
180         QTAILQ_INSERT_TAIL(mounts, mount, next);
181     }
182     free(line);
183 
184     fclose(fp);
185     return true;
186 }
187 #endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
188 
189 #ifdef CONFIG_FSFREEZE
190 /*
191  * Walk list of mounted file systems in the guest, and freeze the ones which
192  * are real local file systems.
193  */
194 int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
195                                           strList *mountpoints,
196                                           FsMountList mounts,
197                                           Error **errp)
198 {
199     struct FsMount *mount;
200     strList *list;
201     int fd, ret, i = 0;
202 
203     QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
204         /* To issue fsfreeze in the reverse order of mounts, check if the
205          * mount is listed in the list here */
206         if (has_mountpoints) {
207             for (list = mountpoints; list; list = list->next) {
208                 if (strcmp(list->value, mount->dirname) == 0) {
209                     break;
210                 }
211             }
212             if (!list) {
213                 continue;
214             }
215         }
216 
217         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
218         if (fd == -1) {
219             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
220             return -1;
221         }
222 
223         /* we try to cull filesystems we know won't work in advance, but other
224          * filesystems may not implement fsfreeze for less obvious reasons.
225          * these will report EOPNOTSUPP. we simply ignore these when tallying
226          * the number of frozen filesystems.
227          * if a filesystem is mounted more than once (aka bind mount) a
228          * consecutive attempt to freeze an already frozen filesystem will
229          * return EBUSY.
230          *
231          * any other error means a failure to freeze a filesystem we
232          * expect to be freezable, so return an error in those cases
233          * and return system to thawed state.
234          */
235         ret = ioctl(fd, FIFREEZE);
236         if (ret == -1) {
237             if (errno != EOPNOTSUPP && errno != EBUSY) {
238                 error_setg_errno(errp, errno, "failed to freeze %s",
239                                  mount->dirname);
240                 close(fd);
241                 return -1;
242             }
243         } else {
244             i++;
245         }
246         close(fd);
247     }
248     return i;
249 }
250 
251 int qmp_guest_fsfreeze_do_thaw(Error **errp)
252 {
253     int ret;
254     FsMountList mounts;
255     FsMount *mount;
256     int fd, i = 0, logged;
257     Error *local_err = NULL;
258 
259     QTAILQ_INIT(&mounts);
260     if (!build_fs_mount_list(&mounts, &local_err)) {
261         error_propagate(errp, local_err);
262         return -1;
263     }
264 
265     QTAILQ_FOREACH(mount, &mounts, next) {
266         logged = false;
267         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
268         if (fd == -1) {
269             continue;
270         }
271         /* we have no way of knowing whether a filesystem was actually unfrozen
272          * as a result of a successful call to FITHAW, only that if an error
273          * was returned the filesystem was *not* unfrozen by that particular
274          * call.
275          *
276          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
277          * to unfreeze, continuing issuing FITHAW until an error is returned,
278          * in which case either the filesystem is in an unfreezable state, or,
279          * more likely, it was thawed previously (and remains so afterward).
280          *
281          * also, since the most recent successful call is the one that did
282          * the actual unfreeze, we can use this to provide an accurate count
283          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
284          * may * be useful for determining whether a filesystem was unfrozen
285          * during the freeze/thaw phase by a process other than qemu-ga.
286          */
287         do {
288             ret = ioctl(fd, FITHAW);
289             if (ret == 0 && !logged) {
290                 i++;
291                 logged = true;
292             }
293         } while (ret == 0);
294         close(fd);
295     }
296 
297     free_fs_mount_list(&mounts);
298 
299     return i;
300 }
301 #endif /* CONFIG_FSFREEZE */
302 
303 #if defined(CONFIG_FSFREEZE)
304 
305 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
306 {
307     char *path;
308     char *dpath;
309     char *driver = NULL;
310     char buf[PATH_MAX];
311     ssize_t len;
312 
313     path = g_strndup(syspath, pathlen);
314     dpath = g_strdup_printf("%s/driver", path);
315     len = readlink(dpath, buf, sizeof(buf) - 1);
316     if (len != -1) {
317         buf[len] = 0;
318         driver = g_path_get_basename(buf);
319     }
320     g_free(dpath);
321     g_free(path);
322     return driver;
323 }
324 
325 static int compare_uint(const void *_a, const void *_b)
326 {
327     unsigned int a = *(unsigned int *)_a;
328     unsigned int b = *(unsigned int *)_b;
329 
330     return a < b ? -1 : a > b ? 1 : 0;
331 }
332 
333 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
334 static int build_hosts(char const *syspath, char const *host, bool ata,
335                        unsigned int *hosts, int hosts_max, Error **errp)
336 {
337     char *path;
338     DIR *dir;
339     struct dirent *entry;
340     int i = 0;
341 
342     path = g_strndup(syspath, host - syspath);
343     dir = opendir(path);
344     if (!dir) {
345         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
346         g_free(path);
347         return -1;
348     }
349 
350     while (i < hosts_max) {
351         entry = readdir(dir);
352         if (!entry) {
353             break;
354         }
355         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
356             ++i;
357         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
358             ++i;
359         }
360     }
361 
362     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
363 
364     g_free(path);
365     closedir(dir);
366     return i;
367 }
368 
369 /*
370  * Store disk device info for devices on the PCI bus.
371  * Returns true if information has been stored, or false for failure.
372  */
373 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
374                                            GuestDiskAddress *disk,
375                                            Error **errp)
376 {
377     unsigned int pci[4], host, hosts[8], tgt[3];
378     int i, nhosts = 0, pcilen;
379     GuestPCIAddress *pciaddr = disk->pci_controller;
380     bool has_ata = false, has_host = false, has_tgt = false;
381     char *p, *q, *driver = NULL;
382     bool ret = false;
383 
384     p = strstr(syspath, "/devices/pci");
385     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
386                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
387         g_debug("only pci device is supported: sysfs path '%s'", syspath);
388         return false;
389     }
390 
391     p += 12 + pcilen;
392     while (true) {
393         driver = get_pci_driver(syspath, p - syspath, errp);
394         if (driver && (g_str_equal(driver, "ata_piix") ||
395                        g_str_equal(driver, "sym53c8xx") ||
396                        g_str_equal(driver, "virtio-pci") ||
397                        g_str_equal(driver, "ahci") ||
398                        g_str_equal(driver, "nvme") ||
399                        g_str_equal(driver, "xhci_hcd") ||
400                        g_str_equal(driver, "ehci-pci"))) {
401             break;
402         }
403 
404         g_free(driver);
405         if (sscanf(p, "/%x:%x:%x.%x%n",
406                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
407             p += pcilen;
408             continue;
409         }
410 
411         g_debug("unsupported driver or sysfs path '%s'", syspath);
412         return false;
413     }
414 
415     p = strstr(syspath, "/target");
416     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
417                     tgt, tgt + 1, tgt + 2) == 3) {
418         has_tgt = true;
419     }
420 
421     p = strstr(syspath, "/ata");
422     if (p) {
423         q = p + 4;
424         has_ata = true;
425     } else {
426         p = strstr(syspath, "/host");
427         q = p + 5;
428     }
429     if (p && sscanf(q, "%u", &host) == 1) {
430         has_host = true;
431         nhosts = build_hosts(syspath, p, has_ata, hosts,
432                              ARRAY_SIZE(hosts), errp);
433         if (nhosts < 0) {
434             goto cleanup;
435         }
436     }
437 
438     pciaddr->domain = pci[0];
439     pciaddr->bus = pci[1];
440     pciaddr->slot = pci[2];
441     pciaddr->function = pci[3];
442 
443     if (strcmp(driver, "ata_piix") == 0) {
444         /* a host per ide bus, target*:0:<unit>:0 */
445         if (!has_host || !has_tgt) {
446             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
447             goto cleanup;
448         }
449         for (i = 0; i < nhosts; i++) {
450             if (host == hosts[i]) {
451                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
452                 disk->bus = i;
453                 disk->unit = tgt[1];
454                 break;
455             }
456         }
457         if (i >= nhosts) {
458             g_debug("no host for '%s' (driver '%s')", syspath, driver);
459             goto cleanup;
460         }
461     } else if (strcmp(driver, "sym53c8xx") == 0) {
462         /* scsi(LSI Logic): target*:0:<unit>:0 */
463         if (!has_tgt) {
464             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
465             goto cleanup;
466         }
467         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
468         disk->unit = tgt[1];
469     } else if (strcmp(driver, "virtio-pci") == 0) {
470         if (has_tgt) {
471             /* virtio-scsi: target*:0:0:<unit> */
472             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
473             disk->unit = tgt[2];
474         } else {
475             /* virtio-blk: 1 disk per 1 device */
476             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
477         }
478     } else if (strcmp(driver, "ahci") == 0) {
479         /* ahci: 1 host per 1 unit */
480         if (!has_host || !has_tgt) {
481             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
482             goto cleanup;
483         }
484         for (i = 0; i < nhosts; i++) {
485             if (host == hosts[i]) {
486                 disk->unit = i;
487                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
488                 break;
489             }
490         }
491         if (i >= nhosts) {
492             g_debug("no host for '%s' (driver '%s')", syspath, driver);
493             goto cleanup;
494         }
495     } else if (strcmp(driver, "nvme") == 0) {
496         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
497     } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
498         disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
499     } else {
500         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
501         goto cleanup;
502     }
503 
504     ret = true;
505 
506 cleanup:
507     g_free(driver);
508     return ret;
509 }
510 
511 /*
512  * Store disk device info for non-PCI virtio devices (for example s390x
513  * channel I/O devices). Returns true if information has been stored, or
514  * false for failure.
515  */
516 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
517                                                  GuestDiskAddress *disk,
518                                                  Error **errp)
519 {
520     unsigned int tgt[3];
521     char *p;
522 
523     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
524         g_debug("Unsupported virtio device '%s'", syspath);
525         return false;
526     }
527 
528     p = strstr(syspath, "/target");
529     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
530                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
531         /* virtio-scsi: target*:0:<target>:<unit> */
532         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
533         disk->bus = tgt[0];
534         disk->target = tgt[1];
535         disk->unit = tgt[2];
536     } else {
537         /* virtio-blk: 1 disk per 1 device */
538         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
539     }
540 
541     return true;
542 }
543 
544 /*
545  * Store disk device info for CCW devices (s390x channel I/O devices).
546  * Returns true if information has been stored, or false for failure.
547  */
548 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
549                                            GuestDiskAddress *disk,
550                                            Error **errp)
551 {
552     unsigned int cssid, ssid, subchno, devno;
553     char *p;
554 
555     p = strstr(syspath, "/devices/css");
556     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
557                      &cssid, &ssid, &subchno, &devno) < 4) {
558         g_debug("could not parse ccw device sysfs path: %s", syspath);
559         return false;
560     }
561 
562     disk->ccw_address = g_new0(GuestCCWAddress, 1);
563     disk->ccw_address->cssid = cssid;
564     disk->ccw_address->ssid = ssid;
565     disk->ccw_address->subchno = subchno;
566     disk->ccw_address->devno = devno;
567 
568     if (strstr(p, "/virtio")) {
569         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
570     }
571 
572     return true;
573 }
574 
575 /* Store disk device info specified by @sysfs into @fs */
576 static void build_guest_fsinfo_for_real_device(char const *syspath,
577                                                GuestFilesystemInfo *fs,
578                                                Error **errp)
579 {
580     GuestDiskAddress *disk;
581     GuestPCIAddress *pciaddr;
582     bool has_hwinf;
583 #ifdef CONFIG_LIBUDEV
584     struct udev *udev = NULL;
585     struct udev_device *udevice = NULL;
586 #endif
587 
588     pciaddr = g_new0(GuestPCIAddress, 1);
589     pciaddr->domain = -1;                       /* -1 means field is invalid */
590     pciaddr->bus = -1;
591     pciaddr->slot = -1;
592     pciaddr->function = -1;
593 
594     disk = g_new0(GuestDiskAddress, 1);
595     disk->pci_controller = pciaddr;
596     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
597 
598 #ifdef CONFIG_LIBUDEV
599     udev = udev_new();
600     udevice = udev_device_new_from_syspath(udev, syspath);
601     if (udev == NULL || udevice == NULL) {
602         g_debug("failed to query udev");
603     } else {
604         const char *devnode, *serial;
605         devnode = udev_device_get_devnode(udevice);
606         if (devnode != NULL) {
607             disk->dev = g_strdup(devnode);
608         }
609         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
610         if (serial != NULL && *serial != 0) {
611             disk->serial = g_strdup(serial);
612         }
613     }
614 
615     udev_unref(udev);
616     udev_device_unref(udevice);
617 #endif
618 
619     if (strstr(syspath, "/devices/pci")) {
620         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
621     } else if (strstr(syspath, "/devices/css")) {
622         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
623     } else if (strstr(syspath, "/virtio")) {
624         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
625     } else {
626         g_debug("Unsupported device type for '%s'", syspath);
627         has_hwinf = false;
628     }
629 
630     if (has_hwinf || disk->dev || disk->serial) {
631         QAPI_LIST_PREPEND(fs->disk, disk);
632     } else {
633         qapi_free_GuestDiskAddress(disk);
634     }
635 }
636 
637 static void build_guest_fsinfo_for_device(char const *devpath,
638                                           GuestFilesystemInfo *fs,
639                                           Error **errp);
640 
641 /* Store a list of slave devices of virtual volume specified by @syspath into
642  * @fs */
643 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
644                                                   GuestFilesystemInfo *fs,
645                                                   Error **errp)
646 {
647     Error *err = NULL;
648     DIR *dir;
649     char *dirpath;
650     struct dirent *entry;
651 
652     dirpath = g_strdup_printf("%s/slaves", syspath);
653     dir = opendir(dirpath);
654     if (!dir) {
655         if (errno != ENOENT) {
656             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
657         }
658         g_free(dirpath);
659         return;
660     }
661 
662     for (;;) {
663         errno = 0;
664         entry = readdir(dir);
665         if (entry == NULL) {
666             if (errno) {
667                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
668             }
669             break;
670         }
671 
672         if (entry->d_type == DT_LNK) {
673             char *path;
674 
675             g_debug(" slave device '%s'", entry->d_name);
676             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
677             build_guest_fsinfo_for_device(path, fs, &err);
678             g_free(path);
679 
680             if (err) {
681                 error_propagate(errp, err);
682                 break;
683             }
684         }
685     }
686 
687     g_free(dirpath);
688     closedir(dir);
689 }
690 
691 static bool is_disk_virtual(const char *devpath, Error **errp)
692 {
693     g_autofree char *syspath = realpath(devpath, NULL);
694 
695     if (!syspath) {
696         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
697         return false;
698     }
699     return strstr(syspath, "/devices/virtual/block/") != NULL;
700 }
701 
702 /* Dispatch to functions for virtual/real device */
703 static void build_guest_fsinfo_for_device(char const *devpath,
704                                           GuestFilesystemInfo *fs,
705                                           Error **errp)
706 {
707     ERRP_GUARD();
708     g_autofree char *syspath = NULL;
709     bool is_virtual = false;
710 
711     syspath = realpath(devpath, NULL);
712     if (!syspath) {
713         if (errno != ENOENT) {
714             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
715             return;
716         }
717 
718         /* ENOENT: This devpath may not exist because of container config */
719         if (!fs->name) {
720             fs->name = g_path_get_basename(devpath);
721         }
722         return;
723     }
724 
725     if (!fs->name) {
726         fs->name = g_path_get_basename(syspath);
727     }
728 
729     g_debug("  parse sysfs path '%s'", syspath);
730     is_virtual = is_disk_virtual(syspath, errp);
731     if (*errp != NULL) {
732         return;
733     }
734     if (is_virtual) {
735         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
736     } else {
737         build_guest_fsinfo_for_real_device(syspath, fs, errp);
738     }
739 }
740 
741 #ifdef CONFIG_LIBUDEV
742 
743 /*
744  * Wrapper around build_guest_fsinfo_for_device() for getting just
745  * the disk address.
746  */
747 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
748 {
749     g_autoptr(GuestFilesystemInfo) fs = NULL;
750 
751     fs = g_new0(GuestFilesystemInfo, 1);
752     build_guest_fsinfo_for_device(syspath, fs, errp);
753     if (fs->disk != NULL) {
754         return g_steal_pointer(&fs->disk->value);
755     }
756     return NULL;
757 }
758 
759 static char *get_alias_for_syspath(const char *syspath)
760 {
761     struct udev *udev = NULL;
762     struct udev_device *udevice = NULL;
763     char *ret = NULL;
764 
765     udev = udev_new();
766     if (udev == NULL) {
767         g_debug("failed to query udev");
768         goto out;
769     }
770     udevice = udev_device_new_from_syspath(udev, syspath);
771     if (udevice == NULL) {
772         g_debug("failed to query udev for path: %s", syspath);
773         goto out;
774     } else {
775         const char *alias = udev_device_get_property_value(
776             udevice, "DM_NAME");
777         /*
778          * NULL means there was an error and empty string means there is no
779          * alias. In case of no alias we return NULL instead of empty string.
780          */
781         if (alias == NULL) {
782             g_debug("failed to query udev for device alias for: %s",
783                 syspath);
784         } else if (*alias != 0) {
785             ret = g_strdup(alias);
786         }
787     }
788 
789 out:
790     udev_unref(udev);
791     udev_device_unref(udevice);
792     return ret;
793 }
794 
795 static char *get_device_for_syspath(const char *syspath)
796 {
797     struct udev *udev = NULL;
798     struct udev_device *udevice = NULL;
799     char *ret = NULL;
800 
801     udev = udev_new();
802     if (udev == NULL) {
803         g_debug("failed to query udev");
804         goto out;
805     }
806     udevice = udev_device_new_from_syspath(udev, syspath);
807     if (udevice == NULL) {
808         g_debug("failed to query udev for path: %s", syspath);
809         goto out;
810     } else {
811         ret = g_strdup(udev_device_get_devnode(udevice));
812     }
813 
814 out:
815     udev_unref(udev);
816     udev_device_unref(udevice);
817     return ret;
818 }
819 
820 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
821 {
822     g_autofree char *deps_dir = NULL;
823     const gchar *dep;
824     GDir *dp_deps = NULL;
825 
826     /* List dependent disks */
827     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
828     g_debug("  listing entries in: %s", deps_dir);
829     dp_deps = g_dir_open(deps_dir, 0, NULL);
830     if (dp_deps == NULL) {
831         g_debug("failed to list entries in %s", deps_dir);
832         return;
833     }
834     disk->has_dependencies = true;
835     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
836         g_autofree char *dep_dir = NULL;
837         char *dev_name;
838 
839         /* Add dependent disks */
840         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
841         dev_name = get_device_for_syspath(dep_dir);
842         if (dev_name != NULL) {
843             g_debug("  adding dependent device: %s", dev_name);
844             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
845         }
846     }
847     g_dir_close(dp_deps);
848 }
849 
850 /*
851  * Detect partitions subdirectory, name is "<disk_name><number>" or
852  * "<disk_name>p<number>"
853  *
854  * @disk_name -- last component of /sys path (e.g. sda)
855  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
856  * @disk_dev -- device node of the disk (e.g. /dev/sda)
857  */
858 static GuestDiskInfoList *get_disk_partitions(
859     GuestDiskInfoList *list,
860     const char *disk_name, const char *disk_dir,
861     const char *disk_dev)
862 {
863     GuestDiskInfoList *ret = list;
864     struct dirent *de_disk;
865     DIR *dp_disk = NULL;
866     size_t len = strlen(disk_name);
867 
868     dp_disk = opendir(disk_dir);
869     while ((de_disk = readdir(dp_disk)) != NULL) {
870         g_autofree char *partition_dir = NULL;
871         char *dev_name;
872         GuestDiskInfo *partition;
873 
874         if (!(de_disk->d_type & DT_DIR)) {
875             continue;
876         }
877 
878         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
879             ((*(de_disk->d_name + len) == 'p' &&
880             isdigit(*(de_disk->d_name + len + 1))) ||
881                 isdigit(*(de_disk->d_name + len))))) {
882             continue;
883         }
884 
885         partition_dir = g_strdup_printf("%s/%s",
886             disk_dir, de_disk->d_name);
887         dev_name = get_device_for_syspath(partition_dir);
888         if (dev_name == NULL) {
889             g_debug("Failed to get device name for syspath: %s",
890                 disk_dir);
891             continue;
892         }
893         partition = g_new0(GuestDiskInfo, 1);
894         partition->name = dev_name;
895         partition->partition = true;
896         partition->has_dependencies = true;
897         /* Add parent disk as dependent for easier tracking of hierarchy */
898         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
899 
900         QAPI_LIST_PREPEND(ret, partition);
901     }
902     closedir(dp_disk);
903 
904     return ret;
905 }
906 
907 static void get_nvme_smart(GuestDiskInfo *disk)
908 {
909     int fd;
910     GuestNVMeSmart *smart;
911     NvmeSmartLog log = {0};
912     struct nvme_admin_cmd cmd = {
913         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
914         .nsid = NVME_NSID_BROADCAST,
915         .addr = (uintptr_t)&log,
916         .data_len = sizeof(log),
917         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
918                  | (((sizeof(log) >> 2) - 1) << 16)
919     };
920 
921     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
922     if (fd == -1) {
923         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
924         return;
925     }
926 
927     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
928         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
929         close(fd);
930         return;
931     }
932 
933     disk->smart = g_new0(GuestDiskSmart, 1);
934     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
935 
936     smart = &disk->smart->u.nvme;
937     smart->critical_warning = log.critical_warning;
938     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
939     smart->available_spare = log.available_spare;
940     smart->available_spare_threshold = log.available_spare_threshold;
941     smart->percentage_used = log.percentage_used;
942     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
943     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
944     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
945     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
946     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
947     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
948     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
949     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
950     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
951     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
952     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
953     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
954     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
955     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
956     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
957     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
958     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
959     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
960     smart->number_of_error_log_entries_lo =
961         le64_to_cpu(log.number_of_error_log_entries[0]);
962     smart->number_of_error_log_entries_hi =
963         le64_to_cpu(log.number_of_error_log_entries[1]);
964 
965     close(fd);
966 }
967 
968 static void get_disk_smart(GuestDiskInfo *disk)
969 {
970     if (disk->address
971         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
972         get_nvme_smart(disk);
973     }
974 }
975 
976 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
977 {
978     GuestDiskInfoList *ret = NULL;
979     GuestDiskInfo *disk;
980     DIR *dp = NULL;
981     struct dirent *de = NULL;
982 
983     g_debug("listing /sys/block directory");
984     dp = opendir("/sys/block");
985     if (dp == NULL) {
986         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
987         return NULL;
988     }
989     while ((de = readdir(dp)) != NULL) {
990         g_autofree char *disk_dir = NULL, *line = NULL,
991             *size_path = NULL;
992         char *dev_name;
993         Error *local_err = NULL;
994         if (de->d_type != DT_LNK) {
995             g_debug("  skipping entry: %s", de->d_name);
996             continue;
997         }
998 
999         /* Check size and skip zero-sized disks */
1000         g_debug("  checking disk size");
1001         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1002         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1003             g_debug("  failed to read disk size");
1004             continue;
1005         }
1006         if (g_strcmp0(line, "0\n") == 0) {
1007             g_debug("  skipping zero-sized disk");
1008             continue;
1009         }
1010 
1011         g_debug("  adding %s", de->d_name);
1012         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1013         dev_name = get_device_for_syspath(disk_dir);
1014         if (dev_name == NULL) {
1015             g_debug("Failed to get device name for syspath: %s",
1016                 disk_dir);
1017             continue;
1018         }
1019         disk = g_new0(GuestDiskInfo, 1);
1020         disk->name = dev_name;
1021         disk->partition = false;
1022         disk->alias = get_alias_for_syspath(disk_dir);
1023         QAPI_LIST_PREPEND(ret, disk);
1024 
1025         /* Get address for non-virtual devices */
1026         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1027         if (local_err != NULL) {
1028             g_debug("  failed to check disk path, ignoring error: %s",
1029                 error_get_pretty(local_err));
1030             error_free(local_err);
1031             local_err = NULL;
1032             /* Don't try to get the address */
1033             is_virtual = true;
1034         }
1035         if (!is_virtual) {
1036             disk->address = get_disk_address(disk_dir, &local_err);
1037             if (local_err != NULL) {
1038                 g_debug("  failed to get device info, ignoring error: %s",
1039                     error_get_pretty(local_err));
1040                 error_free(local_err);
1041                 local_err = NULL;
1042             }
1043         }
1044 
1045         get_disk_deps(disk_dir, disk);
1046         get_disk_smart(disk);
1047         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1048     }
1049 
1050     closedir(dp);
1051 
1052     return ret;
1053 }
1054 
1055 #endif
1056 
1057 /* Return a list of the disk device(s)' info which @mount lies on */
1058 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1059                                                Error **errp)
1060 {
1061     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1062     struct statvfs buf;
1063     unsigned long used, nonroot_total, fr_size;
1064     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1065                                     mount->devmajor, mount->devminor);
1066 
1067     fs->mountpoint = g_strdup(mount->dirname);
1068     fs->type = g_strdup(mount->devtype);
1069     build_guest_fsinfo_for_device(devpath, fs, errp);
1070 
1071     if (statvfs(fs->mountpoint, &buf) == 0) {
1072         fr_size = buf.f_frsize;
1073         used = buf.f_blocks - buf.f_bfree;
1074         nonroot_total = used + buf.f_bavail;
1075         fs->used_bytes = used * fr_size;
1076         fs->total_bytes = nonroot_total * fr_size;
1077         fs->total_bytes_privileged = buf.f_blocks * fr_size;
1078 
1079         fs->has_total_bytes = true;
1080         fs->has_total_bytes_privileged = true;
1081         fs->has_used_bytes = true;
1082     }
1083 
1084     g_free(devpath);
1085 
1086     return fs;
1087 }
1088 
1089 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1090 {
1091     FsMountList mounts;
1092     struct FsMount *mount;
1093     GuestFilesystemInfoList *ret = NULL;
1094     Error *local_err = NULL;
1095 
1096     QTAILQ_INIT(&mounts);
1097     if (!build_fs_mount_list(&mounts, &local_err)) {
1098         error_propagate(errp, local_err);
1099         return NULL;
1100     }
1101 
1102     QTAILQ_FOREACH(mount, &mounts, next) {
1103         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1104 
1105         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1106         if (local_err) {
1107             error_propagate(errp, local_err);
1108             qapi_free_GuestFilesystemInfoList(ret);
1109             ret = NULL;
1110             break;
1111         }
1112     }
1113 
1114     free_fs_mount_list(&mounts);
1115     return ret;
1116 }
1117 #endif /* CONFIG_FSFREEZE */
1118 
1119 #if defined(CONFIG_FSTRIM)
1120 /*
1121  * Walk list of mounted file systems in the guest, and trim them.
1122  */
1123 GuestFilesystemTrimResponse *
1124 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1125 {
1126     GuestFilesystemTrimResponse *response;
1127     GuestFilesystemTrimResult *result;
1128     int ret = 0;
1129     FsMountList mounts;
1130     struct FsMount *mount;
1131     int fd;
1132     struct fstrim_range r;
1133 
1134     slog("guest-fstrim called");
1135 
1136     QTAILQ_INIT(&mounts);
1137     if (!build_fs_mount_list(&mounts, errp)) {
1138         return NULL;
1139     }
1140 
1141     response = g_malloc0(sizeof(*response));
1142 
1143     QTAILQ_FOREACH(mount, &mounts, next) {
1144         result = g_malloc0(sizeof(*result));
1145         result->path = g_strdup(mount->dirname);
1146 
1147         QAPI_LIST_PREPEND(response->paths, result);
1148 
1149         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1150         if (fd == -1) {
1151             result->error = g_strdup_printf("failed to open: %s",
1152                                             strerror(errno));
1153             continue;
1154         }
1155 
1156         /* We try to cull filesystems we know won't work in advance, but other
1157          * filesystems may not implement fstrim for less obvious reasons.
1158          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1159          * will be reported (e.g. CD-ROMs).
1160          * Any other error means an unexpected error.
1161          */
1162         r.start = 0;
1163         r.len = -1;
1164         r.minlen = has_minimum ? minimum : 0;
1165         ret = ioctl(fd, FITRIM, &r);
1166         if (ret == -1) {
1167             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1168                 result->error = g_strdup("trim not supported");
1169             } else {
1170                 result->error = g_strdup_printf("failed to trim: %s",
1171                                                 strerror(errno));
1172             }
1173             close(fd);
1174             continue;
1175         }
1176 
1177         result->has_minimum = true;
1178         result->minimum = r.minlen;
1179         result->has_trimmed = true;
1180         result->trimmed = r.len;
1181         close(fd);
1182     }
1183 
1184     free_fs_mount_list(&mounts);
1185     return response;
1186 }
1187 #endif /* CONFIG_FSTRIM */
1188 
1189 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1190 #define SUSPEND_SUPPORTED 0
1191 #define SUSPEND_NOT_SUPPORTED 1
1192 
1193 typedef enum {
1194     SUSPEND_MODE_DISK = 0,
1195     SUSPEND_MODE_RAM = 1,
1196     SUSPEND_MODE_HYBRID = 2,
1197 } SuspendMode;
1198 
1199 /*
1200  * Executes a command in a child process using g_spawn_sync,
1201  * returning an int >= 0 representing the exit status of the
1202  * process.
1203  *
1204  * If the program wasn't found in path, returns -1.
1205  *
1206  * If a problem happened when creating the child process,
1207  * returns -1 and errp is set.
1208  */
1209 static int run_process_child(const char *command[], Error **errp)
1210 {
1211     int exit_status, spawn_flag;
1212     GError *g_err = NULL;
1213     bool success;
1214 
1215     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1216                  G_SPAWN_STDERR_TO_DEV_NULL;
1217 
1218     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1219                             NULL, NULL, NULL, NULL,
1220                             &exit_status, &g_err);
1221 
1222     if (success) {
1223         return WEXITSTATUS(exit_status);
1224     }
1225 
1226     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1227         error_setg(errp, "failed to create child process, error '%s'",
1228                    g_err->message);
1229     }
1230 
1231     g_error_free(g_err);
1232     return -1;
1233 }
1234 
1235 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1236 {
1237     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1238                                      "systemd-hybrid-sleep"};
1239     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1240     int status;
1241 
1242     status = run_process_child(cmd, errp);
1243 
1244     /*
1245      * systemctl status uses LSB return codes so we can expect
1246      * status > 0 and be ok. To assert if the guest has support
1247      * for the selected suspend mode, status should be < 4. 4 is
1248      * the code for unknown service status, the return value when
1249      * the service does not exist. A common value is status = 3
1250      * (program is not running).
1251      */
1252     if (status > 0 && status < 4) {
1253         return true;
1254     }
1255 
1256     return false;
1257 }
1258 
1259 static void systemd_suspend(SuspendMode mode, Error **errp)
1260 {
1261     Error *local_err = NULL;
1262     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1263     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1264     int status;
1265 
1266     status = run_process_child(cmd, &local_err);
1267 
1268     if (status == 0) {
1269         return;
1270     }
1271 
1272     if ((status == -1) && !local_err) {
1273         error_setg(errp, "the helper program 'systemctl %s' was not found",
1274                    systemctl_args[mode]);
1275         return;
1276     }
1277 
1278     if (local_err) {
1279         error_propagate(errp, local_err);
1280     } else {
1281         error_setg(errp, "the helper program 'systemctl %s' returned an "
1282                    "unexpected exit status code (%d)",
1283                    systemctl_args[mode], status);
1284     }
1285 }
1286 
1287 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1288 {
1289     Error *local_err = NULL;
1290     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1291                                    "--suspend-hybrid"};
1292     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1293     int status;
1294 
1295     status = run_process_child(cmd, &local_err);
1296 
1297     if (status == SUSPEND_SUPPORTED) {
1298         return true;
1299     }
1300 
1301     if ((status == -1) && !local_err) {
1302         return false;
1303     }
1304 
1305     if (local_err) {
1306         error_propagate(errp, local_err);
1307     } else {
1308         error_setg(errp,
1309                    "the helper program '%s' returned an unexpected exit"
1310                    " status code (%d)", "pm-is-supported", status);
1311     }
1312 
1313     return false;
1314 }
1315 
1316 static void pmutils_suspend(SuspendMode mode, Error **errp)
1317 {
1318     Error *local_err = NULL;
1319     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1320                                        "pm-suspend-hybrid"};
1321     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1322     int status;
1323 
1324     status = run_process_child(cmd, &local_err);
1325 
1326     if (status == 0) {
1327         return;
1328     }
1329 
1330     if ((status == -1) && !local_err) {
1331         error_setg(errp, "the helper program '%s' was not found",
1332                    pmutils_binaries[mode]);
1333         return;
1334     }
1335 
1336     if (local_err) {
1337         error_propagate(errp, local_err);
1338     } else {
1339         error_setg(errp,
1340                    "the helper program '%s' returned an unexpected exit"
1341                    " status code (%d)", pmutils_binaries[mode], status);
1342     }
1343 }
1344 
1345 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1346 {
1347     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1348     const char *sysfile_str = sysfile_strs[mode];
1349     char buf[32]; /* hopefully big enough */
1350     int fd;
1351     ssize_t ret;
1352 
1353     if (!sysfile_str) {
1354         error_setg(errp, "unknown guest suspend mode");
1355         return false;
1356     }
1357 
1358     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1359     if (fd < 0) {
1360         return false;
1361     }
1362 
1363     ret = read(fd, buf, sizeof(buf) - 1);
1364     close(fd);
1365     if (ret <= 0) {
1366         return false;
1367     }
1368     buf[ret] = '\0';
1369 
1370     if (strstr(buf, sysfile_str)) {
1371         return true;
1372     }
1373     return false;
1374 }
1375 
1376 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1377 {
1378     g_autoptr(GError) local_gerr = NULL;
1379     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1380     const char *sysfile_str = sysfile_strs[mode];
1381 
1382     if (!sysfile_str) {
1383         error_setg(errp, "unknown guest suspend mode");
1384         return;
1385     }
1386 
1387     if (!g_file_set_contents(LINUX_SYS_STATE_FILE, sysfile_str,
1388                              -1, &local_gerr)) {
1389         error_setg(errp, "suspend: cannot write to '%s': %s",
1390                    LINUX_SYS_STATE_FILE, local_gerr->message);
1391         return;
1392     }
1393 }
1394 
1395 static void guest_suspend(SuspendMode mode, Error **errp)
1396 {
1397     Error *local_err = NULL;
1398     bool mode_supported = false;
1399 
1400     if (systemd_supports_mode(mode, &local_err)) {
1401         mode_supported = true;
1402         systemd_suspend(mode, &local_err);
1403 
1404         if (!local_err) {
1405             return;
1406         }
1407     }
1408 
1409     error_free(local_err);
1410     local_err = NULL;
1411 
1412     if (pmutils_supports_mode(mode, &local_err)) {
1413         mode_supported = true;
1414         pmutils_suspend(mode, &local_err);
1415 
1416         if (!local_err) {
1417             return;
1418         }
1419     }
1420 
1421     error_free(local_err);
1422     local_err = NULL;
1423 
1424     if (linux_sys_state_supports_mode(mode, &local_err)) {
1425         mode_supported = true;
1426         linux_sys_state_suspend(mode, &local_err);
1427     }
1428 
1429     if (!mode_supported) {
1430         error_free(local_err);
1431         error_setg(errp,
1432                    "the requested suspend mode is not supported by the guest");
1433     } else {
1434         error_propagate(errp, local_err);
1435     }
1436 }
1437 
1438 void qmp_guest_suspend_disk(Error **errp)
1439 {
1440     guest_suspend(SUSPEND_MODE_DISK, errp);
1441 }
1442 
1443 void qmp_guest_suspend_ram(Error **errp)
1444 {
1445     guest_suspend(SUSPEND_MODE_RAM, errp);
1446 }
1447 
1448 void qmp_guest_suspend_hybrid(Error **errp)
1449 {
1450     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1451 }
1452 
1453 /* Transfer online/offline status between @vcpu and the guest system.
1454  *
1455  * On input either @errp or *@errp must be NULL.
1456  *
1457  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1458  * - R: vcpu->logical_id
1459  * - W: vcpu->online
1460  * - W: vcpu->can_offline
1461  *
1462  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1463  * - R: vcpu->logical_id
1464  * - R: vcpu->online
1465  *
1466  * Written members remain unmodified on error.
1467  */
1468 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1469                           char *dirpath, Error **errp)
1470 {
1471     int fd;
1472     int res;
1473     int dirfd;
1474     static const char fn[] = "online";
1475 
1476     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1477     if (dirfd == -1) {
1478         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1479         return;
1480     }
1481 
1482     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1483     if (fd == -1) {
1484         if (errno != ENOENT) {
1485             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1486         } else if (sys2vcpu) {
1487             vcpu->online = true;
1488             vcpu->can_offline = false;
1489         } else if (!vcpu->online) {
1490             error_setg(errp, "logical processor #%" PRId64 " can't be "
1491                        "offlined", vcpu->logical_id);
1492         } /* otherwise pretend successful re-onlining */
1493     } else {
1494         unsigned char status;
1495 
1496         res = pread(fd, &status, 1, 0);
1497         if (res == -1) {
1498             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1499         } else if (res == 0) {
1500             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1501                        fn);
1502         } else if (sys2vcpu) {
1503             vcpu->online = (status != '0');
1504             vcpu->can_offline = true;
1505         } else if (vcpu->online != (status != '0')) {
1506             status = '0' + vcpu->online;
1507             if (pwrite(fd, &status, 1, 0) == -1) {
1508                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1509                                  fn);
1510             }
1511         } /* otherwise pretend successful re-(on|off)-lining */
1512 
1513         res = close(fd);
1514         g_assert(res == 0);
1515     }
1516 
1517     res = close(dirfd);
1518     g_assert(res == 0);
1519 }
1520 
1521 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1522 {
1523     GuestLogicalProcessorList *head, **tail;
1524     const char *cpu_dir = "/sys/devices/system/cpu";
1525     const gchar *line;
1526     g_autoptr(GDir) cpu_gdir = NULL;
1527     Error *local_err = NULL;
1528 
1529     head = NULL;
1530     tail = &head;
1531     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
1532 
1533     if (cpu_gdir == NULL) {
1534         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
1535         return NULL;
1536     }
1537 
1538     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
1539         GuestLogicalProcessor *vcpu;
1540         int64_t id;
1541         if (sscanf(line, "cpu%" PRId64, &id)) {
1542             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
1543                                                     "cpu%" PRId64 "/", id);
1544             vcpu = g_malloc0(sizeof *vcpu);
1545             vcpu->logical_id = id;
1546             vcpu->has_can_offline = true; /* lolspeak ftw */
1547             transfer_vcpu(vcpu, true, path, &local_err);
1548             QAPI_LIST_APPEND(tail, vcpu);
1549         }
1550     }
1551 
1552     if (local_err == NULL) {
1553         /* there's no guest with zero VCPUs */
1554         g_assert(head != NULL);
1555         return head;
1556     }
1557 
1558     qapi_free_GuestLogicalProcessorList(head);
1559     error_propagate(errp, local_err);
1560     return NULL;
1561 }
1562 
1563 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1564 {
1565     int64_t processed;
1566     Error *local_err = NULL;
1567 
1568     processed = 0;
1569     while (vcpus != NULL) {
1570         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1571                                      vcpus->value->logical_id);
1572 
1573         transfer_vcpu(vcpus->value, false, path, &local_err);
1574         g_free(path);
1575         if (local_err != NULL) {
1576             break;
1577         }
1578         ++processed;
1579         vcpus = vcpus->next;
1580     }
1581 
1582     if (local_err != NULL) {
1583         if (processed == 0) {
1584             error_propagate(errp, local_err);
1585         } else {
1586             error_free(local_err);
1587         }
1588     }
1589 
1590     return processed;
1591 }
1592 
1593 
1594 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
1595                                int size, Error **errp)
1596 {
1597     int fd;
1598     int res;
1599 
1600     errno = 0;
1601     fd = openat(dirfd, pathname, O_RDONLY);
1602     if (fd == -1) {
1603         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1604         return;
1605     }
1606 
1607     res = pread(fd, buf, size, 0);
1608     if (res == -1) {
1609         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
1610     } else if (res == 0) {
1611         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
1612     }
1613     close(fd);
1614 }
1615 
1616 static void ga_write_sysfs_file(int dirfd, const char *pathname,
1617                                 const char *buf, int size, Error **errp)
1618 {
1619     int fd;
1620 
1621     errno = 0;
1622     fd = openat(dirfd, pathname, O_WRONLY);
1623     if (fd == -1) {
1624         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1625         return;
1626     }
1627 
1628     if (pwrite(fd, buf, size, 0) == -1) {
1629         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
1630     }
1631 
1632     close(fd);
1633 }
1634 
1635 /* Transfer online/offline status between @mem_blk and the guest system.
1636  *
1637  * On input either @errp or *@errp must be NULL.
1638  *
1639  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
1640  * - R: mem_blk->phys_index
1641  * - W: mem_blk->online
1642  * - W: mem_blk->can_offline
1643  *
1644  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
1645  * - R: mem_blk->phys_index
1646  * - R: mem_blk->online
1647  *-  R: mem_blk->can_offline
1648  * Written members remain unmodified on error.
1649  */
1650 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
1651                                   GuestMemoryBlockResponse *result,
1652                                   Error **errp)
1653 {
1654     char *dirpath;
1655     int dirfd;
1656     char *status;
1657     Error *local_err = NULL;
1658 
1659     if (!sys2memblk) {
1660         DIR *dp;
1661 
1662         if (!result) {
1663             error_setg(errp, "Internal error, 'result' should not be NULL");
1664             return;
1665         }
1666         errno = 0;
1667         dp = opendir("/sys/devices/system/memory/");
1668          /* if there is no 'memory' directory in sysfs,
1669          * we think this VM does not support online/offline memory block,
1670          * any other solution?
1671          */
1672         if (!dp) {
1673             if (errno == ENOENT) {
1674                 result->response =
1675                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1676             }
1677             goto out1;
1678         }
1679         closedir(dp);
1680     }
1681 
1682     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
1683                               mem_blk->phys_index);
1684     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1685     if (dirfd == -1) {
1686         if (sys2memblk) {
1687             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1688         } else {
1689             if (errno == ENOENT) {
1690                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
1691             } else {
1692                 result->response =
1693                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1694             }
1695         }
1696         g_free(dirpath);
1697         goto out1;
1698     }
1699     g_free(dirpath);
1700 
1701     status = g_malloc0(10);
1702     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
1703     if (local_err) {
1704         /* treat with sysfs file that not exist in old kernel */
1705         if (errno == ENOENT) {
1706             error_free(local_err);
1707             if (sys2memblk) {
1708                 mem_blk->online = true;
1709                 mem_blk->can_offline = false;
1710             } else if (!mem_blk->online) {
1711                 result->response =
1712                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1713             }
1714         } else {
1715             if (sys2memblk) {
1716                 error_propagate(errp, local_err);
1717             } else {
1718                 error_free(local_err);
1719                 result->response =
1720                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1721             }
1722         }
1723         goto out2;
1724     }
1725 
1726     if (sys2memblk) {
1727         char removable = '0';
1728 
1729         mem_blk->online = (strncmp(status, "online", 6) == 0);
1730 
1731         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
1732         if (local_err) {
1733             /* if no 'removable' file, it doesn't support offline mem blk */
1734             if (errno == ENOENT) {
1735                 error_free(local_err);
1736                 mem_blk->can_offline = false;
1737             } else {
1738                 error_propagate(errp, local_err);
1739             }
1740         } else {
1741             mem_blk->can_offline = (removable != '0');
1742         }
1743     } else {
1744         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
1745             const char *new_state = mem_blk->online ? "online" : "offline";
1746 
1747             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
1748                                 &local_err);
1749             if (local_err) {
1750                 error_free(local_err);
1751                 result->response =
1752                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1753                 goto out2;
1754             }
1755 
1756             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
1757             result->has_error_code = false;
1758         } /* otherwise pretend successful re-(on|off)-lining */
1759     }
1760     g_free(status);
1761     close(dirfd);
1762     return;
1763 
1764 out2:
1765     g_free(status);
1766     close(dirfd);
1767 out1:
1768     if (!sys2memblk) {
1769         result->has_error_code = true;
1770         result->error_code = errno;
1771     }
1772 }
1773 
1774 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
1775 {
1776     GuestMemoryBlockList *head, **tail;
1777     Error *local_err = NULL;
1778     struct dirent *de;
1779     DIR *dp;
1780 
1781     head = NULL;
1782     tail = &head;
1783 
1784     dp = opendir("/sys/devices/system/memory/");
1785     if (!dp) {
1786         /* it's ok if this happens to be a system that doesn't expose
1787          * memory blocks via sysfs, but otherwise we should report
1788          * an error
1789          */
1790         if (errno != ENOENT) {
1791             error_setg_errno(errp, errno, "Can't open directory"
1792                              "\"/sys/devices/system/memory/\"");
1793         }
1794         return NULL;
1795     }
1796 
1797     /* Note: the phys_index of memory block may be discontinuous,
1798      * this is because a memblk is the unit of the Sparse Memory design, which
1799      * allows discontinuous memory ranges (ex. NUMA), so here we should
1800      * traverse the memory block directory.
1801      */
1802     while ((de = readdir(dp)) != NULL) {
1803         GuestMemoryBlock *mem_blk;
1804 
1805         if ((strncmp(de->d_name, "memory", 6) != 0) ||
1806             !(de->d_type & DT_DIR)) {
1807             continue;
1808         }
1809 
1810         mem_blk = g_malloc0(sizeof *mem_blk);
1811         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
1812         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
1813         mem_blk->has_can_offline = true; /* lolspeak ftw */
1814         transfer_memory_block(mem_blk, true, NULL, &local_err);
1815         if (local_err) {
1816             break;
1817         }
1818 
1819         QAPI_LIST_APPEND(tail, mem_blk);
1820     }
1821 
1822     closedir(dp);
1823     if (local_err == NULL) {
1824         /* there's no guest with zero memory blocks */
1825         if (head == NULL) {
1826             error_setg(errp, "guest reported zero memory blocks!");
1827         }
1828         return head;
1829     }
1830 
1831     qapi_free_GuestMemoryBlockList(head);
1832     error_propagate(errp, local_err);
1833     return NULL;
1834 }
1835 
1836 GuestMemoryBlockResponseList *
1837 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
1838 {
1839     GuestMemoryBlockResponseList *head, **tail;
1840     Error *local_err = NULL;
1841 
1842     head = NULL;
1843     tail = &head;
1844 
1845     while (mem_blks != NULL) {
1846         GuestMemoryBlockResponse *result;
1847         GuestMemoryBlock *current_mem_blk = mem_blks->value;
1848 
1849         result = g_malloc0(sizeof(*result));
1850         result->phys_index = current_mem_blk->phys_index;
1851         transfer_memory_block(current_mem_blk, false, result, &local_err);
1852         if (local_err) { /* should never happen */
1853             goto err;
1854         }
1855 
1856         QAPI_LIST_APPEND(tail, result);
1857         mem_blks = mem_blks->next;
1858     }
1859 
1860     return head;
1861 err:
1862     qapi_free_GuestMemoryBlockResponseList(head);
1863     error_propagate(errp, local_err);
1864     return NULL;
1865 }
1866 
1867 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
1868 {
1869     Error *local_err = NULL;
1870     char *dirpath;
1871     int dirfd;
1872     char *buf;
1873     GuestMemoryBlockInfo *info;
1874 
1875     dirpath = g_strdup_printf("/sys/devices/system/memory/");
1876     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1877     if (dirfd == -1) {
1878         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1879         g_free(dirpath);
1880         return NULL;
1881     }
1882     g_free(dirpath);
1883 
1884     buf = g_malloc0(20);
1885     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
1886     close(dirfd);
1887     if (local_err) {
1888         g_free(buf);
1889         error_propagate(errp, local_err);
1890         return NULL;
1891     }
1892 
1893     info = g_new0(GuestMemoryBlockInfo, 1);
1894     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
1895 
1896     g_free(buf);
1897 
1898     return info;
1899 }
1900 
1901 #define MAX_NAME_LEN 128
1902 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
1903 {
1904     GuestDiskStatsInfoList *head = NULL, **tail = &head;
1905     const char *diskstats = "/proc/diskstats";
1906     FILE *fp;
1907     size_t n;
1908     char *line = NULL;
1909 
1910     fp = fopen(diskstats, "r");
1911     if (fp  == NULL) {
1912         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
1913         return NULL;
1914     }
1915 
1916     while (getline(&line, &n, fp) != -1) {
1917         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
1918         g_autofree GuestDiskStats *diskstat = NULL;
1919         char dev_name[MAX_NAME_LEN];
1920         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
1921         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
1922         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
1923         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
1924         unsigned int major, minor;
1925         int i;
1926 
1927         i = sscanf(line, "%u %u %s %lu %lu %lu"
1928                    "%lu %lu %lu %lu %u %u %u %u"
1929                    "%lu %lu %lu %u %lu %u",
1930                    &major, &minor, dev_name,
1931                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
1932                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
1933                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
1934                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
1935                    &fl_ios, &fl_ticks);
1936 
1937         if (i < 7) {
1938             continue;
1939         }
1940 
1941         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
1942         diskstatinfo->name = g_strdup(dev_name);
1943         diskstatinfo->major = major;
1944         diskstatinfo->minor = minor;
1945 
1946         diskstat = g_new0(GuestDiskStats, 1);
1947         if (i == 7) {
1948             diskstat->has_read_ios = true;
1949             diskstat->read_ios = rd_ios;
1950             diskstat->has_read_sectors = true;
1951             diskstat->read_sectors = rd_merges_or_rd_sec;
1952             diskstat->has_write_ios = true;
1953             diskstat->write_ios = rd_sec_or_wr_ios;
1954             diskstat->has_write_sectors = true;
1955             diskstat->write_sectors = rd_ticks_or_wr_sec;
1956         }
1957         if (i >= 14) {
1958             diskstat->has_read_ios = true;
1959             diskstat->read_ios = rd_ios;
1960             diskstat->has_read_sectors = true;
1961             diskstat->read_sectors = rd_sec_or_wr_ios;
1962             diskstat->has_read_merges = true;
1963             diskstat->read_merges = rd_merges_or_rd_sec;
1964             diskstat->has_read_ticks = true;
1965             diskstat->read_ticks = rd_ticks_or_wr_sec;
1966             diskstat->has_write_ios = true;
1967             diskstat->write_ios = wr_ios;
1968             diskstat->has_write_sectors = true;
1969             diskstat->write_sectors = wr_sec;
1970             diskstat->has_write_merges = true;
1971             diskstat->write_merges = wr_merges;
1972             diskstat->has_write_ticks = true;
1973             diskstat->write_ticks = wr_ticks;
1974             diskstat->has_ios_pgr = true;
1975             diskstat->ios_pgr = ios_pgr;
1976             diskstat->has_total_ticks = true;
1977             diskstat->total_ticks = tot_ticks;
1978             diskstat->has_weight_ticks = true;
1979             diskstat->weight_ticks = rq_ticks;
1980         }
1981         if (i >= 18) {
1982             diskstat->has_discard_ios = true;
1983             diskstat->discard_ios = dc_ios;
1984             diskstat->has_discard_merges = true;
1985             diskstat->discard_merges = dc_merges;
1986             diskstat->has_discard_sectors = true;
1987             diskstat->discard_sectors = dc_sec;
1988             diskstat->has_discard_ticks = true;
1989             diskstat->discard_ticks = dc_ticks;
1990         }
1991         if (i >= 20) {
1992             diskstat->has_flush_ios = true;
1993             diskstat->flush_ios = fl_ios;
1994             diskstat->has_flush_ticks = true;
1995             diskstat->flush_ticks = fl_ticks;
1996         }
1997 
1998         diskstatinfo->stats = g_steal_pointer(&diskstat);
1999         QAPI_LIST_APPEND(tail, diskstatinfo);
2000         diskstatinfo = NULL;
2001     }
2002     free(line);
2003     fclose(fp);
2004     return head;
2005 }
2006 
2007 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2008 {
2009     return guest_get_diskstats(errp);
2010 }
2011 
2012 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2013 {
2014     GuestCpuStatsList *head = NULL, **tail = &head;
2015     const char *cpustats = "/proc/stat";
2016     int clk_tck = sysconf(_SC_CLK_TCK);
2017     FILE *fp;
2018     size_t n;
2019     char *line = NULL;
2020 
2021     fp = fopen(cpustats, "r");
2022     if (fp  == NULL) {
2023         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2024         return NULL;
2025     }
2026 
2027     while (getline(&line, &n, fp) != -1) {
2028         GuestCpuStats *cpustat = NULL;
2029         GuestLinuxCpuStats *linuxcpustat;
2030         int i;
2031         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2032         unsigned long nice, guest_nice;
2033         char name[64];
2034 
2035         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2036                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2037                    &steal, &guest, &guest_nice);
2038 
2039         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2040         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2041             continue;
2042         }
2043 
2044         if (i < 5) {
2045             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2046             break;
2047         }
2048 
2049         cpustat = g_new0(GuestCpuStats, 1);
2050         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2051 
2052         linuxcpustat = &cpustat->u.q_linux;
2053         linuxcpustat->cpu = atoi(&name[3]);
2054         linuxcpustat->user = user * 1000 / clk_tck;
2055         linuxcpustat->nice = nice * 1000 / clk_tck;
2056         linuxcpustat->system = system * 1000 / clk_tck;
2057         linuxcpustat->idle = idle * 1000 / clk_tck;
2058 
2059         if (i > 5) {
2060             linuxcpustat->has_iowait = true;
2061             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2062         }
2063 
2064         if (i > 6) {
2065             linuxcpustat->has_irq = true;
2066             linuxcpustat->irq = irq * 1000 / clk_tck;
2067             linuxcpustat->has_softirq = true;
2068             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2069         }
2070 
2071         if (i > 8) {
2072             linuxcpustat->has_steal = true;
2073             linuxcpustat->steal = steal * 1000 / clk_tck;
2074         }
2075 
2076         if (i > 9) {
2077             linuxcpustat->has_guest = true;
2078             linuxcpustat->guest = guest * 1000 / clk_tck;
2079         }
2080 
2081         if (i > 10) {
2082             linuxcpustat->has_guest = true;
2083             linuxcpustat->guest = guest * 1000 / clk_tck;
2084             linuxcpustat->has_guestnice = true;
2085             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2086         }
2087 
2088         QAPI_LIST_APPEND(tail, cpustat);
2089     }
2090 
2091     free(line);
2092     fclose(fp);
2093     return head;
2094 }
2095 
2096 static char *hex_to_ip_address(const void *hex_value, int is_ipv6)
2097 {
2098     if (is_ipv6) {
2099         char addr[INET6_ADDRSTRLEN];
2100         struct in6_addr in6;
2101         const char *hex_str = (const char *)hex_value;
2102         int i;
2103 
2104         for (i = 0; i < 16; i++) {
2105             if (sscanf(&hex_str[i * 2], "%02hhx", &in6.s6_addr[i]) != 1) {
2106                 return NULL;
2107             }
2108         }
2109         inet_ntop(AF_INET6, &in6, addr, INET6_ADDRSTRLEN);
2110 
2111         return g_strdup(addr);
2112     } else {
2113         unsigned int hex_int = *(unsigned int *)hex_value;
2114         unsigned int byte1 = (hex_int >> 24) & 0xFF;
2115         unsigned int byte2 = (hex_int >> 16) & 0xFF;
2116         unsigned int byte3 = (hex_int >> 8) & 0xFF;
2117         unsigned int byte4 = hex_int & 0xFF;
2118 
2119         return g_strdup_printf("%u.%u.%u.%u", byte4, byte3, byte2, byte1);
2120     }
2121 }
2122 
2123 GuestNetworkRouteList *qmp_guest_network_get_route(Error **errp)
2124 {
2125     GuestNetworkRouteList *head = NULL, **tail = &head;
2126     const char *route_files[] = {"/proc/net/route", "/proc/net/ipv6_route"};
2127     FILE *fp;
2128     size_t n = 0;
2129     char *line = NULL;
2130     int firstLine;
2131     int is_ipv6;
2132     int i;
2133     char iface[IFNAMSIZ];
2134 
2135     for (i = 0; i < 2; i++) {
2136         firstLine = 1;
2137         is_ipv6 = (i == 1);
2138         fp = fopen(route_files[i], "r");
2139         if (fp == NULL) {
2140             error_setg_errno(errp, errno, "open(\"%s\")", route_files[i]);
2141             continue;
2142         }
2143 
2144         while (getline(&line, &n, fp) != -1) {
2145             if (firstLine && !is_ipv6) {
2146                 firstLine = 0;
2147                 continue;
2148             }
2149             g_autoptr(GuestNetworkRoute) route = g_new0(GuestNetworkRoute, 1);
2150 
2151             if (is_ipv6) {
2152                 char destination[33], source[33], next_hop[33];
2153                 int des_prefixlen, src_prefixlen, metric, refcnt, use, flags;
2154                 if (sscanf(line, "%32s %x %32s %x %32s %x %x %x %x %s",
2155                            destination, &des_prefixlen, source,
2156                            &src_prefixlen, next_hop, &metric, &refcnt,
2157                            &use, &flags, iface) != 10) {
2158                     continue;
2159                 }
2160 
2161                 route->destination = hex_to_ip_address(destination, 1);
2162                 if (route->destination == NULL) {
2163                     continue;
2164                 }
2165                 route->iface = g_strdup(iface);
2166                 route->source = hex_to_ip_address(source, 1);
2167                 route->nexthop = hex_to_ip_address(next_hop, 1);
2168                 route->desprefixlen = g_strdup_printf("%d", des_prefixlen);
2169                 route->srcprefixlen = g_strdup_printf("%d", src_prefixlen);
2170                 route->metric = metric;
2171                 route->has_flags = true;
2172                 route->flags = flags;
2173                 route->has_refcnt = true;
2174                 route->refcnt = refcnt;
2175                 route->has_use = true;
2176                 route->use = use;
2177                 route->version = 6;
2178             } else {
2179                 unsigned int destination, gateway, mask, flags;
2180                 int refcnt, use, metric, mtu, window, irtt;
2181                 if (sscanf(line, "%s %X %X %x %d %d %d %X %d %d %d",
2182                            iface, &destination, &gateway, &flags, &refcnt,
2183                            &use, &metric, &mask, &mtu, &window, &irtt) != 11) {
2184                     continue;
2185                 }
2186 
2187                 route->destination = hex_to_ip_address(&destination, 0);
2188                 if (route->destination == NULL) {
2189                     continue;
2190                 }
2191                 route->iface = g_strdup(iface);
2192                 route->gateway = hex_to_ip_address(&gateway, 0);
2193                 route->mask = hex_to_ip_address(&mask, 0);
2194                 route->metric = metric;
2195                 route->has_flags = true;
2196                 route->flags = flags;
2197                 route->has_refcnt = true;
2198                 route->refcnt = refcnt;
2199                 route->has_use = true;
2200                 route->use = use;
2201                 route->has_mtu = true;
2202                 route->mtu = mtu;
2203                 route->has_window = true;
2204                 route->window = window;
2205                 route->has_irtt = true;
2206                 route->irtt = irtt;
2207                 route->version = 4;
2208             }
2209 
2210             QAPI_LIST_APPEND(tail, route);
2211             route = NULL;
2212         }
2213 
2214         fclose(fp);
2215     }
2216 
2217     free(line);
2218     return head;
2219 }
2220