xref: /openbmc/qemu/qga/commands-linux.c (revision d0f0cd5b)
1 /*
2  * QEMU Guest Agent Linux-specific command implementations
3  *
4  * Copyright IBM Corp. 2011
5  *
6  * Authors:
7  *  Michael Roth      <mdroth@linux.vnet.ibm.com>
8  *  Michal Privoznik  <mprivozn@redhat.com>
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
11  * See the COPYING file in the top-level directory.
12  */
13 
14 #include "qemu/osdep.h"
15 #include "qapi/error.h"
16 #include "qga-qapi-commands.h"
17 #include "qapi/error.h"
18 #include "qapi/qmp/qerror.h"
19 #include "commands-common.h"
20 #include "cutils.h"
21 #include <mntent.h>
22 #include <sys/ioctl.h>
23 #include <mntent.h>
24 #include <linux/nvme_ioctl.h>
25 #include "block/nvme.h"
26 
27 #ifdef CONFIG_LIBUDEV
28 #include <libudev.h>
29 #endif
30 
31 #ifdef HAVE_GETIFADDRS
32 #include <net/if.h>
33 #endif
34 
35 #include <sys/statvfs.h>
36 
37 #if defined(CONFIG_FSFREEZE) || defined(CONFIG_FSTRIM)
38 static int dev_major_minor(const char *devpath,
39                            unsigned int *devmajor, unsigned int *devminor)
40 {
41     struct stat st;
42 
43     *devmajor = 0;
44     *devminor = 0;
45 
46     if (stat(devpath, &st) < 0) {
47         slog("failed to stat device file '%s': %s", devpath, strerror(errno));
48         return -1;
49     }
50     if (S_ISDIR(st.st_mode)) {
51         /* It is bind mount */
52         return -2;
53     }
54     if (S_ISBLK(st.st_mode)) {
55         *devmajor = major(st.st_rdev);
56         *devminor = minor(st.st_rdev);
57         return 0;
58     }
59     return -1;
60 }
61 
62 static bool build_fs_mount_list_from_mtab(FsMountList *mounts, Error **errp)
63 {
64     struct mntent *ment;
65     FsMount *mount;
66     char const *mtab = "/proc/self/mounts";
67     FILE *fp;
68     unsigned int devmajor, devminor;
69 
70     fp = setmntent(mtab, "r");
71     if (!fp) {
72         error_setg(errp, "failed to open mtab file: '%s'", mtab);
73         return false;
74     }
75 
76     while ((ment = getmntent(fp))) {
77         /*
78          * An entry which device name doesn't start with a '/' is
79          * either a dummy file system or a network file system.
80          * Add special handling for smbfs and cifs as is done by
81          * coreutils as well.
82          */
83         if ((ment->mnt_fsname[0] != '/') ||
84             (strcmp(ment->mnt_type, "smbfs") == 0) ||
85             (strcmp(ment->mnt_type, "cifs") == 0)) {
86             continue;
87         }
88         if (dev_major_minor(ment->mnt_fsname, &devmajor, &devminor) == -2) {
89             /* Skip bind mounts */
90             continue;
91         }
92 
93         mount = g_new0(FsMount, 1);
94         mount->dirname = g_strdup(ment->mnt_dir);
95         mount->devtype = g_strdup(ment->mnt_type);
96         mount->devmajor = devmajor;
97         mount->devminor = devminor;
98 
99         QTAILQ_INSERT_TAIL(mounts, mount, next);
100     }
101 
102     endmntent(fp);
103     return true;
104 }
105 
106 static void decode_mntname(char *name, int len)
107 {
108     int i, j = 0;
109     for (i = 0; i <= len; i++) {
110         if (name[i] != '\\') {
111             name[j++] = name[i];
112         } else if (name[i + 1] == '\\') {
113             name[j++] = '\\';
114             i++;
115         } else if (name[i + 1] >= '0' && name[i + 1] <= '3' &&
116                    name[i + 2] >= '0' && name[i + 2] <= '7' &&
117                    name[i + 3] >= '0' && name[i + 3] <= '7') {
118             name[j++] = (name[i + 1] - '0') * 64 +
119                         (name[i + 2] - '0') * 8 +
120                         (name[i + 3] - '0');
121             i += 3;
122         } else {
123             name[j++] = name[i];
124         }
125     }
126 }
127 
128 /*
129  * Walk the mount table and build a list of local file systems
130  */
131 bool build_fs_mount_list(FsMountList *mounts, Error **errp)
132 {
133     FsMount *mount;
134     char const *mountinfo = "/proc/self/mountinfo";
135     FILE *fp;
136     char *line = NULL, *dash;
137     size_t n;
138     char check;
139     unsigned int devmajor, devminor;
140     int ret, dir_s, dir_e, type_s, type_e, dev_s, dev_e;
141 
142     fp = fopen(mountinfo, "r");
143     if (!fp) {
144         return build_fs_mount_list_from_mtab(mounts, errp);
145     }
146 
147     while (getline(&line, &n, fp) != -1) {
148         ret = sscanf(line, "%*u %*u %u:%u %*s %n%*s%n%c",
149                      &devmajor, &devminor, &dir_s, &dir_e, &check);
150         if (ret < 3) {
151             continue;
152         }
153         dash = strstr(line + dir_e, " - ");
154         if (!dash) {
155             continue;
156         }
157         ret = sscanf(dash, " - %n%*s%n %n%*s%n%c",
158                      &type_s, &type_e, &dev_s, &dev_e, &check);
159         if (ret < 1) {
160             continue;
161         }
162         line[dir_e] = 0;
163         dash[type_e] = 0;
164         dash[dev_e] = 0;
165         decode_mntname(line + dir_s, dir_e - dir_s);
166         decode_mntname(dash + dev_s, dev_e - dev_s);
167         if (devmajor == 0) {
168             /* btrfs reports major number = 0 */
169             if (strcmp("btrfs", dash + type_s) != 0 ||
170                 dev_major_minor(dash + dev_s, &devmajor, &devminor) < 0) {
171                 continue;
172             }
173         }
174 
175         mount = g_new0(FsMount, 1);
176         mount->dirname = g_strdup(line + dir_s);
177         mount->devtype = g_strdup(dash + type_s);
178         mount->devmajor = devmajor;
179         mount->devminor = devminor;
180 
181         QTAILQ_INSERT_TAIL(mounts, mount, next);
182     }
183     free(line);
184 
185     fclose(fp);
186     return true;
187 }
188 #endif /* CONFIG_FSFREEZE || CONFIG_FSTRIM */
189 
190 #ifdef CONFIG_FSFREEZE
191 /*
192  * Walk list of mounted file systems in the guest, and freeze the ones which
193  * are real local file systems.
194  */
195 int64_t qmp_guest_fsfreeze_do_freeze_list(bool has_mountpoints,
196                                           strList *mountpoints,
197                                           FsMountList mounts,
198                                           Error **errp)
199 {
200     struct FsMount *mount;
201     strList *list;
202     int fd, ret, i = 0;
203 
204     QTAILQ_FOREACH_REVERSE(mount, &mounts, next) {
205         /* To issue fsfreeze in the reverse order of mounts, check if the
206          * mount is listed in the list here */
207         if (has_mountpoints) {
208             for (list = mountpoints; list; list = list->next) {
209                 if (strcmp(list->value, mount->dirname) == 0) {
210                     break;
211                 }
212             }
213             if (!list) {
214                 continue;
215             }
216         }
217 
218         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
219         if (fd == -1) {
220             error_setg_errno(errp, errno, "failed to open %s", mount->dirname);
221             return -1;
222         }
223 
224         /* we try to cull filesystems we know won't work in advance, but other
225          * filesystems may not implement fsfreeze for less obvious reasons.
226          * these will report EOPNOTSUPP. we simply ignore these when tallying
227          * the number of frozen filesystems.
228          * if a filesystem is mounted more than once (aka bind mount) a
229          * consecutive attempt to freeze an already frozen filesystem will
230          * return EBUSY.
231          *
232          * any other error means a failure to freeze a filesystem we
233          * expect to be freezable, so return an error in those cases
234          * and return system to thawed state.
235          */
236         ret = ioctl(fd, FIFREEZE);
237         if (ret == -1) {
238             if (errno != EOPNOTSUPP && errno != EBUSY) {
239                 error_setg_errno(errp, errno, "failed to freeze %s",
240                                  mount->dirname);
241                 close(fd);
242                 return -1;
243             }
244         } else {
245             i++;
246         }
247         close(fd);
248     }
249     return i;
250 }
251 
252 int qmp_guest_fsfreeze_do_thaw(Error **errp)
253 {
254     int ret;
255     FsMountList mounts;
256     FsMount *mount;
257     int fd, i = 0, logged;
258     Error *local_err = NULL;
259 
260     QTAILQ_INIT(&mounts);
261     if (!build_fs_mount_list(&mounts, &local_err)) {
262         error_propagate(errp, local_err);
263         return -1;
264     }
265 
266     QTAILQ_FOREACH(mount, &mounts, next) {
267         logged = false;
268         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
269         if (fd == -1) {
270             continue;
271         }
272         /* we have no way of knowing whether a filesystem was actually unfrozen
273          * as a result of a successful call to FITHAW, only that if an error
274          * was returned the filesystem was *not* unfrozen by that particular
275          * call.
276          *
277          * since multiple preceding FIFREEZEs require multiple calls to FITHAW
278          * to unfreeze, continuing issuing FITHAW until an error is returned,
279          * in which case either the filesystem is in an unfreezable state, or,
280          * more likely, it was thawed previously (and remains so afterward).
281          *
282          * also, since the most recent successful call is the one that did
283          * the actual unfreeze, we can use this to provide an accurate count
284          * of the number of filesystems unfrozen by guest-fsfreeze-thaw, which
285          * may * be useful for determining whether a filesystem was unfrozen
286          * during the freeze/thaw phase by a process other than qemu-ga.
287          */
288         do {
289             ret = ioctl(fd, FITHAW);
290             if (ret == 0 && !logged) {
291                 i++;
292                 logged = true;
293             }
294         } while (ret == 0);
295         close(fd);
296     }
297 
298     free_fs_mount_list(&mounts);
299 
300     return i;
301 }
302 #endif /* CONFIG_FSFREEZE */
303 
304 #if defined(CONFIG_FSFREEZE)
305 
306 static char *get_pci_driver(char const *syspath, int pathlen, Error **errp)
307 {
308     char *path;
309     char *dpath;
310     char *driver = NULL;
311     char buf[PATH_MAX];
312     ssize_t len;
313 
314     path = g_strndup(syspath, pathlen);
315     dpath = g_strdup_printf("%s/driver", path);
316     len = readlink(dpath, buf, sizeof(buf) - 1);
317     if (len != -1) {
318         buf[len] = 0;
319         driver = g_path_get_basename(buf);
320     }
321     g_free(dpath);
322     g_free(path);
323     return driver;
324 }
325 
326 static int compare_uint(const void *_a, const void *_b)
327 {
328     unsigned int a = *(unsigned int *)_a;
329     unsigned int b = *(unsigned int *)_b;
330 
331     return a < b ? -1 : a > b ? 1 : 0;
332 }
333 
334 /* Walk the specified sysfs and build a sorted list of host or ata numbers */
335 static int build_hosts(char const *syspath, char const *host, bool ata,
336                        unsigned int *hosts, int hosts_max, Error **errp)
337 {
338     char *path;
339     DIR *dir;
340     struct dirent *entry;
341     int i = 0;
342 
343     path = g_strndup(syspath, host - syspath);
344     dir = opendir(path);
345     if (!dir) {
346         error_setg_errno(errp, errno, "opendir(\"%s\")", path);
347         g_free(path);
348         return -1;
349     }
350 
351     while (i < hosts_max) {
352         entry = readdir(dir);
353         if (!entry) {
354             break;
355         }
356         if (ata && sscanf(entry->d_name, "ata%d", hosts + i) == 1) {
357             ++i;
358         } else if (!ata && sscanf(entry->d_name, "host%d", hosts + i) == 1) {
359             ++i;
360         }
361     }
362 
363     qsort(hosts, i, sizeof(hosts[0]), compare_uint);
364 
365     g_free(path);
366     closedir(dir);
367     return i;
368 }
369 
370 /*
371  * Store disk device info for devices on the PCI bus.
372  * Returns true if information has been stored, or false for failure.
373  */
374 static bool build_guest_fsinfo_for_pci_dev(char const *syspath,
375                                            GuestDiskAddress *disk,
376                                            Error **errp)
377 {
378     unsigned int pci[4], host, hosts[8], tgt[3];
379     int i, nhosts = 0, pcilen;
380     GuestPCIAddress *pciaddr = disk->pci_controller;
381     bool has_ata = false, has_host = false, has_tgt = false;
382     char *p, *q, *driver = NULL;
383     bool ret = false;
384 
385     p = strstr(syspath, "/devices/pci");
386     if (!p || sscanf(p + 12, "%*x:%*x/%x:%x:%x.%x%n",
387                      pci, pci + 1, pci + 2, pci + 3, &pcilen) < 4) {
388         g_debug("only pci device is supported: sysfs path '%s'", syspath);
389         return false;
390     }
391 
392     p += 12 + pcilen;
393     while (true) {
394         driver = get_pci_driver(syspath, p - syspath, errp);
395         if (driver && (g_str_equal(driver, "ata_piix") ||
396                        g_str_equal(driver, "sym53c8xx") ||
397                        g_str_equal(driver, "virtio-pci") ||
398                        g_str_equal(driver, "ahci") ||
399                        g_str_equal(driver, "nvme") ||
400                        g_str_equal(driver, "xhci_hcd") ||
401                        g_str_equal(driver, "ehci-pci"))) {
402             break;
403         }
404 
405         g_free(driver);
406         if (sscanf(p, "/%x:%x:%x.%x%n",
407                           pci, pci + 1, pci + 2, pci + 3, &pcilen) == 4) {
408             p += pcilen;
409             continue;
410         }
411 
412         g_debug("unsupported driver or sysfs path '%s'", syspath);
413         return false;
414     }
415 
416     p = strstr(syspath, "/target");
417     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
418                     tgt, tgt + 1, tgt + 2) == 3) {
419         has_tgt = true;
420     }
421 
422     p = strstr(syspath, "/ata");
423     if (p) {
424         q = p + 4;
425         has_ata = true;
426     } else {
427         p = strstr(syspath, "/host");
428         q = p + 5;
429     }
430     if (p && sscanf(q, "%u", &host) == 1) {
431         has_host = true;
432         nhosts = build_hosts(syspath, p, has_ata, hosts,
433                              ARRAY_SIZE(hosts), errp);
434         if (nhosts < 0) {
435             goto cleanup;
436         }
437     }
438 
439     pciaddr->domain = pci[0];
440     pciaddr->bus = pci[1];
441     pciaddr->slot = pci[2];
442     pciaddr->function = pci[3];
443 
444     if (strcmp(driver, "ata_piix") == 0) {
445         /* a host per ide bus, target*:0:<unit>:0 */
446         if (!has_host || !has_tgt) {
447             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
448             goto cleanup;
449         }
450         for (i = 0; i < nhosts; i++) {
451             if (host == hosts[i]) {
452                 disk->bus_type = GUEST_DISK_BUS_TYPE_IDE;
453                 disk->bus = i;
454                 disk->unit = tgt[1];
455                 break;
456             }
457         }
458         if (i >= nhosts) {
459             g_debug("no host for '%s' (driver '%s')", syspath, driver);
460             goto cleanup;
461         }
462     } else if (strcmp(driver, "sym53c8xx") == 0) {
463         /* scsi(LSI Logic): target*:0:<unit>:0 */
464         if (!has_tgt) {
465             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
466             goto cleanup;
467         }
468         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
469         disk->unit = tgt[1];
470     } else if (strcmp(driver, "virtio-pci") == 0) {
471         if (has_tgt) {
472             /* virtio-scsi: target*:0:0:<unit> */
473             disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
474             disk->unit = tgt[2];
475         } else {
476             /* virtio-blk: 1 disk per 1 device */
477             disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
478         }
479     } else if (strcmp(driver, "ahci") == 0) {
480         /* ahci: 1 host per 1 unit */
481         if (!has_host || !has_tgt) {
482             g_debug("invalid sysfs path '%s' (driver '%s')", syspath, driver);
483             goto cleanup;
484         }
485         for (i = 0; i < nhosts; i++) {
486             if (host == hosts[i]) {
487                 disk->unit = i;
488                 disk->bus_type = GUEST_DISK_BUS_TYPE_SATA;
489                 break;
490             }
491         }
492         if (i >= nhosts) {
493             g_debug("no host for '%s' (driver '%s')", syspath, driver);
494             goto cleanup;
495         }
496     } else if (strcmp(driver, "nvme") == 0) {
497         disk->bus_type = GUEST_DISK_BUS_TYPE_NVME;
498     } else if (strcmp(driver, "ehci-pci") == 0 || strcmp(driver, "xhci_hcd") == 0) {
499         disk->bus_type = GUEST_DISK_BUS_TYPE_USB;
500     } else {
501         g_debug("unknown driver '%s' (sysfs path '%s')", driver, syspath);
502         goto cleanup;
503     }
504 
505     ret = true;
506 
507 cleanup:
508     g_free(driver);
509     return ret;
510 }
511 
512 /*
513  * Store disk device info for non-PCI virtio devices (for example s390x
514  * channel I/O devices). Returns true if information has been stored, or
515  * false for failure.
516  */
517 static bool build_guest_fsinfo_for_nonpci_virtio(char const *syspath,
518                                                  GuestDiskAddress *disk,
519                                                  Error **errp)
520 {
521     unsigned int tgt[3];
522     char *p;
523 
524     if (!strstr(syspath, "/virtio") || !strstr(syspath, "/block")) {
525         g_debug("Unsupported virtio device '%s'", syspath);
526         return false;
527     }
528 
529     p = strstr(syspath, "/target");
530     if (p && sscanf(p + 7, "%*u:%*u:%*u/%*u:%u:%u:%u",
531                     &tgt[0], &tgt[1], &tgt[2]) == 3) {
532         /* virtio-scsi: target*:0:<target>:<unit> */
533         disk->bus_type = GUEST_DISK_BUS_TYPE_SCSI;
534         disk->bus = tgt[0];
535         disk->target = tgt[1];
536         disk->unit = tgt[2];
537     } else {
538         /* virtio-blk: 1 disk per 1 device */
539         disk->bus_type = GUEST_DISK_BUS_TYPE_VIRTIO;
540     }
541 
542     return true;
543 }
544 
545 /*
546  * Store disk device info for CCW devices (s390x channel I/O devices).
547  * Returns true if information has been stored, or false for failure.
548  */
549 static bool build_guest_fsinfo_for_ccw_dev(char const *syspath,
550                                            GuestDiskAddress *disk,
551                                            Error **errp)
552 {
553     unsigned int cssid, ssid, subchno, devno;
554     char *p;
555 
556     p = strstr(syspath, "/devices/css");
557     if (!p || sscanf(p + 12, "%*x/%x.%x.%x/%*x.%*x.%x/",
558                      &cssid, &ssid, &subchno, &devno) < 4) {
559         g_debug("could not parse ccw device sysfs path: %s", syspath);
560         return false;
561     }
562 
563     disk->ccw_address = g_new0(GuestCCWAddress, 1);
564     disk->ccw_address->cssid = cssid;
565     disk->ccw_address->ssid = ssid;
566     disk->ccw_address->subchno = subchno;
567     disk->ccw_address->devno = devno;
568 
569     if (strstr(p, "/virtio")) {
570         build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
571     }
572 
573     return true;
574 }
575 
576 /* Store disk device info specified by @sysfs into @fs */
577 static void build_guest_fsinfo_for_real_device(char const *syspath,
578                                                GuestFilesystemInfo *fs,
579                                                Error **errp)
580 {
581     GuestDiskAddress *disk;
582     GuestPCIAddress *pciaddr;
583     bool has_hwinf;
584 #ifdef CONFIG_LIBUDEV
585     struct udev *udev = NULL;
586     struct udev_device *udevice = NULL;
587 #endif
588 
589     pciaddr = g_new0(GuestPCIAddress, 1);
590     pciaddr->domain = -1;                       /* -1 means field is invalid */
591     pciaddr->bus = -1;
592     pciaddr->slot = -1;
593     pciaddr->function = -1;
594 
595     disk = g_new0(GuestDiskAddress, 1);
596     disk->pci_controller = pciaddr;
597     disk->bus_type = GUEST_DISK_BUS_TYPE_UNKNOWN;
598 
599 #ifdef CONFIG_LIBUDEV
600     udev = udev_new();
601     udevice = udev_device_new_from_syspath(udev, syspath);
602     if (udev == NULL || udevice == NULL) {
603         g_debug("failed to query udev");
604     } else {
605         const char *devnode, *serial;
606         devnode = udev_device_get_devnode(udevice);
607         if (devnode != NULL) {
608             disk->dev = g_strdup(devnode);
609         }
610         serial = udev_device_get_property_value(udevice, "ID_SERIAL");
611         if (serial != NULL && *serial != 0) {
612             disk->serial = g_strdup(serial);
613         }
614     }
615 
616     udev_unref(udev);
617     udev_device_unref(udevice);
618 #endif
619 
620     if (strstr(syspath, "/devices/pci")) {
621         has_hwinf = build_guest_fsinfo_for_pci_dev(syspath, disk, errp);
622     } else if (strstr(syspath, "/devices/css")) {
623         has_hwinf = build_guest_fsinfo_for_ccw_dev(syspath, disk, errp);
624     } else if (strstr(syspath, "/virtio")) {
625         has_hwinf = build_guest_fsinfo_for_nonpci_virtio(syspath, disk, errp);
626     } else {
627         g_debug("Unsupported device type for '%s'", syspath);
628         has_hwinf = false;
629     }
630 
631     if (has_hwinf || disk->dev || disk->serial) {
632         QAPI_LIST_PREPEND(fs->disk, disk);
633     } else {
634         qapi_free_GuestDiskAddress(disk);
635     }
636 }
637 
638 static void build_guest_fsinfo_for_device(char const *devpath,
639                                           GuestFilesystemInfo *fs,
640                                           Error **errp);
641 
642 /* Store a list of slave devices of virtual volume specified by @syspath into
643  * @fs */
644 static void build_guest_fsinfo_for_virtual_device(char const *syspath,
645                                                   GuestFilesystemInfo *fs,
646                                                   Error **errp)
647 {
648     Error *err = NULL;
649     DIR *dir;
650     char *dirpath;
651     struct dirent *entry;
652 
653     dirpath = g_strdup_printf("%s/slaves", syspath);
654     dir = opendir(dirpath);
655     if (!dir) {
656         if (errno != ENOENT) {
657             error_setg_errno(errp, errno, "opendir(\"%s\")", dirpath);
658         }
659         g_free(dirpath);
660         return;
661     }
662 
663     for (;;) {
664         errno = 0;
665         entry = readdir(dir);
666         if (entry == NULL) {
667             if (errno) {
668                 error_setg_errno(errp, errno, "readdir(\"%s\")", dirpath);
669             }
670             break;
671         }
672 
673         if (entry->d_type == DT_LNK) {
674             char *path;
675 
676             g_debug(" slave device '%s'", entry->d_name);
677             path = g_strdup_printf("%s/slaves/%s", syspath, entry->d_name);
678             build_guest_fsinfo_for_device(path, fs, &err);
679             g_free(path);
680 
681             if (err) {
682                 error_propagate(errp, err);
683                 break;
684             }
685         }
686     }
687 
688     g_free(dirpath);
689     closedir(dir);
690 }
691 
692 static bool is_disk_virtual(const char *devpath, Error **errp)
693 {
694     g_autofree char *syspath = realpath(devpath, NULL);
695 
696     if (!syspath) {
697         error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
698         return false;
699     }
700     return strstr(syspath, "/devices/virtual/block/") != NULL;
701 }
702 
703 /* Dispatch to functions for virtual/real device */
704 static void build_guest_fsinfo_for_device(char const *devpath,
705                                           GuestFilesystemInfo *fs,
706                                           Error **errp)
707 {
708     ERRP_GUARD();
709     g_autofree char *syspath = NULL;
710     bool is_virtual = false;
711 
712     syspath = realpath(devpath, NULL);
713     if (!syspath) {
714         if (errno != ENOENT) {
715             error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
716             return;
717         }
718 
719         /* ENOENT: This devpath may not exist because of container config */
720         if (!fs->name) {
721             fs->name = g_path_get_basename(devpath);
722         }
723         return;
724     }
725 
726     if (!fs->name) {
727         fs->name = g_path_get_basename(syspath);
728     }
729 
730     g_debug("  parse sysfs path '%s'", syspath);
731     is_virtual = is_disk_virtual(syspath, errp);
732     if (*errp != NULL) {
733         return;
734     }
735     if (is_virtual) {
736         build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
737     } else {
738         build_guest_fsinfo_for_real_device(syspath, fs, errp);
739     }
740 }
741 
742 #ifdef CONFIG_LIBUDEV
743 
744 /*
745  * Wrapper around build_guest_fsinfo_for_device() for getting just
746  * the disk address.
747  */
748 static GuestDiskAddress *get_disk_address(const char *syspath, Error **errp)
749 {
750     g_autoptr(GuestFilesystemInfo) fs = NULL;
751 
752     fs = g_new0(GuestFilesystemInfo, 1);
753     build_guest_fsinfo_for_device(syspath, fs, errp);
754     if (fs->disk != NULL) {
755         return g_steal_pointer(&fs->disk->value);
756     }
757     return NULL;
758 }
759 
760 static char *get_alias_for_syspath(const char *syspath)
761 {
762     struct udev *udev = NULL;
763     struct udev_device *udevice = NULL;
764     char *ret = NULL;
765 
766     udev = udev_new();
767     if (udev == NULL) {
768         g_debug("failed to query udev");
769         goto out;
770     }
771     udevice = udev_device_new_from_syspath(udev, syspath);
772     if (udevice == NULL) {
773         g_debug("failed to query udev for path: %s", syspath);
774         goto out;
775     } else {
776         const char *alias = udev_device_get_property_value(
777             udevice, "DM_NAME");
778         /*
779          * NULL means there was an error and empty string means there is no
780          * alias. In case of no alias we return NULL instead of empty string.
781          */
782         if (alias == NULL) {
783             g_debug("failed to query udev for device alias for: %s",
784                 syspath);
785         } else if (*alias != 0) {
786             ret = g_strdup(alias);
787         }
788     }
789 
790 out:
791     udev_unref(udev);
792     udev_device_unref(udevice);
793     return ret;
794 }
795 
796 static char *get_device_for_syspath(const char *syspath)
797 {
798     struct udev *udev = NULL;
799     struct udev_device *udevice = NULL;
800     char *ret = NULL;
801 
802     udev = udev_new();
803     if (udev == NULL) {
804         g_debug("failed to query udev");
805         goto out;
806     }
807     udevice = udev_device_new_from_syspath(udev, syspath);
808     if (udevice == NULL) {
809         g_debug("failed to query udev for path: %s", syspath);
810         goto out;
811     } else {
812         ret = g_strdup(udev_device_get_devnode(udevice));
813     }
814 
815 out:
816     udev_unref(udev);
817     udev_device_unref(udevice);
818     return ret;
819 }
820 
821 static void get_disk_deps(const char *disk_dir, GuestDiskInfo *disk)
822 {
823     g_autofree char *deps_dir = NULL;
824     const gchar *dep;
825     GDir *dp_deps = NULL;
826 
827     /* List dependent disks */
828     deps_dir = g_strdup_printf("%s/slaves", disk_dir);
829     g_debug("  listing entries in: %s", deps_dir);
830     dp_deps = g_dir_open(deps_dir, 0, NULL);
831     if (dp_deps == NULL) {
832         g_debug("failed to list entries in %s", deps_dir);
833         return;
834     }
835     disk->has_dependencies = true;
836     while ((dep = g_dir_read_name(dp_deps)) != NULL) {
837         g_autofree char *dep_dir = NULL;
838         char *dev_name;
839 
840         /* Add dependent disks */
841         dep_dir = g_strdup_printf("%s/%s", deps_dir, dep);
842         dev_name = get_device_for_syspath(dep_dir);
843         if (dev_name != NULL) {
844             g_debug("  adding dependent device: %s", dev_name);
845             QAPI_LIST_PREPEND(disk->dependencies, dev_name);
846         }
847     }
848     g_dir_close(dp_deps);
849 }
850 
851 /*
852  * Detect partitions subdirectory, name is "<disk_name><number>" or
853  * "<disk_name>p<number>"
854  *
855  * @disk_name -- last component of /sys path (e.g. sda)
856  * @disk_dir -- sys path of the disk (e.g. /sys/block/sda)
857  * @disk_dev -- device node of the disk (e.g. /dev/sda)
858  */
859 static GuestDiskInfoList *get_disk_partitions(
860     GuestDiskInfoList *list,
861     const char *disk_name, const char *disk_dir,
862     const char *disk_dev)
863 {
864     GuestDiskInfoList *ret = list;
865     struct dirent *de_disk;
866     DIR *dp_disk = NULL;
867     size_t len = strlen(disk_name);
868 
869     dp_disk = opendir(disk_dir);
870     while ((de_disk = readdir(dp_disk)) != NULL) {
871         g_autofree char *partition_dir = NULL;
872         char *dev_name;
873         GuestDiskInfo *partition;
874 
875         if (!(de_disk->d_type & DT_DIR)) {
876             continue;
877         }
878 
879         if (!(strncmp(disk_name, de_disk->d_name, len) == 0 &&
880             ((*(de_disk->d_name + len) == 'p' &&
881             isdigit(*(de_disk->d_name + len + 1))) ||
882                 isdigit(*(de_disk->d_name + len))))) {
883             continue;
884         }
885 
886         partition_dir = g_strdup_printf("%s/%s",
887             disk_dir, de_disk->d_name);
888         dev_name = get_device_for_syspath(partition_dir);
889         if (dev_name == NULL) {
890             g_debug("Failed to get device name for syspath: %s",
891                 disk_dir);
892             continue;
893         }
894         partition = g_new0(GuestDiskInfo, 1);
895         partition->name = dev_name;
896         partition->partition = true;
897         partition->has_dependencies = true;
898         /* Add parent disk as dependent for easier tracking of hierarchy */
899         QAPI_LIST_PREPEND(partition->dependencies, g_strdup(disk_dev));
900 
901         QAPI_LIST_PREPEND(ret, partition);
902     }
903     closedir(dp_disk);
904 
905     return ret;
906 }
907 
908 static void get_nvme_smart(GuestDiskInfo *disk)
909 {
910     int fd;
911     GuestNVMeSmart *smart;
912     NvmeSmartLog log = {0};
913     struct nvme_admin_cmd cmd = {
914         .opcode = NVME_ADM_CMD_GET_LOG_PAGE,
915         .nsid = NVME_NSID_BROADCAST,
916         .addr = (uintptr_t)&log,
917         .data_len = sizeof(log),
918         .cdw10 = NVME_LOG_SMART_INFO | (1 << 15) /* RAE bit */
919                  | (((sizeof(log) >> 2) - 1) << 16)
920     };
921 
922     fd = qga_open_cloexec(disk->name, O_RDONLY, 0);
923     if (fd == -1) {
924         g_debug("Failed to open device: %s: %s", disk->name, g_strerror(errno));
925         return;
926     }
927 
928     if (ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd)) {
929         g_debug("Failed to get smart: %s: %s", disk->name, g_strerror(errno));
930         close(fd);
931         return;
932     }
933 
934     disk->smart = g_new0(GuestDiskSmart, 1);
935     disk->smart->type = GUEST_DISK_BUS_TYPE_NVME;
936 
937     smart = &disk->smart->u.nvme;
938     smart->critical_warning = log.critical_warning;
939     smart->temperature = lduw_le_p(&log.temperature); /* unaligned field */
940     smart->available_spare = log.available_spare;
941     smart->available_spare_threshold = log.available_spare_threshold;
942     smart->percentage_used = log.percentage_used;
943     smart->data_units_read_lo = le64_to_cpu(log.data_units_read[0]);
944     smart->data_units_read_hi = le64_to_cpu(log.data_units_read[1]);
945     smart->data_units_written_lo = le64_to_cpu(log.data_units_written[0]);
946     smart->data_units_written_hi = le64_to_cpu(log.data_units_written[1]);
947     smart->host_read_commands_lo = le64_to_cpu(log.host_read_commands[0]);
948     smart->host_read_commands_hi = le64_to_cpu(log.host_read_commands[1]);
949     smart->host_write_commands_lo = le64_to_cpu(log.host_write_commands[0]);
950     smart->host_write_commands_hi = le64_to_cpu(log.host_write_commands[1]);
951     smart->controller_busy_time_lo = le64_to_cpu(log.controller_busy_time[0]);
952     smart->controller_busy_time_hi = le64_to_cpu(log.controller_busy_time[1]);
953     smart->power_cycles_lo = le64_to_cpu(log.power_cycles[0]);
954     smart->power_cycles_hi = le64_to_cpu(log.power_cycles[1]);
955     smart->power_on_hours_lo = le64_to_cpu(log.power_on_hours[0]);
956     smart->power_on_hours_hi = le64_to_cpu(log.power_on_hours[1]);
957     smart->unsafe_shutdowns_lo = le64_to_cpu(log.unsafe_shutdowns[0]);
958     smart->unsafe_shutdowns_hi = le64_to_cpu(log.unsafe_shutdowns[1]);
959     smart->media_errors_lo = le64_to_cpu(log.media_errors[0]);
960     smart->media_errors_hi = le64_to_cpu(log.media_errors[1]);
961     smart->number_of_error_log_entries_lo =
962         le64_to_cpu(log.number_of_error_log_entries[0]);
963     smart->number_of_error_log_entries_hi =
964         le64_to_cpu(log.number_of_error_log_entries[1]);
965 
966     close(fd);
967 }
968 
969 static void get_disk_smart(GuestDiskInfo *disk)
970 {
971     if (disk->address
972         && (disk->address->bus_type == GUEST_DISK_BUS_TYPE_NVME)) {
973         get_nvme_smart(disk);
974     }
975 }
976 
977 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
978 {
979     GuestDiskInfoList *ret = NULL;
980     GuestDiskInfo *disk;
981     DIR *dp = NULL;
982     struct dirent *de = NULL;
983 
984     g_debug("listing /sys/block directory");
985     dp = opendir("/sys/block");
986     if (dp == NULL) {
987         error_setg_errno(errp, errno, "Can't open directory \"/sys/block\"");
988         return NULL;
989     }
990     while ((de = readdir(dp)) != NULL) {
991         g_autofree char *disk_dir = NULL, *line = NULL,
992             *size_path = NULL;
993         char *dev_name;
994         Error *local_err = NULL;
995         if (de->d_type != DT_LNK) {
996             g_debug("  skipping entry: %s", de->d_name);
997             continue;
998         }
999 
1000         /* Check size and skip zero-sized disks */
1001         g_debug("  checking disk size");
1002         size_path = g_strdup_printf("/sys/block/%s/size", de->d_name);
1003         if (!g_file_get_contents(size_path, &line, NULL, NULL)) {
1004             g_debug("  failed to read disk size");
1005             continue;
1006         }
1007         if (g_strcmp0(line, "0\n") == 0) {
1008             g_debug("  skipping zero-sized disk");
1009             continue;
1010         }
1011 
1012         g_debug("  adding %s", de->d_name);
1013         disk_dir = g_strdup_printf("/sys/block/%s", de->d_name);
1014         dev_name = get_device_for_syspath(disk_dir);
1015         if (dev_name == NULL) {
1016             g_debug("Failed to get device name for syspath: %s",
1017                 disk_dir);
1018             continue;
1019         }
1020         disk = g_new0(GuestDiskInfo, 1);
1021         disk->name = dev_name;
1022         disk->partition = false;
1023         disk->alias = get_alias_for_syspath(disk_dir);
1024         QAPI_LIST_PREPEND(ret, disk);
1025 
1026         /* Get address for non-virtual devices */
1027         bool is_virtual = is_disk_virtual(disk_dir, &local_err);
1028         if (local_err != NULL) {
1029             g_debug("  failed to check disk path, ignoring error: %s",
1030                 error_get_pretty(local_err));
1031             error_free(local_err);
1032             local_err = NULL;
1033             /* Don't try to get the address */
1034             is_virtual = true;
1035         }
1036         if (!is_virtual) {
1037             disk->address = get_disk_address(disk_dir, &local_err);
1038             if (local_err != NULL) {
1039                 g_debug("  failed to get device info, ignoring error: %s",
1040                     error_get_pretty(local_err));
1041                 error_free(local_err);
1042                 local_err = NULL;
1043             }
1044         }
1045 
1046         get_disk_deps(disk_dir, disk);
1047         get_disk_smart(disk);
1048         ret = get_disk_partitions(ret, de->d_name, disk_dir, dev_name);
1049     }
1050 
1051     closedir(dp);
1052 
1053     return ret;
1054 }
1055 
1056 #endif
1057 
1058 /* Return a list of the disk device(s)' info which @mount lies on */
1059 static GuestFilesystemInfo *build_guest_fsinfo(struct FsMount *mount,
1060                                                Error **errp)
1061 {
1062     GuestFilesystemInfo *fs = g_malloc0(sizeof(*fs));
1063     struct statvfs buf;
1064     unsigned long used, nonroot_total, fr_size;
1065     char *devpath = g_strdup_printf("/sys/dev/block/%u:%u",
1066                                     mount->devmajor, mount->devminor);
1067 
1068     fs->mountpoint = g_strdup(mount->dirname);
1069     fs->type = g_strdup(mount->devtype);
1070     build_guest_fsinfo_for_device(devpath, fs, errp);
1071 
1072     if (statvfs(fs->mountpoint, &buf) == 0) {
1073         fr_size = buf.f_frsize;
1074         used = buf.f_blocks - buf.f_bfree;
1075         nonroot_total = used + buf.f_bavail;
1076         fs->used_bytes = used * fr_size;
1077         fs->total_bytes = nonroot_total * fr_size;
1078         fs->total_bytes_privileged = buf.f_blocks * fr_size;
1079 
1080         fs->has_total_bytes = true;
1081         fs->has_total_bytes_privileged = true;
1082         fs->has_used_bytes = true;
1083     }
1084 
1085     g_free(devpath);
1086 
1087     return fs;
1088 }
1089 
1090 GuestFilesystemInfoList *qmp_guest_get_fsinfo(Error **errp)
1091 {
1092     FsMountList mounts;
1093     struct FsMount *mount;
1094     GuestFilesystemInfoList *ret = NULL;
1095     Error *local_err = NULL;
1096 
1097     QTAILQ_INIT(&mounts);
1098     if (!build_fs_mount_list(&mounts, &local_err)) {
1099         error_propagate(errp, local_err);
1100         return NULL;
1101     }
1102 
1103     QTAILQ_FOREACH(mount, &mounts, next) {
1104         g_debug("Building guest fsinfo for '%s'", mount->dirname);
1105 
1106         QAPI_LIST_PREPEND(ret, build_guest_fsinfo(mount, &local_err));
1107         if (local_err) {
1108             error_propagate(errp, local_err);
1109             qapi_free_GuestFilesystemInfoList(ret);
1110             ret = NULL;
1111             break;
1112         }
1113     }
1114 
1115     free_fs_mount_list(&mounts);
1116     return ret;
1117 }
1118 #endif /* CONFIG_FSFREEZE */
1119 
1120 #if defined(CONFIG_FSTRIM)
1121 /*
1122  * Walk list of mounted file systems in the guest, and trim them.
1123  */
1124 GuestFilesystemTrimResponse *
1125 qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **errp)
1126 {
1127     GuestFilesystemTrimResponse *response;
1128     GuestFilesystemTrimResult *result;
1129     int ret = 0;
1130     FsMountList mounts;
1131     struct FsMount *mount;
1132     int fd;
1133     struct fstrim_range r;
1134 
1135     slog("guest-fstrim called");
1136 
1137     QTAILQ_INIT(&mounts);
1138     if (!build_fs_mount_list(&mounts, errp)) {
1139         return NULL;
1140     }
1141 
1142     response = g_malloc0(sizeof(*response));
1143 
1144     QTAILQ_FOREACH(mount, &mounts, next) {
1145         result = g_malloc0(sizeof(*result));
1146         result->path = g_strdup(mount->dirname);
1147 
1148         QAPI_LIST_PREPEND(response->paths, result);
1149 
1150         fd = qga_open_cloexec(mount->dirname, O_RDONLY, 0);
1151         if (fd == -1) {
1152             result->error = g_strdup_printf("failed to open: %s",
1153                                             strerror(errno));
1154             continue;
1155         }
1156 
1157         /* We try to cull filesystems we know won't work in advance, but other
1158          * filesystems may not implement fstrim for less obvious reasons.
1159          * These will report EOPNOTSUPP; while in some other cases ENOTTY
1160          * will be reported (e.g. CD-ROMs).
1161          * Any other error means an unexpected error.
1162          */
1163         r.start = 0;
1164         r.len = -1;
1165         r.minlen = has_minimum ? minimum : 0;
1166         ret = ioctl(fd, FITRIM, &r);
1167         if (ret == -1) {
1168             if (errno == ENOTTY || errno == EOPNOTSUPP) {
1169                 result->error = g_strdup("trim not supported");
1170             } else {
1171                 result->error = g_strdup_printf("failed to trim: %s",
1172                                                 strerror(errno));
1173             }
1174             close(fd);
1175             continue;
1176         }
1177 
1178         result->has_minimum = true;
1179         result->minimum = r.minlen;
1180         result->has_trimmed = true;
1181         result->trimmed = r.len;
1182         close(fd);
1183     }
1184 
1185     free_fs_mount_list(&mounts);
1186     return response;
1187 }
1188 #endif /* CONFIG_FSTRIM */
1189 
1190 #define LINUX_SYS_STATE_FILE "/sys/power/state"
1191 #define SUSPEND_SUPPORTED 0
1192 #define SUSPEND_NOT_SUPPORTED 1
1193 
1194 typedef enum {
1195     SUSPEND_MODE_DISK = 0,
1196     SUSPEND_MODE_RAM = 1,
1197     SUSPEND_MODE_HYBRID = 2,
1198 } SuspendMode;
1199 
1200 /*
1201  * Executes a command in a child process using g_spawn_sync,
1202  * returning an int >= 0 representing the exit status of the
1203  * process.
1204  *
1205  * If the program wasn't found in path, returns -1.
1206  *
1207  * If a problem happened when creating the child process,
1208  * returns -1 and errp is set.
1209  */
1210 static int run_process_child(const char *command[], Error **errp)
1211 {
1212     int exit_status, spawn_flag;
1213     GError *g_err = NULL;
1214     bool success;
1215 
1216     spawn_flag = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL |
1217                  G_SPAWN_STDERR_TO_DEV_NULL;
1218 
1219     success =  g_spawn_sync(NULL, (char **)command, NULL, spawn_flag,
1220                             NULL, NULL, NULL, NULL,
1221                             &exit_status, &g_err);
1222 
1223     if (success) {
1224         return WEXITSTATUS(exit_status);
1225     }
1226 
1227     if (g_err && (g_err->code != G_SPAWN_ERROR_NOENT)) {
1228         error_setg(errp, "failed to create child process, error '%s'",
1229                    g_err->message);
1230     }
1231 
1232     g_error_free(g_err);
1233     return -1;
1234 }
1235 
1236 static bool systemd_supports_mode(SuspendMode mode, Error **errp)
1237 {
1238     const char *systemctl_args[3] = {"systemd-hibernate", "systemd-suspend",
1239                                      "systemd-hybrid-sleep"};
1240     const char *cmd[4] = {"systemctl", "status", systemctl_args[mode], NULL};
1241     int status;
1242 
1243     status = run_process_child(cmd, errp);
1244 
1245     /*
1246      * systemctl status uses LSB return codes so we can expect
1247      * status > 0 and be ok. To assert if the guest has support
1248      * for the selected suspend mode, status should be < 4. 4 is
1249      * the code for unknown service status, the return value when
1250      * the service does not exist. A common value is status = 3
1251      * (program is not running).
1252      */
1253     if (status > 0 && status < 4) {
1254         return true;
1255     }
1256 
1257     return false;
1258 }
1259 
1260 static void systemd_suspend(SuspendMode mode, Error **errp)
1261 {
1262     Error *local_err = NULL;
1263     const char *systemctl_args[3] = {"hibernate", "suspend", "hybrid-sleep"};
1264     const char *cmd[3] = {"systemctl", systemctl_args[mode], NULL};
1265     int status;
1266 
1267     status = run_process_child(cmd, &local_err);
1268 
1269     if (status == 0) {
1270         return;
1271     }
1272 
1273     if ((status == -1) && !local_err) {
1274         error_setg(errp, "the helper program 'systemctl %s' was not found",
1275                    systemctl_args[mode]);
1276         return;
1277     }
1278 
1279     if (local_err) {
1280         error_propagate(errp, local_err);
1281     } else {
1282         error_setg(errp, "the helper program 'systemctl %s' returned an "
1283                    "unexpected exit status code (%d)",
1284                    systemctl_args[mode], status);
1285     }
1286 }
1287 
1288 static bool pmutils_supports_mode(SuspendMode mode, Error **errp)
1289 {
1290     Error *local_err = NULL;
1291     const char *pmutils_args[3] = {"--hibernate", "--suspend",
1292                                    "--suspend-hybrid"};
1293     const char *cmd[3] = {"pm-is-supported", pmutils_args[mode], NULL};
1294     int status;
1295 
1296     status = run_process_child(cmd, &local_err);
1297 
1298     if (status == SUSPEND_SUPPORTED) {
1299         return true;
1300     }
1301 
1302     if ((status == -1) && !local_err) {
1303         return false;
1304     }
1305 
1306     if (local_err) {
1307         error_propagate(errp, local_err);
1308     } else {
1309         error_setg(errp,
1310                    "the helper program '%s' returned an unexpected exit"
1311                    " status code (%d)", "pm-is-supported", status);
1312     }
1313 
1314     return false;
1315 }
1316 
1317 static void pmutils_suspend(SuspendMode mode, Error **errp)
1318 {
1319     Error *local_err = NULL;
1320     const char *pmutils_binaries[3] = {"pm-hibernate", "pm-suspend",
1321                                        "pm-suspend-hybrid"};
1322     const char *cmd[2] = {pmutils_binaries[mode], NULL};
1323     int status;
1324 
1325     status = run_process_child(cmd, &local_err);
1326 
1327     if (status == 0) {
1328         return;
1329     }
1330 
1331     if ((status == -1) && !local_err) {
1332         error_setg(errp, "the helper program '%s' was not found",
1333                    pmutils_binaries[mode]);
1334         return;
1335     }
1336 
1337     if (local_err) {
1338         error_propagate(errp, local_err);
1339     } else {
1340         error_setg(errp,
1341                    "the helper program '%s' returned an unexpected exit"
1342                    " status code (%d)", pmutils_binaries[mode], status);
1343     }
1344 }
1345 
1346 static bool linux_sys_state_supports_mode(SuspendMode mode, Error **errp)
1347 {
1348     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1349     const char *sysfile_str = sysfile_strs[mode];
1350     char buf[32]; /* hopefully big enough */
1351     int fd;
1352     ssize_t ret;
1353 
1354     if (!sysfile_str) {
1355         error_setg(errp, "unknown guest suspend mode");
1356         return false;
1357     }
1358 
1359     fd = open(LINUX_SYS_STATE_FILE, O_RDONLY);
1360     if (fd < 0) {
1361         return false;
1362     }
1363 
1364     ret = read(fd, buf, sizeof(buf) - 1);
1365     close(fd);
1366     if (ret <= 0) {
1367         return false;
1368     }
1369     buf[ret] = '\0';
1370 
1371     if (strstr(buf, sysfile_str)) {
1372         return true;
1373     }
1374     return false;
1375 }
1376 
1377 static void linux_sys_state_suspend(SuspendMode mode, Error **errp)
1378 {
1379     g_autoptr(GError) local_gerr = NULL;
1380     const char *sysfile_strs[3] = {"disk", "mem", NULL};
1381     const char *sysfile_str = sysfile_strs[mode];
1382 
1383     if (!sysfile_str) {
1384         error_setg(errp, "unknown guest suspend mode");
1385         return;
1386     }
1387 
1388     if (!g_file_set_contents(LINUX_SYS_STATE_FILE, sysfile_str,
1389                              -1, &local_gerr)) {
1390         error_setg(errp, "suspend: cannot write to '%s': %s",
1391                    LINUX_SYS_STATE_FILE, local_gerr->message);
1392         return;
1393     }
1394 }
1395 
1396 static void guest_suspend(SuspendMode mode, Error **errp)
1397 {
1398     Error *local_err = NULL;
1399     bool mode_supported = false;
1400 
1401     if (systemd_supports_mode(mode, &local_err)) {
1402         mode_supported = true;
1403         systemd_suspend(mode, &local_err);
1404 
1405         if (!local_err) {
1406             return;
1407         }
1408     }
1409 
1410     error_free(local_err);
1411     local_err = NULL;
1412 
1413     if (pmutils_supports_mode(mode, &local_err)) {
1414         mode_supported = true;
1415         pmutils_suspend(mode, &local_err);
1416 
1417         if (!local_err) {
1418             return;
1419         }
1420     }
1421 
1422     error_free(local_err);
1423     local_err = NULL;
1424 
1425     if (linux_sys_state_supports_mode(mode, &local_err)) {
1426         mode_supported = true;
1427         linux_sys_state_suspend(mode, &local_err);
1428     }
1429 
1430     if (!mode_supported) {
1431         error_free(local_err);
1432         error_setg(errp,
1433                    "the requested suspend mode is not supported by the guest");
1434     } else {
1435         error_propagate(errp, local_err);
1436     }
1437 }
1438 
1439 void qmp_guest_suspend_disk(Error **errp)
1440 {
1441     guest_suspend(SUSPEND_MODE_DISK, errp);
1442 }
1443 
1444 void qmp_guest_suspend_ram(Error **errp)
1445 {
1446     guest_suspend(SUSPEND_MODE_RAM, errp);
1447 }
1448 
1449 void qmp_guest_suspend_hybrid(Error **errp)
1450 {
1451     guest_suspend(SUSPEND_MODE_HYBRID, errp);
1452 }
1453 
1454 /* Transfer online/offline status between @vcpu and the guest system.
1455  *
1456  * On input either @errp or *@errp must be NULL.
1457  *
1458  * In system-to-@vcpu direction, the following @vcpu fields are accessed:
1459  * - R: vcpu->logical_id
1460  * - W: vcpu->online
1461  * - W: vcpu->can_offline
1462  *
1463  * In @vcpu-to-system direction, the following @vcpu fields are accessed:
1464  * - R: vcpu->logical_id
1465  * - R: vcpu->online
1466  *
1467  * Written members remain unmodified on error.
1468  */
1469 static void transfer_vcpu(GuestLogicalProcessor *vcpu, bool sys2vcpu,
1470                           char *dirpath, Error **errp)
1471 {
1472     int fd;
1473     int res;
1474     int dirfd;
1475     static const char fn[] = "online";
1476 
1477     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1478     if (dirfd == -1) {
1479         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1480         return;
1481     }
1482 
1483     fd = openat(dirfd, fn, sys2vcpu ? O_RDONLY : O_RDWR);
1484     if (fd == -1) {
1485         if (errno != ENOENT) {
1486             error_setg_errno(errp, errno, "open(\"%s/%s\")", dirpath, fn);
1487         } else if (sys2vcpu) {
1488             vcpu->online = true;
1489             vcpu->can_offline = false;
1490         } else if (!vcpu->online) {
1491             error_setg(errp, "logical processor #%" PRId64 " can't be "
1492                        "offlined", vcpu->logical_id);
1493         } /* otherwise pretend successful re-onlining */
1494     } else {
1495         unsigned char status;
1496 
1497         res = pread(fd, &status, 1, 0);
1498         if (res == -1) {
1499             error_setg_errno(errp, errno, "pread(\"%s/%s\")", dirpath, fn);
1500         } else if (res == 0) {
1501             error_setg(errp, "pread(\"%s/%s\"): unexpected EOF", dirpath,
1502                        fn);
1503         } else if (sys2vcpu) {
1504             vcpu->online = (status != '0');
1505             vcpu->can_offline = true;
1506         } else if (vcpu->online != (status != '0')) {
1507             status = '0' + vcpu->online;
1508             if (pwrite(fd, &status, 1, 0) == -1) {
1509                 error_setg_errno(errp, errno, "pwrite(\"%s/%s\")", dirpath,
1510                                  fn);
1511             }
1512         } /* otherwise pretend successful re-(on|off)-lining */
1513 
1514         res = close(fd);
1515         g_assert(res == 0);
1516     }
1517 
1518     res = close(dirfd);
1519     g_assert(res == 0);
1520 }
1521 
1522 GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
1523 {
1524     GuestLogicalProcessorList *head, **tail;
1525     const char *cpu_dir = "/sys/devices/system/cpu";
1526     const gchar *line;
1527     g_autoptr(GDir) cpu_gdir = NULL;
1528     Error *local_err = NULL;
1529 
1530     head = NULL;
1531     tail = &head;
1532     cpu_gdir = g_dir_open(cpu_dir, 0, NULL);
1533 
1534     if (cpu_gdir == NULL) {
1535         error_setg_errno(errp, errno, "failed to list entries: %s", cpu_dir);
1536         return NULL;
1537     }
1538 
1539     while (local_err == NULL && (line = g_dir_read_name(cpu_gdir)) != NULL) {
1540         GuestLogicalProcessor *vcpu;
1541         int64_t id;
1542         if (sscanf(line, "cpu%" PRId64, &id)) {
1543             g_autofree char *path = g_strdup_printf("/sys/devices/system/cpu/"
1544                                                     "cpu%" PRId64 "/", id);
1545             vcpu = g_malloc0(sizeof *vcpu);
1546             vcpu->logical_id = id;
1547             vcpu->has_can_offline = true; /* lolspeak ftw */
1548             transfer_vcpu(vcpu, true, path, &local_err);
1549             QAPI_LIST_APPEND(tail, vcpu);
1550         }
1551     }
1552 
1553     if (local_err == NULL) {
1554         /* there's no guest with zero VCPUs */
1555         g_assert(head != NULL);
1556         return head;
1557     }
1558 
1559     qapi_free_GuestLogicalProcessorList(head);
1560     error_propagate(errp, local_err);
1561     return NULL;
1562 }
1563 
1564 int64_t qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
1565 {
1566     int64_t processed;
1567     Error *local_err = NULL;
1568 
1569     processed = 0;
1570     while (vcpus != NULL) {
1571         char *path = g_strdup_printf("/sys/devices/system/cpu/cpu%" PRId64 "/",
1572                                      vcpus->value->logical_id);
1573 
1574         transfer_vcpu(vcpus->value, false, path, &local_err);
1575         g_free(path);
1576         if (local_err != NULL) {
1577             break;
1578         }
1579         ++processed;
1580         vcpus = vcpus->next;
1581     }
1582 
1583     if (local_err != NULL) {
1584         if (processed == 0) {
1585             error_propagate(errp, local_err);
1586         } else {
1587             error_free(local_err);
1588         }
1589     }
1590 
1591     return processed;
1592 }
1593 
1594 
1595 static void ga_read_sysfs_file(int dirfd, const char *pathname, char *buf,
1596                                int size, Error **errp)
1597 {
1598     int fd;
1599     int res;
1600 
1601     errno = 0;
1602     fd = openat(dirfd, pathname, O_RDONLY);
1603     if (fd == -1) {
1604         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1605         return;
1606     }
1607 
1608     res = pread(fd, buf, size, 0);
1609     if (res == -1) {
1610         error_setg_errno(errp, errno, "pread sysfs file \"%s\"", pathname);
1611     } else if (res == 0) {
1612         error_setg(errp, "pread sysfs file \"%s\": unexpected EOF", pathname);
1613     }
1614     close(fd);
1615 }
1616 
1617 static void ga_write_sysfs_file(int dirfd, const char *pathname,
1618                                 const char *buf, int size, Error **errp)
1619 {
1620     int fd;
1621 
1622     errno = 0;
1623     fd = openat(dirfd, pathname, O_WRONLY);
1624     if (fd == -1) {
1625         error_setg_errno(errp, errno, "open sysfs file \"%s\"", pathname);
1626         return;
1627     }
1628 
1629     if (pwrite(fd, buf, size, 0) == -1) {
1630         error_setg_errno(errp, errno, "pwrite sysfs file \"%s\"", pathname);
1631     }
1632 
1633     close(fd);
1634 }
1635 
1636 /* Transfer online/offline status between @mem_blk and the guest system.
1637  *
1638  * On input either @errp or *@errp must be NULL.
1639  *
1640  * In system-to-@mem_blk direction, the following @mem_blk fields are accessed:
1641  * - R: mem_blk->phys_index
1642  * - W: mem_blk->online
1643  * - W: mem_blk->can_offline
1644  *
1645  * In @mem_blk-to-system direction, the following @mem_blk fields are accessed:
1646  * - R: mem_blk->phys_index
1647  * - R: mem_blk->online
1648  *-  R: mem_blk->can_offline
1649  * Written members remain unmodified on error.
1650  */
1651 static void transfer_memory_block(GuestMemoryBlock *mem_blk, bool sys2memblk,
1652                                   GuestMemoryBlockResponse *result,
1653                                   Error **errp)
1654 {
1655     char *dirpath;
1656     int dirfd;
1657     char *status;
1658     Error *local_err = NULL;
1659 
1660     if (!sys2memblk) {
1661         DIR *dp;
1662 
1663         if (!result) {
1664             error_setg(errp, "Internal error, 'result' should not be NULL");
1665             return;
1666         }
1667         errno = 0;
1668         dp = opendir("/sys/devices/system/memory/");
1669          /* if there is no 'memory' directory in sysfs,
1670          * we think this VM does not support online/offline memory block,
1671          * any other solution?
1672          */
1673         if (!dp) {
1674             if (errno == ENOENT) {
1675                 result->response =
1676                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1677             }
1678             goto out1;
1679         }
1680         closedir(dp);
1681     }
1682 
1683     dirpath = g_strdup_printf("/sys/devices/system/memory/memory%" PRId64 "/",
1684                               mem_blk->phys_index);
1685     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1686     if (dirfd == -1) {
1687         if (sys2memblk) {
1688             error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1689         } else {
1690             if (errno == ENOENT) {
1691                 result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_NOT_FOUND;
1692             } else {
1693                 result->response =
1694                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1695             }
1696         }
1697         g_free(dirpath);
1698         goto out1;
1699     }
1700     g_free(dirpath);
1701 
1702     status = g_malloc0(10);
1703     ga_read_sysfs_file(dirfd, "state", status, 10, &local_err);
1704     if (local_err) {
1705         /* treat with sysfs file that not exist in old kernel */
1706         if (errno == ENOENT) {
1707             error_free(local_err);
1708             if (sys2memblk) {
1709                 mem_blk->online = true;
1710                 mem_blk->can_offline = false;
1711             } else if (!mem_blk->online) {
1712                 result->response =
1713                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_NOT_SUPPORTED;
1714             }
1715         } else {
1716             if (sys2memblk) {
1717                 error_propagate(errp, local_err);
1718             } else {
1719                 error_free(local_err);
1720                 result->response =
1721                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1722             }
1723         }
1724         goto out2;
1725     }
1726 
1727     if (sys2memblk) {
1728         char removable = '0';
1729 
1730         mem_blk->online = (strncmp(status, "online", 6) == 0);
1731 
1732         ga_read_sysfs_file(dirfd, "removable", &removable, 1, &local_err);
1733         if (local_err) {
1734             /* if no 'removable' file, it doesn't support offline mem blk */
1735             if (errno == ENOENT) {
1736                 error_free(local_err);
1737                 mem_blk->can_offline = false;
1738             } else {
1739                 error_propagate(errp, local_err);
1740             }
1741         } else {
1742             mem_blk->can_offline = (removable != '0');
1743         }
1744     } else {
1745         if (mem_blk->online != (strncmp(status, "online", 6) == 0)) {
1746             const char *new_state = mem_blk->online ? "online" : "offline";
1747 
1748             ga_write_sysfs_file(dirfd, "state", new_state, strlen(new_state),
1749                                 &local_err);
1750             if (local_err) {
1751                 error_free(local_err);
1752                 result->response =
1753                     GUEST_MEMORY_BLOCK_RESPONSE_TYPE_OPERATION_FAILED;
1754                 goto out2;
1755             }
1756 
1757             result->response = GUEST_MEMORY_BLOCK_RESPONSE_TYPE_SUCCESS;
1758             result->has_error_code = false;
1759         } /* otherwise pretend successful re-(on|off)-lining */
1760     }
1761     g_free(status);
1762     close(dirfd);
1763     return;
1764 
1765 out2:
1766     g_free(status);
1767     close(dirfd);
1768 out1:
1769     if (!sys2memblk) {
1770         result->has_error_code = true;
1771         result->error_code = errno;
1772     }
1773 }
1774 
1775 GuestMemoryBlockList *qmp_guest_get_memory_blocks(Error **errp)
1776 {
1777     GuestMemoryBlockList *head, **tail;
1778     Error *local_err = NULL;
1779     struct dirent *de;
1780     DIR *dp;
1781 
1782     head = NULL;
1783     tail = &head;
1784 
1785     dp = opendir("/sys/devices/system/memory/");
1786     if (!dp) {
1787         /* it's ok if this happens to be a system that doesn't expose
1788          * memory blocks via sysfs, but otherwise we should report
1789          * an error
1790          */
1791         if (errno != ENOENT) {
1792             error_setg_errno(errp, errno, "Can't open directory"
1793                              "\"/sys/devices/system/memory/\"");
1794         }
1795         return NULL;
1796     }
1797 
1798     /* Note: the phys_index of memory block may be discontinuous,
1799      * this is because a memblk is the unit of the Sparse Memory design, which
1800      * allows discontinuous memory ranges (ex. NUMA), so here we should
1801      * traverse the memory block directory.
1802      */
1803     while ((de = readdir(dp)) != NULL) {
1804         GuestMemoryBlock *mem_blk;
1805 
1806         if ((strncmp(de->d_name, "memory", 6) != 0) ||
1807             !(de->d_type & DT_DIR)) {
1808             continue;
1809         }
1810 
1811         mem_blk = g_malloc0(sizeof *mem_blk);
1812         /* The d_name is "memoryXXX",  phys_index is block id, same as XXX */
1813         mem_blk->phys_index = strtoul(&de->d_name[6], NULL, 10);
1814         mem_blk->has_can_offline = true; /* lolspeak ftw */
1815         transfer_memory_block(mem_blk, true, NULL, &local_err);
1816         if (local_err) {
1817             break;
1818         }
1819 
1820         QAPI_LIST_APPEND(tail, mem_blk);
1821     }
1822 
1823     closedir(dp);
1824     if (local_err == NULL) {
1825         /* there's no guest with zero memory blocks */
1826         if (head == NULL) {
1827             error_setg(errp, "guest reported zero memory blocks!");
1828         }
1829         return head;
1830     }
1831 
1832     qapi_free_GuestMemoryBlockList(head);
1833     error_propagate(errp, local_err);
1834     return NULL;
1835 }
1836 
1837 GuestMemoryBlockResponseList *
1838 qmp_guest_set_memory_blocks(GuestMemoryBlockList *mem_blks, Error **errp)
1839 {
1840     GuestMemoryBlockResponseList *head, **tail;
1841     Error *local_err = NULL;
1842 
1843     head = NULL;
1844     tail = &head;
1845 
1846     while (mem_blks != NULL) {
1847         GuestMemoryBlockResponse *result;
1848         GuestMemoryBlock *current_mem_blk = mem_blks->value;
1849 
1850         result = g_malloc0(sizeof(*result));
1851         result->phys_index = current_mem_blk->phys_index;
1852         transfer_memory_block(current_mem_blk, false, result, &local_err);
1853         if (local_err) { /* should never happen */
1854             goto err;
1855         }
1856 
1857         QAPI_LIST_APPEND(tail, result);
1858         mem_blks = mem_blks->next;
1859     }
1860 
1861     return head;
1862 err:
1863     qapi_free_GuestMemoryBlockResponseList(head);
1864     error_propagate(errp, local_err);
1865     return NULL;
1866 }
1867 
1868 GuestMemoryBlockInfo *qmp_guest_get_memory_block_info(Error **errp)
1869 {
1870     Error *local_err = NULL;
1871     char *dirpath;
1872     int dirfd;
1873     char *buf;
1874     GuestMemoryBlockInfo *info;
1875 
1876     dirpath = g_strdup_printf("/sys/devices/system/memory/");
1877     dirfd = open(dirpath, O_RDONLY | O_DIRECTORY);
1878     if (dirfd == -1) {
1879         error_setg_errno(errp, errno, "open(\"%s\")", dirpath);
1880         g_free(dirpath);
1881         return NULL;
1882     }
1883     g_free(dirpath);
1884 
1885     buf = g_malloc0(20);
1886     ga_read_sysfs_file(dirfd, "block_size_bytes", buf, 20, &local_err);
1887     close(dirfd);
1888     if (local_err) {
1889         g_free(buf);
1890         error_propagate(errp, local_err);
1891         return NULL;
1892     }
1893 
1894     info = g_new0(GuestMemoryBlockInfo, 1);
1895     info->size = strtol(buf, NULL, 16); /* the unit is bytes */
1896 
1897     g_free(buf);
1898 
1899     return info;
1900 }
1901 
1902 #define MAX_NAME_LEN 128
1903 static GuestDiskStatsInfoList *guest_get_diskstats(Error **errp)
1904 {
1905     GuestDiskStatsInfoList *head = NULL, **tail = &head;
1906     const char *diskstats = "/proc/diskstats";
1907     FILE *fp;
1908     size_t n;
1909     char *line = NULL;
1910 
1911     fp = fopen(diskstats, "r");
1912     if (fp  == NULL) {
1913         error_setg_errno(errp, errno, "open(\"%s\")", diskstats);
1914         return NULL;
1915     }
1916 
1917     while (getline(&line, &n, fp) != -1) {
1918         g_autofree GuestDiskStatsInfo *diskstatinfo = NULL;
1919         g_autofree GuestDiskStats *diskstat = NULL;
1920         char dev_name[MAX_NAME_LEN];
1921         unsigned int ios_pgr, tot_ticks, rq_ticks, wr_ticks, dc_ticks, fl_ticks;
1922         unsigned long rd_ios, rd_merges_or_rd_sec, rd_ticks_or_wr_sec, wr_ios;
1923         unsigned long wr_merges, rd_sec_or_wr_ios, wr_sec;
1924         unsigned long dc_ios, dc_merges, dc_sec, fl_ios;
1925         unsigned int major, minor;
1926         int i;
1927 
1928         i = sscanf(line, "%u %u %s %lu %lu %lu"
1929                    "%lu %lu %lu %lu %u %u %u %u"
1930                    "%lu %lu %lu %u %lu %u",
1931                    &major, &minor, dev_name,
1932                    &rd_ios, &rd_merges_or_rd_sec, &rd_sec_or_wr_ios,
1933                    &rd_ticks_or_wr_sec, &wr_ios, &wr_merges, &wr_sec,
1934                    &wr_ticks, &ios_pgr, &tot_ticks, &rq_ticks,
1935                    &dc_ios, &dc_merges, &dc_sec, &dc_ticks,
1936                    &fl_ios, &fl_ticks);
1937 
1938         if (i < 7) {
1939             continue;
1940         }
1941 
1942         diskstatinfo = g_new0(GuestDiskStatsInfo, 1);
1943         diskstatinfo->name = g_strdup(dev_name);
1944         diskstatinfo->major = major;
1945         diskstatinfo->minor = minor;
1946 
1947         diskstat = g_new0(GuestDiskStats, 1);
1948         if (i == 7) {
1949             diskstat->has_read_ios = true;
1950             diskstat->read_ios = rd_ios;
1951             diskstat->has_read_sectors = true;
1952             diskstat->read_sectors = rd_merges_or_rd_sec;
1953             diskstat->has_write_ios = true;
1954             diskstat->write_ios = rd_sec_or_wr_ios;
1955             diskstat->has_write_sectors = true;
1956             diskstat->write_sectors = rd_ticks_or_wr_sec;
1957         }
1958         if (i >= 14) {
1959             diskstat->has_read_ios = true;
1960             diskstat->read_ios = rd_ios;
1961             diskstat->has_read_sectors = true;
1962             diskstat->read_sectors = rd_sec_or_wr_ios;
1963             diskstat->has_read_merges = true;
1964             diskstat->read_merges = rd_merges_or_rd_sec;
1965             diskstat->has_read_ticks = true;
1966             diskstat->read_ticks = rd_ticks_or_wr_sec;
1967             diskstat->has_write_ios = true;
1968             diskstat->write_ios = wr_ios;
1969             diskstat->has_write_sectors = true;
1970             diskstat->write_sectors = wr_sec;
1971             diskstat->has_write_merges = true;
1972             diskstat->write_merges = wr_merges;
1973             diskstat->has_write_ticks = true;
1974             diskstat->write_ticks = wr_ticks;
1975             diskstat->has_ios_pgr = true;
1976             diskstat->ios_pgr = ios_pgr;
1977             diskstat->has_total_ticks = true;
1978             diskstat->total_ticks = tot_ticks;
1979             diskstat->has_weight_ticks = true;
1980             diskstat->weight_ticks = rq_ticks;
1981         }
1982         if (i >= 18) {
1983             diskstat->has_discard_ios = true;
1984             diskstat->discard_ios = dc_ios;
1985             diskstat->has_discard_merges = true;
1986             diskstat->discard_merges = dc_merges;
1987             diskstat->has_discard_sectors = true;
1988             diskstat->discard_sectors = dc_sec;
1989             diskstat->has_discard_ticks = true;
1990             diskstat->discard_ticks = dc_ticks;
1991         }
1992         if (i >= 20) {
1993             diskstat->has_flush_ios = true;
1994             diskstat->flush_ios = fl_ios;
1995             diskstat->has_flush_ticks = true;
1996             diskstat->flush_ticks = fl_ticks;
1997         }
1998 
1999         diskstatinfo->stats = g_steal_pointer(&diskstat);
2000         QAPI_LIST_APPEND(tail, diskstatinfo);
2001         diskstatinfo = NULL;
2002     }
2003     free(line);
2004     fclose(fp);
2005     return head;
2006 }
2007 
2008 GuestDiskStatsInfoList *qmp_guest_get_diskstats(Error **errp)
2009 {
2010     return guest_get_diskstats(errp);
2011 }
2012 
2013 GuestCpuStatsList *qmp_guest_get_cpustats(Error **errp)
2014 {
2015     GuestCpuStatsList *head = NULL, **tail = &head;
2016     const char *cpustats = "/proc/stat";
2017     int clk_tck = sysconf(_SC_CLK_TCK);
2018     FILE *fp;
2019     size_t n;
2020     char *line = NULL;
2021 
2022     fp = fopen(cpustats, "r");
2023     if (fp  == NULL) {
2024         error_setg_errno(errp, errno, "open(\"%s\")", cpustats);
2025         return NULL;
2026     }
2027 
2028     while (getline(&line, &n, fp) != -1) {
2029         GuestCpuStats *cpustat = NULL;
2030         GuestLinuxCpuStats *linuxcpustat;
2031         int i;
2032         unsigned long user, system, idle, iowait, irq, softirq, steal, guest;
2033         unsigned long nice, guest_nice;
2034         char name[64];
2035 
2036         i = sscanf(line, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
2037                    name, &user, &nice, &system, &idle, &iowait, &irq, &softirq,
2038                    &steal, &guest, &guest_nice);
2039 
2040         /* drop "cpu 1 2 3 ...", get "cpuX 1 2 3 ..." only */
2041         if ((i == EOF) || strncmp(name, "cpu", 3) || (name[3] == '\0')) {
2042             continue;
2043         }
2044 
2045         if (i < 5) {
2046             slog("Parsing cpu stat from %s failed, see \"man proc\"", cpustats);
2047             break;
2048         }
2049 
2050         cpustat = g_new0(GuestCpuStats, 1);
2051         cpustat->type = GUEST_CPU_STATS_TYPE_LINUX;
2052 
2053         linuxcpustat = &cpustat->u.q_linux;
2054         linuxcpustat->cpu = atoi(&name[3]);
2055         linuxcpustat->user = user * 1000 / clk_tck;
2056         linuxcpustat->nice = nice * 1000 / clk_tck;
2057         linuxcpustat->system = system * 1000 / clk_tck;
2058         linuxcpustat->idle = idle * 1000 / clk_tck;
2059 
2060         if (i > 5) {
2061             linuxcpustat->has_iowait = true;
2062             linuxcpustat->iowait = iowait * 1000 / clk_tck;
2063         }
2064 
2065         if (i > 6) {
2066             linuxcpustat->has_irq = true;
2067             linuxcpustat->irq = irq * 1000 / clk_tck;
2068             linuxcpustat->has_softirq = true;
2069             linuxcpustat->softirq = softirq * 1000 / clk_tck;
2070         }
2071 
2072         if (i > 8) {
2073             linuxcpustat->has_steal = true;
2074             linuxcpustat->steal = steal * 1000 / clk_tck;
2075         }
2076 
2077         if (i > 9) {
2078             linuxcpustat->has_guest = true;
2079             linuxcpustat->guest = guest * 1000 / clk_tck;
2080         }
2081 
2082         if (i > 10) {
2083             linuxcpustat->has_guest = true;
2084             linuxcpustat->guest = guest * 1000 / clk_tck;
2085             linuxcpustat->has_guestnice = true;
2086             linuxcpustat->guestnice = guest_nice * 1000 / clk_tck;
2087         }
2088 
2089         QAPI_LIST_APPEND(tail, cpustat);
2090     }
2091 
2092     free(line);
2093     fclose(fp);
2094     return head;
2095 }
2096 
2097 static char *hexToIPAddress(const void *hexValue, int is_ipv6)
2098 {
2099     if (is_ipv6) {
2100         char addr[INET6_ADDRSTRLEN];
2101         struct in6_addr in6;
2102         const char *hexStr = (const char *)hexValue;
2103         int i;
2104 
2105         for (i = 0; i < 16; i++) {
2106             sscanf(&hexStr[i * 2], "%02hhx", &in6.s6_addr[i]);
2107         }
2108         inet_ntop(AF_INET6, &in6, addr, INET6_ADDRSTRLEN);
2109 
2110         return g_strdup(addr);
2111     } else {
2112         unsigned int hexInt = *(unsigned int *)hexValue;
2113         unsigned int byte1 = (hexInt >> 24) & 0xFF;
2114         unsigned int byte2 = (hexInt >> 16) & 0xFF;
2115         unsigned int byte3 = (hexInt >> 8) & 0xFF;
2116         unsigned int byte4 = hexInt & 0xFF;
2117 
2118         return g_strdup_printf("%u.%u.%u.%u", byte4, byte3, byte2, byte1);
2119     }
2120 }
2121 
2122 GuestNetworkRouteList *qmp_guest_network_get_route(Error **errp)
2123 {
2124     GuestNetworkRouteList *head = NULL, **tail = &head;
2125     const char *routeFiles[] = {"/proc/net/route", "/proc/net/ipv6_route"};
2126     FILE *fp;
2127     size_t n;
2128     char *line = NULL;
2129     int firstLine;
2130     int is_ipv6;
2131     int i;
2132 
2133     for (i = 0; i < 2; i++) {
2134         firstLine = 1;
2135         is_ipv6 = (i == 1);
2136         fp = fopen(routeFiles[i], "r");
2137         if (fp == NULL) {
2138             error_setg_errno(errp, errno, "open(\"%s\")", routeFiles[i]);
2139             free(line);
2140             continue;
2141         }
2142 
2143         while (getline(&line, &n, fp) != -1) {
2144             if (firstLine && !is_ipv6) {
2145                 firstLine = 0;
2146                 continue;
2147             }
2148             GuestNetworkRoute *route = NULL;
2149             GuestNetworkRoute *networkroute;
2150             char Iface[IFNAMSIZ];
2151             if (is_ipv6) {
2152                 char Destination[33], Source[33], NextHop[33];
2153                 int DesPrefixlen, SrcPrefixlen, Metric, RefCnt, Use, Flags;
2154 
2155                 /* Parse the line and extract the values */
2156                 if (sscanf(line, "%32s %x %32s %x %32s %x %x %x %x %s",
2157                            Destination, &DesPrefixlen, Source,
2158                            &SrcPrefixlen, NextHop, &Metric, &RefCnt,
2159                            &Use, &Flags, Iface) != 10) {
2160                     continue;
2161                 }
2162 
2163                 route = g_new0(GuestNetworkRoute, 1);
2164                 networkroute = route;
2165                 networkroute->iface = g_strdup(Iface);
2166                 networkroute->destination = hexToIPAddress(Destination, 1);
2167                 networkroute->metric = Metric;
2168                 networkroute->source = hexToIPAddress(Source, 1);
2169                 networkroute->desprefixlen = g_strdup_printf(
2170                     "%d", DesPrefixlen
2171                 );
2172                 networkroute->srcprefixlen = g_strdup_printf(
2173                     "%d", SrcPrefixlen
2174                 );
2175                 networkroute->nexthop = hexToIPAddress(NextHop, 1);
2176                 networkroute->has_flags = true;
2177                 networkroute->flags = Flags;
2178                 networkroute->has_refcnt = true;
2179                 networkroute->refcnt = RefCnt;
2180                 networkroute->has_use = true;
2181                 networkroute->use = Use;
2182                 networkroute->version = 6;
2183             } else {
2184                 unsigned int Destination, Gateway, Mask, Flags;
2185                 int RefCnt, Use, Metric, MTU, Window, IRTT;
2186 
2187                 /* Parse the line and extract the values */
2188                 if (sscanf(line, "%s %X %X %x %d %d %d %X %d %d %d",
2189                            Iface, &Destination, &Gateway, &Flags, &RefCnt,
2190                            &Use, &Metric, &Mask, &MTU, &Window, &IRTT) != 11) {
2191                     continue;
2192                 }
2193 
2194                 route = g_new0(GuestNetworkRoute, 1);
2195                 networkroute = route;
2196                 networkroute->iface = g_strdup(Iface);
2197                 networkroute->destination = hexToIPAddress(&Destination, 0);
2198                 networkroute->gateway = hexToIPAddress(&Gateway, 0);
2199                 networkroute->mask = hexToIPAddress(&Mask, 0);
2200                 networkroute->metric = Metric;
2201                 networkroute->has_flags = true;
2202                 networkroute->flags = Flags;
2203                 networkroute->has_refcnt = true;
2204                 networkroute->refcnt = RefCnt;
2205                 networkroute->has_use = true;
2206                 networkroute->use = Use;
2207                 networkroute->has_mtu = true;
2208                 networkroute->mtu = MTU;
2209                 networkroute->has_window = true;
2210                 networkroute->window = Window;
2211                 networkroute->has_irtt = true;
2212                 networkroute->irtt = IRTT;
2213                 networkroute->version = 4;
2214             }
2215 
2216             QAPI_LIST_APPEND(tail, route);
2217         }
2218 
2219         free(line);
2220         fclose(fp);
2221     }
2222 
2223     return head;
2224 }
2225